From 1e69d066aa7f762a4b242c0519818577b7222e4c Mon Sep 17 00:00:00 2001 From: haoyuying <18844182690@163.com> Date: Tue, 22 Mar 2022 17:32:14 +0800 Subject: [PATCH 001/117] add 10 segmentation model --- .../ann_resnet50_cityscapes/README.md | 182 +++++++ .../ann_resnet50_cityscapes/README_en.md | 184 +++++++ .../ann_resnet50_cityscapes/layers.py | 275 +++++++++++ .../ann_resnet50_cityscapes/module.py | 452 ++++++++++++++++++ .../ann_resnet50_cityscapes/resnet.py | 361 ++++++++++++++ .../ann_resnet50_voc/README.md | 182 +++++++ .../ann_resnet50_voc/README_en.md | 182 +++++++ .../ann_resnet50_voc/layers.py | 276 +++++++++++ .../ann_resnet50_voc/module.py | 452 ++++++++++++++++++ .../ann_resnet50_voc/resnet.py | 361 ++++++++++++++ .../danet_resnet50_cityscapes/README.md | 182 +++++++ .../danet_resnet50_cityscapes/README_en.md | 182 +++++++ .../danet_resnet50_cityscapes/layers.py | 349 ++++++++++++++ .../danet_resnet50_cityscapes/module.py | 239 +++++++++ .../danet_resnet50_cityscapes/resnet.py | 359 ++++++++++++++ .../danet_resnet50_voc/README.md | 182 +++++++ .../danet_resnet50_voc/README_en.md | 181 +++++++ .../danet_resnet50_voc/layers.py | 349 ++++++++++++++ .../danet_resnet50_voc/module.py | 245 ++++++++++ .../danet_resnet50_voc/resnet.py | 359 ++++++++++++++ .../isanet_resnet50_cityscapes/README.md | 182 +++++++ .../isanet_resnet50_cityscapes/README_en.md | 181 +++++++ .../isanet_resnet50_cityscapes/layers.py | 401 ++++++++++++++++ .../isanet_resnet50_cityscapes/module.py | 221 +++++++++ .../isanet_resnet50_cityscapes/resnet.py | 359 ++++++++++++++ .../isanet_resnet50_voc/README.md | 182 +++++++ .../isanet_resnet50_voc/README_en.md | 181 +++++++ .../isanet_resnet50_voc/layers.py | 401 ++++++++++++++++ .../isanet_resnet50_voc/module.py | 221 +++++++++ .../isanet_resnet50_voc/resnet.py | 359 ++++++++++++++ .../pspnet_resnet50_cityscapes/README.md | 182 +++++++ .../pspnet_resnet50_cityscapes/README_en.md | 181 +++++++ .../pspnet_resnet50_cityscapes/layers.py | 356 ++++++++++++++ .../pspnet_resnet50_cityscapes/module.py | 165 +++++++ .../pspnet_resnet50_cityscapes/resnet.py | 357 ++++++++++++++ .../pspnet_resnet50_voc/README.md | 182 +++++++ .../pspnet_resnet50_voc/README_en.md | 181 +++++++ .../pspnet_resnet50_voc/layers.py | 353 ++++++++++++++ .../pspnet_resnet50_voc/module.py | 165 +++++++ .../pspnet_resnet50_voc/resnet.py | 357 ++++++++++++++ .../stdc1_seg_cityscapes/README.md | 182 +++++++ .../stdc1_seg_cityscapes/README_en.md | 181 +++++++ .../stdc1_seg_cityscapes/layers.py | 357 ++++++++++++++ .../stdc1_seg_cityscapes/module.py | 235 +++++++++ .../stdc1_seg_cityscapes/stdcnet.py | 263 ++++++++++ .../stdc1_seg_voc/README.md | 182 +++++++ .../stdc1_seg_voc/README_en.md | 181 +++++++ .../stdc1_seg_voc/layers.py | 357 ++++++++++++++ .../stdc1_seg_voc/module.py | 235 +++++++++ .../stdc1_seg_voc/stdcnet.py | 262 ++++++++++ 50 files changed, 13136 insertions(+) create mode 100644 modules/image/semantic_segmentation/ann_resnet50_cityscapes/README.md create mode 100644 modules/image/semantic_segmentation/ann_resnet50_cityscapes/README_en.md create mode 100644 modules/image/semantic_segmentation/ann_resnet50_cityscapes/layers.py create mode 100644 modules/image/semantic_segmentation/ann_resnet50_cityscapes/module.py create mode 100644 modules/image/semantic_segmentation/ann_resnet50_cityscapes/resnet.py create mode 100644 modules/image/semantic_segmentation/ann_resnet50_voc/README.md create mode 100644 modules/image/semantic_segmentation/ann_resnet50_voc/README_en.md create mode 100644 modules/image/semantic_segmentation/ann_resnet50_voc/layers.py create mode 100644 modules/image/semantic_segmentation/ann_resnet50_voc/module.py create mode 100644 modules/image/semantic_segmentation/ann_resnet50_voc/resnet.py create mode 100644 modules/image/semantic_segmentation/danet_resnet50_cityscapes/README.md create mode 100644 modules/image/semantic_segmentation/danet_resnet50_cityscapes/README_en.md create mode 100644 modules/image/semantic_segmentation/danet_resnet50_cityscapes/layers.py create mode 100644 modules/image/semantic_segmentation/danet_resnet50_cityscapes/module.py create mode 100644 modules/image/semantic_segmentation/danet_resnet50_cityscapes/resnet.py create mode 100644 modules/image/semantic_segmentation/danet_resnet50_voc/README.md create mode 100644 modules/image/semantic_segmentation/danet_resnet50_voc/README_en.md create mode 100644 modules/image/semantic_segmentation/danet_resnet50_voc/layers.py create mode 100644 modules/image/semantic_segmentation/danet_resnet50_voc/module.py create mode 100644 modules/image/semantic_segmentation/danet_resnet50_voc/resnet.py create mode 100644 modules/image/semantic_segmentation/isanet_resnet50_cityscapes/README.md create mode 100644 modules/image/semantic_segmentation/isanet_resnet50_cityscapes/README_en.md create mode 100644 modules/image/semantic_segmentation/isanet_resnet50_cityscapes/layers.py create mode 100644 modules/image/semantic_segmentation/isanet_resnet50_cityscapes/module.py create mode 100644 modules/image/semantic_segmentation/isanet_resnet50_cityscapes/resnet.py create mode 100644 modules/image/semantic_segmentation/isanet_resnet50_voc/README.md create mode 100644 modules/image/semantic_segmentation/isanet_resnet50_voc/README_en.md create mode 100644 modules/image/semantic_segmentation/isanet_resnet50_voc/layers.py create mode 100644 modules/image/semantic_segmentation/isanet_resnet50_voc/module.py create mode 100644 modules/image/semantic_segmentation/isanet_resnet50_voc/resnet.py create mode 100644 modules/image/semantic_segmentation/pspnet_resnet50_cityscapes/README.md create mode 100644 modules/image/semantic_segmentation/pspnet_resnet50_cityscapes/README_en.md create mode 100644 modules/image/semantic_segmentation/pspnet_resnet50_cityscapes/layers.py create mode 100644 modules/image/semantic_segmentation/pspnet_resnet50_cityscapes/module.py create mode 100644 modules/image/semantic_segmentation/pspnet_resnet50_cityscapes/resnet.py create mode 100644 modules/image/semantic_segmentation/pspnet_resnet50_voc/README.md create mode 100644 modules/image/semantic_segmentation/pspnet_resnet50_voc/README_en.md create mode 100644 modules/image/semantic_segmentation/pspnet_resnet50_voc/layers.py create mode 100644 modules/image/semantic_segmentation/pspnet_resnet50_voc/module.py create mode 100644 modules/image/semantic_segmentation/pspnet_resnet50_voc/resnet.py create mode 100644 modules/image/semantic_segmentation/stdc1_seg_cityscapes/README.md create mode 100644 modules/image/semantic_segmentation/stdc1_seg_cityscapes/README_en.md create mode 100644 modules/image/semantic_segmentation/stdc1_seg_cityscapes/layers.py create mode 100644 modules/image/semantic_segmentation/stdc1_seg_cityscapes/module.py create mode 100644 modules/image/semantic_segmentation/stdc1_seg_cityscapes/stdcnet.py create mode 100644 modules/image/semantic_segmentation/stdc1_seg_voc/README.md create mode 100644 modules/image/semantic_segmentation/stdc1_seg_voc/README_en.md create mode 100644 modules/image/semantic_segmentation/stdc1_seg_voc/layers.py create mode 100644 modules/image/semantic_segmentation/stdc1_seg_voc/module.py create mode 100644 modules/image/semantic_segmentation/stdc1_seg_voc/stdcnet.py diff --git a/modules/image/semantic_segmentation/ann_resnet50_cityscapes/README.md b/modules/image/semantic_segmentation/ann_resnet50_cityscapes/README.md new file mode 100644 index 000000000..ddbc7cd6a --- /dev/null +++ b/modules/image/semantic_segmentation/ann_resnet50_cityscapes/README.md @@ -0,0 +1,182 @@ +# ann_resnet50_cityscapes + +|模型名称|ann_resnet50_cityscapes| +| :--- | :---: | +|类别|图像-图像分割| +|网络|ann_resnet50vd| +|数据集|Cityscapes| +|是否支持Fine-tuning|是| +|模型大小|228MB| +|指标|-| +|最新更新日期|2022-03-22| + +## 一、模型基本信息 + + - 样例结果示例: +

+ +

+ +- ### 模型介绍 + + - 本示例将展示如何使用PaddleHub对预训练模型进行finetune并完成预测任务。 + - 更多详情请参考:[ann](https://arxiv.org/pdf/1908.07678.pdf) + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、安装 + + - ```shell + $ hub install ann_resnet50_cityscapes + ``` + + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1.预测代码示例 + + - ```python + import cv2 + import paddle + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='ann_resnet50_cityscapes') + img = cv2.imread("/PATH/TO/IMAGE") + result = model.predict(images=[img], visualization=True) + ``` + +- ### 2.如何开始Fine-tune + + - 在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用ann_resnet50_cityscapes模型对OpticDiscSeg数据集进行Fine-tune。 `train.py`内容如下: + + - 代码步骤 + + - Step1: 定义数据预处理方式 + - ```python + from paddlehub.vision.segmentation_transforms import Compose, Resize, Normalize + + transform = Compose([Resize(target_size=(512, 512)), Normalize()]) + ``` + + - `segmentation_transforms` 数据增强模块定义了丰富的针对图像分割数据的预处理方式,用户可按照需求替换自己需要的数据预处理方式。 + + - Step2: 下载数据集并使用 + - ```python + from paddlehub.datasets import OpticDiscSeg + + train_reader = OpticDiscSeg(transform, mode='train') + ``` + - `transforms`: 数据预处理方式。 + - `mode`: `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 + + - 数据集的准备代码可以参考 [opticdiscseg.py](../../paddlehub/datasets/opticdiscseg.py)。`hub.datasets.OpticDiscSeg()`会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 + + - Step3: 加载预训练模型 + + - ```python + import paddlehub as hub + + model = hub.Module(name='ann_resnet50_cityscapes', num_classes=2, pretrained=None) + ``` + - `name`: 选择预训练模型的名字。 + - `load_checkpoint`: 是否加载自己训练的模型,若为None,则加载提供的模型默认参数。 + + - Step4: 选择优化策略和运行配置 + + - ```python + import paddle + from paddlehub.finetune.trainer import Trainer + + scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.01, decay_steps=1000, power=0.9, end_lr=0.0001) + optimizer = paddle.optimizer.Adam(learning_rate=scheduler, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='test_ckpt_img_seg', use_gpu=True) + trainer.train(train_reader, epochs=10, batch_size=4, log_interval=10, save_interval=4) + ``` + + - 模型预测 + + - 当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。我们使用该模型来进行预测。predict.py脚本如下: + + ```python + import paddle + import cv2 + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='ann_resnet50_cityscapes', pretrained='/PATH/TO/CHECKPOINT') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) + ``` + + - 参数配置正确后,请执行脚本`python predict.py`。 + + - **Args** + * `images`:原始图像路径或BGR格式图片; + * `visualization`: 是否可视化,默认为True; + * `save_path`: 保存结果的路径,默认保存路径为'seg_result'。 + + **NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线图像分割服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + + - ```shell + $ hub serving start -m ann_resnet50_cityscapes + ``` + + - 这样就完成了一个图像分割服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + ```python + import requests + import json + import cv2 + import base64 + + import numpy as np + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/ginet_resnet50vd_voc" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + mask = base64_to_cv2(r.json()["results"][0]) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 diff --git a/modules/image/semantic_segmentation/ann_resnet50_cityscapes/README_en.md b/modules/image/semantic_segmentation/ann_resnet50_cityscapes/README_en.md new file mode 100644 index 000000000..43c29951a --- /dev/null +++ b/modules/image/semantic_segmentation/ann_resnet50_cityscapes/README_en.md @@ -0,0 +1,184 @@ +# ann_resnet50_cityscapes + +|Module Name|ann_resnet50_cityscapes| +| :--- | :---: | +|Category|Image Segmentation| +|Network|ann_resnet50vd| +|Dataset|Cityscapes| +|Fine-tuning supported or not|Yes| +|Module Size|228MB| +|Data indicators|-| +|Latest update date|2022-03-22| + +## I. Basic Information + +- ### Application Effect Display + - Sample results: +

+ +

+ +- ### Module Introduction + + - We will show how to use PaddleHub to finetune the pre-trained model and complete the prediction. + - For more information, please refer to: [ann](https://arxiv.org/pdf/1908.07678.pdf) + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、Installation + + - ```shell + $ hub install ann_resnet50_cityscapes + ``` + + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + + +## III. Module API Prediction + +- ### 1、Prediction Code Example + + + - ```python + import cv2 + import paddle + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='ann_resnet50_cityscapes') + img = cv2.imread("/PATH/TO/IMAGE") + result = model.predict(images=[img], visualization=True) + ``` + +- ### 2.Fine-tune and Encapsulation + + - After completing the installation of PaddlePaddle and PaddleHub, you can start using the ann_resnet50_cityscapes model to fine-tune datasets such as OpticDiscSeg. + + - Steps: + + - Step1: Define the data preprocessing method + + - ```python + from paddlehub.vision.segmentation_transforms import Compose, Resize, Normalize + + transform = Compose([Resize(target_size=(512, 512)), Normalize()]) + ``` + + - `segmentation_transforms`: The data enhancement module defines lots of data preprocessing methods. Users can replace the data preprocessing methods according to their needs. + + - Step2: Download the dataset + + - ```python + from paddlehub.datasets import OpticDiscSeg + + train_reader = OpticDiscSeg(transform, mode='train') + ``` + * `transforms`: data preprocessing methods. + + * `mode`: Select the data mode, the options are `train`, `test`, `val`. Default is `train`. + + * Dataset preparation can be referred to [opticdiscseg.py](../../paddlehub/datasets/opticdiscseg.py)。`hub.datasets.OpticDiscSeg()`will be automatically downloaded from the network and decompressed to the `$HOME/.paddlehub/dataset` directory under the user directory. + + - Step3: Load the pre-trained model + + - ```python + import paddlehub as hub + + model = hub.Module(name='ann_resnet50_cityscapes', num_classes=2, pretrained=None) + ``` + - `name`: model name. + - `load_checkpoint`: Whether to load the self-trained model, if it is None, load the provided parameters. + + - Step4: Optimization strategy + + - ```python + import paddle + from paddlehub.finetune.trainer import Trainer + + scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.01, decay_steps=1000, power=0.9, end_lr=0.0001) + optimizer = paddle.optimizer.Adam(learning_rate=scheduler, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='test_ckpt_img_seg', use_gpu=True) + trainer.train(train_reader, epochs=10, batch_size=4, log_interval=10, save_interval=4) + ``` + + + - Model prediction + + - When Fine-tune is completed, the model with the best performance on the verification set will be saved in the `${CHECKPOINT_DIR}/best_model` directory. We use this model to make predictions. The `predict.py` script is as follows: + + ```python + import paddle + import cv2 + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='ann_resnet50_cityscapes', pretrained='/PATH/TO/CHECKPOINT') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) + ``` + + - **Args** + * `images`: Image path or ndarray data with format [H, W, C], BGR. + * `visualization`: Whether to save the recognition results as picture files. + * `save_path`: Save path of the result, default is 'seg_result'. + + +## IV. Server Deployment + +- PaddleHub Serving can deploy an online service of image segmentation. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + + - ```shell + $ hub serving start -m ann_resnet50_cityscapes + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result: + + ```python + import requests + import json + import cv2 + import base64 + + import numpy as np + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/ann_resnet50_cityscapes" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + mask = base64_to_cv2(r.json()["results"][0]) + ``` + +## V. Release Note + +- 1.0.0 + + First release diff --git a/modules/image/semantic_segmentation/ann_resnet50_cityscapes/layers.py b/modules/image/semantic_segmentation/ann_resnet50_cityscapes/layers.py new file mode 100644 index 000000000..083c8d2fa --- /dev/null +++ b/modules/image/semantic_segmentation/ann_resnet50_cityscapes/layers.py @@ -0,0 +1,275 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn.layer import activation +from paddle.nn import Conv2D, AvgPool2D + + +def SyncBatchNorm(*args, **kwargs): + """In cpu environment nn.SyncBatchNorm does not have kernel so use nn.BatchNorm2D instead""" + if paddle.get_device() == 'cpu': + return nn.BatchNorm2D(*args, **kwargs) + else: + return nn.SyncBatchNorm(*args, **kwargs) + + +class SeparableConvBNReLU(nn.Layer): + """Depthwise Separable Convolution.""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + padding: str = 'same', + **kwargs: dict): + super(SeparableConvBNReLU, self).__init__() + self.depthwise_conv = ConvBN( + in_channels, + out_channels=in_channels, + kernel_size=kernel_size, + padding=padding, + groups=in_channels, + **kwargs) + self.piontwise_conv = ConvBNReLU( + in_channels, out_channels, kernel_size=1, groups=1) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self.depthwise_conv(x) + x = self.piontwise_conv(x) + return x + + +class ConvBN(nn.Layer): + """Basic conv bn layer""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + padding: str = 'same', + **kwargs: dict): + super(ConvBN, self).__init__() + self._conv = Conv2D( + in_channels, out_channels, kernel_size, padding=padding, **kwargs) + self._batch_norm = SyncBatchNorm(out_channels) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self._conv(x) + x = self._batch_norm(x) + return x + + +class ConvBNReLU(nn.Layer): + """Basic conv bn relu layer.""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + padding: str = 'same', + **kwargs: dict): + super(ConvBNReLU, self).__init__() + + self._conv = Conv2D( + in_channels, out_channels, kernel_size, padding=padding, **kwargs) + self._batch_norm = SyncBatchNorm(out_channels) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self._conv(x) + x = self._batch_norm(x) + x = F.relu(x) + return x + + +class Activation(nn.Layer): + """ + The wrapper of activations. + Args: + act (str, optional): The activation name in lowercase. It must be one of ['elu', 'gelu', + 'hardshrink', 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', + 'softmax', 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', + 'hsigmoid']. Default: None, means identical transformation. + Returns: + A callable object of Activation. + Raises: + KeyError: When parameter `act` is not in the optional range. + Examples: + from paddleseg.models.common.activation import Activation + relu = Activation("relu") + print(relu) + # + sigmoid = Activation("sigmoid") + print(sigmoid) + # + not_exit_one = Activation("not_exit_one") + # KeyError: "not_exit_one does not exist in the current dict_keys(['elu', 'gelu', 'hardshrink', + # 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', 'softmax', + # 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', 'hsigmoid'])" + """ + + def __init__(self, act: str = None): + super(Activation, self).__init__() + + self._act = act + upper_act_names = activation.__dict__.keys() + lower_act_names = [act.lower() for act in upper_act_names] + act_dict = dict(zip(lower_act_names, upper_act_names)) + + if act is not None: + if act in act_dict.keys(): + act_name = act_dict[act] + self.act_func = eval("activation.{}()".format(act_name)) + else: + raise KeyError("{} does not exist in the current {}".format( + act, act_dict.keys())) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + + if self._act is not None: + return self.act_func(x) + else: + return x + + +class ASPPModule(nn.Layer): + """ + Atrous Spatial Pyramid Pooling. + Args: + aspp_ratios (tuple): The dilation rate using in ASSP module. + in_channels (int): The number of input channels. + out_channels (int): The number of output channels. + align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature + is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. + use_sep_conv (bool, optional): If using separable conv in ASPP module. Default: False. + image_pooling (bool, optional): If augmented with image-level features. Default: False + """ + + def __init__(self, + aspp_ratios: tuple, + in_channels: int, + out_channels: int, + align_corners: bool, + use_sep_conv: bool = False, + image_pooling: bool = False): + super().__init__() + + self.align_corners = align_corners + self.aspp_blocks = nn.LayerList() + + for ratio in aspp_ratios: + if use_sep_conv and ratio > 1: + conv_func = SeparableConvBNReLU + else: + conv_func = ConvBNReLU + + block = conv_func( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1 if ratio == 1 else 3, + dilation=ratio, + padding=0 if ratio == 1 else ratio) + self.aspp_blocks.append(block) + + out_size = len(self.aspp_blocks) + + if image_pooling: + self.global_avg_pool = nn.Sequential( + nn.AdaptiveAvgPool2D(output_size=(1, 1)), + ConvBNReLU(in_channels, out_channels, kernel_size=1, bias_attr=False)) + out_size += 1 + self.image_pooling = image_pooling + + self.conv_bn_relu = ConvBNReLU( + in_channels=out_channels * out_size, + out_channels=out_channels, + kernel_size=1) + + self.dropout = nn.Dropout(p=0.1) # drop rate + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + outputs = [] + for block in self.aspp_blocks: + y = block(x) + y = F.interpolate( + y, + x.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + outputs.append(y) + + if self.image_pooling: + img_avg = self.global_avg_pool(x) + img_avg = F.interpolate( + img_avg, + x.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + outputs.append(img_avg) + + x = paddle.concat(outputs, axis=1) + x = self.conv_bn_relu(x) + x = self.dropout(x) + + return x + + +class AuxLayer(nn.Layer): + """ + The auxiliary layer implementation for auxiliary loss. + + Args: + in_channels (int): The number of input channels. + inter_channels (int): The intermediate channels. + out_channels (int): The number of output channels, and usually it is num_classes. + dropout_prob (float, optional): The drop rate. Default: 0.1. + """ + + def __init__(self, + in_channels: int, + inter_channels: int, + out_channels: int, + dropout_prob: float = 0.1, + **kwargs): + super().__init__() + + self.conv_bn_relu = ConvBNReLU( + in_channels=in_channels, + out_channels=inter_channels, + kernel_size=3, + padding=1, + **kwargs) + + self.dropout = nn.Dropout(p=dropout_prob) + + self.conv = nn.Conv2D( + in_channels=inter_channels, + out_channels=out_channels, + kernel_size=1) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self.conv_bn_relu(x) + x = self.dropout(x) + x = self.conv(x) + return x + + +class Add(nn.Layer): + def __init__(self): + super().__init__() + + def forward(self, x: paddle.Tensor, y: paddle.Tensor, name: str = None): + return paddle.add(x, y, name) diff --git a/modules/image/semantic_segmentation/ann_resnet50_cityscapes/module.py b/modules/image/semantic_segmentation/ann_resnet50_cityscapes/module.py new file mode 100644 index 000000000..d892c47c7 --- /dev/null +++ b/modules/image/semantic_segmentation/ann_resnet50_cityscapes/module.py @@ -0,0 +1,452 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from typing import Union, List, Tuple + +import numpy as np +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddlehub.module.module import moduleinfo +import paddlehub.vision.segmentation_transforms as T +from paddlehub.module.cv_module import ImageSegmentationModule + +from ann_resnet50_cityscapes.resnet import ResNet50_vd +import ann_resnet50_cityscapes.layers as layers + +@moduleinfo( + name="ann_resnet50_cityscapes", + type="CV/semantic_segmentation", + author="paddlepaddle", + author_email="", + summary="ANNResnet50 is a segmentation model.", + version="1.0.0", + meta=ImageSegmentationModule) +class ANN(nn.Layer): + """ + The ANN implementation based on PaddlePaddle. + + The original article refers to + Zhen, Zhu, et al. "Asymmetric Non-local Neural Networks for Semantic Segmentation" + (https://arxiv.org/pdf/1908.07678.pdf). + + Args: + num_classes (int): The unique number of target classes. + backbone_indices (tuple, optional): Two values in the tuple indicate the indices of output of backbone. + key_value_channels (int, optional): The key and value channels of self-attention map in both AFNB and APNB modules. + Default: 256. + inter_channels (int, optional): Both input and output channels of APNB modules. Default: 512. + psp_size (tuple, optional): The out size of pooled feature maps. Default: (1, 3, 6, 8). + enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss. Default: True. + align_corners (bool, optional): An argument of F.interpolate. It should be set to False when the feature size is even, + e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False. + pretrained (str, optional): The path or url of pretrained model. Default: None. + """ + + def __init__(self, + num_classes: int = 19, + backbone_indices: Tuple[int] = (2, 3), + key_value_channels: int = 256, + inter_channels: int = 512, + psp_size: Tuple[int] = (1, 3, 6, 8), + align_corners: bool = False, + pretrained: str = None): + super(ANN, self).__init__() + + self.backbone = ResNet50_vd() + backbone_channels = [ + self.backbone.feat_channels[i] for i in backbone_indices + ] + + self.head = ANNHead(num_classes, backbone_indices, backbone_channels, + key_value_channels, inter_channels, psp_size) + self.align_corners = align_corners + self.transforms = T.Compose([T.Normalize()]) + + if pretrained is not None: + model_dict = paddle.load(pretrained) + self.set_dict(model_dict) + print("load custom parameters success") + + else: + checkpoint = os.path.join(self.directory, 'model.pdparams') + model_dict = paddle.load(checkpoint) + self.set_dict(model_dict) + print("load pretrained parameters success") + + def transform(self, img: Union[np.ndarray, str]) -> Union[np.ndarray, str]: + return self.transforms(img) + + def forward(self, x: paddle.Tensor) -> List[paddle.Tensor]: + feat_list = self.backbone(x) + logit_list = self.head(feat_list) + return [ + F.interpolate( + logit, + paddle.shape(x)[2:], + mode='bilinear', + align_corners=self.align_corners) for logit in logit_list + ] + + + +class ANNHead(nn.Layer): + """ + The ANNHead implementation. + + It mainly consists of AFNB and APNB modules. + + Args: + num_classes (int): The unique number of target classes. + backbone_indices (tuple): Two values in the tuple indicate the indices of output of backbone. + The first index will be taken as low-level features; the second one will be + taken as high-level features in AFNB module. Usually backbone consists of four + downsampling stage, such as ResNet, and return an output of each stage. If it is (2, 3), + it means taking feature map of the third stage and the fourth stage in backbone. + backbone_channels (tuple): The same length with "backbone_indices". It indicates the channels of corresponding index. + key_value_channels (int): The key and value channels of self-attention map in both AFNB and APNB modules. + inter_channels (int): Both input and output channels of APNB modules. + psp_size (tuple): The out size of pooled feature maps. + enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss. Default: False + """ + + def __init__(self, + num_classes: int, + backbone_indices: Tuple[int], + backbone_channels: Tuple[int], + key_value_channels: int, + inter_channels: int, + psp_size: Tuple[int], + enable_auxiliary_loss: bool = False): + super().__init__() + + low_in_channels = backbone_channels[0] + high_in_channels = backbone_channels[1] + + self.fusion = AFNB( + low_in_channels=low_in_channels, + high_in_channels=high_in_channels, + out_channels=high_in_channels, + key_channels=key_value_channels, + value_channels=key_value_channels, + dropout_prob=0.05, + repeat_sizes=([1]), + psp_size=psp_size) + + self.context = nn.Sequential( + layers.ConvBNReLU( + in_channels=high_in_channels, + out_channels=inter_channels, + kernel_size=3, + padding=1), + APNB( + in_channels=inter_channels, + out_channels=inter_channels, + key_channels=key_value_channels, + value_channels=key_value_channels, + dropout_prob=0.05, + repeat_sizes=([1]), + psp_size=psp_size)) + + self.cls = nn.Conv2D( + in_channels=inter_channels, out_channels=num_classes, kernel_size=1) + self.auxlayer = layers.AuxLayer( + in_channels=low_in_channels, + inter_channels=low_in_channels // 2, + out_channels=num_classes, + dropout_prob=0.05) + + self.backbone_indices = backbone_indices + self.enable_auxiliary_loss = enable_auxiliary_loss + + def forward(self, feat_list: List[paddle.Tensor]) -> List[paddle.Tensor]: + logit_list = [] + low_level_x = feat_list[self.backbone_indices[0]] + high_level_x = feat_list[self.backbone_indices[1]] + x = self.fusion(low_level_x, high_level_x) + x = self.context(x) + logit = self.cls(x) + logit_list.append(logit) + + if self.enable_auxiliary_loss: + auxiliary_logit = self.auxlayer(low_level_x) + logit_list.append(auxiliary_logit) + + return logit_list + + +class AFNB(nn.Layer): + """ + Asymmetric Fusion Non-local Block. + + Args: + low_in_channels (int): Low-level-feature channels. + high_in_channels (int): High-level-feature channels. + out_channels (int): Out channels of AFNB module. + key_channels (int): The key channels in self-attention block. + value_channels (int): The value channels in self-attention block. + dropout_prob (float): The dropout rate of output. + repeat_sizes (tuple, optional): The number of AFNB modules. Default: ([1]). + psp_size (tuple. optional): The out size of pooled feature maps. Default: (1, 3, 6, 8). + """ + + def __init__(self, + low_in_channels: int, + high_in_channels: int, + out_channels: int, + key_channels: int, + value_channels: int, + dropout_prob: float, + repeat_sizes: Tuple[int] = ([1]), + psp_size: Tuple[int] = (1, 3, 6, 8)): + super().__init__() + + self.psp_size = psp_size + self.stages = nn.LayerList([ + SelfAttentionBlock_AFNB(low_in_channels, high_in_channels, + key_channels, value_channels, out_channels, + size) for size in repeat_sizes + ]) + self.conv_bn = layers.ConvBN( + in_channels=out_channels + high_in_channels, + out_channels=out_channels, + kernel_size=1) + self.dropout = nn.Dropout(p=dropout_prob) + + def forward(self, low_feats: List[paddle.Tensor], high_feats: List[paddle.Tensor]) -> paddle.Tensor: + priors = [stage(low_feats, high_feats) for stage in self.stages] + context = priors[0] + for i in range(1, len(priors)): + context += priors[i] + + output = self.conv_bn(paddle.concat([context, high_feats], axis=1)) + output = self.dropout(output) + + return output + + +class APNB(nn.Layer): + """ + Asymmetric Pyramid Non-local Block. + + Args: + in_channels (int): The input channels of APNB module. + out_channels (int): Out channels of APNB module. + key_channels (int): The key channels in self-attention block. + value_channels (int): The value channels in self-attention block. + dropout_prob (float): The dropout rate of output. + repeat_sizes (tuple, optional): The number of AFNB modules. Default: ([1]). + psp_size (tuple, optional): The out size of pooled feature maps. Default: (1, 3, 6, 8). + """ + + def __init__(self, + in_channels: int, + out_channels: int, + key_channels: int, + value_channels: int, + dropout_prob: float, + repeat_sizes: Tuple[int] = ([1]), + psp_size: Tuple[int] = (1, 3, 6, 8)): + super().__init__() + + self.psp_size = psp_size + self.stages = nn.LayerList([ + SelfAttentionBlock_APNB(in_channels, out_channels, key_channels, + value_channels, size) + for size in repeat_sizes + ]) + self.conv_bn = layers.ConvBNReLU( + in_channels=in_channels * 2, + out_channels=out_channels, + kernel_size=1) + self.dropout = nn.Dropout(p=dropout_prob) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + priors = [stage(x) for stage in self.stages] + context = priors[0] + for i in range(1, len(priors)): + context += priors[i] + + output = self.conv_bn(paddle.concat([context, x], axis=1)) + output = self.dropout(output) + + return output + + +def _pp_module(x: paddle.Tensor, psp_size: List[int]) -> paddle.Tensor: + n, c, h, w = x.shape + priors = [] + for size in psp_size: + feat = F.adaptive_avg_pool2d(x, size) + feat = paddle.reshape(feat, shape=(0, c, -1)) + priors.append(feat) + center = paddle.concat(priors, axis=-1) + return center + + +class SelfAttentionBlock_AFNB(nn.Layer): + """ + Self-Attention Block for AFNB module. + + Args: + low_in_channels (int): Low-level-feature channels. + high_in_channels (int): High-level-feature channels. + key_channels (int): The key channels in self-attention block. + value_channels (int): The value channels in self-attention block. + out_channels (int, optional): Out channels of AFNB module. Default: None. + scale (int, optional): Pooling size. Default: 1. + psp_size (tuple, optional): The out size of pooled feature maps. Default: (1, 3, 6, 8). + """ + + def __init__(self, + low_in_channels: int, + high_in_channels: int, + key_channels: int, + value_channels: int, + out_channels: int = None, + scale: int = 1, + psp_size: Tuple[int] = (1, 3, 6, 8)): + super().__init__() + + self.scale = scale + self.in_channels = low_in_channels + self.out_channels = out_channels + self.key_channels = key_channels + self.value_channels = value_channels + if out_channels == None: + self.out_channels = high_in_channels + self.pool = nn.MaxPool2D(scale) + self.f_key = layers.ConvBNReLU( + in_channels=low_in_channels, + out_channels=key_channels, + kernel_size=1) + self.f_query = layers.ConvBNReLU( + in_channels=high_in_channels, + out_channels=key_channels, + kernel_size=1) + self.f_value = nn.Conv2D( + in_channels=low_in_channels, + out_channels=value_channels, + kernel_size=1) + + self.W = nn.Conv2D( + in_channels=value_channels, + out_channels=out_channels, + kernel_size=1) + + self.psp_size = psp_size + + def forward(self, low_feats: List[paddle.Tensor], high_feats: List[paddle.Tensor]) -> paddle.Tensor: + batch_size, _, h, w = high_feats.shape + + value = self.f_value(low_feats) + value = _pp_module(value, self.psp_size) + value = paddle.transpose(value, (0, 2, 1)) + + query = self.f_query(high_feats) + query = paddle.reshape(query, shape=(0, self.key_channels, -1)) + query = paddle.transpose(query, perm=(0, 2, 1)) + + key = self.f_key(low_feats) + key = _pp_module(key, self.psp_size) + + sim_map = paddle.matmul(query, key) + sim_map = (self.key_channels**-.5) * sim_map + sim_map = F.softmax(sim_map, axis=-1) + + context = paddle.matmul(sim_map, value) + context = paddle.transpose(context, perm=(0, 2, 1)) + hf_shape = paddle.shape(high_feats) + context = paddle.reshape( + context, shape=[0, self.value_channels, hf_shape[2], hf_shape[3]]) + + context = self.W(context) + + return context + + +class SelfAttentionBlock_APNB(nn.Layer): + """ + Self-Attention Block for APNB module. + + Args: + in_channels (int): The input channels of APNB module. + out_channels (int): The out channels of APNB module. + key_channels (int): The key channels in self-attention block. + value_channels (int): The value channels in self-attention block. + scale (int, optional): Pooling size. Default: 1. + psp_size (tuple, optional): The out size of pooled feature maps. Default: (1, 3, 6, 8). + """ + + def __init__(self, + in_channels: int, + out_channels: int, + key_channels: int, + value_channels: int, + scale: int = 1, + psp_size: Tuple[int] = (1, 3, 6, 8)): + super().__init__() + + self.scale = scale + self.in_channels = in_channels + self.out_channels = out_channels + self.key_channels = key_channels + self.value_channels = value_channels + self.pool = nn.MaxPool2D(scale) + self.f_key = layers.ConvBNReLU( + in_channels=self.in_channels, + out_channels=self.key_channels, + kernel_size=1) + self.f_query = self.f_key + self.f_value = nn.Conv2D( + in_channels=self.in_channels, + out_channels=self.value_channels, + kernel_size=1) + self.W = nn.Conv2D( + in_channels=self.value_channels, + out_channels=self.out_channels, + kernel_size=1) + + self.psp_size = psp_size + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + batch_size, _, h, w = x.shape + if self.scale > 1: + x = self.pool(x) + + value = self.f_value(x) + value = _pp_module(value, self.psp_size) + value = paddle.transpose(value, perm=(0, 2, 1)) + + query = self.f_query(x) + query = paddle.reshape(query, shape=(0, self.key_channels, -1)) + query = paddle.transpose(query, perm=(0, 2, 1)) + + key = self.f_key(x) + key = _pp_module(key, self.psp_size) + + sim_map = paddle.matmul(query, key) + sim_map = (self.key_channels**-.5) * sim_map + sim_map = F.softmax(sim_map, axis=-1) + + context = paddle.matmul(sim_map, value) + context = paddle.transpose(context, perm=(0, 2, 1)) + + x_shape = paddle.shape(x) + context = paddle.reshape( + context, shape=[0, self.value_channels, x_shape[2], x_shape[3]]) + context = self.W(context) + + return context diff --git a/modules/image/semantic_segmentation/ann_resnet50_cityscapes/resnet.py b/modules/image/semantic_segmentation/ann_resnet50_cityscapes/resnet.py new file mode 100644 index 000000000..efa7ba570 --- /dev/null +++ b/modules/image/semantic_segmentation/ann_resnet50_cityscapes/resnet.py @@ -0,0 +1,361 @@ +# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Union, List, Tuple + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +import ann_resnet50_cityscapes.layers as layers + + +class ConvBNLayer(nn.Layer): + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + stride: int = 1, + dilation: int = 1, + groups: int = 1, + is_vd_mode: bool = False, + act: str = None, + data_format: str = 'NCHW'): + super(ConvBNLayer, self).__init__() + if dilation != 1 and kernel_size != 3: + raise RuntimeError("When the dilation isn't 1," \ + "the kernel_size should be 3.") + + self.is_vd_mode = is_vd_mode + self._pool2d_avg = nn.AvgPool2D( + kernel_size=2, + stride=2, + padding=0, + ceil_mode=True, + data_format=data_format) + self._conv = nn.Conv2D( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=(kernel_size - 1) // 2 \ + if dilation == 1 else dilation, + dilation=dilation, + groups=groups, + bias_attr=False, + data_format=data_format) + + self._batch_norm = layers.SyncBatchNorm( + out_channels, data_format=data_format) + self._act_op = layers.Activation(act=act) + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + if self.is_vd_mode: + inputs = self._pool2d_avg(inputs) + y = self._conv(inputs) + y = self._batch_norm(y) + y = self._act_op(y) + + return y + + +class BottleneckBlock(nn.Layer): + def __init__(self, + in_channels: int, + out_channels: int, + stride: int, + shortcut: bool = True, + if_first: bool = False, + dilation: int = 1, + data_format: str = 'NCHW'): + super(BottleneckBlock, self).__init__() + + self.data_format = data_format + self.conv0 = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + act='relu', + data_format=data_format) + + self.dilation = dilation + + self.conv1 = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=3, + stride=stride, + act='relu', + dilation=dilation, + data_format=data_format) + self.conv2 = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels * 4, + kernel_size=1, + act=None, + data_format=data_format) + + if not shortcut: + self.short = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels * 4, + kernel_size=1, + stride=1, + is_vd_mode=False if if_first or stride == 1 else True, + data_format=data_format) + + self.shortcut = shortcut + # NOTE: Use the wrap layer for quantization training + self.add = layers.Add() + self.relu = layers.Activation(act="relu") + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + y = self.conv0(inputs) + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + y = self.add(short, conv2) + y = self.relu(y) + return y + + +class BasicBlock(nn.Layer): + def __init__(self, + in_channels: int, + out_channels: int, + stride: int, + dilation: int = 1, + shortcut: bool = True, + if_first: bool = False, + data_format: str = 'NCHW'): + super(BasicBlock, self).__init__() + self.conv0 = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=3, + stride=stride, + dilation=dilation, + act='relu', + data_format=data_format) + self.conv1 = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=3, + dilation=dilation, + act=None, + data_format=data_format) + + if not shortcut: + self.short = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + stride=1, + is_vd_mode=False if if_first or stride == 1 else True, + data_format=data_format) + + self.shortcut = shortcut + self.dilation = dilation + self.data_format = data_format + self.add = layers.Add() + self.relu = layers.Activation(act="relu") + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + y = self.conv0(inputs) + conv1 = self.conv1(y) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = self.add(short, conv1) + y = self.relu(y) + + return y + + +class ResNet_vd(nn.Layer): + """ + The ResNet_vd implementation based on PaddlePaddle. + + The original article refers to Jingdong + Tong He, et, al. "Bag of Tricks for Image Classification with Convolutional Neural Networks" + (https://arxiv.org/pdf/1812.01187.pdf). + + Args: + layers (int, optional): The layers of ResNet_vd. The supported layers are (18, 34, 50, 101, 152, 200). Default: 50. + output_stride (int, optional): The stride of output features compared to input images. It is 8 or 16. Default: 8. + multi_grid (tuple|list, optional): The grid of stage4. Defult: (1, 1, 1). + pretrained (str, optional): The path of pretrained model. + + """ + + def __init__(self, + layers: int = 50, + output_stride: int = 8, + multi_grid: Tuple[int] = (1, 1, 1), + pretrained: str = None, + data_format: str = 'NCHW'): + super(ResNet_vd, self).__init__() + + self.data_format = data_format + self.conv1_logit = None # for gscnn shape stream + self.layers = layers + supported_layers = [18, 34, 50, 101, 152, 200] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format( + supported_layers, layers) + + if layers == 18: + depth = [2, 2, 2, 2] + elif layers == 34 or layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + elif layers == 200: + depth = [3, 12, 48, 3] + num_channels = [64, 256, 512, 1024 + ] if layers >= 50 else [64, 64, 128, 256] + num_filters = [64, 128, 256, 512] + + # for channels of four returned stages + self.feat_channels = [c * 4 for c in num_filters + ] if layers >= 50 else num_filters + + dilation_dict = None + if output_stride == 8: + dilation_dict = {2: 2, 3: 4} + elif output_stride == 16: + dilation_dict = {3: 2} + + self.conv1_1 = ConvBNLayer( + in_channels=3, + out_channels=32, + kernel_size=3, + stride=2, + act='relu', + data_format=data_format) + self.conv1_2 = ConvBNLayer( + in_channels=32, + out_channels=32, + kernel_size=3, + stride=1, + act='relu', + data_format=data_format) + self.conv1_3 = ConvBNLayer( + in_channels=32, + out_channels=64, + kernel_size=3, + stride=1, + act='relu', + data_format=data_format) + self.pool2d_max = nn.MaxPool2D( + kernel_size=3, stride=2, padding=1, data_format=data_format) + + # self.block_list = [] + self.stage_list = [] + if layers >= 50: + for block in range(len(depth)): + shortcut = False + block_list = [] + for i in range(depth[block]): + if layers in [101, 152] and block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + + ############################################################################### + # Add dilation rate for some segmentation tasks, if dilation_dict is not None. + dilation_rate = dilation_dict[ + block] if dilation_dict and block in dilation_dict else 1 + + # Actually block here is 'stage', and i is 'block' in 'stage' + # At the stage 4, expand the the dilation_rate if given multi_grid + if block == 3: + dilation_rate = dilation_rate * multi_grid[i] + ############################################################################### + + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BottleneckBlock( + in_channels=num_channels[block] + if i == 0 else num_filters[block] * 4, + out_channels=num_filters[block], + stride=2 if i == 0 and block != 0 + and dilation_rate == 1 else 1, + shortcut=shortcut, + if_first=block == i == 0, + dilation=dilation_rate, + data_format=data_format)) + + block_list.append(bottleneck_block) + shortcut = True + self.stage_list.append(block_list) + else: + for block in range(len(depth)): + shortcut = False + block_list = [] + for i in range(depth[block]): + dilation_rate = dilation_dict[block] \ + if dilation_dict and block in dilation_dict else 1 + if block == 3: + dilation_rate = dilation_rate * multi_grid[i] + + basic_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BasicBlock( + in_channels=num_channels[block] + if i == 0 else num_filters[block], + out_channels=num_filters[block], + stride=2 if i == 0 and block != 0 \ + and dilation_rate == 1 else 1, + dilation=dilation_rate, + shortcut=shortcut, + if_first=block == i == 0, + data_format=data_format)) + block_list.append(basic_block) + shortcut = True + self.stage_list.append(block_list) + + self.pretrained = pretrained + + def forward(self, inputs: paddle.Tensor) -> List[paddle.Tensor]: + y = self.conv1_1(inputs) + y = self.conv1_2(y) + y = self.conv1_3(y) + self.conv1_logit = y.clone() + y = self.pool2d_max(y) + + # A feature list saves the output feature map of each stage. + feat_list = [] + for stage in self.stage_list: + for block in stage: + y = block(y) + feat_list.append(y) + + return feat_list + + +def ResNet50_vd(**args): + model = ResNet_vd(layers=50, **args) + return model \ No newline at end of file diff --git a/modules/image/semantic_segmentation/ann_resnet50_voc/README.md b/modules/image/semantic_segmentation/ann_resnet50_voc/README.md new file mode 100644 index 000000000..bd9139911 --- /dev/null +++ b/modules/image/semantic_segmentation/ann_resnet50_voc/README.md @@ -0,0 +1,182 @@ +# ann_resnet50_voc + +|模型名称|ann_resnet50_voc| +| :--- | :---: | +|类别|图像-图像分割| +|网络|ann_resnet50vd| +|数据集|PascalVOC2012| +|是否支持Fine-tuning|是| +|模型大小|228MB| +|指标|-| +|最新更新日期|2022-03-21| + +## 一、模型基本信息 + + - 样例结果示例: +

+ +

+ +- ### 模型介绍 + + - 本示例将展示如何使用PaddleHub对预训练模型进行finetune并完成预测任务。 + - 更多详情请参考:[ann](https://arxiv.org/pdf/1908.07678.pdf) + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、安装 + + - ```shell + $ hub install ann_resnet50_voc + ``` + + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1.预测代码示例 + + - ```python + import cv2 + import paddle + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='ann_resnet50_voc') + img = cv2.imread("/PATH/TO/IMAGE") + result = model.predict(images=[img], visualization=True) + ``` + +- ### 2.如何开始Fine-tune + + - 在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用ann_resnet50_voc模型对OpticDiscSeg数据集进行Fine-tune。 `train.py`内容如下: + + - 代码步骤 + + - Step1: 定义数据预处理方式 + - ```python + from paddlehub.vision.segmentation_transforms import Compose, Resize, Normalize + + transform = Compose([Resize(target_size=(512, 512)), Normalize()]) + ``` + + - `segmentation_transforms` 数据增强模块定义了丰富的针对图像分割数据的预处理方式,用户可按照需求替换自己需要的数据预处理方式。 + + - Step2: 下载数据集并使用 + - ```python + from paddlehub.datasets import OpticDiscSeg + + train_reader = OpticDiscSeg(transform, mode='train') + ``` + - `transforms`: 数据预处理方式。 + - `mode`: `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 + + - 数据集的准备代码可以参考 [opticdiscseg.py](../../paddlehub/datasets/opticdiscseg.py)。`hub.datasets.OpticDiscSeg()`会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 + + - Step3: 加载预训练模型 + + - ```python + import paddlehub as hub + + model = hub.Module(name='ann_resnet50_voc', num_classes=2, pretrained=None) + ``` + - `name`: 选择预训练模型的名字。 + - `load_checkpoint`: 是否加载自己训练的模型,若为None,则加载提供的模型默认参数。 + + - Step4: 选择优化策略和运行配置 + + - ```python + import paddle + from paddlehub.finetune.trainer import Trainer + + scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.01, decay_steps=1000, power=0.9, end_lr=0.0001) + optimizer = paddle.optimizer.Adam(learning_rate=scheduler, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='test_ckpt_img_seg', use_gpu=True) + trainer.train(train_reader, epochs=10, batch_size=4, log_interval=10, save_interval=4) + ``` + + - 模型预测 + + - 当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。我们使用该模型来进行预测。predict.py脚本如下: + + ```python + import paddle + import cv2 + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='ann_resnet50_voc', pretrained='/PATH/TO/CHECKPOINT') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) + ``` + + - 参数配置正确后,请执行脚本`python predict.py`。 + + - **Args** + * `images`:原始图像路径或BGR格式图片; + * `visualization`: 是否可视化,默认为True; + * `save_path`: 保存结果的路径,默认保存路径为'seg_result'。 + + **NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线图像分割服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + + - ```shell + $ hub serving start -m ann_resnet50_voc + ``` + + - 这样就完成了一个图像分割服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + ```python + import requests + import json + import cv2 + import base64 + + import numpy as np + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/ginet_resnet50vd_voc" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + mask = base64_to_cv2(r.json()["results"][0]) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 diff --git a/modules/image/semantic_segmentation/ann_resnet50_voc/README_en.md b/modules/image/semantic_segmentation/ann_resnet50_voc/README_en.md new file mode 100644 index 000000000..2226a22d6 --- /dev/null +++ b/modules/image/semantic_segmentation/ann_resnet50_voc/README_en.md @@ -0,0 +1,182 @@ +# ann_resnet50_voc + +|Module Name|ann_resnet50_voc| +| :--- | :---: | +|Category|Image Segmentation| +|Network|ann_resnet50vd| +|Dataset|PascalVOC2012| +|Fine-tuning supported or not|Yes| +|Module Size|228MB| +|Data indicators|-| +|Latest update date|2022-03-22| + +## I. Basic Information + +- ### Application Effect Display + - Sample results: +

+ +

+ +- ### Module Introduction + + - We will show how to use PaddleHub to finetune the pre-trained model and complete the prediction. + - For more information, please refer to: [ann](https://arxiv.org/pdf/1908.07678.pdf) + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、Installation + + - ```shell + $ hub install ann_resnet50_voc + ``` + + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + + +## III. Module API Prediction + +- ### 1、Prediction Code Example + + - ```python + import cv2 + import paddle + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='ann_resnet50_voc') + img = cv2.imread("/PATH/TO/IMAGE") + result = model.predict(images=[img], visualization=True) + ``` + +- ### 2.Fine-tune and Encapsulation + + - After completing the installation of PaddlePaddle and PaddleHub, you can start using the ann_resnet50_voc model to fine-tune datasets such as OpticDiscSeg. + + - Steps: + + - Step1: Define the data preprocessing method + + - ```python + from paddlehub.vision.segmentation_transforms import Compose, Resize, Normalize + + transform = Compose([Resize(target_size=(512, 512)), Normalize()]) + ``` + + - `segmentation_transforms`: The data enhancement module defines lots of data preprocessing methods. Users can replace the data preprocessing methods according to their needs. + + - Step2: Download the dataset + + - ```python + from paddlehub.datasets import OpticDiscSeg + + train_reader = OpticDiscSeg(transform, mode='train') + ``` + * `transforms`: data preprocessing methods. + + * `mode`: Select the data mode, the options are `train`, `test`, `val`. Default is `train`. + + * Dataset preparation can be referred to [opticdiscseg.py](../../paddlehub/datasets/opticdiscseg.py)。`hub.datasets.OpticDiscSeg()`will be automatically downloaded from the network and decompressed to the `$HOME/.paddlehub/dataset` directory under the user directory. + + - Step3: Load the pre-trained model + + - ```python + import paddlehub as hub + + model = hub.Module(name='ann_resnet50_voc', num_classes=2, pretrained=None) + ``` + - `name`: model name. + - `load_checkpoint`: Whether to load the self-trained model, if it is None, load the provided parameters. + + - Step4: Optimization strategy + + - ```python + import paddle + from paddlehub.finetune.trainer import Trainer + + scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.01, decay_steps=1000, power=0.9, end_lr=0.0001) + optimizer = paddle.optimizer.Adam(learning_rate=scheduler, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='test_ckpt_img_seg', use_gpu=True) + trainer.train(train_reader, epochs=10, batch_size=4, log_interval=10, save_interval=4) + ``` + + - Model prediction + + - When Fine-tune is completed, the model with the best performance on the verification set will be saved in the `${CHECKPOINT_DIR}/best_model` directory. We use this model to make predictions. The `predict.py` script is as follows: + + ```python + import paddle + import cv2 + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='ann_resnet50_voc', pretrained='/PATH/TO/CHECKPOINT') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) + ``` + + - **Args** + * `images`: Image path or ndarray data with format [H, W, C], BGR. + * `visualization`: Whether to save the recognition results as picture files. + * `save_path`: Save path of the result, default is 'seg_result'. + + +## IV. Server Deployment + +- PaddleHub Serving can deploy an online service of image segmentation. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + + - ```shell + $ hub serving start -m ann_resnet50_voc + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result: + + ```python + import requests + import json + import cv2 + import base64 + + import numpy as np + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/ann_resnet50_voc" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + mask = base64_to_cv2(r.json()["results"][0]) + ``` + +## V. Release Note + +- 1.0.0 + + First release diff --git a/modules/image/semantic_segmentation/ann_resnet50_voc/layers.py b/modules/image/semantic_segmentation/ann_resnet50_voc/layers.py new file mode 100644 index 000000000..8060d63d2 --- /dev/null +++ b/modules/image/semantic_segmentation/ann_resnet50_voc/layers.py @@ -0,0 +1,276 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Union, List, Tuple + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn.layer import activation +from paddle.nn import Conv2D, AvgPool2D + + +def SyncBatchNorm(*args, **kwargs): + """In cpu environment nn.SyncBatchNorm does not have kernel so use nn.BatchNorm2D instead""" + if paddle.get_device() == 'cpu': + return nn.BatchNorm2D(*args, **kwargs) + else: + return nn.SyncBatchNorm(*args, **kwargs) + + +class SeparableConvBNReLU(nn.Layer): + """Depthwise Separable Convolution.""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + padding: str = 'same', + **kwargs: dict): + super(SeparableConvBNReLU, self).__init__() + self.depthwise_conv = ConvBN( + in_channels, + out_channels=in_channels, + kernel_size=kernel_size, + padding=padding, + groups=in_channels, + **kwargs) + self.piontwise_conv = ConvBNReLU( + in_channels, out_channels, kernel_size=1, groups=1) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self.depthwise_conv(x) + x = self.piontwise_conv(x) + return x + + +class ConvBN(nn.Layer): + """Basic conv bn layer""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + padding: str = 'same', + **kwargs: dict): + super(ConvBN, self).__init__() + self._conv = Conv2D( + in_channels, out_channels, kernel_size, padding=padding, **kwargs) + self._batch_norm = SyncBatchNorm(out_channels) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self._conv(x) + x = self._batch_norm(x) + return x + + +class ConvBNReLU(nn.Layer): + """Basic conv bn relu layer.""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + padding: str = 'same', + **kwargs: dict): + super(ConvBNReLU, self).__init__() + + self._conv = Conv2D( + in_channels, out_channels, kernel_size, padding=padding, **kwargs) + self._batch_norm = SyncBatchNorm(out_channels) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self._conv(x) + x = self._batch_norm(x) + x = F.relu(x) + return x + + +class Activation(nn.Layer): + """ + The wrapper of activations. + Args: + act (str, optional): The activation name in lowercase. It must be one of ['elu', 'gelu', + 'hardshrink', 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', + 'softmax', 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', + 'hsigmoid']. Default: None, means identical transformation. + Returns: + A callable object of Activation. + Raises: + KeyError: When parameter `act` is not in the optional range. + Examples: + from paddleseg.models.common.activation import Activation + relu = Activation("relu") + print(relu) + # + sigmoid = Activation("sigmoid") + print(sigmoid) + # + not_exit_one = Activation("not_exit_one") + # KeyError: "not_exit_one does not exist in the current dict_keys(['elu', 'gelu', 'hardshrink', + # 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', 'softmax', + # 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', 'hsigmoid'])" + """ + + def __init__(self, act: str = None): + super(Activation, self).__init__() + + self._act = act + upper_act_names = activation.__dict__.keys() + lower_act_names = [act.lower() for act in upper_act_names] + act_dict = dict(zip(lower_act_names, upper_act_names)) + + if act is not None: + if act in act_dict.keys(): + act_name = act_dict[act] + self.act_func = eval("activation.{}()".format(act_name)) + else: + raise KeyError("{} does not exist in the current {}".format( + act, act_dict.keys())) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + + if self._act is not None: + return self.act_func(x) + else: + return x + + +class ASPPModule(nn.Layer): + """ + Atrous Spatial Pyramid Pooling. + Args: + aspp_ratios (tuple): The dilation rate using in ASSP module. + in_channels (int): The number of input channels. + out_channels (int): The number of output channels. + align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature + is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. + use_sep_conv (bool, optional): If using separable conv in ASPP module. Default: False. + image_pooling (bool, optional): If augmented with image-level features. Default: False + """ + + def __init__(self, + aspp_ratios: tuple, + in_channels: int, + out_channels: int, + align_corners: bool, + use_sep_conv: bool = False, + image_pooling: bool = False): + super().__init__() + + self.align_corners = align_corners + self.aspp_blocks = nn.LayerList() + + for ratio in aspp_ratios: + if use_sep_conv and ratio > 1: + conv_func = SeparableConvBNReLU + else: + conv_func = ConvBNReLU + + block = conv_func( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1 if ratio == 1 else 3, + dilation=ratio, + padding=0 if ratio == 1 else ratio) + self.aspp_blocks.append(block) + + out_size = len(self.aspp_blocks) + + if image_pooling: + self.global_avg_pool = nn.Sequential( + nn.AdaptiveAvgPool2D(output_size=(1, 1)), + ConvBNReLU(in_channels, out_channels, kernel_size=1, bias_attr=False)) + out_size += 1 + self.image_pooling = image_pooling + + self.conv_bn_relu = ConvBNReLU( + in_channels=out_channels * out_size, + out_channels=out_channels, + kernel_size=1) + + self.dropout = nn.Dropout(p=0.1) # drop rate + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + outputs = [] + for block in self.aspp_blocks: + y = block(x) + y = F.interpolate( + y, + x.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + outputs.append(y) + + if self.image_pooling: + img_avg = self.global_avg_pool(x) + img_avg = F.interpolate( + img_avg, + x.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + outputs.append(img_avg) + + x = paddle.concat(outputs, axis=1) + x = self.conv_bn_relu(x) + x = self.dropout(x) + + return x + + +class AuxLayer(nn.Layer): + """ + The auxiliary layer implementation for auxiliary loss. + + Args: + in_channels (int): The number of input channels. + inter_channels (int): The intermediate channels. + out_channels (int): The number of output channels, and usually it is num_classes. + dropout_prob (float, optional): The drop rate. Default: 0.1. + """ + + def __init__(self, + in_channels: int, + inter_channels: int, + out_channels: int, + dropout_prob: float = 0.1, + **kwargs): + super().__init__() + + self.conv_bn_relu = ConvBNReLU( + in_channels=in_channels, + out_channels=inter_channels, + kernel_size=3, + padding=1, + **kwargs) + + self.dropout = nn.Dropout(p=dropout_prob) + + self.conv = nn.Conv2D( + in_channels=inter_channels, + out_channels=out_channels, + kernel_size=1) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self.conv_bn_relu(x) + x = self.dropout(x) + x = self.conv(x) + return x + + +class Add(nn.Layer): + def __init__(self): + super().__init__() + + def forward(self, x: paddle.Tensor, y: paddle.Tensor, name: str = None) -> paddle.Tensor: + return paddle.add(x, y, name) diff --git a/modules/image/semantic_segmentation/ann_resnet50_voc/module.py b/modules/image/semantic_segmentation/ann_resnet50_voc/module.py new file mode 100644 index 000000000..f0218dde7 --- /dev/null +++ b/modules/image/semantic_segmentation/ann_resnet50_voc/module.py @@ -0,0 +1,452 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from typing import Union, List, Tuple + +import numpy as np +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddlehub.module.module import moduleinfo +import paddlehub.vision.segmentation_transforms as T +from paddlehub.module.cv_module import ImageSegmentationModule + +from ann_resnet50_voc.resnet import ResNet50_vd +import ann_resnet50_voc.layers as layers + +@moduleinfo( + name="ann_resnet50_voc", + type="CV/semantic_segmentation", + author="paddlepaddle", + author_email="", + summary="ANNResnet50 is a segmentation model.", + version="1.0.0", + meta=ImageSegmentationModule) +class ANN(nn.Layer): + """ + The ANN implementation based on PaddlePaddle. + + The original article refers to + Zhen, Zhu, et al. "Asymmetric Non-local Neural Networks for Semantic Segmentation" + (https://arxiv.org/pdf/1908.07678.pdf). + + Args: + num_classes (int): The unique number of target classes. + backbone_indices (tuple, optional): Two values in the tuple indicate the indices of output of backbone. + key_value_channels (int, optional): The key and value channels of self-attention map in both AFNB and APNB modules. + Default: 256. + inter_channels (int, optional): Both input and output channels of APNB modules. Default: 512. + psp_size (tuple, optional): The out size of pooled feature maps. Default: (1, 3, 6, 8). + enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss. Default: True. + align_corners (bool, optional): An argument of F.interpolate. It should be set to False when the feature size is even, + e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False. + pretrained (str, optional): The path or url of pretrained model. Default: None. + """ + + def __init__(self, + num_classes: int = 21, + backbone_indices: Tuple[int] = (2, 3), + key_value_channels: int = 256, + inter_channels: int = 512, + psp_size: Tuple[int] = (1, 3, 6, 8), + align_corners: bool = False, + pretrained: str = None): + super(ANN, self).__init__() + + self.backbone = ResNet50_vd() + backbone_channels = [ + self.backbone.feat_channels[i] for i in backbone_indices + ] + + self.head = ANNHead(num_classes, backbone_indices, backbone_channels, + key_value_channels, inter_channels, psp_size) + self.align_corners = align_corners + self.transforms = T.Compose([T.Normalize()]) + + if pretrained is not None: + model_dict = paddle.load(pretrained) + self.set_dict(model_dict) + print("load custom parameters success") + + else: + checkpoint = os.path.join(self.directory, 'model.pdparams') + model_dict = paddle.load(checkpoint) + self.set_dict(model_dict) + print("load pretrained parameters success") + + def transform(self, img: Union[np.ndarray, str]) -> Union[np.ndarray, str]: + return self.transforms(img) + + def forward(self, x: paddle.Tensor) -> List[paddle.Tensor]: + feat_list = self.backbone(x) + logit_list = self.head(feat_list) + return [ + F.interpolate( + logit, + paddle.shape(x)[2:], + mode='bilinear', + align_corners=self.align_corners) for logit in logit_list + ] + + + +class ANNHead(nn.Layer): + """ + The ANNHead implementation. + + It mainly consists of AFNB and APNB modules. + + Args: + num_classes (int): The unique number of target classes. + backbone_indices (tuple): Two values in the tuple indicate the indices of output of backbone. + The first index will be taken as low-level features; the second one will be + taken as high-level features in AFNB module. Usually backbone consists of four + downsampling stage, such as ResNet, and return an output of each stage. If it is (2, 3), + it means taking feature map of the third stage and the fourth stage in backbone. + backbone_channels (tuple): The same length with "backbone_indices". It indicates the channels of corresponding index. + key_value_channels (int): The key and value channels of self-attention map in both AFNB and APNB modules. + inter_channels (int): Both input and output channels of APNB modules. + psp_size (tuple): The out size of pooled feature maps. + enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss. Default: False + """ + + def __init__(self, + num_classes: int, + backbone_indices: Tuple[int], + backbone_channels: Tuple[int], + key_value_channels: int, + inter_channels: int, + psp_size: Tuple[int], + enable_auxiliary_loss: bool = False): + super().__init__() + + low_in_channels = backbone_channels[0] + high_in_channels = backbone_channels[1] + + self.fusion = AFNB( + low_in_channels=low_in_channels, + high_in_channels=high_in_channels, + out_channels=high_in_channels, + key_channels=key_value_channels, + value_channels=key_value_channels, + dropout_prob=0.05, + repeat_sizes=([1]), + psp_size=psp_size) + + self.context = nn.Sequential( + layers.ConvBNReLU( + in_channels=high_in_channels, + out_channels=inter_channels, + kernel_size=3, + padding=1), + APNB( + in_channels=inter_channels, + out_channels=inter_channels, + key_channels=key_value_channels, + value_channels=key_value_channels, + dropout_prob=0.05, + repeat_sizes=([1]), + psp_size=psp_size)) + + self.cls = nn.Conv2D( + in_channels=inter_channels, out_channels=num_classes, kernel_size=1) + self.auxlayer = layers.AuxLayer( + in_channels=low_in_channels, + inter_channels=low_in_channels // 2, + out_channels=num_classes, + dropout_prob=0.05) + + self.backbone_indices = backbone_indices + self.enable_auxiliary_loss = enable_auxiliary_loss + + def forward(self, feat_list: List[paddle.Tensor]) -> List[paddle.Tensor]: + logit_list = [] + low_level_x = feat_list[self.backbone_indices[0]] + high_level_x = feat_list[self.backbone_indices[1]] + x = self.fusion(low_level_x, high_level_x) + x = self.context(x) + logit = self.cls(x) + logit_list.append(logit) + + if self.enable_auxiliary_loss: + auxiliary_logit = self.auxlayer(low_level_x) + logit_list.append(auxiliary_logit) + + return logit_list + + +class AFNB(nn.Layer): + """ + Asymmetric Fusion Non-local Block. + + Args: + low_in_channels (int): Low-level-feature channels. + high_in_channels (int): High-level-feature channels. + out_channels (int): Out channels of AFNB module. + key_channels (int): The key channels in self-attention block. + value_channels (int): The value channels in self-attention block. + dropout_prob (float): The dropout rate of output. + repeat_sizes (tuple, optional): The number of AFNB modules. Default: ([1]). + psp_size (tuple. optional): The out size of pooled feature maps. Default: (1, 3, 6, 8). + """ + + def __init__(self, + low_in_channels: int, + high_in_channels: int, + out_channels: int, + key_channels: int, + value_channels: int, + dropout_prob: float, + repeat_sizes: Tuple[int] = ([1]), + psp_size: Tuple[int] = (1, 3, 6, 8)): + super().__init__() + + self.psp_size = psp_size + self.stages = nn.LayerList([ + SelfAttentionBlock_AFNB(low_in_channels, high_in_channels, + key_channels, value_channels, out_channels, + size) for size in repeat_sizes + ]) + self.conv_bn = layers.ConvBN( + in_channels=out_channels + high_in_channels, + out_channels=out_channels, + kernel_size=1) + self.dropout = nn.Dropout(p=dropout_prob) + + def forward(self, low_feats: List[paddle.Tensor], high_feats: List[paddle.Tensor]) -> paddle.Tensor: + priors = [stage(low_feats, high_feats) for stage in self.stages] + context = priors[0] + for i in range(1, len(priors)): + context += priors[i] + + output = self.conv_bn(paddle.concat([context, high_feats], axis=1)) + output = self.dropout(output) + + return output + + +class APNB(nn.Layer): + """ + Asymmetric Pyramid Non-local Block. + + Args: + in_channels (int): The input channels of APNB module. + out_channels (int): Out channels of APNB module. + key_channels (int): The key channels in self-attention block. + value_channels (int): The value channels in self-attention block. + dropout_prob (float): The dropout rate of output. + repeat_sizes (tuple, optional): The number of AFNB modules. Default: ([1]). + psp_size (tuple, optional): The out size of pooled feature maps. Default: (1, 3, 6, 8). + """ + + def __init__(self, + in_channels: int, + out_channels: int, + key_channels: int, + value_channels: int, + dropout_prob: float, + repeat_sizes: Tuple[int] = ([1]), + psp_size: Tuple[int] = (1, 3, 6, 8)): + super().__init__() + + self.psp_size = psp_size + self.stages = nn.LayerList([ + SelfAttentionBlock_APNB(in_channels, out_channels, key_channels, + value_channels, size) + for size in repeat_sizes + ]) + self.conv_bn = layers.ConvBNReLU( + in_channels=in_channels * 2, + out_channels=out_channels, + kernel_size=1) + self.dropout = nn.Dropout(p=dropout_prob) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + priors = [stage(x) for stage in self.stages] + context = priors[0] + for i in range(1, len(priors)): + context += priors[i] + + output = self.conv_bn(paddle.concat([context, x], axis=1)) + output = self.dropout(output) + + return output + + +def _pp_module(x: paddle.Tensor, psp_size: List[int]) -> paddle.Tensor: + n, c, h, w = x.shape + priors = [] + for size in psp_size: + feat = F.adaptive_avg_pool2d(x, size) + feat = paddle.reshape(feat, shape=(0, c, -1)) + priors.append(feat) + center = paddle.concat(priors, axis=-1) + return center + + +class SelfAttentionBlock_AFNB(nn.Layer): + """ + Self-Attention Block for AFNB module. + + Args: + low_in_channels (int): Low-level-feature channels. + high_in_channels (int): High-level-feature channels. + key_channels (int): The key channels in self-attention block. + value_channels (int): The value channels in self-attention block. + out_channels (int, optional): Out channels of AFNB module. Default: None. + scale (int, optional): Pooling size. Default: 1. + psp_size (tuple, optional): The out size of pooled feature maps. Default: (1, 3, 6, 8). + """ + + def __init__(self, + low_in_channels: int, + high_in_channels: int, + key_channels: int, + value_channels: int, + out_channels: int = None, + scale: int = 1, + psp_size: Tuple[int] = (1, 3, 6, 8)): + super().__init__() + + self.scale = scale + self.in_channels = low_in_channels + self.out_channels = out_channels + self.key_channels = key_channels + self.value_channels = value_channels + if out_channels == None: + self.out_channels = high_in_channels + self.pool = nn.MaxPool2D(scale) + self.f_key = layers.ConvBNReLU( + in_channels=low_in_channels, + out_channels=key_channels, + kernel_size=1) + self.f_query = layers.ConvBNReLU( + in_channels=high_in_channels, + out_channels=key_channels, + kernel_size=1) + self.f_value = nn.Conv2D( + in_channels=low_in_channels, + out_channels=value_channels, + kernel_size=1) + + self.W = nn.Conv2D( + in_channels=value_channels, + out_channels=out_channels, + kernel_size=1) + + self.psp_size = psp_size + + def forward(self, low_feats: List[paddle.Tensor], high_feats: List[paddle.Tensor]) -> paddle.Tensor: + batch_size, _, h, w = high_feats.shape + + value = self.f_value(low_feats) + value = _pp_module(value, self.psp_size) + value = paddle.transpose(value, (0, 2, 1)) + + query = self.f_query(high_feats) + query = paddle.reshape(query, shape=(0, self.key_channels, -1)) + query = paddle.transpose(query, perm=(0, 2, 1)) + + key = self.f_key(low_feats) + key = _pp_module(key, self.psp_size) + + sim_map = paddle.matmul(query, key) + sim_map = (self.key_channels**-.5) * sim_map + sim_map = F.softmax(sim_map, axis=-1) + + context = paddle.matmul(sim_map, value) + context = paddle.transpose(context, perm=(0, 2, 1)) + hf_shape = paddle.shape(high_feats) + context = paddle.reshape( + context, shape=[0, self.value_channels, hf_shape[2], hf_shape[3]]) + + context = self.W(context) + + return context + + +class SelfAttentionBlock_APNB(nn.Layer): + """ + Self-Attention Block for APNB module. + + Args: + in_channels (int): The input channels of APNB module. + out_channels (int): The out channels of APNB module. + key_channels (int): The key channels in self-attention block. + value_channels (int): The value channels in self-attention block. + scale (int, optional): Pooling size. Default: 1. + psp_size (tuple, optional): The out size of pooled feature maps. Default: (1, 3, 6, 8). + """ + + def __init__(self, + in_channels: int, + out_channels: int, + key_channels: int, + value_channels: int, + scale: int = 1, + psp_size: Tuple[int] = (1, 3, 6, 8)): + super().__init__() + + self.scale = scale + self.in_channels = in_channels + self.out_channels = out_channels + self.key_channels = key_channels + self.value_channels = value_channels + self.pool = nn.MaxPool2D(scale) + self.f_key = layers.ConvBNReLU( + in_channels=self.in_channels, + out_channels=self.key_channels, + kernel_size=1) + self.f_query = self.f_key + self.f_value = nn.Conv2D( + in_channels=self.in_channels, + out_channels=self.value_channels, + kernel_size=1) + self.W = nn.Conv2D( + in_channels=self.value_channels, + out_channels=self.out_channels, + kernel_size=1) + + self.psp_size = psp_size + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + batch_size, _, h, w = x.shape + if self.scale > 1: + x = self.pool(x) + + value = self.f_value(x) + value = _pp_module(value, self.psp_size) + value = paddle.transpose(value, perm=(0, 2, 1)) + + query = self.f_query(x) + query = paddle.reshape(query, shape=(0, self.key_channels, -1)) + query = paddle.transpose(query, perm=(0, 2, 1)) + + key = self.f_key(x) + key = _pp_module(key, self.psp_size) + + sim_map = paddle.matmul(query, key) + sim_map = (self.key_channels**-.5) * sim_map + sim_map = F.softmax(sim_map, axis=-1) + + context = paddle.matmul(sim_map, value) + context = paddle.transpose(context, perm=(0, 2, 1)) + + x_shape = paddle.shape(x) + context = paddle.reshape( + context, shape=[0, self.value_channels, x_shape[2], x_shape[3]]) + context = self.W(context) + + return context diff --git a/modules/image/semantic_segmentation/ann_resnet50_voc/resnet.py b/modules/image/semantic_segmentation/ann_resnet50_voc/resnet.py new file mode 100644 index 000000000..949f180ce --- /dev/null +++ b/modules/image/semantic_segmentation/ann_resnet50_voc/resnet.py @@ -0,0 +1,361 @@ +# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Union, List, Tuple + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +import ann_resnet50_voc.layers as layers + + +class ConvBNLayer(nn.Layer): + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + stride: int = 1, + dilation: int = 1, + groups: int = 1, + is_vd_mode: bool = False, + act: str = None, + data_format: str = 'NCHW'): + super(ConvBNLayer, self).__init__() + if dilation != 1 and kernel_size != 3: + raise RuntimeError("When the dilation isn't 1," \ + "the kernel_size should be 3.") + + self.is_vd_mode = is_vd_mode + self._pool2d_avg = nn.AvgPool2D( + kernel_size=2, + stride=2, + padding=0, + ceil_mode=True, + data_format=data_format) + self._conv = nn.Conv2D( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=(kernel_size - 1) // 2 \ + if dilation == 1 else dilation, + dilation=dilation, + groups=groups, + bias_attr=False, + data_format=data_format) + + self._batch_norm = layers.SyncBatchNorm( + out_channels, data_format=data_format) + self._act_op = layers.Activation(act=act) + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + if self.is_vd_mode: + inputs = self._pool2d_avg(inputs) + y = self._conv(inputs) + y = self._batch_norm(y) + y = self._act_op(y) + + return y + + +class BottleneckBlock(nn.Layer): + def __init__(self, + in_channels: int, + out_channels: int, + stride: int, + shortcut: bool = True, + if_first: bool = False, + dilation: int = 1, + data_format: str = 'NCHW'): + super(BottleneckBlock, self).__init__() + + self.data_format = data_format + self.conv0 = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + act='relu', + data_format=data_format) + + self.dilation = dilation + + self.conv1 = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=3, + stride=stride, + act='relu', + dilation=dilation, + data_format=data_format) + self.conv2 = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels * 4, + kernel_size=1, + act=None, + data_format=data_format) + + if not shortcut: + self.short = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels * 4, + kernel_size=1, + stride=1, + is_vd_mode=False if if_first or stride == 1 else True, + data_format=data_format) + + self.shortcut = shortcut + # NOTE: Use the wrap layer for quantization training + self.add = layers.Add() + self.relu = layers.Activation(act="relu") + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + y = self.conv0(inputs) + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + y = self.add(short, conv2) + y = self.relu(y) + return y + + +class BasicBlock(nn.Layer): + def __init__(self, + in_channels: int, + out_channels: int, + stride: int, + dilation: int = 1, + shortcut: bool = True, + if_first: bool = False, + data_format: str = 'NCHW'): + super(BasicBlock, self).__init__() + self.conv0 = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=3, + stride=stride, + dilation=dilation, + act='relu', + data_format=data_format) + self.conv1 = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=3, + dilation=dilation, + act=None, + data_format=data_format) + + if not shortcut: + self.short = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + stride=1, + is_vd_mode=False if if_first or stride == 1 else True, + data_format=data_format) + + self.shortcut = shortcut + self.dilation = dilation + self.data_format = data_format + self.add = layers.Add() + self.relu = layers.Activation(act="relu") + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + y = self.conv0(inputs) + conv1 = self.conv1(y) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = self.add(short, conv1) + y = self.relu(y) + + return y + + +class ResNet_vd(nn.Layer): + """ + The ResNet_vd implementation based on PaddlePaddle. + + The original article refers to Jingdong + Tong He, et, al. "Bag of Tricks for Image Classification with Convolutional Neural Networks" + (https://arxiv.org/pdf/1812.01187.pdf). + + Args: + layers (int, optional): The layers of ResNet_vd. The supported layers are (18, 34, 50, 101, 152, 200). Default: 50. + output_stride (int, optional): The stride of output features compared to input images. It is 8 or 16. Default: 8. + multi_grid (tuple|list, optional): The grid of stage4. Defult: (1, 1, 1). + pretrained (str, optional): The path of pretrained model. + + """ + + def __init__(self, + layers: int = 50, + output_stride: int = 8, + multi_grid: Tuple[int]=(1, 1, 1), + pretrained: str = None, + data_format: str = 'NCHW'): + super(ResNet_vd, self).__init__() + + self.data_format = data_format + self.conv1_logit = None # for gscnn shape stream + self.layers = layers + supported_layers = [18, 34, 50, 101, 152, 200] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format( + supported_layers, layers) + + if layers == 18: + depth = [2, 2, 2, 2] + elif layers == 34 or layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + elif layers == 200: + depth = [3, 12, 48, 3] + num_channels = [64, 256, 512, 1024 + ] if layers >= 50 else [64, 64, 128, 256] + num_filters = [64, 128, 256, 512] + + # for channels of four returned stages + self.feat_channels = [c * 4 for c in num_filters + ] if layers >= 50 else num_filters + + dilation_dict = None + if output_stride == 8: + dilation_dict = {2: 2, 3: 4} + elif output_stride == 16: + dilation_dict = {3: 2} + + self.conv1_1 = ConvBNLayer( + in_channels=3, + out_channels=32, + kernel_size=3, + stride=2, + act='relu', + data_format=data_format) + self.conv1_2 = ConvBNLayer( + in_channels=32, + out_channels=32, + kernel_size=3, + stride=1, + act='relu', + data_format=data_format) + self.conv1_3 = ConvBNLayer( + in_channels=32, + out_channels=64, + kernel_size=3, + stride=1, + act='relu', + data_format=data_format) + self.pool2d_max = nn.MaxPool2D( + kernel_size=3, stride=2, padding=1, data_format=data_format) + + # self.block_list = [] + self.stage_list = [] + if layers >= 50: + for block in range(len(depth)): + shortcut = False + block_list = [] + for i in range(depth[block]): + if layers in [101, 152] and block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + + ############################################################################### + # Add dilation rate for some segmentation tasks, if dilation_dict is not None. + dilation_rate = dilation_dict[ + block] if dilation_dict and block in dilation_dict else 1 + + # Actually block here is 'stage', and i is 'block' in 'stage' + # At the stage 4, expand the the dilation_rate if given multi_grid + if block == 3: + dilation_rate = dilation_rate * multi_grid[i] + ############################################################################### + + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BottleneckBlock( + in_channels=num_channels[block] + if i == 0 else num_filters[block] * 4, + out_channels=num_filters[block], + stride=2 if i == 0 and block != 0 + and dilation_rate == 1 else 1, + shortcut=shortcut, + if_first=block == i == 0, + dilation=dilation_rate, + data_format=data_format)) + + block_list.append(bottleneck_block) + shortcut = True + self.stage_list.append(block_list) + else: + for block in range(len(depth)): + shortcut = False + block_list = [] + for i in range(depth[block]): + dilation_rate = dilation_dict[block] \ + if dilation_dict and block in dilation_dict else 1 + if block == 3: + dilation_rate = dilation_rate * multi_grid[i] + + basic_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BasicBlock( + in_channels=num_channels[block] + if i == 0 else num_filters[block], + out_channels=num_filters[block], + stride=2 if i == 0 and block != 0 \ + and dilation_rate == 1 else 1, + dilation=dilation_rate, + shortcut=shortcut, + if_first=block == i == 0, + data_format=data_format)) + block_list.append(basic_block) + shortcut = True + self.stage_list.append(block_list) + + self.pretrained = pretrained + + def forward(self, inputs: paddle.Tensor) -> List[paddle.Tensor]: + y = self.conv1_1(inputs) + y = self.conv1_2(y) + y = self.conv1_3(y) + self.conv1_logit = y.clone() + y = self.pool2d_max(y) + + # A feature list saves the output feature map of each stage. + feat_list = [] + for stage in self.stage_list: + for block in stage: + y = block(y) + feat_list.append(y) + + return feat_list + + +def ResNet50_vd(**args): + model = ResNet_vd(layers=50, **args) + return model diff --git a/modules/image/semantic_segmentation/danet_resnet50_cityscapes/README.md b/modules/image/semantic_segmentation/danet_resnet50_cityscapes/README.md new file mode 100644 index 000000000..a88ce5a82 --- /dev/null +++ b/modules/image/semantic_segmentation/danet_resnet50_cityscapes/README.md @@ -0,0 +1,182 @@ +# danet_resnet50_cityscapes + +|模型名称|danet_resnet50_cityscapes| +| :--- | :---: | +|类别|图像-图像分割| +|网络|danet_resnet50vd| +|数据集|Cityscapes| +|是否支持Fine-tuning|是| +|模型大小|272MB| +|指标|-| +|最新更新日期|2022-03-21| + +## 一、模型基本信息 + + - 样例结果示例: +

+ +

+ +- ### 模型介绍 + + - 本示例将展示如何使用PaddleHub对预训练模型进行finetune并完成预测任务。 + - 更多详情请参考:[ann](https://arxiv.org/pdf/1908.07678.pdf) + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、安装 + + - ```shell + $ hub install danet_resnet50_cityscapes + ``` + + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1.预测代码示例 + + - ```python + import cv2 + import paddle + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='danet_resnet50_cityscapes') + img = cv2.imread("/PATH/TO/IMAGE") + result = model.predict(images=[img], visualization=True) + ``` + +- ### 2.如何开始Fine-tune + + - 在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用danet_resnet50_cityscapes模型对OpticDiscSeg数据集进行Fine-tune。 `train.py`内容如下: + + - 代码步骤 + + - Step1: 定义数据预处理方式 + - ```python + from paddlehub.vision.segmentation_transforms import Compose, Resize, Normalize + + transform = Compose([Resize(target_size=(512, 512)), Normalize()]) + ``` + + - `segmentation_transforms` 数据增强模块定义了丰富的针对图像分割数据的预处理方式,用户可按照需求替换自己需要的数据预处理方式。 + + - Step2: 下载数据集并使用 + - ```python + from paddlehub.datasets import OpticDiscSeg + + train_reader = OpticDiscSeg(transform, mode='train') + ``` + - `transforms`: 数据预处理方式。 + - `mode`: `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 + + - 数据集的准备代码可以参考 [opticdiscseg.py](../../paddlehub/datasets/opticdiscseg.py)。`hub.datasets.OpticDiscSeg()`会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 + + - Step3: 加载预训练模型 + + - ```python + import paddlehub as hub + + model = hub.Module(name='danet_resnet50_cityscapes', num_classes=2, pretrained=None) + ``` + - `name`: 选择预训练模型的名字。 + - `load_checkpoint`: 是否加载自己训练的模型,若为None,则加载提供的模型默认参数。 + + - Step4: 选择优化策略和运行配置 + + - ```python + import paddle + from paddlehub.finetune.trainer import Trainer + + scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.01, decay_steps=1000, power=0.9, end_lr=0.0001) + optimizer = paddle.optimizer.Adam(learning_rate=scheduler, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='test_ckpt_img_seg', use_gpu=True) + trainer.train(train_reader, epochs=10, batch_size=4, log_interval=10, save_interval=4) + ``` + + - 模型预测 + + - 当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。我们使用该模型来进行预测。predict.py脚本如下: + + ```python + import paddle + import cv2 + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='danet_resnet50_cityscapes', pretrained='/PATH/TO/CHECKPOINT') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) + ``` + + - 参数配置正确后,请执行脚本`python predict.py`。 + + - **Args** + * `images`:原始图像路径或BGR格式图片; + * `visualization`: 是否可视化,默认为True; + * `save_path`: 保存结果的路径,默认保存路径为'seg_result'。 + + **NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线图像分割服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + + - ```shell + $ hub serving start -m danet_resnet50_cityscapes + ``` + + - 这样就完成了一个图像分割服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + ```python + import requests + import json + import cv2 + import base64 + + import numpy as np + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/ginet_resnet50vd_voc" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + mask = base64_to_cv2(r.json()["results"][0]) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 diff --git a/modules/image/semantic_segmentation/danet_resnet50_cityscapes/README_en.md b/modules/image/semantic_segmentation/danet_resnet50_cityscapes/README_en.md new file mode 100644 index 000000000..9794b0f3a --- /dev/null +++ b/modules/image/semantic_segmentation/danet_resnet50_cityscapes/README_en.md @@ -0,0 +1,182 @@ +# danet_resnet50_cityscapes + +|Module Name|danet_resnet50_cityscapes| +| :--- | :---: | +|Category|Image Segmentation| +|Network|danet_resnet50vd| +|Dataset|Cityscapes| +|Fine-tuning supported or not|Yes| +|Module Size|272MB| +|Data indicators|-| +|Latest update date|2022-03-21| + +## I. Basic Information + +- ### Application Effect Display + - Sample results: +

+ +

+ +- ### Module Introduction + + - We will show how to use PaddleHub to finetune the pre-trained model and complete the prediction. + - For more information, please refer to: [ginet](https://arxiv.org/pdf/2009.06160) + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、Installation + + - ```shell + $ hub install danet_resnet50_cityscapes + ``` + + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + + +## III. Module API Prediction + +- ### 1、Prediction Code Example + + - ```python + import cv2 + import paddle + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='danet_resnet50_cityscapes') + img = cv2.imread("/PATH/TO/IMAGE") + result = model.predict(images=[img], visualization=True) + ``` + +- ### 2.Fine-tune and Encapsulation + + - After completing the installation of PaddlePaddle and PaddleHub, you can start using the danet_resnet50_cityscapes model to fine-tune datasets such as OpticDiscSeg. + + - Steps: + + - Step1: Define the data preprocessing method + + - ```python + from paddlehub.vision.segmentation_transforms import Compose, Resize, Normalize + + transform = Compose([Resize(target_size=(512, 512)), Normalize()]) + ``` + + - `segmentation_transforms`: The data enhancement module defines lots of data preprocessing methods. Users can replace the data preprocessing methods according to their needs. + + - Step2: Download the dataset + + - ```python + from paddlehub.datasets import OpticDiscSeg + + train_reader = OpticDiscSeg(transform, mode='train') + ``` + * `transforms`: data preprocessing methods. + + * `mode`: Select the data mode, the options are `train`, `test`, `val`. Default is `train`. + + * Dataset preparation can be referred to [opticdiscseg.py](../../paddlehub/datasets/opticdiscseg.py)。`hub.datasets.OpticDiscSeg()`will be automatically downloaded from the network and decompressed to the `$HOME/.paddlehub/dataset` directory under the user directory. + + - Step3: Load the pre-trained model + + - ```python + import paddlehub as hub + + model = hub.Module(name='danet_resnet50_cityscapes', num_classes=2, pretrained=None) + ``` + - `name`: model name. + - `load_checkpoint`: Whether to load the self-trained model, if it is None, load the provided parameters. + + - Step4: Optimization strategy + + - ```python + import paddle + from paddlehub.finetune.trainer import Trainer + + scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.01, decay_steps=1000, power=0.9, end_lr=0.0001) + optimizer = paddle.optimizer.Adam(learning_rate=scheduler, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='test_ckpt_img_seg', use_gpu=True) + trainer.train(train_reader, epochs=10, batch_size=4, log_interval=10, save_interval=4) + ``` + + - Model prediction + + - When Fine-tune is completed, the model with the best performance on the verification set will be saved in the `${CHECKPOINT_DIR}/best_model` directory. We use this model to make predictions. The `predict.py` script is as follows: + + ```python + import paddle + import cv2 + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='danet_resnet50_cityscapes', pretrained='/PATH/TO/CHECKPOINT') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) + ``` + + - **Args** + * `images`: Image path or ndarray data with format [H, W, C], BGR. + * `visualization`: Whether to save the recognition results as picture files. + * `save_path`: Save path of the result, default is 'seg_result'. + + +## IV. Server Deployment + +- PaddleHub Serving can deploy an online service of image segmentation. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + + - ```shell + $ hub serving start -m danet_resnet50_cityscapes + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result: + + ```python + import requests + import json + import cv2 + import base64 + + import numpy as np + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/danet_resnet50_cityscapes" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + mask = base64_to_cv2(r.json()["results"][0]) + ``` + +## V. Release Note + +- 1.0.0 + + First release diff --git a/modules/image/semantic_segmentation/danet_resnet50_cityscapes/layers.py b/modules/image/semantic_segmentation/danet_resnet50_cityscapes/layers.py new file mode 100644 index 000000000..b6d7c005e --- /dev/null +++ b/modules/image/semantic_segmentation/danet_resnet50_cityscapes/layers.py @@ -0,0 +1,349 @@ +# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn.layer import activation +from paddle.nn import Conv2D, AvgPool2D + + +def SyncBatchNorm(*args, **kwargs): + """In cpu environment nn.SyncBatchNorm does not have kernel so use nn.BatchNorm2D instead""" + if paddle.get_device() == 'cpu': + return nn.BatchNorm2D(*args, **kwargs) + else: + return nn.SyncBatchNorm(*args, **kwargs) + + +class ConvBNLayer(nn.Layer): + """Basic conv bn relu layer.""" + + def __init__( + self, + in_channels: int, + out_channels: int, + kernel_size: int, + stride: int = 1, + dilation: int = 1, + groups: int = 1, + is_vd_mode: bool = False, + act: str = None, + name: str = None): + super(ConvBNLayer, self).__init__() + + self.is_vd_mode = is_vd_mode + self._pool2d_avg = AvgPool2D( + kernel_size=2, stride=2, padding=0, ceil_mode=True) + self._conv = Conv2D( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=(kernel_size - 1) // 2 if dilation == 1 else 0, + dilation=dilation, + groups=groups, + bias_attr=False) + + self._batch_norm = SyncBatchNorm(out_channels) + self._act_op = Activation(act=act) + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + if self.is_vd_mode: + inputs = self._pool2d_avg(inputs) + y = self._conv(inputs) + y = self._batch_norm(y) + y = self._act_op(y) + + return y + + +class BottleneckBlock(nn.Layer): + """Residual bottleneck block""" + + def __init__(self, + in_channels: int, + out_channels: int, + stride: int, + shortcut: bool = True, + if_first: bool = False, + dilation: int = 1, + name: str = None): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + act='relu', + name=name + "_branch2a") + + self.dilation = dilation + + self.conv1 = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=3, + stride=stride, + act='relu', + dilation=dilation, + name=name + "_branch2b") + self.conv2 = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels * 4, + kernel_size=1, + act=None, + name=name + "_branch2c") + + if not shortcut: + self.short = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels * 4, + kernel_size=1, + stride=1, + is_vd_mode=False if if_first or stride == 1 else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + y = self.conv0(inputs) + if self.dilation > 1: + padding = self.dilation + y = F.pad(y, [padding, padding, padding, padding]) + + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + y = paddle.add(x=short, y=conv2) + y = F.relu(y) + return y + + +class SeparableConvBNReLU(nn.Layer): + """Depthwise Separable Convolution.""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + padding: str = 'same', + **kwargs: dict): + super(SeparableConvBNReLU, self).__init__() + self.depthwise_conv = ConvBN( + in_channels, + out_channels=in_channels, + kernel_size=kernel_size, + padding=padding, + groups=in_channels, + **kwargs) + self.piontwise_conv = ConvBNReLU( + in_channels, out_channels, kernel_size=1, groups=1) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self.depthwise_conv(x) + x = self.piontwise_conv(x) + return x + + +class ConvBN(nn.Layer): + """Basic conv bn layer""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + padding: str = 'same', + **kwargs: dict): + super(ConvBN, self).__init__() + self._conv = Conv2D( + in_channels, out_channels, kernel_size, padding=padding, **kwargs) + self._batch_norm = SyncBatchNorm(out_channels) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self._conv(x) + x = self._batch_norm(x) + return x + + +class ConvBNReLU(nn.Layer): + """Basic conv bn relu layer.""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + padding: str = 'same', + **kwargs: dict): + super(ConvBNReLU, self).__init__() + + self._conv = Conv2D( + in_channels, out_channels, kernel_size, padding=padding, **kwargs) + self._batch_norm = SyncBatchNorm(out_channels) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self._conv(x) + x = self._batch_norm(x) + x = F.relu(x) + return x + + +class Activation(nn.Layer): + """ + The wrapper of activations. + + Args: + act (str, optional): The activation name in lowercase. It must be one of ['elu', 'gelu', + 'hardshrink', 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', + 'softmax', 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', + 'hsigmoid']. Default: None, means identical transformation. + + Returns: + A callable object of Activation. + + Raises: + KeyError: When parameter `act` is not in the optional range. + + Examples: + + from paddleseg.models.common.activation import Activation + + relu = Activation("relu") + print(relu) + # + + sigmoid = Activation("sigmoid") + print(sigmoid) + # + + not_exit_one = Activation("not_exit_one") + # KeyError: "not_exit_one does not exist in the current dict_keys(['elu', 'gelu', 'hardshrink', + # 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', 'softmax', + # 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', 'hsigmoid'])" + """ + + def __init__(self, act: str = None): + super(Activation, self).__init__() + + self._act = act + upper_act_names = activation.__dict__.keys() + lower_act_names = [act.lower() for act in upper_act_names] + act_dict = dict(zip(lower_act_names, upper_act_names)) + + if act is not None: + if act in act_dict.keys(): + act_name = act_dict[act] + self.act_func = eval("activation.{}()".format(act_name)) + else: + raise KeyError("{} does not exist in the current {}".format( + act, act_dict.keys())) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + + if self._act is not None: + return self.act_func(x) + else: + return x + + +class ASPPModule(nn.Layer): + """ + Atrous Spatial Pyramid Pooling. + + Args: + aspp_ratios (tuple): The dilation rate using in ASSP module. + in_channels (int): The number of input channels. + out_channels (int): The number of output channels. + align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature + is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. + use_sep_conv (bool, optional): If using separable conv in ASPP module. Default: False. + image_pooling (bool, optional): If augmented with image-level features. Default: False + """ + + def __init__(self, + aspp_ratios: tuple, + in_channels: int, + out_channels: int, + align_corners: bool, + use_sep_conv: bool= False, + image_pooling: bool = False): + super().__init__() + + self.align_corners = align_corners + self.aspp_blocks = nn.LayerList() + + for ratio in aspp_ratios: + if use_sep_conv and ratio > 1: + conv_func = SeparableConvBNReLU + else: + conv_func = ConvBNReLU + + block = conv_func( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1 if ratio == 1 else 3, + dilation=ratio, + padding=0 if ratio == 1 else ratio) + self.aspp_blocks.append(block) + + out_size = len(self.aspp_blocks) + + if image_pooling: + self.global_avg_pool = nn.Sequential( + nn.AdaptiveAvgPool2D(output_size=(1, 1)), + ConvBNReLU(in_channels, out_channels, kernel_size=1, bias_attr=False)) + out_size += 1 + self.image_pooling = image_pooling + + self.conv_bn_relu = ConvBNReLU( + in_channels=out_channels * out_size, + out_channels=out_channels, + kernel_size=1) + + self.dropout = nn.Dropout(p=0.1) # drop rate + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + outputs = [] + for block in self.aspp_blocks: + y = block(x) + y = F.interpolate( + y, + x.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + outputs.append(y) + + if self.image_pooling: + img_avg = self.global_avg_pool(x) + img_avg = F.interpolate( + img_avg, + x.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + outputs.append(img_avg) + + x = paddle.concat(outputs, axis=1) + x = self.conv_bn_relu(x) + x = self.dropout(x) + + return x + + + + diff --git a/modules/image/semantic_segmentation/danet_resnet50_cityscapes/module.py b/modules/image/semantic_segmentation/danet_resnet50_cityscapes/module.py new file mode 100644 index 000000000..9bb6e5626 --- /dev/null +++ b/modules/image/semantic_segmentation/danet_resnet50_cityscapes/module.py @@ -0,0 +1,239 @@ +# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from typing import Union, List, Tuple + +import paddle +from paddle import nn +import paddle.nn.functional as F +import numpy as np +from paddlehub.module.module import moduleinfo +import paddlehub.vision.segmentation_transforms as T +from paddlehub.module.cv_module import ImageSegmentationModule + +from danet_resnet50_voc.resnet import ResNet50_vd +import danet_resnet50_voc.layers as L + + +@moduleinfo( + name="danet_resnet50_cityscapes", + type="CV/semantic_segmentation", + author="paddlepaddle", + author_email="", + summary="DANetResnet50 is a segmentation model.", + version="1.0.0", + meta=ImageSegmentationModule) +class DANet(nn.Layer): + """ + The DANet implementation based on PaddlePaddle. + + The original article refers to + Fu, jun, et al. "Dual Attention Network for Scene Segmentation" + (https://arxiv.org/pdf/1809.02983.pdf) + + Args: + num_classes (int): The unique number of target classes. + backbone (Paddle.nn.Layer): A backbone network. + backbone_indices (tuple): The values in the tuple indicate the indices of + output of backbone. + align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature + is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False. + pretrained (str, optional): The path or url of pretrained model. Default: None. + """ + + def __init__(self, + num_classes: int = 19, + backbone_indices: Tuple[int] = (2, 3), + align_corners: bool = False, + pretrained: str = None): + super(DANet, self).__init__() + + self.backbone = ResNet50_vd() + self.backbone_indices = backbone_indices + in_channels = [self.backbone.feat_channels[i] for i in backbone_indices] + + self.head = DAHead(num_classes=num_classes, in_channels=in_channels) + + self.align_corners = align_corners + self.transforms = T.Compose([T.Normalize()]) + + if pretrained is not None: + model_dict = paddle.load(pretrained) + self.set_dict(model_dict) + print("load custom parameters success") + + else: + checkpoint = os.path.join(self.directory, 'model.pdparams') + model_dict = paddle.load(checkpoint) + self.set_dict(model_dict) + print("load pretrained parameters success") + + def forward(self, x: paddle.Tensor) -> List[paddle.Tensor]: + feats = self.backbone(x) + feats = [feats[i] for i in self.backbone_indices] + logit_list = self.head(feats) + if not self.training: + logit_list = [logit_list[0]] + + logit_list = [ + F.interpolate( + logit, + paddle.shape(x)[2:], + mode='bilinear', + align_corners=self.align_corners, + align_mode=1) for logit in logit_list + ] + return logit_list + + def transform(self, img: Union[np.ndarray, str]) -> Union[np.ndarray, str]: + return self.transforms(img) + + +class DAHead(nn.Layer): + """ + The Dual attention head. + + Args: + num_classes (int): The unique number of target classes. + in_channels (tuple): The number of input channels. + """ + + def __init__(self, num_classes: int, in_channels: int): + super().__init__() + in_channels = in_channels[-1] + inter_channels = in_channels // 4 + + self.channel_conv = L.ConvBNReLU(in_channels, inter_channels, 3) + self.position_conv = L.ConvBNReLU(in_channels, inter_channels, 3) + self.pam = PAM(inter_channels) + self.cam = CAM(inter_channels) + self.conv1 = L.ConvBNReLU(inter_channels, inter_channels, 3) + self.conv2 = L.ConvBNReLU(inter_channels, inter_channels, 3) + + self.aux_head = nn.Sequential( + nn.Dropout2D(0.1), nn.Conv2D(in_channels, num_classes, 1)) + + self.aux_head_pam = nn.Sequential( + nn.Dropout2D(0.1), nn.Conv2D(inter_channels, num_classes, 1)) + + self.aux_head_cam = nn.Sequential( + nn.Dropout2D(0.1), nn.Conv2D(inter_channels, num_classes, 1)) + + self.cls_head = nn.Sequential( + nn.Dropout2D(0.1), nn.Conv2D(inter_channels, num_classes, 1)) + + def forward(self, feat_list: List[paddle.Tensor]) -> List[paddle.Tensor]: + feats = feat_list[-1] + channel_feats = self.channel_conv(feats) + channel_feats = self.cam(channel_feats) + channel_feats = self.conv1(channel_feats) + + position_feats = self.position_conv(feats) + position_feats = self.pam(position_feats) + position_feats = self.conv2(position_feats) + + feats_sum = position_feats + channel_feats + logit = self.cls_head(feats_sum) + + if not self.training: + return [logit] + + cam_logit = self.aux_head_cam(channel_feats) + pam_logit = self.aux_head_cam(position_feats) + aux_logit = self.aux_head(feats) + return [logit, cam_logit, pam_logit, aux_logit] + + +class PAM(nn.Layer): + """Position attention module.""" + + def __init__(self, in_channels: int): + super().__init__() + mid_channels = in_channels // 8 + self.mid_channels = mid_channels + self.in_channels = in_channels + + self.query_conv = nn.Conv2D(in_channels, mid_channels, 1, 1) + self.key_conv = nn.Conv2D(in_channels, mid_channels, 1, 1) + self.value_conv = nn.Conv2D(in_channels, in_channels, 1, 1) + + self.gamma = self.create_parameter( + shape=[1], + dtype='float32', + default_initializer=nn.initializer.Constant(0)) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x_shape = paddle.shape(x) + + # query: n, h * w, c1 + query = self.query_conv(x) + query = paddle.reshape(query, (0, self.mid_channels, -1)) + query = paddle.transpose(query, (0, 2, 1)) + + # key: n, c1, h * w + key = self.key_conv(x) + key = paddle.reshape(key, (0, self.mid_channels, -1)) + + # sim: n, h * w, h * w + sim = paddle.bmm(query, key) + sim = F.softmax(sim, axis=-1) + + value = self.value_conv(x) + value = paddle.reshape(value, (0, self.in_channels, -1)) + sim = paddle.transpose(sim, (0, 2, 1)) + + # feat: from (n, c2, h * w) -> (n, c2, h, w) + feat = paddle.bmm(value, sim) + feat = paddle.reshape(feat, + (0, self.in_channels, x_shape[2], x_shape[3])) + + out = self.gamma * feat + x + return out + + +class CAM(nn.Layer): + """Channel attention module.""" + + def __init__(self, channels: int): + super().__init__() + + self.channels = channels + self.gamma = self.create_parameter( + shape=[1], + dtype='float32', + default_initializer=nn.initializer.Constant(0)) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x_shape = paddle.shape(x) + # query: n, c, h * w + query = paddle.reshape(x, (0, self.channels, -1)) + # key: n, h * w, c + key = paddle.reshape(x, (0, self.channels, -1)) + key = paddle.transpose(key, (0, 2, 1)) + + # sim: n, c, c + sim = paddle.bmm(query, key) + # The danet author claims that this can avoid gradient divergence + sim = paddle.max( + sim, axis=-1, keepdim=True).tile([1, 1, self.channels]) - sim + sim = F.softmax(sim, axis=-1) + + # feat: from (n, c, h * w) to (n, c, h, w) + value = paddle.reshape(x, (0, self.channels, -1)) + feat = paddle.bmm(sim, value) + feat = paddle.reshape(feat, (0, self.channels, x_shape[2], x_shape[3])) + + out = self.gamma * feat + x + return out diff --git a/modules/image/semantic_segmentation/danet_resnet50_cityscapes/resnet.py b/modules/image/semantic_segmentation/danet_resnet50_cityscapes/resnet.py new file mode 100644 index 000000000..12102a3fe --- /dev/null +++ b/modules/image/semantic_segmentation/danet_resnet50_cityscapes/resnet.py @@ -0,0 +1,359 @@ +# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Union, List, Tuple + +import paddle.nn as nn +import ann_resnet50_voc.layers as layers + + +class ConvBNLayer(nn.Layer): + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + stride: int = 1, + dilation: int = 1, + groups: int = 1, + is_vd_mode: bool = False, + act: str = None, + data_format: str = 'NCHW'): + super(ConvBNLayer, self).__init__() + if dilation != 1 and kernel_size != 3: + raise RuntimeError("When the dilation isn't 1," \ + "the kernel_size should be 3.") + + self.is_vd_mode = is_vd_mode + self._pool2d_avg = nn.AvgPool2D( + kernel_size=2, + stride=2, + padding=0, + ceil_mode=True, + data_format=data_format) + self._conv = nn.Conv2D( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=(kernel_size - 1) // 2 \ + if dilation == 1 else dilation, + dilation=dilation, + groups=groups, + bias_attr=False, + data_format=data_format) + + self._batch_norm = layers.SyncBatchNorm( + out_channels, data_format=data_format) + self._act_op = layers.Activation(act=act) + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + if self.is_vd_mode: + inputs = self._pool2d_avg(inputs) + y = self._conv(inputs) + y = self._batch_norm(y) + y = self._act_op(y) + + return y + + +class BottleneckBlock(nn.Layer): + def __init__(self, + in_channels: int, + out_channels: int, + stride: int, + shortcut: bool = True, + if_first: bool = False, + dilation: int = 1, + data_format: str = 'NCHW'): + super(BottleneckBlock, self).__init__() + + self.data_format = data_format + self.conv0 = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + act='relu', + data_format=data_format) + + self.dilation = dilation + + self.conv1 = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=3, + stride=stride, + act='relu', + dilation=dilation, + data_format=data_format) + self.conv2 = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels * 4, + kernel_size=1, + act=None, + data_format=data_format) + + if not shortcut: + self.short = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels * 4, + kernel_size=1, + stride=1, + is_vd_mode=False if if_first or stride == 1 else True, + data_format=data_format) + + self.shortcut = shortcut + # NOTE: Use the wrap layer for quantization training + self.add = layers.Add() + self.relu = layers.Activation(act="relu") + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + y = self.conv0(inputs) + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + y = self.add(short, conv2) + y = self.relu(y) + return y + + +class BasicBlock(nn.Layer): + def __init__(self, + in_channels: int, + out_channels: int, + stride: int, + dilation: int = 1, + shortcut: bool = True, + if_first: bool = False, + data_format: str = 'NCHW'): + super(BasicBlock, self).__init__() + self.conv0 = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=3, + stride=stride, + dilation=dilation, + act='relu', + data_format=data_format) + self.conv1 = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=3, + dilation=dilation, + act=None, + data_format=data_format) + + if not shortcut: + self.short = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + stride=1, + is_vd_mode=False if if_first or stride == 1 else True, + data_format=data_format) + + self.shortcut = shortcut + self.dilation = dilation + self.data_format = data_format + self.add = layers.Add() + self.relu = layers.Activation(act="relu") + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + y = self.conv0(inputs) + conv1 = self.conv1(y) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = self.add(short, conv1) + y = self.relu(y) + + return y + + +class ResNet_vd(nn.Layer): + """ + The ResNet_vd implementation based on PaddlePaddle. + + The original article refers to Jingdong + Tong He, et, al. "Bag of Tricks for Image Classification with Convolutional Neural Networks" + (https://arxiv.org/pdf/1812.01187.pdf). + + Args: + layers (int, optional): The layers of ResNet_vd. The supported layers are (18, 34, 50, 101, 152, 200). Default: 50. + output_stride (int, optional): The stride of output features compared to input images. It is 8 or 16. Default: 8. + multi_grid (tuple|list, optional): The grid of stage4. Defult: (1, 1, 1). + pretrained (str, optional): The path of pretrained model. + + """ + + def __init__(self, + layers: int = 50, + output_stride: int = 8, + multi_grid: Tuple[int] = (1, 1, 1), + pretrained: str = None, + data_format: str = 'NCHW'): + super(ResNet_vd, self).__init__() + + self.data_format = data_format + self.conv1_logit = None # for gscnn shape stream + self.layers = layers + supported_layers = [18, 34, 50, 101, 152, 200] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format( + supported_layers, layers) + + if layers == 18: + depth = [2, 2, 2, 2] + elif layers == 34 or layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + elif layers == 200: + depth = [3, 12, 48, 3] + num_channels = [64, 256, 512, 1024 + ] if layers >= 50 else [64, 64, 128, 256] + num_filters = [64, 128, 256, 512] + + # for channels of four returned stages + self.feat_channels = [c * 4 for c in num_filters + ] if layers >= 50 else num_filters + + dilation_dict = None + if output_stride == 8: + dilation_dict = {2: 2, 3: 4} + elif output_stride == 16: + dilation_dict = {3: 2} + + self.conv1_1 = ConvBNLayer( + in_channels=3, + out_channels=32, + kernel_size=3, + stride=2, + act='relu', + data_format=data_format) + self.conv1_2 = ConvBNLayer( + in_channels=32, + out_channels=32, + kernel_size=3, + stride=1, + act='relu', + data_format=data_format) + self.conv1_3 = ConvBNLayer( + in_channels=32, + out_channels=64, + kernel_size=3, + stride=1, + act='relu', + data_format=data_format) + self.pool2d_max = nn.MaxPool2D( + kernel_size=3, stride=2, padding=1, data_format=data_format) + + # self.block_list = [] + self.stage_list = [] + if layers >= 50: + for block in range(len(depth)): + shortcut = False + block_list = [] + for i in range(depth[block]): + if layers in [101, 152] and block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + + ############################################################################### + # Add dilation rate for some segmentation tasks, if dilation_dict is not None. + dilation_rate = dilation_dict[ + block] if dilation_dict and block in dilation_dict else 1 + + # Actually block here is 'stage', and i is 'block' in 'stage' + # At the stage 4, expand the the dilation_rate if given multi_grid + if block == 3: + dilation_rate = dilation_rate * multi_grid[i] + ############################################################################### + + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BottleneckBlock( + in_channels=num_channels[block] + if i == 0 else num_filters[block] * 4, + out_channels=num_filters[block], + stride=2 if i == 0 and block != 0 + and dilation_rate == 1 else 1, + shortcut=shortcut, + if_first=block == i == 0, + dilation=dilation_rate, + data_format=data_format)) + + block_list.append(bottleneck_block) + shortcut = True + self.stage_list.append(block_list) + else: + for block in range(len(depth)): + shortcut = False + block_list = [] + for i in range(depth[block]): + dilation_rate = dilation_dict[block] \ + if dilation_dict and block in dilation_dict else 1 + if block == 3: + dilation_rate = dilation_rate * multi_grid[i] + + basic_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BasicBlock( + in_channels=num_channels[block] + if i == 0 else num_filters[block], + out_channels=num_filters[block], + stride=2 if i == 0 and block != 0 \ + and dilation_rate == 1 else 1, + dilation=dilation_rate, + shortcut=shortcut, + if_first=block == i == 0, + data_format=data_format)) + block_list.append(basic_block) + shortcut = True + self.stage_list.append(block_list) + + self.pretrained = pretrained + + def forward(self, inputs: paddle.Tensor) -> List[paddle.Tensor]: + y = self.conv1_1(inputs) + y = self.conv1_2(y) + y = self.conv1_3(y) + self.conv1_logit = y.clone() + y = self.pool2d_max(y) + + # A feature list saves the output feature map of each stage. + feat_list = [] + for stage in self.stage_list: + for block in stage: + y = block(y) + feat_list.append(y) + + return feat_list + + +def ResNet50_vd(**args): + model = ResNet_vd(layers=50, **args) + return model \ No newline at end of file diff --git a/modules/image/semantic_segmentation/danet_resnet50_voc/README.md b/modules/image/semantic_segmentation/danet_resnet50_voc/README.md new file mode 100644 index 000000000..8ee72c8c8 --- /dev/null +++ b/modules/image/semantic_segmentation/danet_resnet50_voc/README.md @@ -0,0 +1,182 @@ +# danet_resnet50_voc + +|模型名称|danet_resnet50_voc| +| :--- | :---: | +|类别|图像-图像分割| +|网络|danet_resnet50vd| +|数据集|PascalVOC2012| +|是否支持Fine-tuning|是| +|模型大小|273MB| +|指标|-| +|最新更新日期|2022-03-21| + +## 一、模型基本信息 + + - 样例结果示例: +

+ +

+ +- ### 模型介绍 + + - 本示例将展示如何使用PaddleHub对预训练模型进行finetune并完成预测任务。 + - 更多详情请参考:[danet](https://arxiv.org/pdf/1809.02983.pdf) + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、安装 + + - ```shell + $ hub install danet_resnet50_voc + ``` + + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1.预测代码示例 + + - ```python + import cv2 + import paddle + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='danet_resnet50_voc') + img = cv2.imread("/PATH/TO/IMAGE") + result = model.predict(images=[img], visualization=True) + ``` + +- ### 2.如何开始Fine-tune + + - 在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用danet_resnet50_voc模型对OpticDiscSeg数据集进行Fine-tune。 `train.py`内容如下: + + - 代码步骤 + + - Step1: 定义数据预处理方式 + - ```python + from paddlehub.vision.segmentation_transforms import Compose, Resize, Normalize + + transform = Compose([Resize(target_size=(512, 512)), Normalize()]) + ``` + + - `segmentation_transforms` 数据增强模块定义了丰富的针对图像分割数据的预处理方式,用户可按照需求替换自己需要的数据预处理方式。 + + - Step2: 下载数据集并使用 + - ```python + from paddlehub.datasets import OpticDiscSeg + + train_reader = OpticDiscSeg(transform, mode='train') + ``` + - `transforms`: 数据预处理方式。 + - `mode`: `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 + + - 数据集的准备代码可以参考 [opticdiscseg.py](../../paddlehub/datasets/opticdiscseg.py)。`hub.datasets.OpticDiscSeg()`会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 + + - Step3: 加载预训练模型 + + - ```python + import paddlehub as hub + + model = hub.Module(name='danet_resnet50_voc', num_classes=2, pretrained=None) + ``` + - `name`: 选择预训练模型的名字。 + - `load_checkpoint`: 是否加载自己训练的模型,若为None,则加载提供的模型默认参数。 + + - Step4: 选择优化策略和运行配置 + + - ```python + import paddle + from paddlehub.finetune.trainer import Trainer + + scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.01, decay_steps=1000, power=0.9, end_lr=0.0001) + optimizer = paddle.optimizer.Adam(learning_rate=scheduler, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='test_ckpt_img_seg', use_gpu=True) + trainer.train(train_reader, epochs=10, batch_size=4, log_interval=10, save_interval=4) + ``` + + - 模型预测 + + - 当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。我们使用该模型来进行预测。predict.py脚本如下: + + ```python + import paddle + import cv2 + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='danet_resnet50_voc', pretrained='/PATH/TO/CHECKPOINT') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) + ``` + + - 参数配置正确后,请执行脚本`python predict.py`。 + + - **Args** + * `images`:原始图像路径或BGR格式图片; + * `visualization`: 是否可视化,默认为True; + * `save_path`: 保存结果的路径,默认保存路径为'seg_result'。 + + **NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线图像分割服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + + - ```shell + $ hub serving start -m danet_resnet50_voc + ``` + + - 这样就完成了一个图像分割服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + ```python + import requests + import json + import cv2 + import base64 + + import numpy as np + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/ginet_resnet50vd_voc" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + mask = base64_to_cv2(r.json()["results"][0]) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 diff --git a/modules/image/semantic_segmentation/danet_resnet50_voc/README_en.md b/modules/image/semantic_segmentation/danet_resnet50_voc/README_en.md new file mode 100644 index 000000000..6fecdfc23 --- /dev/null +++ b/modules/image/semantic_segmentation/danet_resnet50_voc/README_en.md @@ -0,0 +1,181 @@ +# danet_resnet50_voc + +|Module Name|danet_resnet50_voc| +| :--- | :---: | +|Category|Image Segmentation| +|Network|danet_resnet50vd| +|Dataset|PascalVOC2012| +|Fine-tuning supported or not|Yes| +|Module Size|273MB| +|Data indicators|-| +|Latest update date|2022-03-22| + +## I. Basic Information + +- ### Application Effect Display + - Sample results: +

+ +

+ +- ### Module Introduction + + - We will show how to use PaddleHub to finetune the pre-trained model and complete the prediction. + - For more information, please refer to: [danet](https://arxiv.org/pdf/1809.02983.pdf) + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、Installation + + - ```shell + $ hub install danet_resnet50_voc + ``` + + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + + +## III. Module API Prediction + +- ### 1、Prediction Code Example + + - ```python + import cv2 + import paddle + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='danet_resnet50_voc') + img = cv2.imread("/PATH/TO/IMAGE") + result = model.predict(images=[img], visualization=True) + ``` + +- ### 2.Fine-tune and Encapsulation + + - After completing the installation of PaddlePaddle and PaddleHub, you can start using the danet_resnet50_voc model to fine-tune datasets such as OpticDiscSeg. + + - Steps: + + - Step1: Define the data preprocessing method + + - ```python + from paddlehub.vision.segmentation_transforms import Compose, Resize, Normalize + + transform = Compose([Resize(target_size=(512, 512)), Normalize()]) + ``` + + - `segmentation_transforms`: The data enhancement module defines lots of data preprocessing methods. Users can replace the data preprocessing methods according to their needs. + + - Step2: Download the dataset + + - ```python + from paddlehub.datasets import OpticDiscSeg + + train_reader = OpticDiscSeg(transform, mode='train') + ``` + * `transforms`: data preprocessing methods. + + * `mode`: Select the data mode, the options are `train`, `test`, `val`. Default is `train`. + + * Dataset preparation can be referred to [opticdiscseg.py](../../paddlehub/datasets/opticdiscseg.py)。`hub.datasets.OpticDiscSeg()`will be automatically downloaded from the network and decompressed to the `$HOME/.paddlehub/dataset` directory under the user directory. + + - Step3: Load the pre-trained model + + - ```python + import paddlehub as hub + + model = hub.Module(name='danet_resnet50_voc', num_classes=2, pretrained=None) + ``` + - `name`: model name. + - `load_checkpoint`: Whether to load the self-trained model, if it is None, load the provided parameters. + + - Step4: Optimization strategy + + - ```python + import paddle + from paddlehub.finetune.trainer import Trainer + + scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.01, decay_steps=1000, power=0.9, end_lr=0.0001) + optimizer = paddle.optimizer.Adam(learning_rate=scheduler, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='test_ckpt_img_seg', use_gpu=True) + trainer.train(train_reader, epochs=10, batch_size=4, log_interval=10, save_interval=4) + ``` + + - Model prediction + + - When Fine-tune is completed, the model with the best performance on the verification set will be saved in the `${CHECKPOINT_DIR}/best_model` directory. We use this model to make predictions. The `predict.py` script is as follows: + + ```python + import paddle + import cv2 + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='danet_resnet50_voc', pretrained='/PATH/TO/CHECKPOINT') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) + ``` + + - **Args** + * `images`: Image path or ndarray data with format [H, W, C], BGR. + * `visualization`: Whether to save the recognition results as picture files. + * `save_path`: Save path of the result, default is 'seg_result'. + + +## IV. Server Deployment + +- PaddleHub Serving can deploy an online service of image segmentation. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + + - ```shell + $ hub serving start -m danet_resnet50_voc + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result: + + ```python + import requests + import json + import cv2 + import base64 + + import numpy as np + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/danet_resnet50_voc" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + mask = base64_to_cv2(r.json()["results"][0]) + ``` + +## V. Release Note + +- 1.0.0 + + First release diff --git a/modules/image/semantic_segmentation/danet_resnet50_voc/layers.py b/modules/image/semantic_segmentation/danet_resnet50_voc/layers.py new file mode 100644 index 000000000..96b307dc8 --- /dev/null +++ b/modules/image/semantic_segmentation/danet_resnet50_voc/layers.py @@ -0,0 +1,349 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn.layer import activation +from paddle.nn import Conv2D, AvgPool2D + + +def SyncBatchNorm(*args, **kwargs): + """In cpu environment nn.SyncBatchNorm does not have kernel so use nn.BatchNorm2D instead""" + if paddle.get_device() == 'cpu': + return nn.BatchNorm2D(*args, **kwargs) + else: + return nn.SyncBatchNorm(*args, **kwargs) + + +class ConvBNLayer(nn.Layer): + """Basic conv bn relu layer.""" + + def __init__( + self, + in_channels: int, + out_channels: int, + kernel_size: int, + stride: int = 1, + dilation: int = 1, + groups: int = 1, + is_vd_mode: bool = False, + act: str = None, + name: str = None): + super(ConvBNLayer, self).__init__() + + self.is_vd_mode = is_vd_mode + self._pool2d_avg = AvgPool2D( + kernel_size=2, stride=2, padding=0, ceil_mode=True) + self._conv = Conv2D( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=(kernel_size - 1) // 2 if dilation == 1 else 0, + dilation=dilation, + groups=groups, + bias_attr=False) + + self._batch_norm = SyncBatchNorm(out_channels) + self._act_op = Activation(act=act) + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + if self.is_vd_mode: + inputs = self._pool2d_avg(inputs) + y = self._conv(inputs) + y = self._batch_norm(y) + y = self._act_op(y) + + return y + + +class BottleneckBlock(nn.Layer): + """Residual bottleneck block""" + + def __init__(self, + in_channels: int, + out_channels: int, + stride: int, + shortcut: bool = True, + if_first: bool = False, + dilation: int = 1, + name: str = None): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + act='relu', + name=name + "_branch2a") + + self.dilation = dilation + + self.conv1 = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=3, + stride=stride, + act='relu', + dilation=dilation, + name=name + "_branch2b") + self.conv2 = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels * 4, + kernel_size=1, + act=None, + name=name + "_branch2c") + + if not shortcut: + self.short = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels * 4, + kernel_size=1, + stride=1, + is_vd_mode=False if if_first or stride == 1 else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + y = self.conv0(inputs) + if self.dilation > 1: + padding = self.dilation + y = F.pad(y, [padding, padding, padding, padding]) + + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + y = paddle.add(x=short, y=conv2) + y = F.relu(y) + return y + + +class SeparableConvBNReLU(nn.Layer): + """Depthwise Separable Convolution.""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + padding: str = 'same', + **kwargs: dict): + super(SeparableConvBNReLU, self).__init__() + self.depthwise_conv = ConvBN( + in_channels, + out_channels=in_channels, + kernel_size=kernel_size, + padding=padding, + groups=in_channels, + **kwargs) + self.piontwise_conv = ConvBNReLU( + in_channels, out_channels, kernel_size=1, groups=1) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self.depthwise_conv(x) + x = self.piontwise_conv(x) + return x + + +class ConvBN(nn.Layer): + """Basic conv bn layer""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + padding: str = 'same', + **kwargs: dict): + super(ConvBN, self).__init__() + self._conv = Conv2D( + in_channels, out_channels, kernel_size, padding=padding, **kwargs) + self._batch_norm = SyncBatchNorm(out_channels) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self._conv(x) + x = self._batch_norm(x) + return x + + +class ConvBNReLU(nn.Layer): + """Basic conv bn relu layer.""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + padding: str = 'same', + **kwargs: dict): + super(ConvBNReLU, self).__init__() + + self._conv = Conv2D( + in_channels, out_channels, kernel_size, padding=padding, **kwargs) + self._batch_norm = SyncBatchNorm(out_channels) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self._conv(x) + x = self._batch_norm(x) + x = F.relu(x) + return x + + +class Activation(nn.Layer): + """ + The wrapper of activations. + + Args: + act (str, optional): The activation name in lowercase. It must be one of ['elu', 'gelu', + 'hardshrink', 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', + 'softmax', 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', + 'hsigmoid']. Default: None, means identical transformation. + + Returns: + A callable object of Activation. + + Raises: + KeyError: When parameter `act` is not in the optional range. + + Examples: + + from paddleseg.models.common.activation import Activation + + relu = Activation("relu") + print(relu) + # + + sigmoid = Activation("sigmoid") + print(sigmoid) + # + + not_exit_one = Activation("not_exit_one") + # KeyError: "not_exit_one does not exist in the current dict_keys(['elu', 'gelu', 'hardshrink', + # 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', 'softmax', + # 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', 'hsigmoid'])" + """ + + def __init__(self, act: str = None): + super(Activation, self).__init__() + + self._act = act + upper_act_names = activation.__dict__.keys() + lower_act_names = [act.lower() for act in upper_act_names] + act_dict = dict(zip(lower_act_names, upper_act_names)) + + if act is not None: + if act in act_dict.keys(): + act_name = act_dict[act] + self.act_func = eval("activation.{}()".format(act_name)) + else: + raise KeyError("{} does not exist in the current {}".format( + act, act_dict.keys())) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + + if self._act is not None: + return self.act_func(x) + else: + return x + + +class ASPPModule(nn.Layer): + """ + Atrous Spatial Pyramid Pooling. + + Args: + aspp_ratios (tuple): The dilation rate using in ASSP module. + in_channels (int): The number of input channels. + out_channels (int): The number of output channels. + align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature + is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. + use_sep_conv (bool, optional): If using separable conv in ASPP module. Default: False. + image_pooling (bool, optional): If augmented with image-level features. Default: False + """ + + def __init__(self, + aspp_ratios: tuple, + in_channels: int, + out_channels: int, + align_corners: bool, + use_sep_conv: bool= False, + image_pooling: bool = False): + super().__init__() + + self.align_corners = align_corners + self.aspp_blocks = nn.LayerList() + + for ratio in aspp_ratios: + if use_sep_conv and ratio > 1: + conv_func = SeparableConvBNReLU + else: + conv_func = ConvBNReLU + + block = conv_func( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1 if ratio == 1 else 3, + dilation=ratio, + padding=0 if ratio == 1 else ratio) + self.aspp_blocks.append(block) + + out_size = len(self.aspp_blocks) + + if image_pooling: + self.global_avg_pool = nn.Sequential( + nn.AdaptiveAvgPool2D(output_size=(1, 1)), + ConvBNReLU(in_channels, out_channels, kernel_size=1, bias_attr=False)) + out_size += 1 + self.image_pooling = image_pooling + + self.conv_bn_relu = ConvBNReLU( + in_channels=out_channels * out_size, + out_channels=out_channels, + kernel_size=1) + + self.dropout = nn.Dropout(p=0.1) # drop rate + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + outputs = [] + for block in self.aspp_blocks: + y = block(x) + y = F.interpolate( + y, + x.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + outputs.append(y) + + if self.image_pooling: + img_avg = self.global_avg_pool(x) + img_avg = F.interpolate( + img_avg, + x.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + outputs.append(img_avg) + + x = paddle.concat(outputs, axis=1) + x = self.conv_bn_relu(x) + x = self.dropout(x) + + return x + + + + diff --git a/modules/image/semantic_segmentation/danet_resnet50_voc/module.py b/modules/image/semantic_segmentation/danet_resnet50_voc/module.py new file mode 100644 index 000000000..2dd4c60b9 --- /dev/null +++ b/modules/image/semantic_segmentation/danet_resnet50_voc/module.py @@ -0,0 +1,245 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from typing import Union, List, Tuple + +import paddle +from paddle import nn +import paddle.nn.functional as F +import numpy as np +from paddlehub.module.module import moduleinfo +import paddlehub.vision.segmentation_transforms as T +from paddlehub.module.cv_module import ImageSegmentationModule + +from danet_resnet50_voc.resnet import ResNet50_vd +import danet_resnet50_voc.layers as L + + +@moduleinfo( + name="danet_resnet50_voc", + type="CV/semantic_segmentation", + author="paddlepaddle", + author_email="", + summary="DeepLabV3PResnet50 is a segmentation model.", + version="1.0.0", + meta=ImageSegmentationModule) +class DANet(nn.Layer): + """ + The DANet implementation based on PaddlePaddle. + + The original article refers to + Fu, jun, et al. "Dual Attention Network for Scene Segmentation" + (https://arxiv.org/pdf/1809.02983.pdf) + + Args: + num_classes (int): The unique number of target classes. + backbone (Paddle.nn.Layer): A backbone network. + backbone_indices (tuple): The values in the tuple indicate the indices of + output of backbone. + align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature + is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False. + pretrained (str, optional): The path or url of pretrained model. Default: None. + """ + + def __init__(self, + num_classes: int = 21, + backbone_indices: Tuple[int] = (2, 3), + align_corners: bool = False, + pretrained: str = None): + super(DANet, self).__init__() + + self.backbone = ResNet50_vd() + self.backbone_indices = backbone_indices + in_channels = [self.backbone.feat_channels[i] for i in backbone_indices] + + self.head = DAHead(num_classes=num_classes, in_channels=in_channels) + + self.align_corners = align_corners + self.transforms = T.Compose([T.Normalize()]) + + if pretrained is not None: + model_dict = paddle.load(pretrained) + self.set_dict(model_dict) + print("load custom parameters success") + + else: + checkpoint = os.path.join(self.directory, 'model.pdparams') + model_dict = paddle.load(checkpoint) + self.set_dict(model_dict) + print("load pretrained parameters success") + + def forward(self, x: paddle.Tensor) -> List[paddle.Tensor]: + feats = self.backbone(x) + feats = [feats[i] for i in self.backbone_indices] + logit_list = self.head(feats) + if not self.training: + logit_list = [logit_list[0]] + + logit_list = [ + F.interpolate( + logit, + paddle.shape(x)[2:], + mode='bilinear', + align_corners=self.align_corners, + align_mode=1) for logit in logit_list + ] + return logit_list + + def transform(self, img: Union[np.ndarray, str]) -> Union[np.ndarray, str]: + return self.transforms(img) + + + +class DAHead(nn.Layer): + """ + The Dual attention head. + + Args: + num_classes (int): The unique number of target classes. + in_channels (tuple): The number of input channels. + """ + + def __init__(self, num_classes: int, in_channels: int): + super().__init__() + in_channels = in_channels[-1] + inter_channels = in_channels // 4 + + self.channel_conv = L.ConvBNReLU(in_channels, inter_channels, 3) + self.position_conv = L.ConvBNReLU(in_channels, inter_channels, 3) + self.pam = PAM(inter_channels) + self.cam = CAM(inter_channels) + self.conv1 = L.ConvBNReLU(inter_channels, inter_channels, 3) + self.conv2 = L.ConvBNReLU(inter_channels, inter_channels, 3) + + self.aux_head = nn.Sequential( + nn.Dropout2D(0.1), nn.Conv2D(in_channels, num_classes, 1)) + + self.aux_head_pam = nn.Sequential( + nn.Dropout2D(0.1), nn.Conv2D(inter_channels, num_classes, 1)) + + self.aux_head_cam = nn.Sequential( + nn.Dropout2D(0.1), nn.Conv2D(inter_channels, num_classes, 1)) + + self.cls_head = nn.Sequential( + nn.Dropout2D(0.1), nn.Conv2D(inter_channels, num_classes, 1)) + + def forward(self, feat_list: List[paddle.Tensor]) -> List[paddle.Tensor]: + feats = feat_list[-1] + channel_feats = self.channel_conv(feats) + channel_feats = self.cam(channel_feats) + channel_feats = self.conv1(channel_feats) + + position_feats = self.position_conv(feats) + position_feats = self.pam(position_feats) + position_feats = self.conv2(position_feats) + + feats_sum = position_feats + channel_feats + logit = self.cls_head(feats_sum) + + if not self.training: + return [logit] + + cam_logit = self.aux_head_cam(channel_feats) + pam_logit = self.aux_head_cam(position_feats) + aux_logit = self.aux_head(feats) + return [logit, cam_logit, pam_logit, aux_logit] + + +class PAM(nn.Layer): + """Position attention module.""" + + def __init__(self, in_channels: int): + super().__init__() + mid_channels = in_channels // 8 + self.mid_channels = mid_channels + self.in_channels = in_channels + + self.query_conv = nn.Conv2D(in_channels, mid_channels, 1, 1) + self.key_conv = nn.Conv2D(in_channels, mid_channels, 1, 1) + self.value_conv = nn.Conv2D(in_channels, in_channels, 1, 1) + + self.gamma = self.create_parameter( + shape=[1], + dtype='float32', + default_initializer=nn.initializer.Constant(0)) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x_shape = paddle.shape(x) + + # query: n, h * w, c1 + query = self.query_conv(x) + query = paddle.reshape(query, (0, self.mid_channels, -1)) + query = paddle.transpose(query, (0, 2, 1)) + + # key: n, c1, h * w + key = self.key_conv(x) + key = paddle.reshape(key, (0, self.mid_channels, -1)) + + # sim: n, h * w, h * w + sim = paddle.bmm(query, key) + sim = F.softmax(sim, axis=-1) + + value = self.value_conv(x) + value = paddle.reshape(value, (0, self.in_channels, -1)) + sim = paddle.transpose(sim, (0, 2, 1)) + + # feat: from (n, c2, h * w) -> (n, c2, h, w) + feat = paddle.bmm(value, sim) + feat = paddle.reshape(feat, + (0, self.in_channels, x_shape[2], x_shape[3])) + + out = self.gamma * feat + x + return out + + +class CAM(nn.Layer): + """Channel attention module.""" + + def __init__(self, channels: int): + super().__init__() + + self.channels = channels + self.gamma = self.create_parameter( + shape=[1], + dtype='float32', + default_initializer=nn.initializer.Constant(0)) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x_shape = paddle.shape(x) + # query: n, c, h * w + query = paddle.reshape(x, (0, self.channels, -1)) + # key: n, h * w, c + key = paddle.reshape(x, (0, self.channels, -1)) + key = paddle.transpose(key, (0, 2, 1)) + + # sim: n, c, c + sim = paddle.bmm(query, key) + # The danet author claims that this can avoid gradient divergence + sim = paddle.max( + sim, axis=-1, keepdim=True).tile([1, 1, self.channels]) - sim + sim = F.softmax(sim, axis=-1) + + # feat: from (n, c, h * w) to (n, c, h, w) + value = paddle.reshape(x, (0, self.channels, -1)) + feat = paddle.bmm(sim, value) + feat = paddle.reshape(feat, (0, self.channels, x_shape[2], x_shape[3])) + + out = self.gamma * feat + x + return out + + + + + diff --git a/modules/image/semantic_segmentation/danet_resnet50_voc/resnet.py b/modules/image/semantic_segmentation/danet_resnet50_voc/resnet.py new file mode 100644 index 000000000..12102a3fe --- /dev/null +++ b/modules/image/semantic_segmentation/danet_resnet50_voc/resnet.py @@ -0,0 +1,359 @@ +# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Union, List, Tuple + +import paddle.nn as nn +import ann_resnet50_voc.layers as layers + + +class ConvBNLayer(nn.Layer): + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + stride: int = 1, + dilation: int = 1, + groups: int = 1, + is_vd_mode: bool = False, + act: str = None, + data_format: str = 'NCHW'): + super(ConvBNLayer, self).__init__() + if dilation != 1 and kernel_size != 3: + raise RuntimeError("When the dilation isn't 1," \ + "the kernel_size should be 3.") + + self.is_vd_mode = is_vd_mode + self._pool2d_avg = nn.AvgPool2D( + kernel_size=2, + stride=2, + padding=0, + ceil_mode=True, + data_format=data_format) + self._conv = nn.Conv2D( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=(kernel_size - 1) // 2 \ + if dilation == 1 else dilation, + dilation=dilation, + groups=groups, + bias_attr=False, + data_format=data_format) + + self._batch_norm = layers.SyncBatchNorm( + out_channels, data_format=data_format) + self._act_op = layers.Activation(act=act) + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + if self.is_vd_mode: + inputs = self._pool2d_avg(inputs) + y = self._conv(inputs) + y = self._batch_norm(y) + y = self._act_op(y) + + return y + + +class BottleneckBlock(nn.Layer): + def __init__(self, + in_channels: int, + out_channels: int, + stride: int, + shortcut: bool = True, + if_first: bool = False, + dilation: int = 1, + data_format: str = 'NCHW'): + super(BottleneckBlock, self).__init__() + + self.data_format = data_format + self.conv0 = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + act='relu', + data_format=data_format) + + self.dilation = dilation + + self.conv1 = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=3, + stride=stride, + act='relu', + dilation=dilation, + data_format=data_format) + self.conv2 = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels * 4, + kernel_size=1, + act=None, + data_format=data_format) + + if not shortcut: + self.short = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels * 4, + kernel_size=1, + stride=1, + is_vd_mode=False if if_first or stride == 1 else True, + data_format=data_format) + + self.shortcut = shortcut + # NOTE: Use the wrap layer for quantization training + self.add = layers.Add() + self.relu = layers.Activation(act="relu") + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + y = self.conv0(inputs) + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + y = self.add(short, conv2) + y = self.relu(y) + return y + + +class BasicBlock(nn.Layer): + def __init__(self, + in_channels: int, + out_channels: int, + stride: int, + dilation: int = 1, + shortcut: bool = True, + if_first: bool = False, + data_format: str = 'NCHW'): + super(BasicBlock, self).__init__() + self.conv0 = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=3, + stride=stride, + dilation=dilation, + act='relu', + data_format=data_format) + self.conv1 = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=3, + dilation=dilation, + act=None, + data_format=data_format) + + if not shortcut: + self.short = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + stride=1, + is_vd_mode=False if if_first or stride == 1 else True, + data_format=data_format) + + self.shortcut = shortcut + self.dilation = dilation + self.data_format = data_format + self.add = layers.Add() + self.relu = layers.Activation(act="relu") + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + y = self.conv0(inputs) + conv1 = self.conv1(y) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = self.add(short, conv1) + y = self.relu(y) + + return y + + +class ResNet_vd(nn.Layer): + """ + The ResNet_vd implementation based on PaddlePaddle. + + The original article refers to Jingdong + Tong He, et, al. "Bag of Tricks for Image Classification with Convolutional Neural Networks" + (https://arxiv.org/pdf/1812.01187.pdf). + + Args: + layers (int, optional): The layers of ResNet_vd. The supported layers are (18, 34, 50, 101, 152, 200). Default: 50. + output_stride (int, optional): The stride of output features compared to input images. It is 8 or 16. Default: 8. + multi_grid (tuple|list, optional): The grid of stage4. Defult: (1, 1, 1). + pretrained (str, optional): The path of pretrained model. + + """ + + def __init__(self, + layers: int = 50, + output_stride: int = 8, + multi_grid: Tuple[int] = (1, 1, 1), + pretrained: str = None, + data_format: str = 'NCHW'): + super(ResNet_vd, self).__init__() + + self.data_format = data_format + self.conv1_logit = None # for gscnn shape stream + self.layers = layers + supported_layers = [18, 34, 50, 101, 152, 200] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format( + supported_layers, layers) + + if layers == 18: + depth = [2, 2, 2, 2] + elif layers == 34 or layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + elif layers == 200: + depth = [3, 12, 48, 3] + num_channels = [64, 256, 512, 1024 + ] if layers >= 50 else [64, 64, 128, 256] + num_filters = [64, 128, 256, 512] + + # for channels of four returned stages + self.feat_channels = [c * 4 for c in num_filters + ] if layers >= 50 else num_filters + + dilation_dict = None + if output_stride == 8: + dilation_dict = {2: 2, 3: 4} + elif output_stride == 16: + dilation_dict = {3: 2} + + self.conv1_1 = ConvBNLayer( + in_channels=3, + out_channels=32, + kernel_size=3, + stride=2, + act='relu', + data_format=data_format) + self.conv1_2 = ConvBNLayer( + in_channels=32, + out_channels=32, + kernel_size=3, + stride=1, + act='relu', + data_format=data_format) + self.conv1_3 = ConvBNLayer( + in_channels=32, + out_channels=64, + kernel_size=3, + stride=1, + act='relu', + data_format=data_format) + self.pool2d_max = nn.MaxPool2D( + kernel_size=3, stride=2, padding=1, data_format=data_format) + + # self.block_list = [] + self.stage_list = [] + if layers >= 50: + for block in range(len(depth)): + shortcut = False + block_list = [] + for i in range(depth[block]): + if layers in [101, 152] and block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + + ############################################################################### + # Add dilation rate for some segmentation tasks, if dilation_dict is not None. + dilation_rate = dilation_dict[ + block] if dilation_dict and block in dilation_dict else 1 + + # Actually block here is 'stage', and i is 'block' in 'stage' + # At the stage 4, expand the the dilation_rate if given multi_grid + if block == 3: + dilation_rate = dilation_rate * multi_grid[i] + ############################################################################### + + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BottleneckBlock( + in_channels=num_channels[block] + if i == 0 else num_filters[block] * 4, + out_channels=num_filters[block], + stride=2 if i == 0 and block != 0 + and dilation_rate == 1 else 1, + shortcut=shortcut, + if_first=block == i == 0, + dilation=dilation_rate, + data_format=data_format)) + + block_list.append(bottleneck_block) + shortcut = True + self.stage_list.append(block_list) + else: + for block in range(len(depth)): + shortcut = False + block_list = [] + for i in range(depth[block]): + dilation_rate = dilation_dict[block] \ + if dilation_dict and block in dilation_dict else 1 + if block == 3: + dilation_rate = dilation_rate * multi_grid[i] + + basic_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BasicBlock( + in_channels=num_channels[block] + if i == 0 else num_filters[block], + out_channels=num_filters[block], + stride=2 if i == 0 and block != 0 \ + and dilation_rate == 1 else 1, + dilation=dilation_rate, + shortcut=shortcut, + if_first=block == i == 0, + data_format=data_format)) + block_list.append(basic_block) + shortcut = True + self.stage_list.append(block_list) + + self.pretrained = pretrained + + def forward(self, inputs: paddle.Tensor) -> List[paddle.Tensor]: + y = self.conv1_1(inputs) + y = self.conv1_2(y) + y = self.conv1_3(y) + self.conv1_logit = y.clone() + y = self.pool2d_max(y) + + # A feature list saves the output feature map of each stage. + feat_list = [] + for stage in self.stage_list: + for block in stage: + y = block(y) + feat_list.append(y) + + return feat_list + + +def ResNet50_vd(**args): + model = ResNet_vd(layers=50, **args) + return model \ No newline at end of file diff --git a/modules/image/semantic_segmentation/isanet_resnet50_cityscapes/README.md b/modules/image/semantic_segmentation/isanet_resnet50_cityscapes/README.md new file mode 100644 index 000000000..f4a52885d --- /dev/null +++ b/modules/image/semantic_segmentation/isanet_resnet50_cityscapes/README.md @@ -0,0 +1,182 @@ +# isanet_resnet50_cityscapes + +|模型名称|isanet_resnet50_cityscapes| +| :--- | :---: | +|类别|图像-图像分割| +|网络|isanet_resnet50vd| +|数据集|Cityscapes| +|是否支持Fine-tuning|是| +|模型大小|217MB| +|指标|-| +|最新更新日期|2022-03-21| + +## 一、模型基本信息 + + - 样例结果示例: +

+ +

+ +- ### 模型介绍 + + - 本示例将展示如何使用PaddleHub对预训练模型进行finetune并完成预测任务。 + - 更多详情请参考:[isanet](https://arxiv.org/abs/1907.12273) + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、安装 + + - ```shell + $ hub install isanet_resnet50_cityscapes + ``` + + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1.预测代码示例 + + - ```python + import cv2 + import paddle + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='isanet_resnet50_cityscapes') + img = cv2.imread("/PATH/TO/IMAGE") + result = model.predict(images=[img], visualization=True) + ``` + +- ### 2.如何开始Fine-tune + + - 在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用isanet_resnet50_cityscapes模型对OpticDiscSeg数据集进行Fine-tune。 `train.py`内容如下: + + - 代码步骤 + + - Step1: 定义数据预处理方式 + - ```python + from paddlehub.vision.segmentation_transforms import Compose, Resize, Normalize + + transform = Compose([Resize(target_size=(512, 512)), Normalize()]) + ``` + + - `segmentation_transforms` 数据增强模块定义了丰富的针对图像分割数据的预处理方式,用户可按照需求替换自己需要的数据预处理方式。 + + - Step2: 下载数据集并使用 + - ```python + from paddlehub.datasets import OpticDiscSeg + + train_reader = OpticDiscSeg(transform, mode='train') + ``` + - `transforms`: 数据预处理方式。 + - `mode`: `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 + + - 数据集的准备代码可以参考 [opticdiscseg.py](../../paddlehub/datasets/opticdiscseg.py)。`hub.datasets.OpticDiscSeg()`会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 + + - Step3: 加载预训练模型 + + - ```python + import paddlehub as hub + + model = hub.Module(name='isanet_resnet50_cityscapes', num_classes=2, pretrained=None) + ``` + - `name`: 选择预训练模型的名字。 + - `load_checkpoint`: 是否加载自己训练的模型,若为None,则加载提供的模型默认参数。 + + - Step4: 选择优化策略和运行配置 + + - ```python + import paddle + from paddlehub.finetune.trainer import Trainer + + scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.01, decay_steps=1000, power=0.9, end_lr=0.0001) + optimizer = paddle.optimizer.Adam(learning_rate=scheduler, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='test_ckpt_img_seg', use_gpu=True) + trainer.train(train_reader, epochs=10, batch_size=4, log_interval=10, save_interval=4) + ``` + + - 模型预测 + + - 当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。我们使用该模型来进行预测。predict.py脚本如下: + + ```python + import paddle + import cv2 + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='isanet_resnet50_cityscapes', pretrained='/PATH/TO/CHECKPOINT') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) + ``` + + - 参数配置正确后,请执行脚本`python predict.py`。 + + - **Args** + * `images`:原始图像路径或BGR格式图片; + * `visualization`: 是否可视化,默认为True; + * `save_path`: 保存结果的路径,默认保存路径为'seg_result'。 + + **NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线图像分割服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + + - ```shell + $ hub serving start -m isanet_resnet50_cityscapes + ``` + + - 这样就完成了一个图像分割服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + ```python + import requests + import json + import cv2 + import base64 + + import numpy as np + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/ginet_resnet50vd_voc" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + mask = base64_to_cv2(r.json()["results"][0]) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 diff --git a/modules/image/semantic_segmentation/isanet_resnet50_cityscapes/README_en.md b/modules/image/semantic_segmentation/isanet_resnet50_cityscapes/README_en.md new file mode 100644 index 000000000..ec784ba9f --- /dev/null +++ b/modules/image/semantic_segmentation/isanet_resnet50_cityscapes/README_en.md @@ -0,0 +1,181 @@ +# isanet_resnet50_cityscapes + +|Module Name|isanet_resnet50_cityscapes| +| :--- | :---: | +|Category|Image Segmentation| +|Network|isanet_resnet50vd| +|Dataset|Cityscapes| +|Fine-tuning supported or not|Yes| +|Module Size|217MB| +|Data indicators|-| +|Latest update date|2022-03-21| + +## I. Basic Information + +- ### Application Effect Display + - Sample results: +

+ +

+ +- ### Module Introduction + + - We will show how to use PaddleHub to finetune the pre-trained model and complete the prediction. + - For more information, please refer to: [isanet](https://arxiv.org/abs/1907.12273) + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、Installation + + - ```shell + $ hub install isanet_resnet50_cityscapes + ``` + + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + + +## III. Module API Prediction + +- ### 1、Prediction Code Example + + - ```python + import cv2 + import paddle + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='isanet_resnet50_cityscapes') + img = cv2.imread("/PATH/TO/IMAGE") + result = model.predict(images=[img], visualization=True) + ``` + +- ### 2.Fine-tune and Encapsulation + + - After completing the installation of PaddlePaddle and PaddleHub, you can start using the isanet_resnet50_cityscapes model to fine-tune datasets such as OpticDiscSeg. + + - Steps: + + - Step1: Define the data preprocessing method + + - ```python + from paddlehub.vision.segmentation_transforms import Compose, Resize, Normalize + + transform = Compose([Resize(target_size=(512, 512)), Normalize()]) + ``` + + - `segmentation_transforms`: The data enhancement module defines lots of data preprocessing methods. Users can replace the data preprocessing methods according to their needs. + + - Step2: Download the dataset + + - ```python + from paddlehub.datasets import OpticDiscSeg + + train_reader = OpticDiscSeg(transform, mode='train') + ``` + * `transforms`: data preprocessing methods. + + * `mode`: Select the data mode, the options are `train`, `test`, `val`. Default is `train`. + + * Dataset preparation can be referred to [opticdiscseg.py](../../paddlehub/datasets/opticdiscseg.py)。`hub.datasets.OpticDiscSeg()`will be automatically downloaded from the network and decompressed to the `$HOME/.paddlehub/dataset` directory under the user directory. + + - Step3: Load the pre-trained model + + - ```python + import paddlehub as hub + + model = hub.Module(name='isanet_resnet50_cityscapes', num_classes=2, pretrained=None) + ``` + - `name`: model name. + - `load_checkpoint`: Whether to load the self-trained model, if it is None, load the provided parameters. + + - Step4: Optimization strategy + + - ```python + import paddle + from paddlehub.finetune.trainer import Trainer + + scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.01, decay_steps=1000, power=0.9, end_lr=0.0001) + optimizer = paddle.optimizer.Adam(learning_rate=scheduler, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='test_ckpt_img_seg', use_gpu=True) + trainer.train(train_reader, epochs=10, batch_size=4, log_interval=10, save_interval=4) + ``` + + - Model prediction + + - When Fine-tune is completed, the model with the best performance on the verification set will be saved in the `${CHECKPOINT_DIR}/best_model` directory. We use this model to make predictions. The `predict.py` script is as follows: + + ```python + import paddle + import cv2 + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='isanet_resnet50_cityscapes', pretrained='/PATH/TO/CHECKPOINT') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) + ``` + + - **Args** + * `images`: Image path or ndarray data with format [H, W, C], BGR. + * `visualization`: Whether to save the recognition results as picture files. + * `save_path`: Save path of the result, default is 'seg_result'. + + +## IV. Server Deployment + +- PaddleHub Serving can deploy an online service of image segmentation. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + + - ```shell + $ hub serving start -m isanet_resnet50_cityscapes + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result: + + ```python + import requests + import json + import cv2 + import base64 + + import numpy as np + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/isanet_resnet50_cityscapes" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + mask = base64_to_cv2(r.json()["results"][0]) + ``` + +## V. Release Note + +- 1.0.0 + + First release diff --git a/modules/image/semantic_segmentation/isanet_resnet50_cityscapes/layers.py b/modules/image/semantic_segmentation/isanet_resnet50_cityscapes/layers.py new file mode 100644 index 000000000..3e42fb7f2 --- /dev/null +++ b/modules/image/semantic_segmentation/isanet_resnet50_cityscapes/layers.py @@ -0,0 +1,401 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn.layer import activation +from paddle.nn import Conv2D, AvgPool2D + + +def SyncBatchNorm(*args, **kwargs): + """In cpu environment nn.SyncBatchNorm does not have kernel so use nn.BatchNorm2D instead""" + if paddle.get_device() == 'cpu': + return nn.BatchNorm2D(*args, **kwargs) + else: + return nn.SyncBatchNorm(*args, **kwargs) + + +class SeparableConvBNReLU(nn.Layer): + """Depthwise Separable Convolution.""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + padding: str = 'same', + **kwargs: dict): + super(SeparableConvBNReLU, self).__init__() + self.depthwise_conv = ConvBN( + in_channels, + out_channels=in_channels, + kernel_size=kernel_size, + padding=padding, + groups=in_channels, + **kwargs) + self.piontwise_conv = ConvBNReLU( + in_channels, out_channels, kernel_size=1, groups=1) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self.depthwise_conv(x) + x = self.piontwise_conv(x) + return x + + +class ConvBN(nn.Layer): + """Basic conv bn layer""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + padding: str = 'same', + **kwargs: dict): + super(ConvBN, self).__init__() + self._conv = Conv2D( + in_channels, out_channels, kernel_size, padding=padding, **kwargs) + self._batch_norm = SyncBatchNorm(out_channels) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self._conv(x) + x = self._batch_norm(x) + return x + + +class ConvBNReLU(nn.Layer): + """Basic conv bn relu layer.""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + padding: str = 'same', + **kwargs: dict): + super(ConvBNReLU, self).__init__() + + self._conv = Conv2D( + in_channels, out_channels, kernel_size, padding=padding, **kwargs) + self._batch_norm = SyncBatchNorm(out_channels) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self._conv(x) + x = self._batch_norm(x) + x = F.relu(x) + return x + + +class Activation(nn.Layer): + """ + The wrapper of activations. + Args: + act (str, optional): The activation name in lowercase. It must be one of ['elu', 'gelu', + 'hardshrink', 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', + 'softmax', 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', + 'hsigmoid']. Default: None, means identical transformation. + Returns: + A callable object of Activation. + Raises: + KeyError: When parameter `act` is not in the optional range. + Examples: + from paddleseg.models.common.activation import Activation + relu = Activation("relu") + print(relu) + # + sigmoid = Activation("sigmoid") + print(sigmoid) + # + not_exit_one = Activation("not_exit_one") + # KeyError: "not_exit_one does not exist in the current dict_keys(['elu', 'gelu', 'hardshrink', + # 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', 'softmax', + # 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', 'hsigmoid'])" + """ + + def __init__(self, act: str = None): + super(Activation, self).__init__() + + self._act = act + upper_act_names = activation.__dict__.keys() + lower_act_names = [act.lower() for act in upper_act_names] + act_dict = dict(zip(lower_act_names, upper_act_names)) + + if act is not None: + if act in act_dict.keys(): + act_name = act_dict[act] + self.act_func = eval("activation.{}()".format(act_name)) + else: + raise KeyError("{} does not exist in the current {}".format( + act, act_dict.keys())) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + + if self._act is not None: + return self.act_func(x) + else: + return x + + +class ASPPModule(nn.Layer): + """ + Atrous Spatial Pyramid Pooling. + Args: + aspp_ratios (tuple): The dilation rate using in ASSP module. + in_channels (int): The number of input channels. + out_channels (int): The number of output channels. + align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature + is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. + use_sep_conv (bool, optional): If using separable conv in ASPP module. Default: False. + image_pooling (bool, optional): If augmented with image-level features. Default: False + """ + + def __init__(self, + aspp_ratios: tuple, + in_channels: int, + out_channels: int, + align_corners: bool, + use_sep_conv: bool = False, + image_pooling: bool = False): + super().__init__() + + self.align_corners = align_corners + self.aspp_blocks = nn.LayerList() + + for ratio in aspp_ratios: + if use_sep_conv and ratio > 1: + conv_func = SeparableConvBNReLU + else: + conv_func = ConvBNReLU + + block = conv_func( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1 if ratio == 1 else 3, + dilation=ratio, + padding=0 if ratio == 1 else ratio) + self.aspp_blocks.append(block) + + out_size = len(self.aspp_blocks) + + if image_pooling: + self.global_avg_pool = nn.Sequential( + nn.AdaptiveAvgPool2D(output_size=(1, 1)), + ConvBNReLU(in_channels, out_channels, kernel_size=1, bias_attr=False)) + out_size += 1 + self.image_pooling = image_pooling + + self.conv_bn_relu = ConvBNReLU( + in_channels=out_channels * out_size, + out_channels=out_channels, + kernel_size=1) + + self.dropout = nn.Dropout(p=0.1) # drop rate + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + outputs = [] + for block in self.aspp_blocks: + y = block(x) + y = F.interpolate( + y, + x.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + outputs.append(y) + + if self.image_pooling: + img_avg = self.global_avg_pool(x) + img_avg = F.interpolate( + img_avg, + x.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + outputs.append(img_avg) + + x = paddle.concat(outputs, axis=1) + x = self.conv_bn_relu(x) + x = self.dropout(x) + + return x + + +class AuxLayer(nn.Layer): + """ + The auxiliary layer implementation for auxiliary loss. + + Args: + in_channels (int): The number of input channels. + inter_channels (int): The intermediate channels. + out_channels (int): The number of output channels, and usually it is num_classes. + dropout_prob (float, optional): The drop rate. Default: 0.1. + """ + + def __init__(self, + in_channels: int, + inter_channels: int, + out_channels: int, + dropout_prob: float = 0.1, + **kwargs): + super().__init__() + + self.conv_bn_relu = ConvBNReLU( + in_channels=in_channels, + out_channels=inter_channels, + kernel_size=3, + padding=1, + **kwargs) + + self.dropout = nn.Dropout(p=dropout_prob) + + self.conv = nn.Conv2D( + in_channels=inter_channels, + out_channels=out_channels, + kernel_size=1) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self.conv_bn_relu(x) + x = self.dropout(x) + x = self.conv(x) + return x + + +class Add(nn.Layer): + def __init__(self): + super().__init__() + + def forward(self, x: paddle.Tensor, y: paddle.Tensor, name: str = None): + return paddle.add(x, y, name) + +class AttentionBlock(nn.Layer): + """General self-attention block/non-local block. + + The original article refers to refer to https://arxiv.org/abs/1706.03762. + Args: + key_in_channels (int): Input channels of key feature. + query_in_channels (int): Input channels of query feature. + channels (int): Output channels of key/query transform. + out_channels (int): Output channels. + share_key_query (bool): Whether share projection weight between key + and query projection. + query_downsample (nn.Module): Query downsample module. + key_downsample (nn.Module): Key downsample module. + key_query_num_convs (int): Number of convs for key/query projection. + value_out_num_convs (int): Number of convs for value projection. + key_query_norm (bool): Whether to use BN for key/query projection. + value_out_norm (bool): Whether to use BN for value projection. + matmul_norm (bool): Whether normalize attention map with sqrt of + channels + with_out (bool): Whether use out projection. + """ + + def __init__(self, key_in_channels, query_in_channels, channels, + out_channels, share_key_query, query_downsample, + key_downsample, key_query_num_convs, value_out_num_convs, + key_query_norm, value_out_norm, matmul_norm, with_out): + super(AttentionBlock, self).__init__() + if share_key_query: + assert key_in_channels == query_in_channels + self.with_out = with_out + self.key_in_channels = key_in_channels + self.query_in_channels = query_in_channels + self.out_channels = out_channels + self.channels = channels + self.share_key_query = share_key_query + self.key_project = self.build_project( + key_in_channels, + channels, + num_convs=key_query_num_convs, + use_conv_module=key_query_norm) + if share_key_query: + self.query_project = self.key_project + else: + self.query_project = self.build_project( + query_in_channels, + channels, + num_convs=key_query_num_convs, + use_conv_module=key_query_norm) + + self.value_project = self.build_project( + key_in_channels, + channels if self.with_out else out_channels, + num_convs=value_out_num_convs, + use_conv_module=value_out_norm) + + if self.with_out: + self.out_project = self.build_project( + channels, + out_channels, + num_convs=value_out_num_convs, + use_conv_module=value_out_norm) + else: + self.out_project = None + + self.query_downsample = query_downsample + self.key_downsample = key_downsample + self.matmul_norm = matmul_norm + + def build_project(self, in_channels: int , channels: int, num_convs: int, use_conv_module: bool): + if use_conv_module: + convs = [ + ConvBNReLU( + in_channels=in_channels, + out_channels=channels, + kernel_size=1, + bias_attr=False) + ] + for _ in range(num_convs - 1): + convs.append( + ConvBNReLU( + in_channels=channels, + out_channels=channels, + kernel_size=1, + bias_attr=False)) + else: + convs = [nn.Conv2D(in_channels, channels, 1)] + for _ in range(num_convs - 1): + convs.append(nn.Conv2D(channels, channels, 1)) + + if len(convs) > 1: + convs = nn.Sequential(*convs) + else: + convs = convs[0] + return convs + + def forward(self, query_feats: paddle.Tensor, key_feats: paddle.Tensor) -> paddle.Tensor: + query_shape = paddle.shape(query_feats) + query = self.query_project(query_feats) + if self.query_downsample is not None: + query = self.query_downsample(query) + query = query.flatten(2).transpose([0, 2, 1]) + + key = self.key_project(key_feats) + value = self.value_project(key_feats) + + if self.key_downsample is not None: + key = self.key_downsample(key) + value = self.key_downsample(value) + + key = key.flatten(2) + value = value.flatten(2).transpose([0, 2, 1]) + sim_map = paddle.matmul(query, key) + if self.matmul_norm: + sim_map = (self.channels**-0.5) * sim_map + sim_map = F.softmax(sim_map, axis=-1) + + context = paddle.matmul(sim_map, value) + context = paddle.transpose(context, [0, 2, 1]) + + context = paddle.reshape( + context, [0, self.out_channels, query_shape[2], query_shape[3]]) + + if self.out_project is not None: + context = self.out_project(context) + return context diff --git a/modules/image/semantic_segmentation/isanet_resnet50_cityscapes/module.py b/modules/image/semantic_segmentation/isanet_resnet50_cityscapes/module.py new file mode 100644 index 000000000..6b20ac094 --- /dev/null +++ b/modules/image/semantic_segmentation/isanet_resnet50_cityscapes/module.py @@ -0,0 +1,221 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from typing import Union, List, Tuple + +import numpy as np +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddlehub.module.module import moduleinfo +import paddlehub.vision.segmentation_transforms as T +from paddlehub.module.cv_module import ImageSegmentationModule + +from isanet_resnet50_cityscapes.resnet import ResNet50_vd +import isanet_resnet50_cityscapes.layers as layers + + +@moduleinfo( + name="isanet_resnet50_cityscapes", + type="CV/semantic_segmentation", + author="paddlepaddle", + author_email="", + summary="ISANetResnet50 is a segmentation model.", + version="1.0.0", + meta=ImageSegmentationModule) +class ISANet(nn.Layer): + """Interlaced Sparse Self-Attention for Semantic Segmentation. + + The original article refers to Lang Huang, et al. "Interlaced Sparse Self-Attention for Semantic Segmentation" + (https://arxiv.org/abs/1907.12273). + + Args: + num_classes (int): The unique number of target classes. + backbone_indices (tuple): The values in the tuple indicate the indices of output of backbone. + isa_channels (int): The channels of ISA Module. + down_factor (tuple): Divide the height and width dimension to (Ph, PW) groups. + enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss. Default: True. + align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature + is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False. + pretrained (str, optional): The path or url of pretrained model. Default: None. + + """ + + def __init__(self, + num_classes: int = 19, + backbone_indices: Tuple[int] = (2, 3), + isa_channels: int = 256, + down_factor: Tuple[int] = (8, 8), + enable_auxiliary_loss: bool = True, + align_corners: bool = False, + pretrained: str = None): + super(ISANet, self).__init__() + + self.backbone = ResNet50_vd() + self.backbone_indices = backbone_indices + in_channels = [self.backbone.feat_channels[i] for i in backbone_indices] + self.head = ISAHead(num_classes, in_channels, isa_channels, down_factor, + enable_auxiliary_loss) + self.align_corners = align_corners + self.transforms = T.Compose([T.Normalize()]) + + if pretrained is not None: + model_dict = paddle.load(pretrained) + self.set_dict(model_dict) + print("load custom parameters success") + + else: + checkpoint = os.path.join(self.directory, 'model.pdparams') + model_dict = paddle.load(checkpoint) + self.set_dict(model_dict) + print("load pretrained parameters success") + + def transform(self, img: Union[np.ndarray, str]) -> Union[np.ndarray, str]: + return self.transforms(img) + + def forward(self, x: paddle.Tensor) -> List[paddle.Tensor]: + feats = self.backbone(x) + feats = [feats[i] for i in self.backbone_indices] + logit_list = self.head(feats) + logit_list = [ + F.interpolate( + logit, + paddle.shape(x)[2:], + mode='bilinear', + align_corners=self.align_corners, + align_mode=1) for logit in logit_list + ] + + return logit_list + + +class ISAHead(nn.Layer): + """ + The ISAHead. + + Args: + num_classes (int): The unique number of target classes. + in_channels (tuple): The number of input channels. + isa_channels (int): The channels of ISA Module. + down_factor (tuple): Divide the height and width dimension to (Ph, PW) groups. + enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss. Default: True. + """ + + def __init__(self, + num_classes: int, + in_channels: int, + isa_channels: int, + down_factor: Tuple[int], + enable_auxiliary_loss: bool): + super(ISAHead, self).__init__() + self.in_channels = in_channels[-1] + inter_channels = self.in_channels // 4 + self.inter_channels = inter_channels + self.down_factor = down_factor + self.enable_auxiliary_loss = enable_auxiliary_loss + self.in_conv = layers.ConvBNReLU( + self.in_channels, inter_channels, 3, bias_attr=False) + self.global_relation = SelfAttentionBlock(inter_channels, isa_channels) + self.local_relation = SelfAttentionBlock(inter_channels, isa_channels) + self.out_conv = layers.ConvBNReLU( + inter_channels * 2, inter_channels, 1, bias_attr=False) + self.cls = nn.Sequential( + nn.Dropout2D(p=0.1), nn.Conv2D(inter_channels, num_classes, 1)) + self.aux = nn.Sequential( + layers.ConvBNReLU( + in_channels=1024, + out_channels=256, + kernel_size=3, + bias_attr=False), nn.Dropout2D(p=0.1), + nn.Conv2D(256, num_classes, 1)) + + def forward(self, feat_list: List[paddle.Tensor]) -> List[paddle.Tensor]: + C3, C4 = feat_list + x = self.in_conv(C4) + x_shape = paddle.shape(x) + P_h, P_w = self.down_factor + Q_h, Q_w = paddle.ceil(x_shape[2] / P_h).astype('int32'), paddle.ceil( + x_shape[3] / P_w).astype('int32') + pad_h, pad_w = (Q_h * P_h - x_shape[2]).astype('int32'), ( + Q_w * P_w - x_shape[3]).astype('int32') + if pad_h > 0 or pad_w > 0: + padding = paddle.concat([ + pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2 + ], + axis=0) + feat = F.pad(x, padding) + else: + feat = x + + feat = feat.reshape([0, x_shape[1], Q_h, P_h, Q_w, P_w]) + feat = feat.transpose([0, 3, 5, 1, 2, + 4]).reshape([-1, self.inter_channels, Q_h, Q_w]) + feat = self.global_relation(feat) + + feat = feat.reshape([x_shape[0], P_h, P_w, x_shape[1], Q_h, Q_w]) + feat = feat.transpose([0, 4, 5, 3, 1, + 2]).reshape([-1, self.inter_channels, P_h, P_w]) + feat = self.local_relation(feat) + + feat = feat.reshape([x_shape[0], Q_h, Q_w, x_shape[1], P_h, P_w]) + feat = feat.transpose([0, 3, 1, 4, 2, 5]).reshape( + [0, self.inter_channels, P_h * Q_h, P_w * Q_w]) + if pad_h > 0 or pad_w > 0: + feat = paddle.slice( + feat, + axes=[2, 3], + starts=[pad_h // 2, pad_w // 2], + ends=[pad_h // 2 + x_shape[2], pad_w // 2 + x_shape[3]]) + + feat = self.out_conv(paddle.concat([feat, x], axis=1)) + output = self.cls(feat) + + if self.enable_auxiliary_loss: + auxout = self.aux(C3) + return [output, auxout] + else: + return [output] + + +class SelfAttentionBlock(layers.AttentionBlock): + """General self-attention block/non-local block. + + Args: + in_channels (int): Input channels of key/query feature. + channels (int): Output channels of key/query transform. + """ + + def __init__(self, in_channels: int, channels: int): + super(SelfAttentionBlock, self).__init__( + key_in_channels=in_channels, + query_in_channels=in_channels, + channels=channels, + out_channels=in_channels, + share_key_query=False, + query_downsample=None, + key_downsample=None, + key_query_num_convs=2, + key_query_norm=True, + value_out_num_convs=1, + value_out_norm=False, + matmul_norm=True, + with_out=False) + + self.output_project = self.build_project( + in_channels, in_channels, num_convs=1, use_conv_module=True) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + context = super(SelfAttentionBlock, self).forward(x, x) + return self.output_project(context) diff --git a/modules/image/semantic_segmentation/isanet_resnet50_cityscapes/resnet.py b/modules/image/semantic_segmentation/isanet_resnet50_cityscapes/resnet.py new file mode 100644 index 000000000..f7de1ee29 --- /dev/null +++ b/modules/image/semantic_segmentation/isanet_resnet50_cityscapes/resnet.py @@ -0,0 +1,359 @@ +# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +import isanet_resnet50_cityscapes.layers as layers + + +class ConvBNLayer(nn.Layer): + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + stride: int = 1, + dilation: int = 1, + groups: int = 1, + is_vd_mode: bool = False, + act: str = None, + data_format: str = 'NCHW'): + super(ConvBNLayer, self).__init__() + if dilation != 1 and kernel_size != 3: + raise RuntimeError("When the dilation isn't 1," \ + "the kernel_size should be 3.") + + self.is_vd_mode = is_vd_mode + self._pool2d_avg = nn.AvgPool2D( + kernel_size=2, + stride=2, + padding=0, + ceil_mode=True, + data_format=data_format) + self._conv = nn.Conv2D( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=(kernel_size - 1) // 2 \ + if dilation == 1 else dilation, + dilation=dilation, + groups=groups, + bias_attr=False, + data_format=data_format) + + self._batch_norm = layers.SyncBatchNorm( + out_channels, data_format=data_format) + self._act_op = layers.Activation(act=act) + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + if self.is_vd_mode: + inputs = self._pool2d_avg(inputs) + y = self._conv(inputs) + y = self._batch_norm(y) + y = self._act_op(y) + + return y + + +class BottleneckBlock(nn.Layer): + def __init__(self, + in_channels: int, + out_channels: int, + stride: int, + shortcut: bool = True, + if_first: bool = False, + dilation: int = 1, + data_format: str = 'NCHW'): + super(BottleneckBlock, self).__init__() + + self.data_format = data_format + self.conv0 = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + act='relu', + data_format=data_format) + + self.dilation = dilation + + self.conv1 = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=3, + stride=stride, + act='relu', + dilation=dilation, + data_format=data_format) + self.conv2 = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels * 4, + kernel_size=1, + act=None, + data_format=data_format) + + if not shortcut: + self.short = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels * 4, + kernel_size=1, + stride=1, + is_vd_mode=False if if_first or stride == 1 else True, + data_format=data_format) + + self.shortcut = shortcut + # NOTE: Use the wrap layer for quantization training + self.add = layers.Add() + self.relu = layers.Activation(act="relu") + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + y = self.conv0(inputs) + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + y = self.add(short, conv2) + y = self.relu(y) + return y + + +class BasicBlock(nn.Layer): + def __init__(self, + in_channels: int, + out_channels: int, + stride: int, + dilation: int = 1, + shortcut: bool = True, + if_first: bool = False, + data_format: str = 'NCHW'): + super(BasicBlock, self).__init__() + self.conv0 = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=3, + stride=stride, + dilation=dilation, + act='relu', + data_format=data_format) + self.conv1 = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=3, + dilation=dilation, + act=None, + data_format=data_format) + + if not shortcut: + self.short = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + stride=1, + is_vd_mode=False if if_first or stride == 1 else True, + data_format=data_format) + + self.shortcut = shortcut + self.dilation = dilation + self.data_format = data_format + self.add = layers.Add() + self.relu = layers.Activation(act="relu") + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + y = self.conv0(inputs) + conv1 = self.conv1(y) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = self.add(short, conv1) + y = self.relu(y) + + return y + + +class ResNet_vd(nn.Layer): + """ + The ResNet_vd implementation based on PaddlePaddle. + + The original article refers to Jingdong + Tong He, et, al. "Bag of Tricks for Image Classification with Convolutional Neural Networks" + (https://arxiv.org/pdf/1812.01187.pdf). + + Args: + layers (int, optional): The layers of ResNet_vd. The supported layers are (18, 34, 50, 101, 152, 200). Default: 50. + output_stride (int, optional): The stride of output features compared to input images. It is 8 or 16. Default: 8. + multi_grid (tuple|list, optional): The grid of stage4. Defult: (1, 1, 1). + pretrained (str, optional): The path of pretrained model. + + """ + + def __init__(self, + layers: int = 50, + output_stride: int = 8, + multi_grid: Tuple[int] = (1, 1, 1), + pretrained: str = None, + data_format: str = 'NCHW'): + super(ResNet_vd, self).__init__() + + self.data_format = data_format + self.conv1_logit = None # for gscnn shape stream + self.layers = layers + supported_layers = [18, 34, 50, 101, 152, 200] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format( + supported_layers, layers) + + if layers == 18: + depth = [2, 2, 2, 2] + elif layers == 34 or layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + elif layers == 200: + depth = [3, 12, 48, 3] + num_channels = [64, 256, 512, 1024 + ] if layers >= 50 else [64, 64, 128, 256] + num_filters = [64, 128, 256, 512] + + # for channels of four returned stages + self.feat_channels = [c * 4 for c in num_filters + ] if layers >= 50 else num_filters + + dilation_dict = None + if output_stride == 8: + dilation_dict = {2: 2, 3: 4} + elif output_stride == 16: + dilation_dict = {3: 2} + + self.conv1_1 = ConvBNLayer( + in_channels=3, + out_channels=32, + kernel_size=3, + stride=2, + act='relu', + data_format=data_format) + self.conv1_2 = ConvBNLayer( + in_channels=32, + out_channels=32, + kernel_size=3, + stride=1, + act='relu', + data_format=data_format) + self.conv1_3 = ConvBNLayer( + in_channels=32, + out_channels=64, + kernel_size=3, + stride=1, + act='relu', + data_format=data_format) + self.pool2d_max = nn.MaxPool2D( + kernel_size=3, stride=2, padding=1, data_format=data_format) + + # self.block_list = [] + self.stage_list = [] + if layers >= 50: + for block in range(len(depth)): + shortcut = False + block_list = [] + for i in range(depth[block]): + if layers in [101, 152] and block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + + ############################################################################### + # Add dilation rate for some segmentation tasks, if dilation_dict is not None. + dilation_rate = dilation_dict[ + block] if dilation_dict and block in dilation_dict else 1 + + # Actually block here is 'stage', and i is 'block' in 'stage' + # At the stage 4, expand the the dilation_rate if given multi_grid + if block == 3: + dilation_rate = dilation_rate * multi_grid[i] + ############################################################################### + + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BottleneckBlock( + in_channels=num_channels[block] + if i == 0 else num_filters[block] * 4, + out_channels=num_filters[block], + stride=2 if i == 0 and block != 0 + and dilation_rate == 1 else 1, + shortcut=shortcut, + if_first=block == i == 0, + dilation=dilation_rate, + data_format=data_format)) + + block_list.append(bottleneck_block) + shortcut = True + self.stage_list.append(block_list) + else: + for block in range(len(depth)): + shortcut = False + block_list = [] + for i in range(depth[block]): + dilation_rate = dilation_dict[block] \ + if dilation_dict and block in dilation_dict else 1 + if block == 3: + dilation_rate = dilation_rate * multi_grid[i] + + basic_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BasicBlock( + in_channels=num_channels[block] + if i == 0 else num_filters[block], + out_channels=num_filters[block], + stride=2 if i == 0 and block != 0 \ + and dilation_rate == 1 else 1, + dilation=dilation_rate, + shortcut=shortcut, + if_first=block == i == 0, + data_format=data_format)) + block_list.append(basic_block) + shortcut = True + self.stage_list.append(block_list) + + self.pretrained = pretrained + + def forward(self, inputs: paddle.Tensor) -> List[paddle.Tensor]: + y = self.conv1_1(inputs) + y = self.conv1_2(y) + y = self.conv1_3(y) + self.conv1_logit = y.clone() + y = self.pool2d_max(y) + + # A feature list saves the output feature map of each stage. + feat_list = [] + for stage in self.stage_list: + for block in stage: + y = block(y) + feat_list.append(y) + + return feat_list + + +def ResNet50_vd(**args): + model = ResNet_vd(layers=50, **args) + return model diff --git a/modules/image/semantic_segmentation/isanet_resnet50_voc/README.md b/modules/image/semantic_segmentation/isanet_resnet50_voc/README.md new file mode 100644 index 000000000..e7e56aa3e --- /dev/null +++ b/modules/image/semantic_segmentation/isanet_resnet50_voc/README.md @@ -0,0 +1,182 @@ +# isanet_resnet50_voc + +|模型名称|isanet_resnet50_voc| +| :--- | :---: | +|类别|图像-图像分割| +|网络|isanet_resnet50vd| +|数据集|PascalVOC2012| +|是否支持Fine-tuning|是| +|模型大小|217MB| +|指标|-| +|最新更新日期|2022-03-21| + +## 一、模型基本信息 + + - 样例结果示例: +

+ +

+ +- ### 模型介绍 + + - 本示例将展示如何使用PaddleHub对预训练模型进行finetune并完成预测任务。 + - 更多详情请参考:[isanet](https://arxiv.org/abs/1907.12273) + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、安装 + + - ```shell + $ hub install isanet_resnet50_voc + ``` + + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1.预测代码示例 + + - ```python + import cv2 + import paddle + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='isanet_resnet50_voc') + img = cv2.imread("/PATH/TO/IMAGE") + result = model.predict(images=[img], visualization=True) + ``` + +- ### 2.如何开始Fine-tune + + - 在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用isanet_resnet50_voc模型对OpticDiscSeg数据集进行Fine-tune。 `train.py`内容如下: + + - 代码步骤 + + - Step1: 定义数据预处理方式 + - ```python + from paddlehub.vision.segmentation_transforms import Compose, Resize, Normalize + + transform = Compose([Resize(target_size=(512, 512)), Normalize()]) + ``` + + - `segmentation_transforms` 数据增强模块定义了丰富的针对图像分割数据的预处理方式,用户可按照需求替换自己需要的数据预处理方式。 + + - Step2: 下载数据集并使用 + - ```python + from paddlehub.datasets import OpticDiscSeg + + train_reader = OpticDiscSeg(transform, mode='train') + ``` + - `transforms`: 数据预处理方式。 + - `mode`: `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 + + - 数据集的准备代码可以参考 [opticdiscseg.py](../../paddlehub/datasets/opticdiscseg.py)。`hub.datasets.OpticDiscSeg()`会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 + + - Step3: 加载预训练模型 + + - ```python + import paddlehub as hub + + model = hub.Module(name='isanet_resnet50_voc', num_classes=2, pretrained=None) + ``` + - `name`: 选择预训练模型的名字。 + - `load_checkpoint`: 是否加载自己训练的模型,若为None,则加载提供的模型默认参数。 + + - Step4: 选择优化策略和运行配置 + + - ```python + import paddle + from paddlehub.finetune.trainer import Trainer + + scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.01, decay_steps=1000, power=0.9, end_lr=0.0001) + optimizer = paddle.optimizer.Adam(learning_rate=scheduler, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='test_ckpt_img_seg', use_gpu=True) + trainer.train(train_reader, epochs=10, batch_size=4, log_interval=10, save_interval=4) + ``` + + - 模型预测 + + - 当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。我们使用该模型来进行预测。predict.py脚本如下: + + ```python + import paddle + import cv2 + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='isanet_resnet50_voc', pretrained='/PATH/TO/CHECKPOINT') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) + ``` + + - 参数配置正确后,请执行脚本`python predict.py`。 + + - **Args** + * `images`:原始图像路径或BGR格式图片; + * `visualization`: 是否可视化,默认为True; + * `save_path`: 保存结果的路径,默认保存路径为'seg_result'。 + + **NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线图像分割服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + + - ```shell + $ hub serving start -m isanet_resnet50_voc + ``` + + - 这样就完成了一个图像分割服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + ```python + import requests + import json + import cv2 + import base64 + + import numpy as np + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/ginet_resnet50vd_voc" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + mask = base64_to_cv2(r.json()["results"][0]) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 diff --git a/modules/image/semantic_segmentation/isanet_resnet50_voc/README_en.md b/modules/image/semantic_segmentation/isanet_resnet50_voc/README_en.md new file mode 100644 index 000000000..b80886911 --- /dev/null +++ b/modules/image/semantic_segmentation/isanet_resnet50_voc/README_en.md @@ -0,0 +1,181 @@ +# isanet_resnet50_voc + +|Module Name|isanet_resnet50_voc| +| :--- | :---: | +|Category|Image Segmentation| +|Network|isanet_resnet50vd| +|Dataset|PascalVOC2012| +|Fine-tuning supported or not|Yes| +|Module Size|217MB| +|Data indicators|-| +|Latest update date|2022-03-22| + +## I. Basic Information + +- ### Application Effect Display + - Sample results: +

+ +

+ +- ### Module Introduction + + - We will show how to use PaddleHub to finetune the pre-trained model and complete the prediction. + - For more information, please refer to: [isanet](https://arxiv.org/abs/1907.12273) + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、Installation + + - ```shell + $ hub install isanet_resnet50_voc + ``` + + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + + +## III. Module API Prediction + +- ### 1、Prediction Code Example + + - ```python + import cv2 + import paddle + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='isanet_resnet50_voc') + img = cv2.imread("/PATH/TO/IMAGE") + result = model.predict(images=[img], visualization=True) + ``` + +- ### 2.Fine-tune and Encapsulation + + - After completing the installation of PaddlePaddle and PaddleHub, you can start using the isanet_resnet50_voc model to fine-tune datasets such as OpticDiscSeg. + + - Steps: + + - Step1: Define the data preprocessing method + + - ```python + from paddlehub.vision.segmentation_transforms import Compose, Resize, Normalize + + transform = Compose([Resize(target_size=(512, 512)), Normalize()]) + ``` + + - `segmentation_transforms`: The data enhancement module defines lots of data preprocessing methods. Users can replace the data preprocessing methods according to their needs. + + - Step2: Download the dataset + + - ```python + from paddlehub.datasets import OpticDiscSeg + + train_reader = OpticDiscSeg(transform, mode='train') + ``` + * `transforms`: data preprocessing methods. + + * `mode`: Select the data mode, the options are `train`, `test`, `val`. Default is `train`. + + * Dataset preparation can be referred to [opticdiscseg.py](../../paddlehub/datasets/opticdiscseg.py)。`hub.datasets.OpticDiscSeg()`will be automatically downloaded from the network and decompressed to the `$HOME/.paddlehub/dataset` directory under the user directory. + + - Step3: Load the pre-trained model + + - ```python + import paddlehub as hub + + model = hub.Module(name='isanet_resnet50_voc', num_classes=2, pretrained=None) + ``` + - `name`: model name. + - `load_checkpoint`: Whether to load the self-trained model, if it is None, load the provided parameters. + + - Step4: Optimization strategy + + - ```python + import paddle + from paddlehub.finetune.trainer import Trainer + + scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.01, decay_steps=1000, power=0.9, end_lr=0.0001) + optimizer = paddle.optimizer.Adam(learning_rate=scheduler, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='test_ckpt_img_seg', use_gpu=True) + trainer.train(train_reader, epochs=10, batch_size=4, log_interval=10, save_interval=4) + ``` + + - Model prediction + + - When Fine-tune is completed, the model with the best performance on the verification set will be saved in the `${CHECKPOINT_DIR}/best_model` directory. We use this model to make predictions. The `predict.py` script is as follows: + + ```python + import paddle + import cv2 + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='isanet_resnet50_voc', pretrained='/PATH/TO/CHECKPOINT') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) + ``` + + - **Args** + * `images`: Image path or ndarray data with format [H, W, C], BGR. + * `visualization`: Whether to save the recognition results as picture files. + * `save_path`: Save path of the result, default is 'seg_result'. + + +## IV. Server Deployment + +- PaddleHub Serving can deploy an online service of image segmentation. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + + - ```shell + $ hub serving start -m isanet_resnet50_voc + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result: + + ```python + import requests + import json + import cv2 + import base64 + + import numpy as np + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/isanet_resnet50_voc" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + mask = base64_to_cv2(r.json()["results"][0]) + ``` + +## V. Release Note + +- 1.0.0 + + First release diff --git a/modules/image/semantic_segmentation/isanet_resnet50_voc/layers.py b/modules/image/semantic_segmentation/isanet_resnet50_voc/layers.py new file mode 100644 index 000000000..7f6ee5748 --- /dev/null +++ b/modules/image/semantic_segmentation/isanet_resnet50_voc/layers.py @@ -0,0 +1,401 @@ +# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn.layer import activation +from paddle.nn import Conv2D, AvgPool2D + + +def SyncBatchNorm(*args, **kwargs): + """In cpu environment nn.SyncBatchNorm does not have kernel so use nn.BatchNorm2D instead""" + if paddle.get_device() == 'cpu': + return nn.BatchNorm2D(*args, **kwargs) + else: + return nn.SyncBatchNorm(*args, **kwargs) + + +class SeparableConvBNReLU(nn.Layer): + """Depthwise Separable Convolution.""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + padding: str = 'same', + **kwargs: dict): + super(SeparableConvBNReLU, self).__init__() + self.depthwise_conv = ConvBN( + in_channels, + out_channels=in_channels, + kernel_size=kernel_size, + padding=padding, + groups=in_channels, + **kwargs) + self.piontwise_conv = ConvBNReLU( + in_channels, out_channels, kernel_size=1, groups=1) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self.depthwise_conv(x) + x = self.piontwise_conv(x) + return x + + +class ConvBN(nn.Layer): + """Basic conv bn layer""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + padding: str = 'same', + **kwargs: dict): + super(ConvBN, self).__init__() + self._conv = Conv2D( + in_channels, out_channels, kernel_size, padding=padding, **kwargs) + self._batch_norm = SyncBatchNorm(out_channels) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self._conv(x) + x = self._batch_norm(x) + return x + + +class ConvBNReLU(nn.Layer): + """Basic conv bn relu layer.""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + padding: str = 'same', + **kwargs: dict): + super(ConvBNReLU, self).__init__() + + self._conv = Conv2D( + in_channels, out_channels, kernel_size, padding=padding, **kwargs) + self._batch_norm = SyncBatchNorm(out_channels) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self._conv(x) + x = self._batch_norm(x) + x = F.relu(x) + return x + + +class Activation(nn.Layer): + """ + The wrapper of activations. + Args: + act (str, optional): The activation name in lowercase. It must be one of ['elu', 'gelu', + 'hardshrink', 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', + 'softmax', 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', + 'hsigmoid']. Default: None, means identical transformation. + Returns: + A callable object of Activation. + Raises: + KeyError: When parameter `act` is not in the optional range. + Examples: + from paddleseg.models.common.activation import Activation + relu = Activation("relu") + print(relu) + # + sigmoid = Activation("sigmoid") + print(sigmoid) + # + not_exit_one = Activation("not_exit_one") + # KeyError: "not_exit_one does not exist in the current dict_keys(['elu', 'gelu', 'hardshrink', + # 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', 'softmax', + # 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', 'hsigmoid'])" + """ + + def __init__(self, act: str = None): + super(Activation, self).__init__() + + self._act = act + upper_act_names = activation.__dict__.keys() + lower_act_names = [act.lower() for act in upper_act_names] + act_dict = dict(zip(lower_act_names, upper_act_names)) + + if act is not None: + if act in act_dict.keys(): + act_name = act_dict[act] + self.act_func = eval("activation.{}()".format(act_name)) + else: + raise KeyError("{} does not exist in the current {}".format( + act, act_dict.keys())) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + + if self._act is not None: + return self.act_func(x) + else: + return x + + +class ASPPModule(nn.Layer): + """ + Atrous Spatial Pyramid Pooling. + Args: + aspp_ratios (tuple): The dilation rate using in ASSP module. + in_channels (int): The number of input channels. + out_channels (int): The number of output channels. + align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature + is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. + use_sep_conv (bool, optional): If using separable conv in ASPP module. Default: False. + image_pooling (bool, optional): If augmented with image-level features. Default: False + """ + + def __init__(self, + aspp_ratios: tuple, + in_channels: int, + out_channels: int, + align_corners: bool, + use_sep_conv: bool = False, + image_pooling: bool = False): + super().__init__() + + self.align_corners = align_corners + self.aspp_blocks = nn.LayerList() + + for ratio in aspp_ratios: + if use_sep_conv and ratio > 1: + conv_func = SeparableConvBNReLU + else: + conv_func = ConvBNReLU + + block = conv_func( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1 if ratio == 1 else 3, + dilation=ratio, + padding=0 if ratio == 1 else ratio) + self.aspp_blocks.append(block) + + out_size = len(self.aspp_blocks) + + if image_pooling: + self.global_avg_pool = nn.Sequential( + nn.AdaptiveAvgPool2D(output_size=(1, 1)), + ConvBNReLU(in_channels, out_channels, kernel_size=1, bias_attr=False)) + out_size += 1 + self.image_pooling = image_pooling + + self.conv_bn_relu = ConvBNReLU( + in_channels=out_channels * out_size, + out_channels=out_channels, + kernel_size=1) + + self.dropout = nn.Dropout(p=0.1) # drop rate + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + outputs = [] + for block in self.aspp_blocks: + y = block(x) + y = F.interpolate( + y, + x.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + outputs.append(y) + + if self.image_pooling: + img_avg = self.global_avg_pool(x) + img_avg = F.interpolate( + img_avg, + x.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + outputs.append(img_avg) + + x = paddle.concat(outputs, axis=1) + x = self.conv_bn_relu(x) + x = self.dropout(x) + + return x + + +class AuxLayer(nn.Layer): + """ + The auxiliary layer implementation for auxiliary loss. + + Args: + in_channels (int): The number of input channels. + inter_channels (int): The intermediate channels. + out_channels (int): The number of output channels, and usually it is num_classes. + dropout_prob (float, optional): The drop rate. Default: 0.1. + """ + + def __init__(self, + in_channels: int, + inter_channels: int, + out_channels: int, + dropout_prob: float = 0.1, + **kwargs): + super().__init__() + + self.conv_bn_relu = ConvBNReLU( + in_channels=in_channels, + out_channels=inter_channels, + kernel_size=3, + padding=1, + **kwargs) + + self.dropout = nn.Dropout(p=dropout_prob) + + self.conv = nn.Conv2D( + in_channels=inter_channels, + out_channels=out_channels, + kernel_size=1) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self.conv_bn_relu(x) + x = self.dropout(x) + x = self.conv(x) + return x + + +class Add(nn.Layer): + def __init__(self): + super().__init__() + + def forward(self, x: paddle.Tensor, y: paddle.Tensor, name: str = None): + return paddle.add(x, y, name) + +class AttentionBlock(nn.Layer): + """General self-attention block/non-local block. + + The original article refers to refer to https://arxiv.org/abs/1706.03762. + Args: + key_in_channels (int): Input channels of key feature. + query_in_channels (int): Input channels of query feature. + channels (int): Output channels of key/query transform. + out_channels (int): Output channels. + share_key_query (bool): Whether share projection weight between key + and query projection. + query_downsample (nn.Module): Query downsample module. + key_downsample (nn.Module): Key downsample module. + key_query_num_convs (int): Number of convs for key/query projection. + value_out_num_convs (int): Number of convs for value projection. + key_query_norm (bool): Whether to use BN for key/query projection. + value_out_norm (bool): Whether to use BN for value projection. + matmul_norm (bool): Whether normalize attention map with sqrt of + channels + with_out (bool): Whether use out projection. + """ + + def __init__(self, key_in_channels, query_in_channels, channels, + out_channels, share_key_query, query_downsample, + key_downsample, key_query_num_convs, value_out_num_convs, + key_query_norm, value_out_norm, matmul_norm, with_out): + super(AttentionBlock, self).__init__() + if share_key_query: + assert key_in_channels == query_in_channels + self.with_out = with_out + self.key_in_channels = key_in_channels + self.query_in_channels = query_in_channels + self.out_channels = out_channels + self.channels = channels + self.share_key_query = share_key_query + self.key_project = self.build_project( + key_in_channels, + channels, + num_convs=key_query_num_convs, + use_conv_module=key_query_norm) + if share_key_query: + self.query_project = self.key_project + else: + self.query_project = self.build_project( + query_in_channels, + channels, + num_convs=key_query_num_convs, + use_conv_module=key_query_norm) + + self.value_project = self.build_project( + key_in_channels, + channels if self.with_out else out_channels, + num_convs=value_out_num_convs, + use_conv_module=value_out_norm) + + if self.with_out: + self.out_project = self.build_project( + channels, + out_channels, + num_convs=value_out_num_convs, + use_conv_module=value_out_norm) + else: + self.out_project = None + + self.query_downsample = query_downsample + self.key_downsample = key_downsample + self.matmul_norm = matmul_norm + + def build_project(self, in_channels: int, channels: int, num_convs: int, use_conv_module: bool): + if use_conv_module: + convs = [ + ConvBNReLU( + in_channels=in_channels, + out_channels=channels, + kernel_size=1, + bias_attr=False) + ] + for _ in range(num_convs - 1): + convs.append( + ConvBNReLU( + in_channels=channels, + out_channels=channels, + kernel_size=1, + bias_attr=False)) + else: + convs = [nn.Conv2D(in_channels, channels, 1)] + for _ in range(num_convs - 1): + convs.append(nn.Conv2D(channels, channels, 1)) + + if len(convs) > 1: + convs = nn.Sequential(*convs) + else: + convs = convs[0] + return convs + + def forward(self, query_feats: paddle.Tensor, key_feats: paddle.Tensor) -> paddle.Tensor: + query_shape = paddle.shape(query_feats) + query = self.query_project(query_feats) + if self.query_downsample is not None: + query = self.query_downsample(query) + query = query.flatten(2).transpose([0, 2, 1]) + + key = self.key_project(key_feats) + value = self.value_project(key_feats) + + if self.key_downsample is not None: + key = self.key_downsample(key) + value = self.key_downsample(value) + + key = key.flatten(2) + value = value.flatten(2).transpose([0, 2, 1]) + sim_map = paddle.matmul(query, key) + if self.matmul_norm: + sim_map = (self.channels**-0.5) * sim_map + sim_map = F.softmax(sim_map, axis=-1) + + context = paddle.matmul(sim_map, value) + context = paddle.transpose(context, [0, 2, 1]) + + context = paddle.reshape( + context, [0, self.out_channels, query_shape[2], query_shape[3]]) + + if self.out_project is not None: + context = self.out_project(context) + return context diff --git a/modules/image/semantic_segmentation/isanet_resnet50_voc/module.py b/modules/image/semantic_segmentation/isanet_resnet50_voc/module.py new file mode 100644 index 000000000..ed92c1286 --- /dev/null +++ b/modules/image/semantic_segmentation/isanet_resnet50_voc/module.py @@ -0,0 +1,221 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from typing import Union, List, Tuple + +import numpy as np +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddlehub.module.module import moduleinfo +import paddlehub.vision.segmentation_transforms as T +from paddlehub.module.cv_module import ImageSegmentationModule + +from isanet_resnet50_voc.resnet import ResNet50_vd +import isanet_resnet50_voc.layers as layers + + +@moduleinfo( + name="isanet_resnet50_voc", + type="CV/semantic_segmentation", + author="paddlepaddle", + author_email="", + summary="ISANetResnet50 is a segmentation model.", + version="1.0.0", + meta=ImageSegmentationModule) +class ISANet(nn.Layer): + """Interlaced Sparse Self-Attention for Semantic Segmentation. + + The original article refers to Lang Huang, et al. "Interlaced Sparse Self-Attention for Semantic Segmentation" + (https://arxiv.org/abs/1907.12273). + + Args: + num_classes (int): The unique number of target classes. + backbone_indices (tuple): The values in the tuple indicate the indices of output of backbone. + isa_channels (int): The channels of ISA Module. + down_factor (tuple): Divide the height and width dimension to (Ph, PW) groups. + enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss. Default: True. + align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature + is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False. + pretrained (str, optional): The path or url of pretrained model. Default: None. + + """ + + def __init__(self, + num_classes: int = 21, + backbone_indices: Tuple[int] = (2, 3), + isa_channels: int = 256, + down_factor: Tuple[int] = (8, 8), + enable_auxiliary_loss: bool = True, + align_corners: bool = False, + pretrained: str = None): + super(ISANet, self).__init__() + + self.backbone = ResNet50_vd() + self.backbone_indices = backbone_indices + in_channels = [self.backbone.feat_channels[i] for i in backbone_indices] + self.head = ISAHead(num_classes, in_channels, isa_channels, down_factor, + enable_auxiliary_loss) + self.align_corners = align_corners + self.transforms = T.Compose([T.Normalize()]) + + if pretrained is not None: + model_dict = paddle.load(pretrained) + self.set_dict(model_dict) + print("load custom parameters success") + + else: + checkpoint = os.path.join(self.directory, 'model.pdparams') + model_dict = paddle.load(checkpoint) + self.set_dict(model_dict) + print("load pretrained parameters success") + + def transform(self, img: Union[np.ndarray, str]) -> Union[np.ndarray, str]: + return self.transforms(img) + + def forward(self, x: paddle.Tensor) -> List[paddle.Tensor]: + feats = self.backbone(x) + feats = [feats[i] for i in self.backbone_indices] + logit_list = self.head(feats) + logit_list = [ + F.interpolate( + logit, + paddle.shape(x)[2:], + mode='bilinear', + align_corners=self.align_corners, + align_mode=1) for logit in logit_list + ] + + return logit_list + + +class ISAHead(nn.Layer): + """ + The ISAHead. + + Args: + num_classes (int): The unique number of target classes. + in_channels (tuple): The number of input channels. + isa_channels (int): The channels of ISA Module. + down_factor (tuple): Divide the height and width dimension to (Ph, PW) groups. + enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss. Default: True. + """ + + def __init__(self, + num_classes: int, + in_channels: Tuple[int], + isa_channels: int, + down_factor: Tuple[int], + enable_auxiliary_loss: bool): + super(ISAHead, self).__init__() + self.in_channels = in_channels[-1] + inter_channels = self.in_channels // 4 + self.inter_channels = inter_channels + self.down_factor = down_factor + self.enable_auxiliary_loss = enable_auxiliary_loss + self.in_conv = layers.ConvBNReLU( + self.in_channels, inter_channels, 3, bias_attr=False) + self.global_relation = SelfAttentionBlock(inter_channels, isa_channels) + self.local_relation = SelfAttentionBlock(inter_channels, isa_channels) + self.out_conv = layers.ConvBNReLU( + inter_channels * 2, inter_channels, 1, bias_attr=False) + self.cls = nn.Sequential( + nn.Dropout2D(p=0.1), nn.Conv2D(inter_channels, num_classes, 1)) + self.aux = nn.Sequential( + layers.ConvBNReLU( + in_channels=1024, + out_channels=256, + kernel_size=3, + bias_attr=False), nn.Dropout2D(p=0.1), + nn.Conv2D(256, num_classes, 1)) + + def forward(self, feat_list: List[paddle.Tensor]) -> List[paddle.Tensor]: + C3, C4 = feat_list + x = self.in_conv(C4) + x_shape = paddle.shape(x) + P_h, P_w = self.down_factor + Q_h, Q_w = paddle.ceil(x_shape[2] / P_h).astype('int32'), paddle.ceil( + x_shape[3] / P_w).astype('int32') + pad_h, pad_w = (Q_h * P_h - x_shape[2]).astype('int32'), ( + Q_w * P_w - x_shape[3]).astype('int32') + if pad_h > 0 or pad_w > 0: + padding = paddle.concat([ + pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2 + ], + axis=0) + feat = F.pad(x, padding) + else: + feat = x + + feat = feat.reshape([0, x_shape[1], Q_h, P_h, Q_w, P_w]) + feat = feat.transpose([0, 3, 5, 1, 2, + 4]).reshape([-1, self.inter_channels, Q_h, Q_w]) + feat = self.global_relation(feat) + + feat = feat.reshape([x_shape[0], P_h, P_w, x_shape[1], Q_h, Q_w]) + feat = feat.transpose([0, 4, 5, 3, 1, + 2]).reshape([-1, self.inter_channels, P_h, P_w]) + feat = self.local_relation(feat) + + feat = feat.reshape([x_shape[0], Q_h, Q_w, x_shape[1], P_h, P_w]) + feat = feat.transpose([0, 3, 1, 4, 2, 5]).reshape( + [0, self.inter_channels, P_h * Q_h, P_w * Q_w]) + if pad_h > 0 or pad_w > 0: + feat = paddle.slice( + feat, + axes=[2, 3], + starts=[pad_h // 2, pad_w // 2], + ends=[pad_h // 2 + x_shape[2], pad_w // 2 + x_shape[3]]) + + feat = self.out_conv(paddle.concat([feat, x], axis=1)) + output = self.cls(feat) + + if self.enable_auxiliary_loss: + auxout = self.aux(C3) + return [output, auxout] + else: + return [output] + + +class SelfAttentionBlock(layers.AttentionBlock): + """General self-attention block/non-local block. + + Args: + in_channels (int): Input channels of key/query feature. + channels (int): Output channels of key/query transform. + """ + + def __init__(self, in_channels, channels): + super(SelfAttentionBlock, self).__init__( + key_in_channels=in_channels, + query_in_channels=in_channels, + channels=channels, + out_channels=in_channels, + share_key_query=False, + query_downsample=None, + key_downsample=None, + key_query_num_convs=2, + key_query_norm=True, + value_out_num_convs=1, + value_out_norm=False, + matmul_norm=True, + with_out=False) + + self.output_project = self.build_project( + in_channels, in_channels, num_convs=1, use_conv_module=True) + + def forward(self, x): + context = super(SelfAttentionBlock, self).forward(x, x) + return self.output_project(context) diff --git a/modules/image/semantic_segmentation/isanet_resnet50_voc/resnet.py b/modules/image/semantic_segmentation/isanet_resnet50_voc/resnet.py new file mode 100644 index 000000000..39327564d --- /dev/null +++ b/modules/image/semantic_segmentation/isanet_resnet50_voc/resnet.py @@ -0,0 +1,359 @@ +# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +import isanet_resnet50_voc.layers as layers + + +class ConvBNLayer(nn.Layer): + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + stride: int = 1, + dilation: int = 1, + groups: int = 1, + is_vd_mode: bool = False, + act: str = None, + data_format: str = 'NCHW'): + super(ConvBNLayer, self).__init__() + if dilation != 1 and kernel_size != 3: + raise RuntimeError("When the dilation isn't 1," \ + "the kernel_size should be 3.") + + self.is_vd_mode = is_vd_mode + self._pool2d_avg = nn.AvgPool2D( + kernel_size=2, + stride=2, + padding=0, + ceil_mode=True, + data_format=data_format) + self._conv = nn.Conv2D( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=(kernel_size - 1) // 2 \ + if dilation == 1 else dilation, + dilation=dilation, + groups=groups, + bias_attr=False, + data_format=data_format) + + self._batch_norm = layers.SyncBatchNorm( + out_channels, data_format=data_format) + self._act_op = layers.Activation(act=act) + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + if self.is_vd_mode: + inputs = self._pool2d_avg(inputs) + y = self._conv(inputs) + y = self._batch_norm(y) + y = self._act_op(y) + + return y + + +class BottleneckBlock(nn.Layer): + def __init__(self, + in_channels: int, + out_channels: int, + stride: int, + shortcut: bool = True, + if_first: bool = False, + dilation: int = 1, + data_format: str = 'NCHW'): + super(BottleneckBlock, self).__init__() + + self.data_format = data_format + self.conv0 = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + act='relu', + data_format=data_format) + + self.dilation = dilation + + self.conv1 = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=3, + stride=stride, + act='relu', + dilation=dilation, + data_format=data_format) + self.conv2 = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels * 4, + kernel_size=1, + act=None, + data_format=data_format) + + if not shortcut: + self.short = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels * 4, + kernel_size=1, + stride=1, + is_vd_mode=False if if_first or stride == 1 else True, + data_format=data_format) + + self.shortcut = shortcut + # NOTE: Use the wrap layer for quantization training + self.add = layers.Add() + self.relu = layers.Activation(act="relu") + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + y = self.conv0(inputs) + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + y = self.add(short, conv2) + y = self.relu(y) + return y + + +class BasicBlock(nn.Layer): + def __init__(self, + in_channels: int, + out_channels: int, + stride: int, + dilation: int = 1, + shortcut: bool = True, + if_first: bool = False, + data_format: str = 'NCHW'): + super(BasicBlock, self).__init__() + self.conv0 = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=3, + stride=stride, + dilation=dilation, + act='relu', + data_format=data_format) + self.conv1 = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=3, + dilation=dilation, + act=None, + data_format=data_format) + + if not shortcut: + self.short = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + stride=1, + is_vd_mode=False if if_first or stride == 1 else True, + data_format=data_format) + + self.shortcut = shortcut + self.dilation = dilation + self.data_format = data_format + self.add = layers.Add() + self.relu = layers.Activation(act="relu") + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + y = self.conv0(inputs) + conv1 = self.conv1(y) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = self.add(short, conv1) + y = self.relu(y) + + return y + + +class ResNet_vd(nn.Layer): + """ + The ResNet_vd implementation based on PaddlePaddle. + + The original article refers to Jingdong + Tong He, et, al. "Bag of Tricks for Image Classification with Convolutional Neural Networks" + (https://arxiv.org/pdf/1812.01187.pdf). + + Args: + layers (int, optional): The layers of ResNet_vd. The supported layers are (18, 34, 50, 101, 152, 200). Default: 50. + output_stride (int, optional): The stride of output features compared to input images. It is 8 or 16. Default: 8. + multi_grid (tuple|list, optional): The grid of stage4. Defult: (1, 1, 1). + pretrained (str, optional): The path of pretrained model. + + """ + + def __init__(self, + layers: int = 50, + output_stride: int = 8, + multi_grid: Tuple[int] = (1, 1, 1), + pretrained: str = None, + data_format: str = 'NCHW'): + super(ResNet_vd, self).__init__() + + self.data_format = data_format + self.conv1_logit = None # for gscnn shape stream + self.layers = layers + supported_layers = [18, 34, 50, 101, 152, 200] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format( + supported_layers, layers) + + if layers == 18: + depth = [2, 2, 2, 2] + elif layers == 34 or layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + elif layers == 200: + depth = [3, 12, 48, 3] + num_channels = [64, 256, 512, 1024 + ] if layers >= 50 else [64, 64, 128, 256] + num_filters = [64, 128, 256, 512] + + # for channels of four returned stages + self.feat_channels = [c * 4 for c in num_filters + ] if layers >= 50 else num_filters + + dilation_dict = None + if output_stride == 8: + dilation_dict = {2: 2, 3: 4} + elif output_stride == 16: + dilation_dict = {3: 2} + + self.conv1_1 = ConvBNLayer( + in_channels=3, + out_channels=32, + kernel_size=3, + stride=2, + act='relu', + data_format=data_format) + self.conv1_2 = ConvBNLayer( + in_channels=32, + out_channels=32, + kernel_size=3, + stride=1, + act='relu', + data_format=data_format) + self.conv1_3 = ConvBNLayer( + in_channels=32, + out_channels=64, + kernel_size=3, + stride=1, + act='relu', + data_format=data_format) + self.pool2d_max = nn.MaxPool2D( + kernel_size=3, stride=2, padding=1, data_format=data_format) + + # self.block_list = [] + self.stage_list = [] + if layers >= 50: + for block in range(len(depth)): + shortcut = False + block_list = [] + for i in range(depth[block]): + if layers in [101, 152] and block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + + ############################################################################### + # Add dilation rate for some segmentation tasks, if dilation_dict is not None. + dilation_rate = dilation_dict[ + block] if dilation_dict and block in dilation_dict else 1 + + # Actually block here is 'stage', and i is 'block' in 'stage' + # At the stage 4, expand the the dilation_rate if given multi_grid + if block == 3: + dilation_rate = dilation_rate * multi_grid[i] + ############################################################################### + + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BottleneckBlock( + in_channels=num_channels[block] + if i == 0 else num_filters[block] * 4, + out_channels=num_filters[block], + stride=2 if i == 0 and block != 0 + and dilation_rate == 1 else 1, + shortcut=shortcut, + if_first=block == i == 0, + dilation=dilation_rate, + data_format=data_format)) + + block_list.append(bottleneck_block) + shortcut = True + self.stage_list.append(block_list) + else: + for block in range(len(depth)): + shortcut = False + block_list = [] + for i in range(depth[block]): + dilation_rate = dilation_dict[block] \ + if dilation_dict and block in dilation_dict else 1 + if block == 3: + dilation_rate = dilation_rate * multi_grid[i] + + basic_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BasicBlock( + in_channels=num_channels[block] + if i == 0 else num_filters[block], + out_channels=num_filters[block], + stride=2 if i == 0 and block != 0 \ + and dilation_rate == 1 else 1, + dilation=dilation_rate, + shortcut=shortcut, + if_first=block == i == 0, + data_format=data_format)) + block_list.append(basic_block) + shortcut = True + self.stage_list.append(block_list) + + self.pretrained = pretrained + + def forward(self, inputs: paddle.Tensor) -> List[paddle.Tensor]: + y = self.conv1_1(inputs) + y = self.conv1_2(y) + y = self.conv1_3(y) + self.conv1_logit = y.clone() + y = self.pool2d_max(y) + + # A feature list saves the output feature map of each stage. + feat_list = [] + for stage in self.stage_list: + for block in stage: + y = block(y) + feat_list.append(y) + + return feat_list + + +def ResNet50_vd(**args): + model = ResNet_vd(layers=50, **args) + return model \ No newline at end of file diff --git a/modules/image/semantic_segmentation/pspnet_resnet50_cityscapes/README.md b/modules/image/semantic_segmentation/pspnet_resnet50_cityscapes/README.md new file mode 100644 index 000000000..4e9f9b513 --- /dev/null +++ b/modules/image/semantic_segmentation/pspnet_resnet50_cityscapes/README.md @@ -0,0 +1,182 @@ +# pspnet_resnet50_cityscapes + +|模型名称|pspnet_resnet50_cityscapes| +| :--- | :---: | +|类别|图像-图像分割| +|网络|pspnet_resnet50vd| +|数据集|Cityscapes| +|是否支持Fine-tuning|是| +|模型大小|390MB| +|指标|-| +|最新更新日期|2022-03-21| + +## 一、模型基本信息 + + - 样例结果示例: +

+ +

+ +- ### 模型介绍 + + - 本示例将展示如何使用PaddleHub对预训练模型进行finetune并完成预测任务。 + - 更多详情请参考:[pspnet](https://openaccess.thecvf.com/content_cvpr_2017/papers/Zhao_Pyramid_Scene_Parsing_CVPR_2017_paper.pdf) + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、安装 + + - ```shell + $ hub install pspnet_resnet50_cityscapes + ``` + + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1.预测代码示例 + + - ```python + import cv2 + import paddle + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='pspnet_resnet50_cityscapes') + img = cv2.imread("/PATH/TO/IMAGE") + result = model.predict(images=[img], visualization=True) + ``` + +- ### 2.如何开始Fine-tune + + - 在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用pspnet_resnet50_cityscapes模型对OpticDiscSeg数据集进行Fine-tune。 `train.py`内容如下: + + - 代码步骤 + + - Step1: 定义数据预处理方式 + - ```python + from paddlehub.vision.segmentation_transforms import Compose, Resize, Normalize + + transform = Compose([Resize(target_size=(512, 512)), Normalize()]) + ``` + + - `segmentation_transforms` 数据增强模块定义了丰富的针对图像分割数据的预处理方式,用户可按照需求替换自己需要的数据预处理方式。 + + - Step2: 下载数据集并使用 + - ```python + from paddlehub.datasets import OpticDiscSeg + + train_reader = OpticDiscSeg(transform, mode='train') + ``` + - `transforms`: 数据预处理方式。 + - `mode`: `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 + + - 数据集的准备代码可以参考 [opticdiscseg.py](../../paddlehub/datasets/opticdiscseg.py)。`hub.datasets.OpticDiscSeg()`会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 + + - Step3: 加载预训练模型 + + - ```python + import paddlehub as hub + + model = hub.Module(name='pspnet_resnet50_cityscapes', num_classes=2, pretrained=None) + ``` + - `name`: 选择预训练模型的名字。 + - `load_checkpoint`: 是否加载自己训练的模型,若为None,则加载提供的模型默认参数。 + + - Step4: 选择优化策略和运行配置 + + - ```python + import paddle + from paddlehub.finetune.trainer import Trainer + + scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.01, decay_steps=1000, power=0.9, end_lr=0.0001) + optimizer = paddle.optimizer.Adam(learning_rate=scheduler, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='test_ckpt_img_seg', use_gpu=True) + trainer.train(train_reader, epochs=10, batch_size=4, log_interval=10, save_interval=4) + ``` + + - 模型预测 + + - 当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。我们使用该模型来进行预测。predict.py脚本如下: + + ```python + import paddle + import cv2 + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='pspnet_resnet50_cityscapes', pretrained='/PATH/TO/CHECKPOINT') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) + ``` + + - 参数配置正确后,请执行脚本`python predict.py`。 + + - **Args** + * `images`:原始图像路径或BGR格式图片; + * `visualization`: 是否可视化,默认为True; + * `save_path`: 保存结果的路径,默认保存路径为'seg_result'。 + + **NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线图像分割服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + + - ```shell + $ hub serving start -m pspnet_resnet50_cityscapes + ``` + + - 这样就完成了一个图像分割服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + ```python + import requests + import json + import cv2 + import base64 + + import numpy as np + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/ginet_resnet50vd_voc" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + mask = base64_to_cv2(r.json()["results"][0]) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 diff --git a/modules/image/semantic_segmentation/pspnet_resnet50_cityscapes/README_en.md b/modules/image/semantic_segmentation/pspnet_resnet50_cityscapes/README_en.md new file mode 100644 index 000000000..975a84629 --- /dev/null +++ b/modules/image/semantic_segmentation/pspnet_resnet50_cityscapes/README_en.md @@ -0,0 +1,181 @@ +# pspnet_resnet50_cityscapes + +|Module Name|pspnet_resnet50_cityscapes| +| :--- | :---: | +|Category|Image Segmentation| +|Network|pspnet_resnet50vd| +|Dataset|Cityscapes| +|Fine-tuning supported or not|Yes| +|Module Size|390MB| +|Data indicators|-| +|Latest update date|2022-03-21| + +## I. Basic Information + +- ### Application Effect Display + - Sample results: +

+ +

+ +- ### Module Introduction + + - We will show how to use PaddleHub to finetune the pre-trained model and complete the prediction. + - For more information, please refer to: [pspnet](https://openaccess.thecvf.com/content_cvpr_2017/papers/Zhao_Pyramid_Scene_Parsing_CVPR_2017_paper.pdf) + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、Installation + + - ```shell + $ hub install pspnet_resnet50_cityscapes + ``` + + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + + +## III. Module API Prediction + +- ### 1、Prediction Code Example + + - ```python + import cv2 + import paddle + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='pspnet_resnet50_cityscapes') + img = cv2.imread("/PATH/TO/IMAGE") + result = model.predict(images=[img], visualization=True) + ``` + +- ### 2.Fine-tune and Encapsulation + + - After completing the installation of PaddlePaddle and PaddleHub, you can start using the pspnet_resnet50_cityscapes model to fine-tune datasets such as OpticDiscSeg. + + - Steps: + + - Step1: Define the data preprocessing method + + - ```python + from paddlehub.vision.segmentation_transforms import Compose, Resize, Normalize + + transform = Compose([Resize(target_size=(512, 512)), Normalize()]) + ``` + + - `segmentation_transforms`: The data enhancement module defines lots of data preprocessing methods. Users can replace the data preprocessing methods according to their needs. + + - Step2: Download the dataset + + - ```python + from paddlehub.datasets import OpticDiscSeg + + train_reader = OpticDiscSeg(transform, mode='train') + ``` + * `transforms`: data preprocessing methods. + + * `mode`: Select the data mode, the options are `train`, `test`, `val`. Default is `train`. + + * Dataset preparation can be referred to [opticdiscseg.py](../../paddlehub/datasets/opticdiscseg.py)。`hub.datasets.OpticDiscSeg()`will be automatically downloaded from the network and decompressed to the `$HOME/.paddlehub/dataset` directory under the user directory. + + - Step3: Load the pre-trained model + + - ```python + import paddlehub as hub + + model = hub.Module(name='pspnet_resnet50_cityscapes', num_classes=2, pretrained=None) + ``` + - `name`: model name. + - `load_checkpoint`: Whether to load the self-trained model, if it is None, load the provided parameters. + + - Step4: Optimization strategy + + - ```python + import paddle + from paddlehub.finetune.trainer import Trainer + + scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.01, decay_steps=1000, power=0.9, end_lr=0.0001) + optimizer = paddle.optimizer.Adam(learning_rate=scheduler, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='test_ckpt_img_seg', use_gpu=True) + trainer.train(train_reader, epochs=10, batch_size=4, log_interval=10, save_interval=4) + ``` + + - Model prediction + + - When Fine-tune is completed, the model with the best performance on the verification set will be saved in the `${CHECKPOINT_DIR}/best_model` directory. We use this model to make predictions. The `predict.py` script is as follows: + + ```python + import paddle + import cv2 + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='pspnet_resnet50_cityscapes', pretrained='/PATH/TO/CHECKPOINT') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) + ``` + + - **Args** + * `images`: Image path or ndarray data with format [H, W, C], BGR. + * `visualization`: Whether to save the recognition results as picture files. + * `save_path`: Save path of the result, default is 'seg_result'. + + +## IV. Server Deployment + +- PaddleHub Serving can deploy an online service of image segmentation. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + + - ```shell + $ hub serving start -m pspnet_resnet50_cityscapes + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result: + + ```python + import requests + import json + import cv2 + import base64 + + import numpy as np + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/pspnet_resnet50_cityscapes" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + mask = base64_to_cv2(r.json()["results"][0]) + ``` + +## V. Release Note + +- 1.0.0 + + First release diff --git a/modules/image/semantic_segmentation/pspnet_resnet50_cityscapes/layers.py b/modules/image/semantic_segmentation/pspnet_resnet50_cityscapes/layers.py new file mode 100644 index 000000000..af3c8765f --- /dev/null +++ b/modules/image/semantic_segmentation/pspnet_resnet50_cityscapes/layers.py @@ -0,0 +1,356 @@ +# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn.layer import activation +from paddle.nn import Conv2D, AvgPool2D + + +def SyncBatchNorm(*args, **kwargs): + """In cpu environment nn.SyncBatchNorm does not have kernel so use nn.BatchNorm2D instead""" + if paddle.get_device() == 'cpu': + return nn.BatchNorm2D(*args, **kwargs) + else: + return nn.SyncBatchNorm(*args, **kwargs) + + +class SeparableConvBNReLU(nn.Layer): + """Depthwise Separable Convolution.""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + padding: str = 'same', + **kwargs: dict): + super(SeparableConvBNReLU, self).__init__() + self.depthwise_conv = ConvBN( + in_channels, + out_channels=in_channels, + kernel_size=kernel_size, + padding=padding, + groups=in_channels, + **kwargs) + self.piontwise_conv = ConvBNReLU( + in_channels, out_channels, kernel_size=1, groups=1) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self.depthwise_conv(x) + x = self.piontwise_conv(x) + return x + + +class ConvBN(nn.Layer): + """Basic conv bn layer""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + padding: str = 'same', + **kwargs: dict): + super(ConvBN, self).__init__() + self._conv = Conv2D( + in_channels, out_channels, kernel_size, padding=padding, **kwargs) + self._batch_norm = SyncBatchNorm(out_channels) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self._conv(x) + x = self._batch_norm(x) + return x + + +class ConvBNReLU(nn.Layer): + """Basic conv bn relu layer.""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + padding: str = 'same', + **kwargs: dict): + super(ConvBNReLU, self).__init__() + + self._conv = Conv2D( + in_channels, out_channels, kernel_size, padding=padding, **kwargs) + self._batch_norm = SyncBatchNorm(out_channels) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self._conv(x) + x = self._batch_norm(x) + x = F.relu(x) + return x + + +class Activation(nn.Layer): + """ + The wrapper of activations. + Args: + act (str, optional): The activation name in lowercase. It must be one of ['elu', 'gelu', + 'hardshrink', 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', + 'softmax', 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', + 'hsigmoid']. Default: None, means identical transformation. + Returns: + A callable object of Activation. + Raises: + KeyError: When parameter `act` is not in the optional range. + Examples: + from paddleseg.models.common.activation import Activation + relu = Activation("relu") + print(relu) + # + sigmoid = Activation("sigmoid") + print(sigmoid) + # + not_exit_one = Activation("not_exit_one") + # KeyError: "not_exit_one does not exist in the current dict_keys(['elu', 'gelu', 'hardshrink', + # 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', 'softmax', + # 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', 'hsigmoid'])" + """ + + def __init__(self, act: str = None): + super(Activation, self).__init__() + + self._act = act + upper_act_names = activation.__dict__.keys() + lower_act_names = [act.lower() for act in upper_act_names] + act_dict = dict(zip(lower_act_names, upper_act_names)) + + if act is not None: + if act in act_dict.keys(): + act_name = act_dict[act] + self.act_func = eval("activation.{}()".format(act_name)) + else: + raise KeyError("{} does not exist in the current {}".format( + act, act_dict.keys())) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + + if self._act is not None: + return self.act_func(x) + else: + return x + + +class ASPPModule(nn.Layer): + """ + Atrous Spatial Pyramid Pooling. + Args: + aspp_ratios (tuple): The dilation rate using in ASSP module. + in_channels (int): The number of input channels. + out_channels (int): The number of output channels. + align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature + is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. + use_sep_conv (bool, optional): If using separable conv in ASPP module. Default: False. + image_pooling (bool, optional): If augmented with image-level features. Default: False + """ + + def __init__(self, + aspp_ratios: tuple, + in_channels: int, + out_channels: int, + align_corners: bool, + use_sep_conv: bool = False, + image_pooling: bool = False): + super().__init__() + + self.align_corners = align_corners + self.aspp_blocks = nn.LayerList() + + for ratio in aspp_ratios: + if use_sep_conv and ratio > 1: + conv_func = SeparableConvBNReLU + else: + conv_func = ConvBNReLU + + block = conv_func( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1 if ratio == 1 else 3, + dilation=ratio, + padding=0 if ratio == 1 else ratio) + self.aspp_blocks.append(block) + + out_size = len(self.aspp_blocks) + + if image_pooling: + self.global_avg_pool = nn.Sequential( + nn.AdaptiveAvgPool2D(output_size=(1, 1)), + ConvBNReLU(in_channels, out_channels, kernel_size=1, bias_attr=False)) + out_size += 1 + self.image_pooling = image_pooling + + self.conv_bn_relu = ConvBNReLU( + in_channels=out_channels * out_size, + out_channels=out_channels, + kernel_size=1) + + self.dropout = nn.Dropout(p=0.1) # drop rate + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + outputs = [] + for block in self.aspp_blocks: + y = block(x) + y = F.interpolate( + y, + x.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + outputs.append(y) + + if self.image_pooling: + img_avg = self.global_avg_pool(x) + img_avg = F.interpolate( + img_avg, + x.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + outputs.append(img_avg) + + x = paddle.concat(outputs, axis=1) + x = self.conv_bn_relu(x) + x = self.dropout(x) + + return x + + +class AuxLayer(nn.Layer): + """ + The auxiliary layer implementation for auxiliary loss. + + Args: + in_channels (int): The number of input channels. + inter_channels (int): The intermediate channels. + out_channels (int): The number of output channels, and usually it is num_classes. + dropout_prob (float, optional): The drop rate. Default: 0.1. + """ + + def __init__(self, + in_channels: int, + inter_channels: int, + out_channels: int, + dropout_prob: float = 0.1, + **kwargs): + super().__init__() + + self.conv_bn_relu = ConvBNReLU( + in_channels=in_channels, + out_channels=inter_channels, + kernel_size=3, + padding=1, + **kwargs) + + self.dropout = nn.Dropout(p=dropout_prob) + + self.conv = nn.Conv2D( + in_channels=inter_channels, + out_channels=out_channels, + kernel_size=1) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self.conv_bn_relu(x) + x = self.dropout(x) + x = self.conv(x) + return x + + +class Add(nn.Layer): + def __init__(self): + super().__init__() + + def forward(self, x: paddle.Tensor, y: paddle.Tensor, name: str = None) -> paddle.Tensor: + return paddle.add(x, y, name) + +class PPModule(nn.Layer): + """ + Pyramid pooling module originally in PSPNet. + + Args: + in_channels (int): The number of intput channels to pyramid pooling module. + out_channels (int): The number of output channels after pyramid pooling module. + bin_sizes (tuple, optional): The out size of pooled feature maps. Default: (1, 2, 3, 6). + dim_reduction (bool, optional): A bool value represents if reducing dimension after pooling. Default: True. + align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature + is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. + """ + + def __init__(self, + in_channels: int, + out_channels: int, + bin_sizes: Tuple[int], + dim_reduction: bool, + align_corners: bool): + super().__init__() + + self.bin_sizes = bin_sizes + + inter_channels = in_channels + if dim_reduction: + inter_channels = in_channels // len(bin_sizes) + + # we use dimension reduction after pooling mentioned in original implementation. + self.stages = nn.LayerList([ + self._make_stage(in_channels, inter_channels, size) + for size in bin_sizes + ]) + + self.conv_bn_relu2 = ConvBNReLU( + in_channels=in_channels + inter_channels * len(bin_sizes), + out_channels=out_channels, + kernel_size=3, + padding=1) + + self.align_corners = align_corners + + def _make_stage(self, in_channels: int, out_channels: int, size: int): + """ + Create one pooling layer. + + In our implementation, we adopt the same dimension reduction as the original paper that might be + slightly different with other implementations. + + After pooling, the channels are reduced to 1/len(bin_sizes) immediately, while some other implementations + keep the channels to be same. + + Args: + in_channels (int): The number of intput channels to pyramid pooling module. + size (int): The out size of the pooled layer. + + Returns: + conv (Tensor): A tensor after Pyramid Pooling Module. + """ + + prior = nn.AdaptiveAvgPool2D(output_size=(size, size)) + conv = ConvBNReLU( + in_channels=in_channels, out_channels=out_channels, kernel_size=1) + + return nn.Sequential(prior, conv) + + def forward(self, input: paddle.Tensor) -> paddle.Tensor: + cat_layers = [] + for stage in self.stages: + x = stage(input) + x = F.interpolate( + x, + paddle.shape(input)[2:], + mode='bilinear', + align_corners=self.align_corners) + cat_layers.append(x) + cat_layers = [input] + cat_layers[::-1] + cat = paddle.concat(cat_layers, axis=1) + out = self.conv_bn_relu2(cat) + + return out diff --git a/modules/image/semantic_segmentation/pspnet_resnet50_cityscapes/module.py b/modules/image/semantic_segmentation/pspnet_resnet50_cityscapes/module.py new file mode 100644 index 000000000..8657af0d8 --- /dev/null +++ b/modules/image/semantic_segmentation/pspnet_resnet50_cityscapes/module.py @@ -0,0 +1,165 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from typing import Union, List, Tuple + +import numpy as np +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddlehub.module.module import moduleinfo +import paddlehub.vision.segmentation_transforms as T +from paddlehub.module.cv_module import ImageSegmentationModule + +from pspnet_resnet50_cityscapes.resnet import ResNet50_vd +import pspnet_resnet50_cityscapes.layers as layers + +@moduleinfo( + name="pspnet_resnet50_cityscapes", + type="CV/semantic_segmentation", + author="paddlepaddle", + author_email="", + summary="PSPNetResnet50 is a segmentation model.", + version="1.0.0", + meta=ImageSegmentationModule) +class PSPNet(nn.Layer): + """ + The PSPNet implementation based on PaddlePaddle. + + The original article refers to + Zhao, Hengshuang, et al. "Pyramid scene parsing network" + (https://openaccess.thecvf.com/content_cvpr_2017/papers/Zhao_Pyramid_Scene_Parsing_CVPR_2017_paper.pdf). + + Args: + num_classes (int): The unique number of target classes. + backbone_indices (tuple, optional): Two values in the tuple indicate the indices of output of backbone. + pp_out_channels (int, optional): The output channels after Pyramid Pooling Module. Default: 1024. + bin_sizes (tuple, optional): The out size of pooled feature maps. Default: (1,2,3,6). + enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss. Default: True. + align_corners (bool, optional): An argument of F.interpolate. It should be set to False when the feature size is even, + e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False. + pretrained (str, optional): The path or url of pretrained model. Default: None. + """ + + def __init__(self, + num_classes: int = 19, + backbone_indices: Tuple[int] = (2, 3), + pp_out_channels: int = 1024, + bin_sizes: Tuple[int] = (1, 2, 3, 6), + enable_auxiliary_loss: bool = True, + align_corners: bool = False, + pretrained: str = None): + super(PSPNet, self).__init__() + + self.backbone = ResNet50_vd() + backbone_channels = [ + self.backbone.feat_channels[i] for i in backbone_indices + ] + + self.head = PSPNetHead(num_classes, backbone_indices, backbone_channels, + pp_out_channels, bin_sizes, + enable_auxiliary_loss, align_corners) + self.align_corners = align_corners + self.transforms = T.Compose([T.Normalize()]) + if pretrained is not None: + model_dict = paddle.load(pretrained) + self.set_dict(model_dict) + print("load custom parameters success") + + else: + checkpoint = os.path.join(self.directory, 'model.pdparams') + model_dict = paddle.load(checkpoint) + self.set_dict(model_dict) + print("load pretrained parameters success") + + def transform(self, img: Union[np.ndarray, str]) -> Union[np.ndarray, str]: + return self.transforms(img) + + def forward(self, x: paddle.Tensor) -> List[paddle.Tensor]: + feat_list = self.backbone(x) + logit_list = self.head(feat_list) + return [ + F.interpolate( + logit, + paddle.shape(x)[2:], + mode='bilinear', + align_corners=self.align_corners) for logit in logit_list + ] + + +class PSPNetHead(nn.Layer): + """ + The PSPNetHead implementation. + + Args: + num_classes (int): The unique number of target classes. + backbone_indices (tuple): Two values in the tuple indicate the indices of output of backbone. + The first index will be taken as a deep-supervision feature in auxiliary layer; + the second one will be taken as input of Pyramid Pooling Module (PPModule). + Usually backbone consists of four downsampling stage, and return an output of + each stage. If we set it as (2, 3) in ResNet, that means taking feature map of the third + stage (res4b22) in backbone, and feature map of the fourth stage (res5c) as input of PPModule. + backbone_channels (tuple): The same length with "backbone_indices". It indicates the channels of corresponding index. + pp_out_channels (int): The output channels after Pyramid Pooling Module. + bin_sizes (tuple): The out size of pooled feature maps. + enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss. Default: True. + align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature + is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. + """ + + def __init__(self, num_classes, backbone_indices, backbone_channels, + pp_out_channels, bin_sizes, enable_auxiliary_loss, + align_corners): + + super().__init__() + + self.backbone_indices = backbone_indices + + self.psp_module = layers.PPModule( + in_channels=backbone_channels[1], + out_channels=pp_out_channels, + bin_sizes=bin_sizes, + dim_reduction=True, + align_corners=align_corners) + + self.dropout = nn.Dropout(p=0.1) # dropout_prob + + self.conv = nn.Conv2D( + in_channels=pp_out_channels, + out_channels=num_classes, + kernel_size=1) + + if enable_auxiliary_loss: + self.auxlayer = layers.AuxLayer( + in_channels=backbone_channels[0], + inter_channels=backbone_channels[0] // 4, + out_channels=num_classes) + + self.enable_auxiliary_loss = enable_auxiliary_loss + + def forward(self, feat_list: List[paddle.Tensor]) -> List[paddle.Tensor]: + logit_list = [] + x = feat_list[self.backbone_indices[1]] + x = self.psp_module(x) + x = self.dropout(x) + logit = self.conv(x) + logit_list.append(logit) + + if self.enable_auxiliary_loss: + auxiliary_feat = feat_list[self.backbone_indices[0]] + auxiliary_logit = self.auxlayer(auxiliary_feat) + logit_list.append(auxiliary_logit) + + return logit_list diff --git a/modules/image/semantic_segmentation/pspnet_resnet50_cityscapes/resnet.py b/modules/image/semantic_segmentation/pspnet_resnet50_cityscapes/resnet.py new file mode 100644 index 000000000..f70720eec --- /dev/null +++ b/modules/image/semantic_segmentation/pspnet_resnet50_cityscapes/resnet.py @@ -0,0 +1,357 @@ +# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle.nn as nn +import pspnet_resnet50_cityscapes.layers as layers + + +class ConvBNLayer(nn.Layer): + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + stride: int = 1, + dilation: int = 1, + groups: int = 1, + is_vd_mode: bool = False, + act: str = None, + data_format: str = 'NCHW'): + super(ConvBNLayer, self).__init__() + if dilation != 1 and kernel_size != 3: + raise RuntimeError("When the dilation isn't 1," \ + "the kernel_size should be 3.") + + self.is_vd_mode = is_vd_mode + self._pool2d_avg = nn.AvgPool2D( + kernel_size=2, + stride=2, + padding=0, + ceil_mode=True, + data_format=data_format) + self._conv = nn.Conv2D( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=(kernel_size - 1) // 2 \ + if dilation == 1 else dilation, + dilation=dilation, + groups=groups, + bias_attr=False, + data_format=data_format) + + self._batch_norm = layers.SyncBatchNorm( + out_channels, data_format=data_format) + self._act_op = layers.Activation(act=act) + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + if self.is_vd_mode: + inputs = self._pool2d_avg(inputs) + y = self._conv(inputs) + y = self._batch_norm(y) + y = self._act_op(y) + + return y + + +class BottleneckBlock(nn.Layer): + def __init__(self, + in_channels: int, + out_channels: int, + stride: int, + shortcut: bool = True, + if_first: bool = False, + dilation: int = 1, + data_format: str = 'NCHW'): + super(BottleneckBlock, self).__init__() + + self.data_format = data_format + self.conv0 = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + act='relu', + data_format=data_format) + + self.dilation = dilation + + self.conv1 = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=3, + stride=stride, + act='relu', + dilation=dilation, + data_format=data_format) + self.conv2 = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels * 4, + kernel_size=1, + act=None, + data_format=data_format) + + if not shortcut: + self.short = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels * 4, + kernel_size=1, + stride=1, + is_vd_mode=False if if_first or stride == 1 else True, + data_format=data_format) + + self.shortcut = shortcut + # NOTE: Use the wrap layer for quantization training + self.add = layers.Add() + self.relu = layers.Activation(act="relu") + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + y = self.conv0(inputs) + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + y = self.add(short, conv2) + y = self.relu(y) + return y + + +class BasicBlock(nn.Layer): + def __init__(self, + in_channels: int, + out_channels: int, + stride: int, + dilation: int = 1, + shortcut: bool = True, + if_first: bool = False, + data_format: str = 'NCHW'): + super(BasicBlock, self).__init__() + self.conv0 = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=3, + stride=stride, + dilation=dilation, + act='relu', + data_format=data_format) + self.conv1 = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=3, + dilation=dilation, + act=None, + data_format=data_format) + + if not shortcut: + self.short = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + stride=1, + is_vd_mode=False if if_first or stride == 1 else True, + data_format=data_format) + + self.shortcut = shortcut + self.dilation = dilation + self.data_format = data_format + self.add = layers.Add() + self.relu = layers.Activation(act="relu") + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + y = self.conv0(inputs) + conv1 = self.conv1(y) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = self.add(short, conv1) + y = self.relu(y) + + return y + + +class ResNet_vd(nn.Layer): + """ + The ResNet_vd implementation based on PaddlePaddle. + + The original article refers to Jingdong + Tong He, et, al. "Bag of Tricks for Image Classification with Convolutional Neural Networks" + (https://arxiv.org/pdf/1812.01187.pdf). + + Args: + layers (int, optional): The layers of ResNet_vd. The supported layers are (18, 34, 50, 101, 152, 200). Default: 50. + output_stride (int, optional): The stride of output features compared to input images. It is 8 or 16. Default: 8. + multi_grid (tuple|list, optional): The grid of stage4. Defult: (1, 1, 1). + pretrained (str, optional): The path of pretrained model. + + """ + + def __init__(self, + layers: int = 50, + output_stride: int = 8, + multi_grid: Tuple[int] = (1, 1, 1), + pretrained: str = None, + data_format: str = 'NCHW'): + super(ResNet_vd, self).__init__() + + self.data_format = data_format + self.conv1_logit = None # for gscnn shape stream + self.layers = layers + supported_layers = [18, 34, 50, 101, 152, 200] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format( + supported_layers, layers) + + if layers == 18: + depth = [2, 2, 2, 2] + elif layers == 34 or layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + elif layers == 200: + depth = [3, 12, 48, 3] + num_channels = [64, 256, 512, 1024 + ] if layers >= 50 else [64, 64, 128, 256] + num_filters = [64, 128, 256, 512] + + # for channels of four returned stages + self.feat_channels = [c * 4 for c in num_filters + ] if layers >= 50 else num_filters + + dilation_dict = None + if output_stride == 8: + dilation_dict = {2: 2, 3: 4} + elif output_stride == 16: + dilation_dict = {3: 2} + + self.conv1_1 = ConvBNLayer( + in_channels=3, + out_channels=32, + kernel_size=3, + stride=2, + act='relu', + data_format=data_format) + self.conv1_2 = ConvBNLayer( + in_channels=32, + out_channels=32, + kernel_size=3, + stride=1, + act='relu', + data_format=data_format) + self.conv1_3 = ConvBNLayer( + in_channels=32, + out_channels=64, + kernel_size=3, + stride=1, + act='relu', + data_format=data_format) + self.pool2d_max = nn.MaxPool2D( + kernel_size=3, stride=2, padding=1, data_format=data_format) + + # self.block_list = [] + self.stage_list = [] + if layers >= 50: + for block in range(len(depth)): + shortcut = False + block_list = [] + for i in range(depth[block]): + if layers in [101, 152] and block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + + ############################################################################### + # Add dilation rate for some segmentation tasks, if dilation_dict is not None. + dilation_rate = dilation_dict[ + block] if dilation_dict and block in dilation_dict else 1 + + # Actually block here is 'stage', and i is 'block' in 'stage' + # At the stage 4, expand the the dilation_rate if given multi_grid + if block == 3: + dilation_rate = dilation_rate * multi_grid[i] + ############################################################################### + + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BottleneckBlock( + in_channels=num_channels[block] + if i == 0 else num_filters[block] * 4, + out_channels=num_filters[block], + stride=2 if i == 0 and block != 0 + and dilation_rate == 1 else 1, + shortcut=shortcut, + if_first=block == i == 0, + dilation=dilation_rate, + data_format=data_format)) + + block_list.append(bottleneck_block) + shortcut = True + self.stage_list.append(block_list) + else: + for block in range(len(depth)): + shortcut = False + block_list = [] + for i in range(depth[block]): + dilation_rate = dilation_dict[block] \ + if dilation_dict and block in dilation_dict else 1 + if block == 3: + dilation_rate = dilation_rate * multi_grid[i] + + basic_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BasicBlock( + in_channels=num_channels[block] + if i == 0 else num_filters[block], + out_channels=num_filters[block], + stride=2 if i == 0 and block != 0 \ + and dilation_rate == 1 else 1, + dilation=dilation_rate, + shortcut=shortcut, + if_first=block == i == 0, + data_format=data_format)) + block_list.append(basic_block) + shortcut = True + self.stage_list.append(block_list) + + self.pretrained = pretrained + + def forward(self, inputs: paddle.Tensor) -> List[paddle.Tensor]: + y = self.conv1_1(inputs) + y = self.conv1_2(y) + y = self.conv1_3(y) + self.conv1_logit = y.clone() + y = self.pool2d_max(y) + + # A feature list saves the output feature map of each stage. + feat_list = [] + for stage in self.stage_list: + for block in stage: + y = block(y) + feat_list.append(y) + + return feat_list + + +def ResNet50_vd(**args): + model = ResNet_vd(layers=50, **args) + return model \ No newline at end of file diff --git a/modules/image/semantic_segmentation/pspnet_resnet50_voc/README.md b/modules/image/semantic_segmentation/pspnet_resnet50_voc/README.md new file mode 100644 index 000000000..97e4c156d --- /dev/null +++ b/modules/image/semantic_segmentation/pspnet_resnet50_voc/README.md @@ -0,0 +1,182 @@ +# pspnet_resnet50_voc + +|模型名称|pspnet_resnet50_voc| +| :--- | :---: | +|类别|图像-图像分割| +|网络|pspnet_resnet50vd| +|数据集|PascalVOC2012| +|是否支持Fine-tuning|是| +|模型大小|390MB| +|指标|-| +|最新更新日期|2022-03-21| + +## 一、模型基本信息 + + - 样例结果示例: +

+ +

+ +- ### 模型介绍 + + - 本示例将展示如何使用PaddleHub对预训练模型进行finetune并完成预测任务。 + - 更多详情请参考:[pspnet](https://openaccess.thecvf.com/content_cvpr_2017/papers/Zhao_Pyramid_Scene_Parsing_CVPR_2017_paper.pdf) + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、安装 + + - ```shell + $ hub install pspnet_resnet50_voc + ``` + + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1.预测代码示例 + + - ```python + import cv2 + import paddle + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='pspnet_resnet50_voc') + img = cv2.imread("/PATH/TO/IMAGE") + result = model.predict(images=[img], visualization=True) + ``` + +- ### 2.如何开始Fine-tune + + - 在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用pspnet_resnet50_voc模型对OpticDiscSeg数据集进行Fine-tune。 `train.py`内容如下: + + - 代码步骤 + + - Step1: 定义数据预处理方式 + - ```python + from paddlehub.vision.segmentation_transforms import Compose, Resize, Normalize + + transform = Compose([Resize(target_size=(512, 512)), Normalize()]) + ``` + + - `segmentation_transforms` 数据增强模块定义了丰富的针对图像分割数据的预处理方式,用户可按照需求替换自己需要的数据预处理方式。 + + - Step2: 下载数据集并使用 + - ```python + from paddlehub.datasets import OpticDiscSeg + + train_reader = OpticDiscSeg(transform, mode='train') + ``` + - `transforms`: 数据预处理方式。 + - `mode`: `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 + + - 数据集的准备代码可以参考 [opticdiscseg.py](../../paddlehub/datasets/opticdiscseg.py)。`hub.datasets.OpticDiscSeg()`会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 + + - Step3: 加载预训练模型 + + - ```python + import paddlehub as hub + + model = hub.Module(name='pspnet_resnet50_voc', num_classes=2, pretrained=None) + ``` + - `name`: 选择预训练模型的名字。 + - `load_checkpoint`: 是否加载自己训练的模型,若为None,则加载提供的模型默认参数。 + + - Step4: 选择优化策略和运行配置 + + - ```python + import paddle + from paddlehub.finetune.trainer import Trainer + + scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.01, decay_steps=1000, power=0.9, end_lr=0.0001) + optimizer = paddle.optimizer.Adam(learning_rate=scheduler, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='test_ckpt_img_seg', use_gpu=True) + trainer.train(train_reader, epochs=10, batch_size=4, log_interval=10, save_interval=4) + ``` + + - 模型预测 + + - 当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。我们使用该模型来进行预测。predict.py脚本如下: + + ```python + import paddle + import cv2 + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='pspnet_resnet50_voc', pretrained='/PATH/TO/CHECKPOINT') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) + ``` + + - 参数配置正确后,请执行脚本`python predict.py`。 + + - **Args** + * `images`:原始图像路径或BGR格式图片; + * `visualization`: 是否可视化,默认为True; + * `save_path`: 保存结果的路径,默认保存路径为'seg_result'。 + + **NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线图像分割服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + + - ```shell + $ hub serving start -m pspnet_resnet50_voc + ``` + + - 这样就完成了一个图像分割服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + ```python + import requests + import json + import cv2 + import base64 + + import numpy as np + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/ginet_resnet50vd_voc" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + mask = base64_to_cv2(r.json()["results"][0]) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 diff --git a/modules/image/semantic_segmentation/pspnet_resnet50_voc/README_en.md b/modules/image/semantic_segmentation/pspnet_resnet50_voc/README_en.md new file mode 100644 index 000000000..27b1489c9 --- /dev/null +++ b/modules/image/semantic_segmentation/pspnet_resnet50_voc/README_en.md @@ -0,0 +1,181 @@ +# pspnet_resnet50_voc + +|Module Name|pspnet_resnet50_voc| +| :--- | :---: | +|Category|Image Segmentation| +|Network|pspnet_resnet50vd| +|Dataset|PascalVOC2012| +|Fine-tuning supported or not|Yes| +|Module Size|370MB| +|Data indicators|-| +|Latest update date|2022-03-22| + +## I. Basic Information + +- ### Application Effect Display + - Sample results: +

+ +

+ +- ### Module Introduction + + - We will show how to use PaddleHub to finetune the pre-trained model and complete the prediction. + - For more information, please refer to: [pspnet](https://openaccess.thecvf.com/content_cvpr_2017/papers/Zhao_Pyramid_Scene_Parsing_CVPR_2017_paper.pdf) + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、Installation + + - ```shell + $ hub install pspnet_resnet50_voc + ``` + + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + + +## III. Module API Prediction + +- ### 1、Prediction Code Example + + - ```python + import cv2 + import paddle + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='pspnet_resnet50_voc') + img = cv2.imread("/PATH/TO/IMAGE") + result = model.predict(images=[img], visualization=True) + ``` + +- ### 2.Fine-tune and Encapsulation + + - After completing the installation of PaddlePaddle and PaddleHub, you can start using the pspnet_resnet50_voc model to fine-tune datasets such as OpticDiscSeg. + + - Steps: + + - Step1: Define the data preprocessing method + + - ```python + from paddlehub.vision.segmentation_transforms import Compose, Resize, Normalize + + transform = Compose([Resize(target_size=(512, 512)), Normalize()]) + ``` + + - `segmentation_transforms`: The data enhancement module defines lots of data preprocessing methods. Users can replace the data preprocessing methods according to their needs. + + - Step2: Download the dataset + + - ```python + from paddlehub.datasets import OpticDiscSeg + + train_reader = OpticDiscSeg(transform, mode='train') + ``` + * `transforms`: data preprocessing methods. + + * `mode`: Select the data mode, the options are `train`, `test`, `val`. Default is `train`. + + * Dataset preparation can be referred to [opticdiscseg.py](../../paddlehub/datasets/opticdiscseg.py)。`hub.datasets.OpticDiscSeg()`will be automatically downloaded from the network and decompressed to the `$HOME/.paddlehub/dataset` directory under the user directory. + + - Step3: Load the pre-trained model + + - ```python + import paddlehub as hub + + model = hub.Module(name='pspnet_resnet50_voc', num_classes=2, pretrained=None) + ``` + - `name`: model name. + - `load_checkpoint`: Whether to load the self-trained model, if it is None, load the provided parameters. + + - Step4: Optimization strategy + + - ```python + import paddle + from paddlehub.finetune.trainer import Trainer + + scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.01, decay_steps=1000, power=0.9, end_lr=0.0001) + optimizer = paddle.optimizer.Adam(learning_rate=scheduler, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='test_ckpt_img_seg', use_gpu=True) + trainer.train(train_reader, epochs=10, batch_size=4, log_interval=10, save_interval=4) + ``` + + - Model prediction + + - When Fine-tune is completed, the model with the best performance on the verification set will be saved in the `${CHECKPOINT_DIR}/best_model` directory. We use this model to make predictions. The `predict.py` script is as follows: + + ```python + import paddle + import cv2 + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='pspnet_resnet50_voc', pretrained='/PATH/TO/CHECKPOINT') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) + ``` + + - **Args** + * `images`: Image path or ndarray data with format [H, W, C], BGR. + * `visualization`: Whether to save the recognition results as picture files. + * `save_path`: Save path of the result, default is 'seg_result'. + + +## IV. Server Deployment + +- PaddleHub Serving can deploy an online service of image segmentation. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + + - ```shell + $ hub serving start -m pspnet_resnet50_voc + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result: + + ```python + import requests + import json + import cv2 + import base64 + + import numpy as np + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/pspnet_resnet50_voc" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + mask = base64_to_cv2(r.json()["results"][0]) + ``` + +## V. Release Note + +- 1.0.0 + + First release diff --git a/modules/image/semantic_segmentation/pspnet_resnet50_voc/layers.py b/modules/image/semantic_segmentation/pspnet_resnet50_voc/layers.py new file mode 100644 index 000000000..a40f65856 --- /dev/null +++ b/modules/image/semantic_segmentation/pspnet_resnet50_voc/layers.py @@ -0,0 +1,353 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn.layer import activation +from paddle.nn import Conv2D, AvgPool2D + + +def SyncBatchNorm(*args, **kwargs): + """In cpu environment nn.SyncBatchNorm does not have kernel so use nn.BatchNorm2D instead""" + if paddle.get_device() == 'cpu': + return nn.BatchNorm2D(*args, **kwargs) + else: + return nn.SyncBatchNorm(*args, **kwargs) + + +class SeparableConvBNReLU(nn.Layer): + """Depthwise Separable Convolution.""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + padding: str = 'same', + **kwargs: dict): + super(SeparableConvBNReLU, self).__init__() + self.depthwise_conv = ConvBN( + in_channels, + out_channels=in_channels, + kernel_size=kernel_size, + padding=padding, + groups=in_channels, + **kwargs) + self.piontwise_conv = ConvBNReLU( + in_channels, out_channels, kernel_size=1, groups=1) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self.depthwise_conv(x) + x = self.piontwise_conv(x) + return x + + +class ConvBN(nn.Layer): + """Basic conv bn layer""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + padding: str = 'same', + **kwargs: dict): + super(ConvBN, self).__init__() + self._conv = Conv2D( + in_channels, out_channels, kernel_size, padding=padding, **kwargs) + self._batch_norm = SyncBatchNorm(out_channels) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self._conv(x) + x = self._batch_norm(x) + return x + + +class ConvBNReLU(nn.Layer): + """Basic conv bn relu layer.""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + padding: str = 'same', + **kwargs: dict): + super(ConvBNReLU, self).__init__() + + self._conv = Conv2D( + in_channels, out_channels, kernel_size, padding=padding, **kwargs) + self._batch_norm = SyncBatchNorm(out_channels) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self._conv(x) + x = self._batch_norm(x) + x = F.relu(x) + return x + + +class Activation(nn.Layer): + """ + The wrapper of activations. + Args: + act (str, optional): The activation name in lowercase. It must be one of ['elu', 'gelu', + 'hardshrink', 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', + 'softmax', 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', + 'hsigmoid']. Default: None, means identical transformation. + Returns: + A callable object of Activation. + Raises: + KeyError: When parameter `act` is not in the optional range. + Examples: + from paddleseg.models.common.activation import Activation + relu = Activation("relu") + print(relu) + # + sigmoid = Activation("sigmoid") + print(sigmoid) + # + not_exit_one = Activation("not_exit_one") + # KeyError: "not_exit_one does not exist in the current dict_keys(['elu', 'gelu', 'hardshrink', + # 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', 'softmax', + # 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', 'hsigmoid'])" + """ + + def __init__(self, act: str = None): + super(Activation, self).__init__() + + self._act = act + upper_act_names = activation.__dict__.keys() + lower_act_names = [act.lower() for act in upper_act_names] + act_dict = dict(zip(lower_act_names, upper_act_names)) + + if act is not None: + if act in act_dict.keys(): + act_name = act_dict[act] + self.act_func = eval("activation.{}()".format(act_name)) + else: + raise KeyError("{} does not exist in the current {}".format( + act, act_dict.keys())) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + + if self._act is not None: + return self.act_func(x) + else: + return x + + +class ASPPModule(nn.Layer): + """ + Atrous Spatial Pyramid Pooling. + Args: + aspp_ratios (tuple): The dilation rate using in ASSP module. + in_channels (int): The number of input channels. + out_channels (int): The number of output channels. + align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature + is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. + use_sep_conv (bool, optional): If using separable conv in ASPP module. Default: False. + image_pooling (bool, optional): If augmented with image-level features. Default: False + """ + + def __init__(self, + aspp_ratios: tuple, + in_channels: int, + out_channels: int, + align_corners: bool, + use_sep_conv: bool = False, + image_pooling: bool = False): + super().__init__() + + self.align_corners = align_corners + self.aspp_blocks = nn.LayerList() + + for ratio in aspp_ratios: + if use_sep_conv and ratio > 1: + conv_func = SeparableConvBNReLU + else: + conv_func = ConvBNReLU + + block = conv_func( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1 if ratio == 1 else 3, + dilation=ratio, + padding=0 if ratio == 1 else ratio) + self.aspp_blocks.append(block) + + out_size = len(self.aspp_blocks) + + if image_pooling: + self.global_avg_pool = nn.Sequential( + nn.AdaptiveAvgPool2D(output_size=(1, 1)), + ConvBNReLU(in_channels, out_channels, kernel_size=1, bias_attr=False)) + out_size += 1 + self.image_pooling = image_pooling + + self.conv_bn_relu = ConvBNReLU( + in_channels=out_channels * out_size, + out_channels=out_channels, + kernel_size=1) + + self.dropout = nn.Dropout(p=0.1) # drop rate + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + outputs = [] + for block in self.aspp_blocks: + y = block(x) + y = F.interpolate( + y, + x.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + outputs.append(y) + + if self.image_pooling: + img_avg = self.global_avg_pool(x) + img_avg = F.interpolate( + img_avg, + x.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + outputs.append(img_avg) + + x = paddle.concat(outputs, axis=1) + x = self.conv_bn_relu(x) + x = self.dropout(x) + + return x + + +class AuxLayer(nn.Layer): + """ + The auxiliary layer implementation for auxiliary loss. + + Args: + in_channels (int): The number of input channels. + inter_channels (int): The intermediate channels. + out_channels (int): The number of output channels, and usually it is num_classes. + dropout_prob (float, optional): The drop rate. Default: 0.1. + """ + + def __init__(self, + in_channels: int, + inter_channels: int, + out_channels: int, + dropout_prob: float = 0.1, + **kwargs): + super().__init__() + + self.conv_bn_relu = ConvBNReLU( + in_channels=in_channels, + out_channels=inter_channels, + kernel_size=3, + padding=1, + **kwargs) + + self.dropout = nn.Dropout(p=dropout_prob) + + self.conv = nn.Conv2D( + in_channels=inter_channels, + out_channels=out_channels, + kernel_size=1) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self.conv_bn_relu(x) + x = self.dropout(x) + x = self.conv(x) + return x + + +class Add(nn.Layer): + def __init__(self): + super().__init__() + + def forward(self, x: paddle.Tensor, y: paddle.Tensor, name: str = None): + return paddle.add(x, y, name) + +class PPModule(nn.Layer): + """ + Pyramid pooling module originally in PSPNet. + + Args: + in_channels (int): The number of intput channels to pyramid pooling module. + out_channels (int): The number of output channels after pyramid pooling module. + bin_sizes (tuple, optional): The out size of pooled feature maps. Default: (1, 2, 3, 6). + dim_reduction (bool, optional): A bool value represents if reducing dimension after pooling. Default: True. + align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature + is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. + """ + + def __init__(self, in_channels: int, out_channels: int, bin_sizes: tuple, dim_reduction: bool, + align_corners: bool): + super().__init__() + + self.bin_sizes = bin_sizes + + inter_channels = in_channels + if dim_reduction: + inter_channels = in_channels // len(bin_sizes) + + # we use dimension reduction after pooling mentioned in original implementation. + self.stages = nn.LayerList([ + self._make_stage(in_channels, inter_channels, size) + for size in bin_sizes + ]) + + self.conv_bn_relu2 = ConvBNReLU( + in_channels=in_channels + inter_channels * len(bin_sizes), + out_channels=out_channels, + kernel_size=3, + padding=1) + + self.align_corners = align_corners + + def _make_stage(self, in_channels: int, out_channels: int, size: int): + """ + Create one pooling layer. + + In our implementation, we adopt the same dimension reduction as the original paper that might be + slightly different with other implementations. + + After pooling, the channels are reduced to 1/len(bin_sizes) immediately, while some other implementations + keep the channels to be same. + + Args: + in_channels (int): The number of intput channels to pyramid pooling module. + out_channels (int): The number of output channels to pyramid pooling module. + size (int): The out size of the pooled layer. + + Returns: + conv (Tensor): A tensor after Pyramid Pooling Module. + """ + + prior = nn.AdaptiveAvgPool2D(output_size=(size, size)) + conv = ConvBNReLU( + in_channels=in_channels, out_channels=out_channels, kernel_size=1) + + return nn.Sequential(prior, conv) + + def forward(self, input: paddle.Tensor) -> paddle.Tensor: + cat_layers = [] + for stage in self.stages: + x = stage(input) + x = F.interpolate( + x, + paddle.shape(input)[2:], + mode='bilinear', + align_corners=self.align_corners) + cat_layers.append(x) + cat_layers = [input] + cat_layers[::-1] + cat = paddle.concat(cat_layers, axis=1) + out = self.conv_bn_relu2(cat) + + return out diff --git a/modules/image/semantic_segmentation/pspnet_resnet50_voc/module.py b/modules/image/semantic_segmentation/pspnet_resnet50_voc/module.py new file mode 100644 index 000000000..417b0d338 --- /dev/null +++ b/modules/image/semantic_segmentation/pspnet_resnet50_voc/module.py @@ -0,0 +1,165 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from typing import Union, List, Tuple + +import numpy as np +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddlehub.module.module import moduleinfo +import paddlehub.vision.segmentation_transforms as T +from paddlehub.module.cv_module import ImageSegmentationModule + +from pspnet_resnet50_voc.resnet import ResNet50_vd +import pspnet_resnet50_voc.layers as layers + +@moduleinfo( + name="pspnet_resnet50_voc", + type="CV/semantic_segmentation", + author="paddlepaddle", + author_email="", + summary="PSPNetResnet50 is a segmentation model.", + version="1.0.0", + meta=ImageSegmentationModule) +class PSPNet(nn.Layer): + """ + The PSPNet implementation based on PaddlePaddle. + + The original article refers to + Zhao, Hengshuang, et al. "Pyramid scene parsing network" + (https://openaccess.thecvf.com/content_cvpr_2017/papers/Zhao_Pyramid_Scene_Parsing_CVPR_2017_paper.pdf). + + Args: + num_classes (int): The unique number of target classes. + backbone_indices (tuple, optional): Two values in the tuple indicate the indices of output of backbone. + pp_out_channels (int, optional): The output channels after Pyramid Pooling Module. Default: 1024. + bin_sizes (tuple, optional): The out size of pooled feature maps. Default: (1,2,3,6). + enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss. Default: True. + align_corners (bool, optional): An argument of F.interpolate. It should be set to False when the feature size is even, + e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False. + pretrained (str, optional): The path or url of pretrained model. Default: None. + """ + + def __init__(self, + num_classes: int = 21, + backbone_indices: Tuple[int] = (2, 3), + pp_out_channels: int = 1024, + bin_sizes: Tuple[int] = (1, 2, 3, 6), + enable_auxiliary_loss: bool = True, + align_corners: bool = False, + pretrained: str = None): + super(PSPNet, self).__init__() + + self.backbone = ResNet50_vd() + backbone_channels = [ + self.backbone.feat_channels[i] for i in backbone_indices + ] + + self.head = PSPNetHead(num_classes, backbone_indices, backbone_channels, + pp_out_channels, bin_sizes, + enable_auxiliary_loss, align_corners) + self.align_corners = align_corners + self.transforms = T.Compose([T.Normalize()]) + if pretrained is not None: + model_dict = paddle.load(pretrained) + self.set_dict(model_dict) + print("load custom parameters success") + + else: + checkpoint = os.path.join(self.directory, 'model.pdparams') + model_dict = paddle.load(checkpoint) + self.set_dict(model_dict) + print("load pretrained parameters success") + + def transform(self, img: Union[np.ndarray, str]) -> Union[np.ndarray, str]: + return self.transforms(img) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + feat_list = self.backbone(x) + logit_list = self.head(feat_list) + return [ + F.interpolate( + logit, + paddle.shape(x)[2:], + mode='bilinear', + align_corners=self.align_corners) for logit in logit_list + ] + + +class PSPNetHead(nn.Layer): + """ + The PSPNetHead implementation. + + Args: + num_classes (int): The unique number of target classes. + backbone_indices (tuple): Two values in the tuple indicate the indices of output of backbone. + The first index will be taken as a deep-supervision feature in auxiliary layer; + the second one will be taken as input of Pyramid Pooling Module (PPModule). + Usually backbone consists of four downsampling stage, and return an output of + each stage. If we set it as (2, 3) in ResNet, that means taking feature map of the third + stage (res4b22) in backbone, and feature map of the fourth stage (res5c) as input of PPModule. + backbone_channels (tuple): The same length with "backbone_indices". It indicates the channels of corresponding index. + pp_out_channels (int): The output channels after Pyramid Pooling Module. + bin_sizes (tuple): The out size of pooled feature maps. + enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss. Default: True. + align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature + is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. + """ + + def __init__(self, num_classes, backbone_indices, backbone_channels, + pp_out_channels, bin_sizes, enable_auxiliary_loss, + align_corners): + + super().__init__() + + self.backbone_indices = backbone_indices + + self.psp_module = layers.PPModule( + in_channels=backbone_channels[1], + out_channels=pp_out_channels, + bin_sizes=bin_sizes, + dim_reduction=True, + align_corners=align_corners) + + self.dropout = nn.Dropout(p=0.1) # dropout_prob + + self.conv = nn.Conv2D( + in_channels=pp_out_channels, + out_channels=num_classes, + kernel_size=1) + + if enable_auxiliary_loss: + self.auxlayer = layers.AuxLayer( + in_channels=backbone_channels[0], + inter_channels=backbone_channels[0] // 4, + out_channels=num_classes) + + self.enable_auxiliary_loss = enable_auxiliary_loss + + def forward(self, feat_list: List[paddle.Tensor]) -> List[paddle.Tensor]: + logit_list = [] + x = feat_list[self.backbone_indices[1]] + x = self.psp_module(x) + x = self.dropout(x) + logit = self.conv(x) + logit_list.append(logit) + + if self.enable_auxiliary_loss: + auxiliary_feat = feat_list[self.backbone_indices[0]] + auxiliary_logit = self.auxlayer(auxiliary_feat) + logit_list.append(auxiliary_logit) + + return logit_list diff --git a/modules/image/semantic_segmentation/pspnet_resnet50_voc/resnet.py b/modules/image/semantic_segmentation/pspnet_resnet50_voc/resnet.py new file mode 100644 index 000000000..71af88390 --- /dev/null +++ b/modules/image/semantic_segmentation/pspnet_resnet50_voc/resnet.py @@ -0,0 +1,357 @@ +# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle.nn as nn +import pspnet_resnet50_voc.layers as layers + + +class ConvBNLayer(nn.Layer): + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + stride: int = 1, + dilation: int = 1, + groups: int = 1, + is_vd_mode: bool = False, + act: str = None, + data_format: str = 'NCHW'): + super(ConvBNLayer, self).__init__() + if dilation != 1 and kernel_size != 3: + raise RuntimeError("When the dilation isn't 1," \ + "the kernel_size should be 3.") + + self.is_vd_mode = is_vd_mode + self._pool2d_avg = nn.AvgPool2D( + kernel_size=2, + stride=2, + padding=0, + ceil_mode=True, + data_format=data_format) + self._conv = nn.Conv2D( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=(kernel_size - 1) // 2 \ + if dilation == 1 else dilation, + dilation=dilation, + groups=groups, + bias_attr=False, + data_format=data_format) + + self._batch_norm = layers.SyncBatchNorm( + out_channels, data_format=data_format) + self._act_op = layers.Activation(act=act) + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + if self.is_vd_mode: + inputs = self._pool2d_avg(inputs) + y = self._conv(inputs) + y = self._batch_norm(y) + y = self._act_op(y) + + return y + + +class BottleneckBlock(nn.Layer): + def __init__(self, + in_channels: int, + out_channels: int, + stride: int, + shortcut: bool = True, + if_first: bool = False, + dilation: int = 1, + data_format: str = 'NCHW'): + super(BottleneckBlock, self).__init__() + + self.data_format = data_format + self.conv0 = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + act='relu', + data_format=data_format) + + self.dilation = dilation + + self.conv1 = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=3, + stride=stride, + act='relu', + dilation=dilation, + data_format=data_format) + self.conv2 = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels * 4, + kernel_size=1, + act=None, + data_format=data_format) + + if not shortcut: + self.short = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels * 4, + kernel_size=1, + stride=1, + is_vd_mode=False if if_first or stride == 1 else True, + data_format=data_format) + + self.shortcut = shortcut + # NOTE: Use the wrap layer for quantization training + self.add = layers.Add() + self.relu = layers.Activation(act="relu") + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + y = self.conv0(inputs) + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + y = self.add(short, conv2) + y = self.relu(y) + return y + + +class BasicBlock(nn.Layer): + def __init__(self, + in_channels: int, + out_channels: int, + stride: int, + dilation: int = 1, + shortcut: bool = True, + if_first: bool = False, + data_format: str = 'NCHW'): + super(BasicBlock, self).__init__() + self.conv0 = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=3, + stride=stride, + dilation=dilation, + act='relu', + data_format=data_format) + self.conv1 = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=3, + dilation=dilation, + act=None, + data_format=data_format) + + if not shortcut: + self.short = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + stride=1, + is_vd_mode=False if if_first or stride == 1 else True, + data_format=data_format) + + self.shortcut = shortcut + self.dilation = dilation + self.data_format = data_format + self.add = layers.Add() + self.relu = layers.Activation(act="relu") + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + y = self.conv0(inputs) + conv1 = self.conv1(y) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = self.add(short, conv1) + y = self.relu(y) + + return y + + +class ResNet_vd(nn.Layer): + """ + The ResNet_vd implementation based on PaddlePaddle. + + The original article refers to Jingdong + Tong He, et, al. "Bag of Tricks for Image Classification with Convolutional Neural Networks" + (https://arxiv.org/pdf/1812.01187.pdf). + + Args: + layers (int, optional): The layers of ResNet_vd. The supported layers are (18, 34, 50, 101, 152, 200). Default: 50. + output_stride (int, optional): The stride of output features compared to input images. It is 8 or 16. Default: 8. + multi_grid (tuple|list, optional): The grid of stage4. Defult: (1, 1, 1). + pretrained (str, optional): The path of pretrained model. + + """ + + def __init__(self, + layers: int = 50, + output_stride: int = 8, + multi_grid: Tuple[int] = (1, 1, 1), + pretrained: str = None, + data_format: str = 'NCHW'): + super(ResNet_vd, self).__init__() + + self.data_format = data_format + self.conv1_logit = None # for gscnn shape stream + self.layers = layers + supported_layers = [18, 34, 50, 101, 152, 200] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format( + supported_layers, layers) + + if layers == 18: + depth = [2, 2, 2, 2] + elif layers == 34 or layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + elif layers == 200: + depth = [3, 12, 48, 3] + num_channels = [64, 256, 512, 1024 + ] if layers >= 50 else [64, 64, 128, 256] + num_filters = [64, 128, 256, 512] + + # for channels of four returned stages + self.feat_channels = [c * 4 for c in num_filters + ] if layers >= 50 else num_filters + + dilation_dict = None + if output_stride == 8: + dilation_dict = {2: 2, 3: 4} + elif output_stride == 16: + dilation_dict = {3: 2} + + self.conv1_1 = ConvBNLayer( + in_channels=3, + out_channels=32, + kernel_size=3, + stride=2, + act='relu', + data_format=data_format) + self.conv1_2 = ConvBNLayer( + in_channels=32, + out_channels=32, + kernel_size=3, + stride=1, + act='relu', + data_format=data_format) + self.conv1_3 = ConvBNLayer( + in_channels=32, + out_channels=64, + kernel_size=3, + stride=1, + act='relu', + data_format=data_format) + self.pool2d_max = nn.MaxPool2D( + kernel_size=3, stride=2, padding=1, data_format=data_format) + + # self.block_list = [] + self.stage_list = [] + if layers >= 50: + for block in range(len(depth)): + shortcut = False + block_list = [] + for i in range(depth[block]): + if layers in [101, 152] and block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + + ############################################################################### + # Add dilation rate for some segmentation tasks, if dilation_dict is not None. + dilation_rate = dilation_dict[ + block] if dilation_dict and block in dilation_dict else 1 + + # Actually block here is 'stage', and i is 'block' in 'stage' + # At the stage 4, expand the the dilation_rate if given multi_grid + if block == 3: + dilation_rate = dilation_rate * multi_grid[i] + ############################################################################### + + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BottleneckBlock( + in_channels=num_channels[block] + if i == 0 else num_filters[block] * 4, + out_channels=num_filters[block], + stride=2 if i == 0 and block != 0 + and dilation_rate == 1 else 1, + shortcut=shortcut, + if_first=block == i == 0, + dilation=dilation_rate, + data_format=data_format)) + + block_list.append(bottleneck_block) + shortcut = True + self.stage_list.append(block_list) + else: + for block in range(len(depth)): + shortcut = False + block_list = [] + for i in range(depth[block]): + dilation_rate = dilation_dict[block] \ + if dilation_dict and block in dilation_dict else 1 + if block == 3: + dilation_rate = dilation_rate * multi_grid[i] + + basic_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BasicBlock( + in_channels=num_channels[block] + if i == 0 else num_filters[block], + out_channels=num_filters[block], + stride=2 if i == 0 and block != 0 \ + and dilation_rate == 1 else 1, + dilation=dilation_rate, + shortcut=shortcut, + if_first=block == i == 0, + data_format=data_format)) + block_list.append(basic_block) + shortcut = True + self.stage_list.append(block_list) + + self.pretrained = pretrained + + def forward(self, inputs: paddle.Tensor) -> List[paddle.Tensor]: + y = self.conv1_1(inputs) + y = self.conv1_2(y) + y = self.conv1_3(y) + self.conv1_logit = y.clone() + y = self.pool2d_max(y) + + # A feature list saves the output feature map of each stage. + feat_list = [] + for stage in self.stage_list: + for block in stage: + y = block(y) + feat_list.append(y) + + return feat_list + + +def ResNet50_vd(**args): + model = ResNet_vd(layers=50, **args) + return model \ No newline at end of file diff --git a/modules/image/semantic_segmentation/stdc1_seg_cityscapes/README.md b/modules/image/semantic_segmentation/stdc1_seg_cityscapes/README.md new file mode 100644 index 000000000..c2d0cbb3e --- /dev/null +++ b/modules/image/semantic_segmentation/stdc1_seg_cityscapes/README.md @@ -0,0 +1,182 @@ +# stdc1_seg_cityscapes + +|模型名称|stdc1_seg_cityscapes| +| :--- | :---: | +|类别|图像-图像分割| +|网络|stdc1_seg| +|数据集|Cityscapes| +|是否支持Fine-tuning|是| +|模型大小|67MB| +|指标|-| +|最新更新日期|2022-03-21| + +## 一、模型基本信息 + + - 样例结果示例: +

+ +

+ +- ### 模型介绍 + + - 本示例将展示如何使用PaddleHub对预训练模型进行finetune并完成预测任务。 + - 更多详情请参考:[stdc](https://arxiv.org/abs/2104.13188) + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、安装 + + - ```shell + $ hub install stdc1_seg_cityscapes + ``` + + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1.预测代码示例 + + - ```python + import cv2 + import paddle + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='stdc1_seg_cityscapes') + img = cv2.imread("/PATH/TO/IMAGE") + result = model.predict(images=[img], visualization=True) + ``` + +- ### 2.如何开始Fine-tune + + - 在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用stdc1_seg_cityscapes模型对OpticDiscSeg数据集进行Fine-tune。 `train.py`内容如下: + + - 代码步骤 + + - Step1: 定义数据预处理方式 + - ```python + from paddlehub.vision.segmentation_transforms import Compose, Resize, Normalize + + transform = Compose([Resize(target_size=(512, 512)), Normalize()]) + ``` + + - `segmentation_transforms` 数据增强模块定义了丰富的针对图像分割数据的预处理方式,用户可按照需求替换自己需要的数据预处理方式。 + + - Step2: 下载数据集并使用 + - ```python + from paddlehub.datasets import OpticDiscSeg + + train_reader = OpticDiscSeg(transform, mode='train') + ``` + - `transforms`: 数据预处理方式。 + - `mode`: `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 + + - 数据集的准备代码可以参考 [opticdiscseg.py](../../paddlehub/datasets/opticdiscseg.py)。`hub.datasets.OpticDiscSeg()`会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 + + - Step3: 加载预训练模型 + + - ```python + import paddlehub as hub + + model = hub.Module(name='stdc1_seg_cityscapes', num_classes=2, pretrained=None) + ``` + - `name`: 选择预训练模型的名字。 + - `load_checkpoint`: 是否加载自己训练的模型,若为None,则加载提供的模型默认参数。 + + - Step4: 选择优化策略和运行配置 + + - ```python + import paddle + from paddlehub.finetune.trainer import Trainer + + scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.01, decay_steps=1000, power=0.9, end_lr=0.0001) + optimizer = paddle.optimizer.Adam(learning_rate=scheduler, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='test_ckpt_img_seg', use_gpu=True) + trainer.train(train_reader, epochs=10, batch_size=4, log_interval=10, save_interval=4) + ``` + + - 模型预测 + + - 当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。我们使用该模型来进行预测。predict.py脚本如下: + + ```python + import paddle + import cv2 + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='stdc1_seg_cityscapes', pretrained='/PATH/TO/CHECKPOINT') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) + ``` + + - 参数配置正确后,请执行脚本`python predict.py`。 + + - **Args** + * `images`:原始图像路径或BGR格式图片; + * `visualization`: 是否可视化,默认为True; + * `save_path`: 保存结果的路径,默认保存路径为'seg_result'。 + + **NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线图像分割服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + + - ```shell + $ hub serving start -m stdc1_seg_cityscapes + ``` + + - 这样就完成了一个图像分割服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + ```python + import requests + import json + import cv2 + import base64 + + import numpy as np + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/ginet_resnet50vd_voc" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + mask = base64_to_cv2(r.json()["results"][0]) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 diff --git a/modules/image/semantic_segmentation/stdc1_seg_cityscapes/README_en.md b/modules/image/semantic_segmentation/stdc1_seg_cityscapes/README_en.md new file mode 100644 index 000000000..3198989fc --- /dev/null +++ b/modules/image/semantic_segmentation/stdc1_seg_cityscapes/README_en.md @@ -0,0 +1,181 @@ +# stdc1_seg_cityscapes + +|Module Name|stdc1_seg_cityscapes| +| :--- | :---: | +|Category|Image Segmentation| +|Network|stdc1_seg| +|Dataset|Cityscapes| +|Fine-tuning supported or not|Yes| +|Module Size|67MB| +|Data indicators|-| +|Latest update date|2022-03-21| + +## I. Basic Information + +- ### Application Effect Display + - Sample results: +

+ +

+ +- ### Module Introduction + + - We will show how to use PaddleHub to finetune the pre-trained model and complete the prediction. + - For more information, please refer to: [pspnet](https://openaccess.thecvf.com/content_cvpr_2017/papers/Zhao_Pyramid_Scene_Parsing_CVPR_2017_paper.pdf) + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、Installation + + - ```shell + $ hub install stdc1_seg_cityscapes + ``` + + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + + +## III. Module API Prediction + +- ### 1、Prediction Code Example + + - ```python + import cv2 + import paddle + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='stdc1_seg_cityscapes') + img = cv2.imread("/PATH/TO/IMAGE") + result = model.predict(images=[img], visualization=True) + ``` + +- ### 2.Fine-tune and Encapsulation + + - After completing the installation of PaddlePaddle and PaddleHub, you can start using the stdc1_seg_cityscapes model to fine-tune datasets such as OpticDiscSeg. + + - Steps: + + - Step1: Define the data preprocessing method + + - ```python + from paddlehub.vision.segmentation_transforms import Compose, Resize, Normalize + + transform = Compose([Resize(target_size=(512, 512)), Normalize()]) + ``` + + - `segmentation_transforms`: The data enhancement module defines lots of data preprocessing methods. Users can replace the data preprocessing methods according to their needs. + + - Step2: Download the dataset + + - ```python + from paddlehub.datasets import OpticDiscSeg + + train_reader = OpticDiscSeg(transform, mode='train') + ``` + * `transforms`: data preprocessing methods. + + * `mode`: Select the data mode, the options are `train`, `test`, `val`. Default is `train`. + + * Dataset preparation can be referred to [opticdiscseg.py](../../paddlehub/datasets/opticdiscseg.py)。`hub.datasets.OpticDiscSeg()`will be automatically downloaded from the network and decompressed to the `$HOME/.paddlehub/dataset` directory under the user directory. + + - Step3: Load the pre-trained model + + - ```python + import paddlehub as hub + + model = hub.Module(name='stdc1_seg_cityscapes', num_classes=2, pretrained=None) + ``` + - `name`: model name. + - `load_checkpoint`: Whether to load the self-trained model, if it is None, load the provided parameters. + + - Step4: Optimization strategy + + - ```python + import paddle + from paddlehub.finetune.trainer import Trainer + + scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.01, decay_steps=1000, power=0.9, end_lr=0.0001) + optimizer = paddle.optimizer.Adam(learning_rate=scheduler, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='test_ckpt_img_seg', use_gpu=True) + trainer.train(train_reader, epochs=10, batch_size=4, log_interval=10, save_interval=4) + ``` + + - Model prediction + + - When Fine-tune is completed, the model with the best performance on the verification set will be saved in the `${CHECKPOINT_DIR}/best_model` directory. We use this model to make predictions. The `predict.py` script is as follows: + + ```python + import paddle + import cv2 + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='stdc1_seg_cityscapes', pretrained='/PATH/TO/CHECKPOINT') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) + ``` + + - **Args** + * `images`: Image path or ndarray data with format [H, W, C], BGR. + * `visualization`: Whether to save the recognition results as picture files. + * `save_path`: Save path of the result, default is 'seg_result'. + + +## IV. Server Deployment + +- PaddleHub Serving can deploy an online service of image segmentation. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + + - ```shell + $ hub serving start -m stdc1_seg_cityscapes + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result: + + ```python + import requests + import json + import cv2 + import base64 + + import numpy as np + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/stdc1_seg_cityscapes" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + mask = base64_to_cv2(r.json()["results"][0]) + ``` + +## V. Release Note + +- 1.0.0 + + First release diff --git a/modules/image/semantic_segmentation/stdc1_seg_cityscapes/layers.py b/modules/image/semantic_segmentation/stdc1_seg_cityscapes/layers.py new file mode 100644 index 000000000..c65193f55 --- /dev/null +++ b/modules/image/semantic_segmentation/stdc1_seg_cityscapes/layers.py @@ -0,0 +1,357 @@ +# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn.layer import activation +from paddle.nn import Conv2D, AvgPool2D + + +def SyncBatchNorm(*args, **kwargs): + """In cpu environment nn.SyncBatchNorm does not have kernel so use nn.BatchNorm2D instead""" + if paddle.get_device() == 'cpu': + return nn.BatchNorm2D(*args, **kwargs) + else: + return nn.SyncBatchNorm(*args, **kwargs) + + +class SeparableConvBNReLU(nn.Layer): + """Depthwise Separable Convolution.""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + padding: str = 'same', + **kwargs: dict): + super(SeparableConvBNReLU, self).__init__() + self.depthwise_conv = ConvBN( + in_channels, + out_channels=in_channels, + kernel_size=kernel_size, + padding=padding, + groups=in_channels, + **kwargs) + self.piontwise_conv = ConvBNReLU( + in_channels, out_channels, kernel_size=1, groups=1) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self.depthwise_conv(x) + x = self.piontwise_conv(x) + return x + + +class ConvBN(nn.Layer): + """Basic conv bn layer""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + padding: str = 'same', + **kwargs: dict): + super(ConvBN, self).__init__() + self._conv = Conv2D( + in_channels, out_channels, kernel_size, padding=padding, **kwargs) + self._batch_norm = SyncBatchNorm(out_channels) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self._conv(x) + x = self._batch_norm(x) + return x + + +class ConvBNReLU(nn.Layer): + """Basic conv bn relu layer.""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + padding: str = 'same', + **kwargs: dict): + super(ConvBNReLU, self).__init__() + + self._conv = Conv2D( + in_channels, out_channels, kernel_size, padding=padding, **kwargs) + self._batch_norm = SyncBatchNorm(out_channels) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self._conv(x) + x = self._batch_norm(x) + x = F.relu(x) + return x + + +class Activation(nn.Layer): + """ + The wrapper of activations. + Args: + act (str, optional): The activation name in lowercase. It must be one of ['elu', 'gelu', + 'hardshrink', 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', + 'softmax', 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', + 'hsigmoid']. Default: None, means identical transformation. + Returns: + A callable object of Activation. + Raises: + KeyError: When parameter `act` is not in the optional range. + Examples: + from paddleseg.models.common.activation import Activation + relu = Activation("relu") + print(relu) + # + sigmoid = Activation("sigmoid") + print(sigmoid) + # + not_exit_one = Activation("not_exit_one") + # KeyError: "not_exit_one does not exist in the current dict_keys(['elu', 'gelu', 'hardshrink', + # 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', 'softmax', + # 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', 'hsigmoid'])" + """ + + def __init__(self, act: str = None): + super(Activation, self).__init__() + + self._act = act + upper_act_names = activation.__dict__.keys() + lower_act_names = [act.lower() for act in upper_act_names] + act_dict = dict(zip(lower_act_names, upper_act_names)) + + if act is not None: + if act in act_dict.keys(): + act_name = act_dict[act] + self.act_func = eval("activation.{}()".format(act_name)) + else: + raise KeyError("{} does not exist in the current {}".format( + act, act_dict.keys())) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + + if self._act is not None: + return self.act_func(x) + else: + return x + + +class ASPPModule(nn.Layer): + """ + Atrous Spatial Pyramid Pooling. + Args: + aspp_ratios (tuple): The dilation rate using in ASSP module. + in_channels (int): The number of input channels. + out_channels (int): The number of output channels. + align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature + is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. + use_sep_conv (bool, optional): If using separable conv in ASPP module. Default: False. + image_pooling (bool, optional): If augmented with image-level features. Default: False + """ + + def __init__(self, + aspp_ratios: tuple, + in_channels: int, + out_channels: int, + align_corners: bool, + use_sep_conv: bool = False, + image_pooling: bool = False): + super().__init__() + + self.align_corners = align_corners + self.aspp_blocks = nn.LayerList() + + for ratio in aspp_ratios: + if use_sep_conv and ratio > 1: + conv_func = SeparableConvBNReLU + else: + conv_func = ConvBNReLU + + block = conv_func( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1 if ratio == 1 else 3, + dilation=ratio, + padding=0 if ratio == 1 else ratio) + self.aspp_blocks.append(block) + + out_size = len(self.aspp_blocks) + + if image_pooling: + self.global_avg_pool = nn.Sequential( + nn.AdaptiveAvgPool2D(output_size=(1, 1)), + ConvBNReLU(in_channels, out_channels, kernel_size=1, bias_attr=False)) + out_size += 1 + self.image_pooling = image_pooling + + self.conv_bn_relu = ConvBNReLU( + in_channels=out_channels * out_size, + out_channels=out_channels, + kernel_size=1) + + self.dropout = nn.Dropout(p=0.1) # drop rate + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + outputs = [] + for block in self.aspp_blocks: + y = block(x) + y = F.interpolate( + y, + x.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + outputs.append(y) + + if self.image_pooling: + img_avg = self.global_avg_pool(x) + img_avg = F.interpolate( + img_avg, + x.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + outputs.append(img_avg) + + x = paddle.concat(outputs, axis=1) + x = self.conv_bn_relu(x) + x = self.dropout(x) + + return x + + +class AuxLayer(nn.Layer): + """ + The auxiliary layer implementation for auxiliary loss. + + Args: + in_channels (int): The number of input channels. + inter_channels (int): The intermediate channels. + out_channels (int): The number of output channels, and usually it is num_classes. + dropout_prob (float, optional): The drop rate. Default: 0.1. + """ + + def __init__(self, + in_channels: int, + inter_channels: int, + out_channels: int, + dropout_prob: float = 0.1, + **kwargs): + super().__init__() + + self.conv_bn_relu = ConvBNReLU( + in_channels=in_channels, + out_channels=inter_channels, + kernel_size=3, + padding=1, + **kwargs) + + self.dropout = nn.Dropout(p=dropout_prob) + + self.conv = nn.Conv2D( + in_channels=inter_channels, + out_channels=out_channels, + kernel_size=1) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self.conv_bn_relu(x) + x = self.dropout(x) + x = self.conv(x) + return x + + +class Add(nn.Layer): + def __init__(self): + super().__init__() + + def forward(self, x: paddle.Tensor, y: paddle.Tensor, name=None) -> paddle.Tensor: + return paddle.add(x, y, name) + +class PPModule(nn.Layer): + """ + Pyramid pooling module originally in PSPNet. + + Args: + in_channels (int): The number of intput channels to pyramid pooling module. + out_channels (int): The number of output channels after pyramid pooling module. + bin_sizes (tuple, optional): The out size of pooled feature maps. Default: (1, 2, 3, 6). + dim_reduction (bool, optional): A bool value represents if reducing dimension after pooling. Default: True. + align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature + is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. + """ + + def __init__(self, + in_channels: int, + out_channels: int, + bin_sizes: tuple, + dim_reduction: bool, + align_corners: bool): + super().__init__() + + self.bin_sizes = bin_sizes + + inter_channels = in_channels + if dim_reduction: + inter_channels = in_channels // len(bin_sizes) + + # we use dimension reduction after pooling mentioned in original implementation. + self.stages = nn.LayerList([ + self._make_stage(in_channels, inter_channels, size) + for size in bin_sizes + ]) + + self.conv_bn_relu2 = ConvBNReLU( + in_channels=in_channels + inter_channels * len(bin_sizes), + out_channels=out_channels, + kernel_size=3, + padding=1) + + self.align_corners = align_corners + + def _make_stage(self, in_channels: int, out_channels: int, size: int): + """ + Create one pooling layer. + + In our implementation, we adopt the same dimension reduction as the original paper that might be + slightly different with other implementations. + + After pooling, the channels are reduced to 1/len(bin_sizes) immediately, while some other implementations + keep the channels to be same. + + Args: + in_channels (int): The number of intput channels to pyramid pooling module. + out_channels (int): The number of output channels to pyramid pooling module. + size (int): The out size of the pooled layer. + + Returns: + conv (Tensor): A tensor after Pyramid Pooling Module. + """ + + prior = nn.AdaptiveAvgPool2D(output_size=(size, size)) + conv = ConvBNReLU( + in_channels=in_channels, out_channels=out_channels, kernel_size=1) + + return nn.Sequential(prior, conv) + + def forward(self, input: paddle.Tensor) -> paddle.Tensor: + cat_layers = [] + for stage in self.stages: + x = stage(input) + x = F.interpolate( + x, + paddle.shape(input)[2:], + mode='bilinear', + align_corners=self.align_corners) + cat_layers.append(x) + cat_layers = [input] + cat_layers[::-1] + cat = paddle.concat(cat_layers, axis=1) + out = self.conv_bn_relu2(cat) + + return out diff --git a/modules/image/semantic_segmentation/stdc1_seg_cityscapes/module.py b/modules/image/semantic_segmentation/stdc1_seg_cityscapes/module.py new file mode 100644 index 000000000..f942f225a --- /dev/null +++ b/modules/image/semantic_segmentation/stdc1_seg_cityscapes/module.py @@ -0,0 +1,235 @@ +# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from typing import Union, List, Tuple + +import numpy as np +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddlehub.module.module import moduleinfo +import paddlehub.vision.segmentation_transforms as T +from paddlehub.module.cv_module import ImageSegmentationModule + +from stdc1_seg_cityscapes.stdcnet import STDC1 +import stdc1_seg_cityscapes.layers as layers + + +@moduleinfo( + name="stdc1_seg_cityscapes", + type="CV/semantic_segmentation", + author="paddlepaddle", + author_email="", + summary="STDCSeg is a segmentation model.", + version="1.0.0", + meta=ImageSegmentationModule) +class STDCSeg(nn.Layer): + """ + The STDCSeg implementation based on PaddlePaddle. + + The original article refers to Meituan + Fan, Mingyuan, et al. "Rethinking BiSeNet For Real-time Semantic Segmentation." + (https://arxiv.org/abs/2104.13188) + + Args: + num_classes(int,optional): The unique number of target classes. + use_boundary_8(bool,non-optional): Whether to use detail loss. it should be True accroding to paper for best metric. Default: True. + Actually,if you want to use _boundary_2/_boundary_4/_boundary_16,you should append loss function number of DetailAggregateLoss.It should work properly. + use_conv_last(bool,optional): Determine ContextPath 's inplanes variable according to whether to use bockbone's last conv. Default: False. + pretrained (str, optional): The path or url of pretrained model. Default: None. + """ + + def __init__(self, + num_classes: int = 19, + use_boundary_2: bool = False, + use_boundary_4: bool = False, + use_boundary_8: bool = True, + use_boundary_16: bool = False, + use_conv_last: bool = False, + pretrained: str = None): + super(STDCSeg, self).__init__() + + self.use_boundary_2 = use_boundary_2 + self.use_boundary_4 = use_boundary_4 + self.use_boundary_8 = use_boundary_8 + self.use_boundary_16 = use_boundary_16 + self.cp = ContextPath(STDC1(), use_conv_last=use_conv_last) + self.ffm = FeatureFusionModule(384, 256) + self.conv_out = SegHead(256, 256, num_classes) + self.conv_out8 = SegHead(128, 64, num_classes) + self.conv_out16 = SegHead(128, 64, num_classes) + self.conv_out_sp16 = SegHead(512, 64, 1) + self.conv_out_sp8 = SegHead(256, 64, 1) + self.conv_out_sp4 = SegHead(64, 64, 1) + self.conv_out_sp2 = SegHead(32, 64, 1) + self.transforms = T.Compose([T.Normalize()]) + + if pretrained is not None: + model_dict = paddle.load(pretrained) + self.set_dict(model_dict) + print("load custom parameters success") + + else: + checkpoint = os.path.join(self.directory, 'model.pdparams') + model_dict = paddle.load(checkpoint) + self.set_dict(model_dict) + print("load pretrained parameters success") + + def transform(self, img: Union[np.ndarray, str]) -> Union[np.ndarray, str]: + return self.transforms(img) + + def forward(self, x: paddle.Tensor) -> List[paddle.Tensor]: + x_hw = paddle.shape(x)[2:] + feat_res2, feat_res4, feat_res8, _, feat_cp8, feat_cp16 = self.cp(x) + + logit_list = [] + if self.training: + feat_fuse = self.ffm(feat_res8, feat_cp8) + feat_out = self.conv_out(feat_fuse) + feat_out8 = self.conv_out8(feat_cp8) + feat_out16 = self.conv_out16(feat_cp16) + + logit_list = [feat_out, feat_out8, feat_out16] + logit_list = [ + F.interpolate(x, x_hw, mode='bilinear', align_corners=True) + for x in logit_list + ] + + if self.use_boundary_2: + feat_out_sp2 = self.conv_out_sp2(feat_res2) + logit_list.append(feat_out_sp2) + if self.use_boundary_4: + feat_out_sp4 = self.conv_out_sp4(feat_res4) + logit_list.append(feat_out_sp4) + if self.use_boundary_8: + feat_out_sp8 = self.conv_out_sp8(feat_res8) + logit_list.append(feat_out_sp8) + else: + feat_fuse = self.ffm(feat_res8, feat_cp8) + feat_out = self.conv_out(feat_fuse) + feat_out = F.interpolate( + feat_out, x_hw, mode='bilinear', align_corners=True) + logit_list = [feat_out] + + return logit_list + + +class SegHead(nn.Layer): + def __init__(self, in_chan: int, mid_chan: int, n_classes:int): + super(SegHead, self).__init__() + self.conv = layers.ConvBNReLU( + in_chan, mid_chan, kernel_size=3, stride=1, padding=1) + self.conv_out = nn.Conv2D( + mid_chan, n_classes, kernel_size=1, bias_attr=None) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self.conv(x) + x = self.conv_out(x) + return x + + +class AttentionRefinementModule(nn.Layer): + def __init__(self, in_chan: int, out_chan: int): + super(AttentionRefinementModule, self).__init__() + self.conv = layers.ConvBNReLU( + in_chan, out_chan, kernel_size=3, stride=1, padding=1) + self.conv_atten = nn.Conv2D( + out_chan, out_chan, kernel_size=1, bias_attr=None) + self.bn_atten = nn.BatchNorm2D(out_chan) + self.sigmoid_atten = nn.Sigmoid() + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + feat = self.conv(x) + atten = F.adaptive_avg_pool2d(feat, 1) + atten = self.conv_atten(atten) + atten = self.bn_atten(atten) + atten = self.sigmoid_atten(atten) + out = paddle.multiply(feat, atten) + return out + + +class ContextPath(nn.Layer): + def __init__(self, backbone, use_conv_last: bool = False): + super(ContextPath, self).__init__() + self.backbone = backbone + self.arm16 = AttentionRefinementModule(512, 128) + inplanes = 1024 + if use_conv_last: + inplanes = 1024 + self.arm32 = AttentionRefinementModule(inplanes, 128) + self.conv_head32 = layers.ConvBNReLU( + 128, 128, kernel_size=3, stride=1, padding=1) + self.conv_head16 = layers.ConvBNReLU( + 128, 128, kernel_size=3, stride=1, padding=1) + self.conv_avg = layers.ConvBNReLU( + inplanes, 128, kernel_size=1, stride=1, padding=0) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + feat2, feat4, feat8, feat16, feat32 = self.backbone(x) + + feat8_hw = paddle.shape(feat8)[2:] + feat16_hw = paddle.shape(feat16)[2:] + feat32_hw = paddle.shape(feat32)[2:] + + avg = F.adaptive_avg_pool2d(feat32, 1) + avg = self.conv_avg(avg) + avg_up = F.interpolate(avg, feat32_hw, mode='nearest') + + feat32_arm = self.arm32(feat32) + feat32_sum = feat32_arm + avg_up + feat32_up = F.interpolate(feat32_sum, feat16_hw, mode='nearest') + feat32_up = self.conv_head32(feat32_up) + + feat16_arm = self.arm16(feat16) + feat16_sum = feat16_arm + feat32_up + feat16_up = F.interpolate(feat16_sum, feat8_hw, mode='nearest') + feat16_up = self.conv_head16(feat16_up) + + return feat2, feat4, feat8, feat16, feat16_up, feat32_up # x8, x16 + + +class FeatureFusionModule(nn.Layer): + def __init__(self, in_chan:int , out_chan: int): + super(FeatureFusionModule, self).__init__() + self.convblk = layers.ConvBNReLU( + in_chan, out_chan, kernel_size=1, stride=1, padding=0) + self.conv1 = nn.Conv2D( + out_chan, + out_chan // 4, + kernel_size=1, + stride=1, + padding=0, + bias_attr=None) + self.conv2 = nn.Conv2D( + out_chan // 4, + out_chan, + kernel_size=1, + stride=1, + padding=0, + bias_attr=None) + self.relu = nn.ReLU() + self.sigmoid = nn.Sigmoid() + + def forward(self, fsp: paddle.Tensor, fcp: paddle.Tensor) -> paddle.Tensor: + fcat = paddle.concat([fsp, fcp], axis=1) + feat = self.convblk(fcat) + atten = F.adaptive_avg_pool2d(feat, 1) + atten = self.conv1(atten) + atten = self.relu(atten) + atten = self.conv2(atten) + atten = self.sigmoid(atten) + feat_atten = paddle.multiply(feat, atten) + feat_out = feat_atten + feat + return feat_out \ No newline at end of file diff --git a/modules/image/semantic_segmentation/stdc1_seg_cityscapes/stdcnet.py b/modules/image/semantic_segmentation/stdc1_seg_cityscapes/stdcnet.py new file mode 100644 index 000000000..ddf0f0431 --- /dev/null +++ b/modules/image/semantic_segmentation/stdc1_seg_cityscapes/stdcnet.py @@ -0,0 +1,263 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Union, List, Tuple +import math + +import paddle +import paddle.nn as nn + +import stdc1_seg_cityscapes.layers as L + +__all__ = ["STDC1", "STDC2"] + + +class STDCNet(nn.Layer): + """ + The STDCNet implementation based on PaddlePaddle. + + The original article refers to Meituan + Fan, Mingyuan, et al. "Rethinking BiSeNet For Real-time Semantic Segmentation." + (https://arxiv.org/abs/2104.13188) + + Args: + base(int, optional): base channels. Default: 64. + layers(list, optional): layers numbers list. It determines STDC block numbers of STDCNet's stage3\4\5. Defualt: [4, 5, 3]. + block_num(int,optional): block_num of features block. Default: 4. + type(str,optional): feature fusion method "cat"/"add". Default: "cat". + num_classes(int, optional): class number for image classification. Default: 1000. + dropout(float,optional): dropout ratio. if >0,use dropout ratio. Default: 0.20. + use_conv_last(bool,optional): whether to use the last ConvBNReLU layer . Default: False. + pretrained(str, optional): the path of pretrained model. + """ + + def __init__(self, + base: int = 64, + layers: List[int] = [4, 5, 3], + block_num: int = 4, + type: str = "cat", + num_classes: int = 1000, + dropout: float = 0.20, + use_conv_last: bool = False): + super(STDCNet, self).__init__() + if type == "cat": + block = CatBottleneck + elif type == "add": + block = AddBottleneck + self.use_conv_last = use_conv_last + self.features = self._make_layers(base, layers, block_num, block) + self.conv_last = ConvBNRelu(base * 16, max(1024, base * 16), 1, 1) + + if (layers == [4, 5, 3]): #stdc1446 + self.x2 = nn.Sequential(self.features[:1]) + self.x4 = nn.Sequential(self.features[1:2]) + self.x8 = nn.Sequential(self.features[2:6]) + self.x16 = nn.Sequential(self.features[6:11]) + self.x32 = nn.Sequential(self.features[11:]) + elif (layers == [2, 2, 2]): #stdc813 + self.x2 = nn.Sequential(self.features[:1]) + self.x4 = nn.Sequential(self.features[1:2]) + self.x8 = nn.Sequential(self.features[2:4]) + self.x16 = nn.Sequential(self.features[4:6]) + self.x32 = nn.Sequential(self.features[6:]) + else: + raise NotImplementedError( + "model with layers:{} is not implemented!".format(layers)) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + """ + forward function for feature extract. + """ + feat2 = self.x2(x) + feat4 = self.x4(feat2) + feat8 = self.x8(feat4) + feat16 = self.x16(feat8) + feat32 = self.x32(feat16) + if self.use_conv_last: + feat32 = self.conv_last(feat32) + return feat2, feat4, feat8, feat16, feat32 + + def _make_layers(self, base, layers, block_num, block): + features = [] + features += [ConvBNRelu(3, base // 2, 3, 2)] + features += [ConvBNRelu(base // 2, base, 3, 2)] + + for i, layer in enumerate(layers): + for j in range(layer): + if i == 0 and j == 0: + features.append(block(base, base * 4, block_num, 2)) + elif j == 0: + features.append( + block(base * int(math.pow(2, i + 1)), + base * int(math.pow(2, i + 2)), block_num, 2)) + else: + features.append( + block(base * int(math.pow(2, i + 2)), + base * int(math.pow(2, i + 2)), block_num, 1)) + + return nn.Sequential(*features) + + +class ConvBNRelu(nn.Layer): + def __init__(self, in_planes: int, out_planes: int, kernel: int = 3, stride: int = 1): + super(ConvBNRelu, self).__init__() + self.conv = nn.Conv2D( + in_planes, + out_planes, + kernel_size=kernel, + stride=stride, + padding=kernel // 2, + bias_attr=False) + self.bn = L.SyncBatchNorm(out_planes, data_format='NCHW') + self.relu = nn.ReLU() + + def forward(self, x): + out = self.relu(self.bn(self.conv(x))) + return out + + +class AddBottleneck(nn.Layer): + def __init__(self, in_planes: int, out_planes: int, block_num: int = 3, stride: int = 1): + super(AddBottleneck, self).__init__() + assert block_num > 1, "block number should be larger than 1." + self.conv_list = nn.LayerList() + self.stride = stride + if stride == 2: + self.avd_layer = nn.Sequential( + nn.Conv2D( + out_planes // 2, + out_planes // 2, + kernel_size=3, + stride=2, + padding=1, + groups=out_planes // 2, + bias_attr=False), + nn.BatchNorm2D(out_planes // 2), + ) + self.skip = nn.Sequential( + nn.Conv2D( + in_planes, + in_planes, + kernel_size=3, + stride=2, + padding=1, + groups=in_planes, + bias_attr=False), + nn.BatchNorm2D(in_planes), + nn.Conv2D( + in_planes, out_planes, kernel_size=1, bias_attr=False), + nn.BatchNorm2D(out_planes), + ) + stride = 1 + + for idx in range(block_num): + if idx == 0: + self.conv_list.append( + ConvBNRelu(in_planes, out_planes // 2, kernel=1)) + elif idx == 1 and block_num == 2: + self.conv_list.append( + ConvBNRelu(out_planes // 2, out_planes // 2, stride=stride)) + elif idx == 1 and block_num > 2: + self.conv_list.append( + ConvBNRelu(out_planes // 2, out_planes // 4, stride=stride)) + elif idx < block_num - 1: + self.conv_list.append( + ConvBNRelu(out_planes // int(math.pow(2, idx)), + out_planes // int(math.pow(2, idx + 1)))) + else: + self.conv_list.append( + ConvBNRelu(out_planes // int(math.pow(2, idx)), + out_planes // int(math.pow(2, idx)))) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + out_list = [] + out = x + for idx, conv in enumerate(self.conv_list): + if idx == 0 and self.stride == 2: + out = self.avd_layer(conv(out)) + else: + out = conv(out) + out_list.append(out) + if self.stride == 2: + x = self.skip(x) + return paddle.concat(out_list, axis=1) + x + + +class CatBottleneck(nn.Layer): + def __init__(self, in_planes: int, out_planes: int, block_num: int = 3, stride: int = 1): + super(CatBottleneck, self).__init__() + assert block_num > 1, "block number should be larger than 1." + self.conv_list = nn.LayerList() + self.stride = stride + if stride == 2: + self.avd_layer = nn.Sequential( + nn.Conv2D( + out_planes // 2, + out_planes // 2, + kernel_size=3, + stride=2, + padding=1, + groups=out_planes // 2, + bias_attr=False), + nn.BatchNorm2D(out_planes // 2), + ) + self.skip = nn.AvgPool2D(kernel_size=3, stride=2, padding=1) + stride = 1 + + for idx in range(block_num): + if idx == 0: + self.conv_list.append( + ConvBNRelu(in_planes, out_planes // 2, kernel=1)) + elif idx == 1 and block_num == 2: + self.conv_list.append( + ConvBNRelu(out_planes // 2, out_planes // 2, stride=stride)) + elif idx == 1 and block_num > 2: + self.conv_list.append( + ConvBNRelu(out_planes // 2, out_planes // 4, stride=stride)) + elif idx < block_num - 1: + self.conv_list.append( + ConvBNRelu(out_planes // int(math.pow(2, idx)), + out_planes // int(math.pow(2, idx + 1)))) + else: + self.conv_list.append( + ConvBNRelu(out_planes // int(math.pow(2, idx)), + out_planes // int(math.pow(2, idx)))) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + out_list = [] + out1 = self.conv_list[0](x) + for idx, conv in enumerate(self.conv_list[1:]): + if idx == 0: + if self.stride == 2: + out = conv(self.avd_layer(out1)) + else: + out = conv(out1) + else: + out = conv(out) + out_list.append(out) + + if self.stride == 2: + out1 = self.skip(out1) + out_list.insert(0, out1) + out = paddle.concat(out_list, axis=1) + return out + + +def STDC2(**kwargs): + model = STDCNet(base=64, layers=[4, 5, 3], **kwargs) + return model + +def STDC1(**kwargs): + model = STDCNet(base=64, layers=[2, 2, 2], **kwargs) + return model \ No newline at end of file diff --git a/modules/image/semantic_segmentation/stdc1_seg_voc/README.md b/modules/image/semantic_segmentation/stdc1_seg_voc/README.md new file mode 100644 index 000000000..f24a2a813 --- /dev/null +++ b/modules/image/semantic_segmentation/stdc1_seg_voc/README.md @@ -0,0 +1,182 @@ +# stdc1_seg_voc + +|模型名称|stdc1_seg_voc| +| :--- | :---: | +|类别|图像-图像分割| +|网络|stdc1_seg| +|数据集|PascalVOC2012| +|是否支持Fine-tuning|是| +|模型大小|67MB| +|指标|-| +|最新更新日期|2022-03-21| + +## 一、模型基本信息 + + - 样例结果示例: +

+ +

+ +- ### 模型介绍 + + - 本示例将展示如何使用PaddleHub对预训练模型进行finetune并完成预测任务。 + - 更多详情请参考:[stdc](https://arxiv.org/abs/2104.13188) + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、安装 + + - ```shell + $ hub install stdc1_seg_voc + ``` + + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1.预测代码示例 + + - ```python + import cv2 + import paddle + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='stdc1_seg_voc') + img = cv2.imread("/PATH/TO/IMAGE") + result = model.predict(images=[img], visualization=True) + ``` + +- ### 2.如何开始Fine-tune + + - 在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用stdc1_seg_voc模型对OpticDiscSeg数据集进行Fine-tune。 `train.py`内容如下: + + - 代码步骤 + + - Step1: 定义数据预处理方式 + - ```python + from paddlehub.vision.segmentation_transforms import Compose, Resize, Normalize + + transform = Compose([Resize(target_size=(512, 512)), Normalize()]) + ``` + + - `segmentation_transforms` 数据增强模块定义了丰富的针对图像分割数据的预处理方式,用户可按照需求替换自己需要的数据预处理方式。 + + - Step2: 下载数据集并使用 + - ```python + from paddlehub.datasets import OpticDiscSeg + + train_reader = OpticDiscSeg(transform, mode='train') + ``` + - `transforms`: 数据预处理方式。 + - `mode`: `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 + + - 数据集的准备代码可以参考 [opticdiscseg.py](../../paddlehub/datasets/opticdiscseg.py)。`hub.datasets.OpticDiscSeg()`会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 + + - Step3: 加载预训练模型 + + - ```python + import paddlehub as hub + + model = hub.Module(name='stdc1_seg_voc', num_classes=2, pretrained=None) + ``` + - `name`: 选择预训练模型的名字。 + - `load_checkpoint`: 是否加载自己训练的模型,若为None,则加载提供的模型默认参数。 + + - Step4: 选择优化策略和运行配置 + + - ```python + import paddle + from paddlehub.finetune.trainer import Trainer + + scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.01, decay_steps=1000, power=0.9, end_lr=0.0001) + optimizer = paddle.optimizer.Adam(learning_rate=scheduler, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='test_ckpt_img_seg', use_gpu=True) + trainer.train(train_reader, epochs=10, batch_size=4, log_interval=10, save_interval=4) + ``` + + - 模型预测 + + - 当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。我们使用该模型来进行预测。predict.py脚本如下: + + ```python + import paddle + import cv2 + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='stdc1_seg_voc', pretrained='/PATH/TO/CHECKPOINT') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) + ``` + + - 参数配置正确后,请执行脚本`python predict.py`。 + + - **Args** + * `images`:原始图像路径或BGR格式图片; + * `visualization`: 是否可视化,默认为True; + * `save_path`: 保存结果的路径,默认保存路径为'seg_result'。 + + **NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线图像分割服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + + - ```shell + $ hub serving start -m stdc1_seg_voc + ``` + + - 这样就完成了一个图像分割服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + ```python + import requests + import json + import cv2 + import base64 + + import numpy as np + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/ginet_resnet50vd_voc" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + mask = base64_to_cv2(r.json()["results"][0]) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 diff --git a/modules/image/semantic_segmentation/stdc1_seg_voc/README_en.md b/modules/image/semantic_segmentation/stdc1_seg_voc/README_en.md new file mode 100644 index 000000000..fd11504b9 --- /dev/null +++ b/modules/image/semantic_segmentation/stdc1_seg_voc/README_en.md @@ -0,0 +1,181 @@ +# stdc1_seg_voc + +|Module Name|stdc1_seg_voc| +| :--- | :---: | +|Category|Image Segmentation| +|Network|stdc1_seg| +|Dataset|PascalVOC2012| +|Fine-tuning supported or not|Yes| +|Module Size|370MB| +|Data indicators|-| +|Latest update date|2022-03-22| + +## I. Basic Information + +- ### Application Effect Display + - Sample results: +

+ +

+ +- ### Module Introduction + + - We will show how to use PaddleHub to finetune the pre-trained model and complete the prediction. + - For more information, please refer to: [stdc](https://arxiv.org/abs/2104.13188) + +## II. Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、Installation + + - ```shell + $ hub install stdc1_seg_voc + ``` + + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) + | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + + +## III. Module API Prediction + +- ### 1、Prediction Code Example + + - ```python + import cv2 + import paddle + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='stdc1_seg_voc') + img = cv2.imread("/PATH/TO/IMAGE") + result = model.predict(images=[img], visualization=True) + ``` + +- ### 2.Fine-tune and Encapsulation + + - After completing the installation of PaddlePaddle and PaddleHub, you can start using the stdc1_seg_voc model to fine-tune datasets such as OpticDiscSeg. + + - Steps: + + - Step1: Define the data preprocessing method + + - ```python + from paddlehub.vision.segmentation_transforms import Compose, Resize, Normalize + + transform = Compose([Resize(target_size=(512, 512)), Normalize()]) + ``` + + - `segmentation_transforms`: The data enhancement module defines lots of data preprocessing methods. Users can replace the data preprocessing methods according to their needs. + + - Step2: Download the dataset + + - ```python + from paddlehub.datasets import OpticDiscSeg + + train_reader = OpticDiscSeg(transform, mode='train') + ``` + * `transforms`: data preprocessing methods. + + * `mode`: Select the data mode, the options are `train`, `test`, `val`. Default is `train`. + + * Dataset preparation can be referred to [opticdiscseg.py](../../paddlehub/datasets/opticdiscseg.py)。`hub.datasets.OpticDiscSeg()`will be automatically downloaded from the network and decompressed to the `$HOME/.paddlehub/dataset` directory under the user directory. + + - Step3: Load the pre-trained model + + - ```python + import paddlehub as hub + + model = hub.Module(name='stdc1_seg_voc', num_classes=2, pretrained=None) + ``` + - `name`: model name. + - `load_checkpoint`: Whether to load the self-trained model, if it is None, load the provided parameters. + + - Step4: Optimization strategy + + - ```python + import paddle + from paddlehub.finetune.trainer import Trainer + + scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.01, decay_steps=1000, power=0.9, end_lr=0.0001) + optimizer = paddle.optimizer.Adam(learning_rate=scheduler, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='test_ckpt_img_seg', use_gpu=True) + trainer.train(train_reader, epochs=10, batch_size=4, log_interval=10, save_interval=4) + ``` + + - Model prediction + + - When Fine-tune is completed, the model with the best performance on the verification set will be saved in the `${CHECKPOINT_DIR}/best_model` directory. We use this model to make predictions. The `predict.py` script is as follows: + + ```python + import paddle + import cv2 + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='stdc1_seg_voc', pretrained='/PATH/TO/CHECKPOINT') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) + ``` + + - **Args** + * `images`: Image path or ndarray data with format [H, W, C], BGR. + * `visualization`: Whether to save the recognition results as picture files. + * `save_path`: Save path of the result, default is 'seg_result'. + + +## IV. Server Deployment + +- PaddleHub Serving can deploy an online service of image segmentation. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + + - ```shell + $ hub serving start -m stdc1_seg_voc + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result: + + ```python + import requests + import json + import cv2 + import base64 + + import numpy as np + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/stdc1_seg_voc" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + mask = base64_to_cv2(r.json()["results"][0]) + ``` + +## V. Release Note + +- 1.0.0 + + First release diff --git a/modules/image/semantic_segmentation/stdc1_seg_voc/layers.py b/modules/image/semantic_segmentation/stdc1_seg_voc/layers.py new file mode 100644 index 000000000..230461030 --- /dev/null +++ b/modules/image/semantic_segmentation/stdc1_seg_voc/layers.py @@ -0,0 +1,357 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn.layer import activation +from paddle.nn import Conv2D, AvgPool2D + + +def SyncBatchNorm(*args, **kwargs): + """In cpu environment nn.SyncBatchNorm does not have kernel so use nn.BatchNorm2D instead""" + if paddle.get_device() == 'cpu': + return nn.BatchNorm2D(*args, **kwargs) + else: + return nn.SyncBatchNorm(*args, **kwargs) + + +class SeparableConvBNReLU(nn.Layer): + """Depthwise Separable Convolution.""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + padding: str = 'same', + **kwargs: dict): + super(SeparableConvBNReLU, self).__init__() + self.depthwise_conv = ConvBN( + in_channels, + out_channels=in_channels, + kernel_size=kernel_size, + padding=padding, + groups=in_channels, + **kwargs) + self.piontwise_conv = ConvBNReLU( + in_channels, out_channels, kernel_size=1, groups=1) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self.depthwise_conv(x) + x = self.piontwise_conv(x) + return x + + +class ConvBN(nn.Layer): + """Basic conv bn layer""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + padding: str = 'same', + **kwargs: dict): + super(ConvBN, self).__init__() + self._conv = Conv2D( + in_channels, out_channels, kernel_size, padding=padding, **kwargs) + self._batch_norm = SyncBatchNorm(out_channels) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self._conv(x) + x = self._batch_norm(x) + return x + + +class ConvBNReLU(nn.Layer): + """Basic conv bn relu layer.""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + padding: str = 'same', + **kwargs: dict): + super(ConvBNReLU, self).__init__() + + self._conv = Conv2D( + in_channels, out_channels, kernel_size, padding=padding, **kwargs) + self._batch_norm = SyncBatchNorm(out_channels) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self._conv(x) + x = self._batch_norm(x) + x = F.relu(x) + return x + + +class Activation(nn.Layer): + """ + The wrapper of activations. + Args: + act (str, optional): The activation name in lowercase. It must be one of ['elu', 'gelu', + 'hardshrink', 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', + 'softmax', 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', + 'hsigmoid']. Default: None, means identical transformation. + Returns: + A callable object of Activation. + Raises: + KeyError: When parameter `act` is not in the optional range. + Examples: + from paddleseg.models.common.activation import Activation + relu = Activation("relu") + print(relu) + # + sigmoid = Activation("sigmoid") + print(sigmoid) + # + not_exit_one = Activation("not_exit_one") + # KeyError: "not_exit_one does not exist in the current dict_keys(['elu', 'gelu', 'hardshrink', + # 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', 'softmax', + # 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', 'hsigmoid'])" + """ + + def __init__(self, act: str = None): + super(Activation, self).__init__() + + self._act = act + upper_act_names = activation.__dict__.keys() + lower_act_names = [act.lower() for act in upper_act_names] + act_dict = dict(zip(lower_act_names, upper_act_names)) + + if act is not None: + if act in act_dict.keys(): + act_name = act_dict[act] + self.act_func = eval("activation.{}()".format(act_name)) + else: + raise KeyError("{} does not exist in the current {}".format( + act, act_dict.keys())) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + + if self._act is not None: + return self.act_func(x) + else: + return x + + +class ASPPModule(nn.Layer): + """ + Atrous Spatial Pyramid Pooling. + Args: + aspp_ratios (tuple): The dilation rate using in ASSP module. + in_channels (int): The number of input channels. + out_channels (int): The number of output channels. + align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature + is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. + use_sep_conv (bool, optional): If using separable conv in ASPP module. Default: False. + image_pooling (bool, optional): If augmented with image-level features. Default: False + """ + + def __init__(self, + aspp_ratios: tuple, + in_channels: int, + out_channels: int, + align_corners: bool, + use_sep_conv: bool = False, + image_pooling: bool = False): + super().__init__() + + self.align_corners = align_corners + self.aspp_blocks = nn.LayerList() + + for ratio in aspp_ratios: + if use_sep_conv and ratio > 1: + conv_func = SeparableConvBNReLU + else: + conv_func = ConvBNReLU + + block = conv_func( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1 if ratio == 1 else 3, + dilation=ratio, + padding=0 if ratio == 1 else ratio) + self.aspp_blocks.append(block) + + out_size = len(self.aspp_blocks) + + if image_pooling: + self.global_avg_pool = nn.Sequential( + nn.AdaptiveAvgPool2D(output_size=(1, 1)), + ConvBNReLU(in_channels, out_channels, kernel_size=1, bias_attr=False)) + out_size += 1 + self.image_pooling = image_pooling + + self.conv_bn_relu = ConvBNReLU( + in_channels=out_channels * out_size, + out_channels=out_channels, + kernel_size=1) + + self.dropout = nn.Dropout(p=0.1) # drop rate + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + outputs = [] + for block in self.aspp_blocks: + y = block(x) + y = F.interpolate( + y, + x.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + outputs.append(y) + + if self.image_pooling: + img_avg = self.global_avg_pool(x) + img_avg = F.interpolate( + img_avg, + x.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + outputs.append(img_avg) + + x = paddle.concat(outputs, axis=1) + x = self.conv_bn_relu(x) + x = self.dropout(x) + + return x + + +class AuxLayer(nn.Layer): + """ + The auxiliary layer implementation for auxiliary loss. + + Args: + in_channels (int): The number of input channels. + inter_channels (int): The intermediate channels. + out_channels (int): The number of output channels, and usually it is num_classes. + dropout_prob (float, optional): The drop rate. Default: 0.1. + """ + + def __init__(self, + in_channels: int, + inter_channels: int, + out_channels: int, + dropout_prob: float = 0.1, + **kwargs): + super().__init__() + + self.conv_bn_relu = ConvBNReLU( + in_channels=in_channels, + out_channels=inter_channels, + kernel_size=3, + padding=1, + **kwargs) + + self.dropout = nn.Dropout(p=dropout_prob) + + self.conv = nn.Conv2D( + in_channels=inter_channels, + out_channels=out_channels, + kernel_size=1) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self.conv_bn_relu(x) + x = self.dropout(x) + x = self.conv(x) + return x + + +class Add(nn.Layer): + def __init__(self): + super().__init__() + + def forward(self, x: paddle.Tensor, y: paddle.Tensor, name=None) -> paddle.Tensor: + return paddle.add(x, y, name) + +class PPModule(nn.Layer): + """ + Pyramid pooling module originally in PSPNet. + + Args: + in_channels (int): The number of intput channels to pyramid pooling module. + out_channels (int): The number of output channels after pyramid pooling module. + bin_sizes (tuple, optional): The out size of pooled feature maps. Default: (1, 2, 3, 6). + dim_reduction (bool, optional): A bool value represents if reducing dimension after pooling. Default: True. + align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature + is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. + """ + + def __init__(self, + in_channels: int, + out_channels: int, + bin_sizes: tuple, + dim_reduction: bool, + align_corners: bool): + super().__init__() + + self.bin_sizes = bin_sizes + + inter_channels = in_channels + if dim_reduction: + inter_channels = in_channels // len(bin_sizes) + + # we use dimension reduction after pooling mentioned in original implementation. + self.stages = nn.LayerList([ + self._make_stage(in_channels, inter_channels, size) + for size in bin_sizes + ]) + + self.conv_bn_relu2 = ConvBNReLU( + in_channels=in_channels + inter_channels * len(bin_sizes), + out_channels=out_channels, + kernel_size=3, + padding=1) + + self.align_corners = align_corners + + def _make_stage(self, in_channels: int, out_channels: int, size: int): + """ + Create one pooling layer. + + In our implementation, we adopt the same dimension reduction as the original paper that might be + slightly different with other implementations. + + After pooling, the channels are reduced to 1/len(bin_sizes) immediately, while some other implementations + keep the channels to be same. + + Args: + in_channels (int): The number of intput channels to pyramid pooling module. + out_channels (int): The number of output channels to pyramid pooling module. + size (int): The out size of the pooled layer. + + Returns: + conv (Tensor): A tensor after Pyramid Pooling Module. + """ + + prior = nn.AdaptiveAvgPool2D(output_size=(size, size)) + conv = ConvBNReLU( + in_channels=in_channels, out_channels=out_channels, kernel_size=1) + + return nn.Sequential(prior, conv) + + def forward(self, input: paddle.Tensor) -> paddle.Tensor: + cat_layers = [] + for stage in self.stages: + x = stage(input) + x = F.interpolate( + x, + paddle.shape(input)[2:], + mode='bilinear', + align_corners=self.align_corners) + cat_layers.append(x) + cat_layers = [input] + cat_layers[::-1] + cat = paddle.concat(cat_layers, axis=1) + out = self.conv_bn_relu2(cat) + + return out \ No newline at end of file diff --git a/modules/image/semantic_segmentation/stdc1_seg_voc/module.py b/modules/image/semantic_segmentation/stdc1_seg_voc/module.py new file mode 100644 index 000000000..642628dc9 --- /dev/null +++ b/modules/image/semantic_segmentation/stdc1_seg_voc/module.py @@ -0,0 +1,235 @@ +# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from typing import Union, List, Tuple + +import numpy as np +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddlehub.module.module import moduleinfo +import paddlehub.vision.segmentation_transforms as T +from paddlehub.module.cv_module import ImageSegmentationModule + +from stdc1_seg_voc.stdcnet import STDC1 +import stdc1_seg_voc.layers as layers + + +@moduleinfo( + name="stdc1_seg_voc", + type="CV/semantic_segmentation", + author="paddlepaddle", + author_email="", + summary="STDCSeg is a segmentation model.", + version="1.0.0", + meta=ImageSegmentationModule) +class STDCSeg(nn.Layer): + """ + The STDCSeg implementation based on PaddlePaddle. + + The original article refers to Meituan + Fan, Mingyuan, et al. "Rethinking BiSeNet For Real-time Semantic Segmentation." + (https://arxiv.org/abs/2104.13188) + + Args: + num_classes(int,optional): The unique number of target classes. + use_boundary_8(bool,non-optional): Whether to use detail loss. it should be True accroding to paper for best metric. Default: True. + Actually,if you want to use _boundary_2/_boundary_4/_boundary_16,you should append loss function number of DetailAggregateLoss.It should work properly. + use_conv_last(bool,optional): Determine ContextPath 's inplanes variable according to whether to use bockbone's last conv. Default: False. + pretrained (str, optional): The path or url of pretrained model. Default: None. + """ + + def __init__(self, + num_classes: int = 19, + use_boundary_2: bool = False, + use_boundary_4: bool = False, + use_boundary_8: bool = True, + use_boundary_16: bool = False, + use_conv_last: bool = False, + pretrained: str = None): + super(STDCSeg, self).__init__() + + self.use_boundary_2 = use_boundary_2 + self.use_boundary_4 = use_boundary_4 + self.use_boundary_8 = use_boundary_8 + self.use_boundary_16 = use_boundary_16 + self.cp = ContextPath(STDC1(), use_conv_last=use_conv_last) + self.ffm = FeatureFusionModule(384, 256) + self.conv_out = SegHead(256, 256, num_classes) + self.conv_out8 = SegHead(128, 64, num_classes) + self.conv_out16 = SegHead(128, 64, num_classes) + self.conv_out_sp16 = SegHead(512, 64, 1) + self.conv_out_sp8 = SegHead(256, 64, 1) + self.conv_out_sp4 = SegHead(64, 64, 1) + self.conv_out_sp2 = SegHead(32, 64, 1) + self.transforms = T.Compose([T.Normalize()]) + + if pretrained is not None: + model_dict = paddle.load(pretrained) + self.set_dict(model_dict) + print("load custom parameters success") + + else: + checkpoint = os.path.join(self.directory, 'model.pdparams') + model_dict = paddle.load(checkpoint) + self.set_dict(model_dict) + print("load pretrained parameters success") + + def transform(self, img: Union[np.ndarray, str]) -> Union[np.ndarray, str]: + return self.transforms(img) + + def forward(self, x: paddle.Tensor) -> List[paddle.Tensor]: + x_hw = paddle.shape(x)[2:] + feat_res2, feat_res4, feat_res8, _, feat_cp8, feat_cp16 = self.cp(x) + + logit_list = [] + if self.training: + feat_fuse = self.ffm(feat_res8, feat_cp8) + feat_out = self.conv_out(feat_fuse) + feat_out8 = self.conv_out8(feat_cp8) + feat_out16 = self.conv_out16(feat_cp16) + + logit_list = [feat_out, feat_out8, feat_out16] + logit_list = [ + F.interpolate(x, x_hw, mode='bilinear', align_corners=True) + for x in logit_list + ] + + if self.use_boundary_2: + feat_out_sp2 = self.conv_out_sp2(feat_res2) + logit_list.append(feat_out_sp2) + if self.use_boundary_4: + feat_out_sp4 = self.conv_out_sp4(feat_res4) + logit_list.append(feat_out_sp4) + if self.use_boundary_8: + feat_out_sp8 = self.conv_out_sp8(feat_res8) + logit_list.append(feat_out_sp8) + else: + feat_fuse = self.ffm(feat_res8, feat_cp8) + feat_out = self.conv_out(feat_fuse) + feat_out = F.interpolate( + feat_out, x_hw, mode='bilinear', align_corners=True) + logit_list = [feat_out] + + return logit_list + + +class SegHead(nn.Layer): + def __init__(self, in_chan: int, mid_chan: int, n_classes:int): + super(SegHead, self).__init__() + self.conv = layers.ConvBNReLU( + in_chan, mid_chan, kernel_size=3, stride=1, padding=1) + self.conv_out = nn.Conv2D( + mid_chan, n_classes, kernel_size=1, bias_attr=None) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self.conv(x) + x = self.conv_out(x) + return x + + +class AttentionRefinementModule(nn.Layer): + def __init__(self, in_chan: int, out_chan: int): + super(AttentionRefinementModule, self).__init__() + self.conv = layers.ConvBNReLU( + in_chan, out_chan, kernel_size=3, stride=1, padding=1) + self.conv_atten = nn.Conv2D( + out_chan, out_chan, kernel_size=1, bias_attr=None) + self.bn_atten = nn.BatchNorm2D(out_chan) + self.sigmoid_atten = nn.Sigmoid() + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + feat = self.conv(x) + atten = F.adaptive_avg_pool2d(feat, 1) + atten = self.conv_atten(atten) + atten = self.bn_atten(atten) + atten = self.sigmoid_atten(atten) + out = paddle.multiply(feat, atten) + return out + + +class ContextPath(nn.Layer): + def __init__(self, backbone, use_conv_last: bool = False): + super(ContextPath, self).__init__() + self.backbone = backbone + self.arm16 = AttentionRefinementModule(512, 128) + inplanes = 1024 + if use_conv_last: + inplanes = 1024 + self.arm32 = AttentionRefinementModule(inplanes, 128) + self.conv_head32 = layers.ConvBNReLU( + 128, 128, kernel_size=3, stride=1, padding=1) + self.conv_head16 = layers.ConvBNReLU( + 128, 128, kernel_size=3, stride=1, padding=1) + self.conv_avg = layers.ConvBNReLU( + inplanes, 128, kernel_size=1, stride=1, padding=0) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + feat2, feat4, feat8, feat16, feat32 = self.backbone(x) + + feat8_hw = paddle.shape(feat8)[2:] + feat16_hw = paddle.shape(feat16)[2:] + feat32_hw = paddle.shape(feat32)[2:] + + avg = F.adaptive_avg_pool2d(feat32, 1) + avg = self.conv_avg(avg) + avg_up = F.interpolate(avg, feat32_hw, mode='nearest') + + feat32_arm = self.arm32(feat32) + feat32_sum = feat32_arm + avg_up + feat32_up = F.interpolate(feat32_sum, feat16_hw, mode='nearest') + feat32_up = self.conv_head32(feat32_up) + + feat16_arm = self.arm16(feat16) + feat16_sum = feat16_arm + feat32_up + feat16_up = F.interpolate(feat16_sum, feat8_hw, mode='nearest') + feat16_up = self.conv_head16(feat16_up) + + return feat2, feat4, feat8, feat16, feat16_up, feat32_up # x8, x16 + + +class FeatureFusionModule(nn.Layer): + def __init__(self, in_chan:int , out_chan: int): + super(FeatureFusionModule, self).__init__() + self.convblk = layers.ConvBNReLU( + in_chan, out_chan, kernel_size=1, stride=1, padding=0) + self.conv1 = nn.Conv2D( + out_chan, + out_chan // 4, + kernel_size=1, + stride=1, + padding=0, + bias_attr=None) + self.conv2 = nn.Conv2D( + out_chan // 4, + out_chan, + kernel_size=1, + stride=1, + padding=0, + bias_attr=None) + self.relu = nn.ReLU() + self.sigmoid = nn.Sigmoid() + + def forward(self, fsp: paddle.Tensor, fcp: paddle.Tensor) -> paddle.Tensor: + fcat = paddle.concat([fsp, fcp], axis=1) + feat = self.convblk(fcat) + atten = F.adaptive_avg_pool2d(feat, 1) + atten = self.conv1(atten) + atten = self.relu(atten) + atten = self.conv2(atten) + atten = self.sigmoid(atten) + feat_atten = paddle.multiply(feat, atten) + feat_out = feat_atten + feat + return feat_out \ No newline at end of file diff --git a/modules/image/semantic_segmentation/stdc1_seg_voc/stdcnet.py b/modules/image/semantic_segmentation/stdc1_seg_voc/stdcnet.py new file mode 100644 index 000000000..d2716a83b --- /dev/null +++ b/modules/image/semantic_segmentation/stdc1_seg_voc/stdcnet.py @@ -0,0 +1,262 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math + +import paddle +import paddle.nn as nn + +import stdc1_seg_voc.layers as L + +__all__ = ["STDC1", "STDC2"] + + +class STDCNet(nn.Layer): + """ + The STDCNet implementation based on PaddlePaddle. + + The original article refers to Meituan + Fan, Mingyuan, et al. "Rethinking BiSeNet For Real-time Semantic Segmentation." + (https://arxiv.org/abs/2104.13188) + + Args: + base(int, optional): base channels. Default: 64. + layers(list, optional): layers numbers list. It determines STDC block numbers of STDCNet's stage3\4\5. Defualt: [4, 5, 3]. + block_num(int,optional): block_num of features block. Default: 4. + type(str,optional): feature fusion method "cat"/"add". Default: "cat". + num_classes(int, optional): class number for image classification. Default: 1000. + dropout(float,optional): dropout ratio. if >0,use dropout ratio. Default: 0.20. + use_conv_last(bool,optional): whether to use the last ConvBNReLU layer . Default: False. + pretrained(str, optional): the path of pretrained model. + """ + + def __init__(self, + base: int = 64, + layers: List[int] = [4, 5, 3], + block_num: int = 4, + type: str = "cat", + num_classes: int = 1000, + dropout: float = 0.20, + use_conv_last: bool = False): + super(STDCNet, self).__init__() + if type == "cat": + block = CatBottleneck + elif type == "add": + block = AddBottleneck + self.use_conv_last = use_conv_last + self.features = self._make_layers(base, layers, block_num, block) + self.conv_last = ConvBNRelu(base * 16, max(1024, base * 16), 1, 1) + + if (layers == [4, 5, 3]): #stdc1446 + self.x2 = nn.Sequential(self.features[:1]) + self.x4 = nn.Sequential(self.features[1:2]) + self.x8 = nn.Sequential(self.features[2:6]) + self.x16 = nn.Sequential(self.features[6:11]) + self.x32 = nn.Sequential(self.features[11:]) + elif (layers == [2, 2, 2]): #stdc813 + self.x2 = nn.Sequential(self.features[:1]) + self.x4 = nn.Sequential(self.features[1:2]) + self.x8 = nn.Sequential(self.features[2:4]) + self.x16 = nn.Sequential(self.features[4:6]) + self.x32 = nn.Sequential(self.features[6:]) + else: + raise NotImplementedError( + "model with layers:{} is not implemented!".format(layers)) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + """ + forward function for feature extract. + """ + feat2 = self.x2(x) + feat4 = self.x4(feat2) + feat8 = self.x8(feat4) + feat16 = self.x16(feat8) + feat32 = self.x32(feat16) + if self.use_conv_last: + feat32 = self.conv_last(feat32) + return feat2, feat4, feat8, feat16, feat32 + + def _make_layers(self, base, layers, block_num, block): + features = [] + features += [ConvBNRelu(3, base // 2, 3, 2)] + features += [ConvBNRelu(base // 2, base, 3, 2)] + + for i, layer in enumerate(layers): + for j in range(layer): + if i == 0 and j == 0: + features.append(block(base, base * 4, block_num, 2)) + elif j == 0: + features.append( + block(base * int(math.pow(2, i + 1)), + base * int(math.pow(2, i + 2)), block_num, 2)) + else: + features.append( + block(base * int(math.pow(2, i + 2)), + base * int(math.pow(2, i + 2)), block_num, 1)) + + return nn.Sequential(*features) + + +class ConvBNRelu(nn.Layer): + def __init__(self, in_planes: int, out_planes: int, kernel: int = 3, stride: int = 1): + super(ConvBNRelu, self).__init__() + self.conv = nn.Conv2D( + in_planes, + out_planes, + kernel_size=kernel, + stride=stride, + padding=kernel // 2, + bias_attr=False) + self.bn = L.SyncBatchNorm(out_planes, data_format='NCHW') + self.relu = nn.ReLU() + + def forward(self, x): + out = self.relu(self.bn(self.conv(x))) + return out + + +class AddBottleneck(nn.Layer): + def __init__(self, in_planes: int, out_planes: int, block_num: int = 3, stride: int = 1): + super(AddBottleneck, self).__init__() + assert block_num > 1, "block number should be larger than 1." + self.conv_list = nn.LayerList() + self.stride = stride + if stride == 2: + self.avd_layer = nn.Sequential( + nn.Conv2D( + out_planes // 2, + out_planes // 2, + kernel_size=3, + stride=2, + padding=1, + groups=out_planes // 2, + bias_attr=False), + nn.BatchNorm2D(out_planes // 2), + ) + self.skip = nn.Sequential( + nn.Conv2D( + in_planes, + in_planes, + kernel_size=3, + stride=2, + padding=1, + groups=in_planes, + bias_attr=False), + nn.BatchNorm2D(in_planes), + nn.Conv2D( + in_planes, out_planes, kernel_size=1, bias_attr=False), + nn.BatchNorm2D(out_planes), + ) + stride = 1 + + for idx in range(block_num): + if idx == 0: + self.conv_list.append( + ConvBNRelu(in_planes, out_planes // 2, kernel=1)) + elif idx == 1 and block_num == 2: + self.conv_list.append( + ConvBNRelu(out_planes // 2, out_planes // 2, stride=stride)) + elif idx == 1 and block_num > 2: + self.conv_list.append( + ConvBNRelu(out_planes // 2, out_planes // 4, stride=stride)) + elif idx < block_num - 1: + self.conv_list.append( + ConvBNRelu(out_planes // int(math.pow(2, idx)), + out_planes // int(math.pow(2, idx + 1)))) + else: + self.conv_list.append( + ConvBNRelu(out_planes // int(math.pow(2, idx)), + out_planes // int(math.pow(2, idx)))) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + out_list = [] + out = x + for idx, conv in enumerate(self.conv_list): + if idx == 0 and self.stride == 2: + out = self.avd_layer(conv(out)) + else: + out = conv(out) + out_list.append(out) + if self.stride == 2: + x = self.skip(x) + return paddle.concat(out_list, axis=1) + x + + +class CatBottleneck(nn.Layer): + def __init__(self, in_planes: int, out_planes: int, block_num: int = 3, stride: int = 1): + super(CatBottleneck, self).__init__() + assert block_num > 1, "block number should be larger than 1." + self.conv_list = nn.LayerList() + self.stride = stride + if stride == 2: + self.avd_layer = nn.Sequential( + nn.Conv2D( + out_planes // 2, + out_planes // 2, + kernel_size=3, + stride=2, + padding=1, + groups=out_planes // 2, + bias_attr=False), + nn.BatchNorm2D(out_planes // 2), + ) + self.skip = nn.AvgPool2D(kernel_size=3, stride=2, padding=1) + stride = 1 + + for idx in range(block_num): + if idx == 0: + self.conv_list.append( + ConvBNRelu(in_planes, out_planes // 2, kernel=1)) + elif idx == 1 and block_num == 2: + self.conv_list.append( + ConvBNRelu(out_planes // 2, out_planes // 2, stride=stride)) + elif idx == 1 and block_num > 2: + self.conv_list.append( + ConvBNRelu(out_planes // 2, out_planes // 4, stride=stride)) + elif idx < block_num - 1: + self.conv_list.append( + ConvBNRelu(out_planes // int(math.pow(2, idx)), + out_planes // int(math.pow(2, idx + 1)))) + else: + self.conv_list.append( + ConvBNRelu(out_planes // int(math.pow(2, idx)), + out_planes // int(math.pow(2, idx)))) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + out_list = [] + out1 = self.conv_list[0](x) + for idx, conv in enumerate(self.conv_list[1:]): + if idx == 0: + if self.stride == 2: + out = conv(self.avd_layer(out1)) + else: + out = conv(out1) + else: + out = conv(out) + out_list.append(out) + + if self.stride == 2: + out1 = self.skip(out1) + out_list.insert(0, out1) + out = paddle.concat(out_list, axis=1) + return out + + +def STDC2(**kwargs): + model = STDCNet(base=64, layers=[4, 5, 3], **kwargs) + return model + +def STDC1(**kwargs): + model = STDCNet(base=64, layers=[2, 2, 2], **kwargs) + return model \ No newline at end of file From 0caac1d4b797c49013d5bea200e63d25358bbbea Mon Sep 17 00:00:00 2001 From: linjieccc <623543001@qq.com> Date: Wed, 30 Mar 2022 08:30:23 +0000 Subject: [PATCH 002/117] add albert pretrained models --- .../language_model/albert-base-v1/README.md | 4 +- .../language_model/albert-base-v2/README.md | 173 +++++++++++++++++ .../language_model/albert-base-v2/__init__.py | 0 .../language_model/albert-base-v2/module.py | 177 ++++++++++++++++++ .../albert-chinese-base/README.md | 173 +++++++++++++++++ .../albert-chinese-base/__init__.py | 0 .../albert-chinese-base/module.py | 175 +++++++++++++++++ .../albert-chinese-large/README.md | 173 +++++++++++++++++ .../albert-chinese-large/__init__.py | 0 .../albert-chinese-large/module.py | 175 +++++++++++++++++ .../albert-chinese-small/README.md | 173 +++++++++++++++++ .../albert-chinese-small/__init__.py | 0 .../albert-chinese-small/module.py | 175 +++++++++++++++++ .../albert-chinese-tiny/README.md | 173 +++++++++++++++++ .../albert-chinese-tiny/__init__.py | 0 .../albert-chinese-tiny/module.py | 175 +++++++++++++++++ .../albert-chinese-xlarge/README.md | 173 +++++++++++++++++ .../albert-chinese-xlarge/__init__.py | 0 .../albert-chinese-xlarge/module.py | 175 +++++++++++++++++ .../albert-chinese-xxlarge/README.md | 173 +++++++++++++++++ .../albert-chinese-xxlarge/__init__.py | 0 .../albert-chinese-xxlarge/module.py | 175 +++++++++++++++++ .../albert-xxlarge-v1/README.md | 173 +++++++++++++++++ .../albert-xxlarge-v1/__init__.py | 0 .../albert-xxlarge-v1/module.py | 176 +++++++++++++++++ .../albert-xxlarge-v2/README.md | 173 +++++++++++++++++ .../albert-xxlarge-v2/__init__.py | 0 .../albert-xxlarge-v2/module.py | 176 +++++++++++++++++ 28 files changed, 3138 insertions(+), 2 deletions(-) create mode 100644 modules/text/language_model/albert-base-v2/README.md create mode 100644 modules/text/language_model/albert-base-v2/__init__.py create mode 100644 modules/text/language_model/albert-base-v2/module.py create mode 100644 modules/text/language_model/albert-chinese-base/README.md create mode 100644 modules/text/language_model/albert-chinese-base/__init__.py create mode 100644 modules/text/language_model/albert-chinese-base/module.py create mode 100644 modules/text/language_model/albert-chinese-large/README.md create mode 100644 modules/text/language_model/albert-chinese-large/__init__.py create mode 100644 modules/text/language_model/albert-chinese-large/module.py create mode 100644 modules/text/language_model/albert-chinese-small/README.md create mode 100644 modules/text/language_model/albert-chinese-small/__init__.py create mode 100644 modules/text/language_model/albert-chinese-small/module.py create mode 100644 modules/text/language_model/albert-chinese-tiny/README.md create mode 100644 modules/text/language_model/albert-chinese-tiny/__init__.py create mode 100644 modules/text/language_model/albert-chinese-tiny/module.py create mode 100644 modules/text/language_model/albert-chinese-xlarge/README.md create mode 100644 modules/text/language_model/albert-chinese-xlarge/__init__.py create mode 100644 modules/text/language_model/albert-chinese-xlarge/module.py create mode 100644 modules/text/language_model/albert-chinese-xxlarge/README.md create mode 100644 modules/text/language_model/albert-chinese-xxlarge/__init__.py create mode 100644 modules/text/language_model/albert-chinese-xxlarge/module.py create mode 100644 modules/text/language_model/albert-xxlarge-v1/README.md create mode 100644 modules/text/language_model/albert-xxlarge-v1/__init__.py create mode 100644 modules/text/language_model/albert-xxlarge-v1/module.py create mode 100644 modules/text/language_model/albert-xxlarge-v2/README.md create mode 100644 modules/text/language_model/albert-xxlarge-v2/__init__.py create mode 100644 modules/text/language_model/albert-xxlarge-v2/module.py diff --git a/modules/text/language_model/albert-base-v1/README.md b/modules/text/language_model/albert-base-v1/README.md index abef64ad5..b0f68e8b9 100644 --- a/modules/text/language_model/albert-base-v1/README.md +++ b/modules/text/language_model/albert-base-v1/README.md @@ -25,9 +25,9 @@ - ### 1、环境依赖 - - paddlepaddle >= 2.0.0 + - paddlepaddle >= 2.2.0 - - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + - paddlehub >= 2.2.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) - ### 2、安装 diff --git a/modules/text/language_model/albert-base-v2/README.md b/modules/text/language_model/albert-base-v2/README.md new file mode 100644 index 000000000..d2ad12fcb --- /dev/null +++ b/modules/text/language_model/albert-base-v2/README.md @@ -0,0 +1,173 @@ +# albert-base-v2 +|模型名称|albert-base-v2| +| :--- | :---: | +|类别|文本-语义模型| +|网络|albert-base-v2| +|数据集|-| +|是否支持Fine-tuning|是| +|模型大小|90MB| +|最新更新日期|2022-02-08| +|数据指标|-| + +## 一、模型基本信息 + +- ### 模型介绍 + + - ALBERT针对当前预训练模型参数量过大的问题,提出了以下改进方案: + + - 嵌入向量参数化的因式分解。ALBERT对词嵌入参数进行了因式分解,先将单词映射到一个低维的词嵌入空间E,然后再将其映射到高维的隐藏空间H。 + + - 跨层参数共享。ALBERT共享了层之间的全部参数。 + +更多详情请参考[ALBERT论文](https://arxiv.org/abs/1909.11942) + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.2.0 + + - paddlehub >= 2.2.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install albert-base-v2 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、预测代码示例 + +```python +import paddlehub as hub + +data = [ + ['这个宾馆比较陈旧了,特价的房间也很一般。总体来说一般'], + ['怀着十分激动的心情放映,可是看着看着发现,在放映完毕后,出现一集米老鼠的动画片'], + ['作为老的四星酒店,房间依然很整洁,相当不错。机场接机服务很好,可以在车上办理入住手续,节省时间。'], +] +label_map = {0: 'negative', 1: 'positive'} + +model = hub.Module( + name='albert-base-v2', + version='1.0.0', + task='seq-cls', + load_checkpoint='/path/to/parameters', + label_map=label_map) +results = model.predict(data, max_seq_len=50, batch_size=1, use_gpu=False) +for idx, text in enumerate(data): + print('Data: {} \t Label: {}'.format(text, results[idx])) +``` + +详情可参考PaddleHub示例: +- [文本分类](../../../../demo/text_classification) +- [序列标注](../../../../demo/sequence_labeling) + +- ### 2、API + + - ```python + def __init__( + task=None, + load_checkpoint=None, + label_map=None, + num_classes=2, + suffix=False, + **kwargs, + ) + ``` + + - 创建Module对象(动态图组网版本) + + - **参数** + + - `task`: 任务名称,可为`seq-cls`(文本分类任务)或`token-cls`(序列标注任务)。 + - `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 + - `label_map`:预测时的类别映射表。 + - `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 + - `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + + - ```python + def predict( + data, + max_seq_len=128, + batch_size=1, + use_gpu=False + ) + ``` + + - **参数** + + - `data`: 待预测数据,格式为\[\[sample\_a\_text\_a, sample\_a\_text\_b\], \[sample\_b\_text\_a, sample\_b\_text\_b\],…,\],其中每个元素都是一个样例,每个样例可以包含text\_a与text\_b。每个样例文本数量(1个或者2个)需和训练时保持一致。 + - `max_seq_len`:模型处理文本的最大长度 + - `batch_size`:模型批处理大小 + - `use_gpu`:是否使用gpu,默认为False。对于GPU用户,建议开启use_gpu。 + + - **返回** + + - `results`:list类型,不同任务类型的返回结果如下 + - 文本分类:列表里包含每个句子的预测标签,格式为\[label\_1, label\_2, …,\] + - 序列标注:列表里包含每个句子每个token的预测标签,格式为\[\[token\_1, token\_2, …,\], \[token\_1, token\_2, …,\], …,\] + + - ```python + def get_embedding( + data, + use_gpu=False + ) + ``` + + - 用于获取输入文本的句子粒度特征与字粒度特征 + + - **参数** + + - `data`:输入文本列表,格式为\[\[sample\_a\_text\_a, sample\_a\_text\_b\], \[sample\_b\_text\_a, sample\_b\_text\_b\],…,\],其中每个元素都是一个样例,每个样例可以包含text\_a与text\_b。 + - `use_gpu`:是否使用gpu,默认为False。对于GPU用户,建议开启use_gpu。 + + - **返回** + + - `results`:list类型,格式为\[\[sample\_a\_pooled\_feature, sample\_a\_seq\_feature\], \[sample\_b\_pooled\_feature, sample\_b\_seq\_feature\],…,\],其中每个元素都是对应样例的特征输出,每个样例都有句子粒度特征pooled\_feature与字粒度特征seq\_feature。 + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线获取预训练词向量。 + +- ### 第一步:启动PaddleHub Serving + + - ```shell + $ hub serving start -m albert-base-v2 + ``` + + - 这样就完成了一个获取预训练词向量服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于获取embedding的文本[[text_1], [text_2], ... ]} + text = [["今天是个好日子"], ["天气预报说今天要下雨"]] + # 以key的方式指定text传入预测方法的时的参数,此例中为"data" + # 对应本地部署,则为module.get_embedding(data=text) + data = {"data": text} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/albert-base-v2" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 diff --git a/modules/text/language_model/albert-base-v2/__init__.py b/modules/text/language_model/albert-base-v2/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/modules/text/language_model/albert-base-v2/module.py b/modules/text/language_model/albert-base-v2/module.py new file mode 100644 index 000000000..b3b639ed2 --- /dev/null +++ b/modules/text/language_model/albert-base-v2/module.py @@ -0,0 +1,177 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +import os +from typing import Dict + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddlenlp.metrics import ChunkEvaluator +from paddlenlp.transformers.albert.modeling import AlbertForSequenceClassification +from paddlenlp.transformers.albert.modeling import AlbertForTokenClassification +from paddlenlp.transformers.albert.modeling import AlbertModel +from paddlenlp.transformers.albert.tokenizer import AlbertTokenizer + +from paddlehub.module.module import moduleinfo +from paddlehub.module.nlp_module import TransformerModule +from paddlehub.utils.log import logger + + +@moduleinfo(name="albert-base-v2", + version="1.0.0", + summary="", + author="Baidu", + author_email="", + type="nlp/semantic_model", + meta=TransformerModule) +class Albert(nn.Layer): + """ + ALBERT model + """ + + def __init__( + self, + task: str = None, + load_checkpoint: str = None, + label_map: Dict = None, + num_classes: int = 2, + suffix: bool = False, + **kwargs, + ): + super(Albert, self).__init__() + if label_map: + self.label_map = label_map + self.num_classes = len(label_map) + else: + self.num_classes = num_classes + + if task == 'sequence_classification': + task = 'seq-cls' + logger.warning( + "current task name 'sequence_classification' was renamed to 'seq-cls', " + "'sequence_classification' has been deprecated and will be removed in the future.", ) + if task == 'seq-cls': + self.model = AlbertForSequenceClassification.from_pretrained(pretrained_model_name_or_path='albert-base-v2', + num_classes=self.num_classes, + **kwargs) + self.criterion = paddle.nn.loss.CrossEntropyLoss() + self.metric = paddle.metric.Accuracy() + elif task == 'token-cls': + self.model = AlbertForTokenClassification.from_pretrained(pretrained_model_name_or_path='albert-base-v2', + num_classes=self.num_classes, + **kwargs) + self.criterion = paddle.nn.loss.CrossEntropyLoss() + self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], + suffix=suffix) + elif task == 'text-matching': + self.model = AlbertModel.from_pretrained(pretrained_model_name_or_path='albert-base-v2', **kwargs) + self.dropout = paddle.nn.Dropout(0.1) + self.classifier = paddle.nn.Linear(self.model.config['hidden_size'] * 3, 2) + self.criterion = paddle.nn.loss.CrossEntropyLoss() + self.metric = paddle.metric.Accuracy() + elif task is None: + self.model = AlbertModel.from_pretrained(pretrained_model_name_or_path='albert-base-v2', **kwargs) + else: + raise RuntimeError("Unknown task {}, task should be one in {}".format(task, self._tasks_supported)) + + self.task = task + + if load_checkpoint is not None and os.path.isfile(load_checkpoint): + state_dict = paddle.load(load_checkpoint) + self.set_state_dict(state_dict) + logger.info('Loaded parameters from %s' % os.path.abspath(load_checkpoint)) + + def forward(self, + input_ids=None, + token_type_ids=None, + position_ids=None, + attention_mask=None, + query_input_ids=None, + query_token_type_ids=None, + query_position_ids=None, + query_attention_mask=None, + title_input_ids=None, + title_token_type_ids=None, + title_position_ids=None, + title_attention_mask=None, + seq_lengths=None, + labels=None): + + if self.task != 'text-matching': + result = self.model(input_ids, token_type_ids, position_ids, attention_mask) + else: + query_result = self.model(query_input_ids, query_token_type_ids, query_position_ids, query_attention_mask) + title_result = self.model(title_input_ids, title_token_type_ids, title_position_ids, title_attention_mask) + + if self.task == 'seq-cls': + logits = result + probs = F.softmax(logits, axis=1) + if labels is not None: + loss = self.criterion(logits, labels) + correct = self.metric.compute(probs, labels) + acc = self.metric.update(correct) + return probs, loss, {'acc': acc} + return probs + elif self.task == 'token-cls': + logits = result + token_level_probs = F.softmax(logits, axis=-1) + preds = token_level_probs.argmax(axis=-1) + if labels is not None: + loss = self.criterion(logits, labels.unsqueeze(-1)) + num_infer_chunks, num_label_chunks, num_correct_chunks = \ + self.metric.compute(None, seq_lengths, preds, labels) + self.metric.update(num_infer_chunks.numpy(), num_label_chunks.numpy(), num_correct_chunks.numpy()) + _, _, f1_score = map(float, self.metric.accumulate()) + return token_level_probs, loss, {'f1_score': f1_score} + return token_level_probs + elif self.task == 'text-matching': + query_token_embedding, _ = query_result + query_token_embedding = self.dropout(query_token_embedding) + query_attention_mask = paddle.unsqueeze( + (query_input_ids != self.model.pad_token_id).astype(self.model.pooler.dense.weight.dtype), axis=2) + query_token_embedding = query_token_embedding * query_attention_mask + query_sum_embedding = paddle.sum(query_token_embedding, axis=1) + query_sum_mask = paddle.sum(query_attention_mask, axis=1) + query_mean = query_sum_embedding / query_sum_mask + + title_token_embedding, _ = title_result + title_token_embedding = self.dropout(title_token_embedding) + title_attention_mask = paddle.unsqueeze( + (title_input_ids != self.model.pad_token_id).astype(self.model.pooler.dense.weight.dtype), axis=2) + title_token_embedding = title_token_embedding * title_attention_mask + title_sum_embedding = paddle.sum(title_token_embedding, axis=1) + title_sum_mask = paddle.sum(title_attention_mask, axis=1) + title_mean = title_sum_embedding / title_sum_mask + + sub = paddle.abs(paddle.subtract(query_mean, title_mean)) + projection = paddle.concat([query_mean, title_mean, sub], axis=-1) + logits = self.classifier(projection) + probs = F.softmax(logits) + if labels is not None: + loss = self.criterion(logits, labels) + correct = self.metric.compute(probs, labels) + acc = self.metric.update(correct) + return probs, loss, {'acc': acc} + return probs + else: + sequence_output, pooled_output = result + return sequence_output, pooled_output + + @staticmethod + def get_tokenizer(*args, **kwargs): + """ + Gets the tokenizer that is customized for this module. + """ + return AlbertTokenizer.from_pretrained(pretrained_model_name_or_path='albert-base-v2', *args, **kwargs) diff --git a/modules/text/language_model/albert-chinese-base/README.md b/modules/text/language_model/albert-chinese-base/README.md new file mode 100644 index 000000000..a3964f448 --- /dev/null +++ b/modules/text/language_model/albert-chinese-base/README.md @@ -0,0 +1,173 @@ +# albert-chinese-base +|模型名称|albert-chinese-base| +| :--- | :---: | +|类别|文本-语义模型| +|网络|albert-chinese-base| +|数据集|-| +|是否支持Fine-tuning|是| +|模型大小|77MB| +|最新更新日期|2022-02-08| +|数据指标|-| + +## 一、模型基本信息 + +- ### 模型介绍 + + - ALBERT针对当前预训练模型参数量过大的问题,提出了以下改进方案: + + - 嵌入向量参数化的因式分解。ALBERT对词嵌入参数进行了因式分解,先将单词映射到一个低维的词嵌入空间E,然后再将其映射到高维的隐藏空间H。 + + - 跨层参数共享。ALBERT共享了层之间的全部参数。 + +更多详情请参考[ALBERT论文](https://arxiv.org/abs/1909.11942) + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.2.0 + + - paddlehub >= 2.2.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install albert-chinese-base + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、预测代码示例 + +```python +import paddlehub as hub + +data = [ + ['这个宾馆比较陈旧了,特价的房间也很一般。总体来说一般'], + ['怀着十分激动的心情放映,可是看着看着发现,在放映完毕后,出现一集米老鼠的动画片'], + ['作为老的四星酒店,房间依然很整洁,相当不错。机场接机服务很好,可以在车上办理入住手续,节省时间。'], +] +label_map = {0: 'negative', 1: 'positive'} + +model = hub.Module( + name='albert-chinese-base', + version='1.0.0', + task='seq-cls', + load_checkpoint='/path/to/parameters', + label_map=label_map) +results = model.predict(data, max_seq_len=50, batch_size=1, use_gpu=False) +for idx, text in enumerate(data): + print('Data: {} \t Label: {}'.format(text, results[idx])) +``` + +详情可参考PaddleHub示例: +- [文本分类](../../../../demo/text_classification) +- [序列标注](../../../../demo/sequence_labeling) + +- ### 2、API + + - ```python + def __init__( + task=None, + load_checkpoint=None, + label_map=None, + num_classes=2, + suffix=False, + **kwargs, + ) + ``` + + - 创建Module对象(动态图组网版本) + + - **参数** + + - `task`: 任务名称,可为`seq-cls`(文本分类任务)或`token-cls`(序列标注任务)。 + - `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 + - `label_map`:预测时的类别映射表。 + - `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 + - `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + + - ```python + def predict( + data, + max_seq_len=128, + batch_size=1, + use_gpu=False + ) + ``` + + - **参数** + + - `data`: 待预测数据,格式为\[\[sample\_a\_text\_a, sample\_a\_text\_b\], \[sample\_b\_text\_a, sample\_b\_text\_b\],…,\],其中每个元素都是一个样例,每个样例可以包含text\_a与text\_b。每个样例文本数量(1个或者2个)需和训练时保持一致。 + - `max_seq_len`:模型处理文本的最大长度 + - `batch_size`:模型批处理大小 + - `use_gpu`:是否使用gpu,默认为False。对于GPU用户,建议开启use_gpu。 + + - **返回** + + - `results`:list类型,不同任务类型的返回结果如下 + - 文本分类:列表里包含每个句子的预测标签,格式为\[label\_1, label\_2, …,\] + - 序列标注:列表里包含每个句子每个token的预测标签,格式为\[\[token\_1, token\_2, …,\], \[token\_1, token\_2, …,\], …,\] + + - ```python + def get_embedding( + data, + use_gpu=False + ) + ``` + + - 用于获取输入文本的句子粒度特征与字粒度特征 + + - **参数** + + - `data`:输入文本列表,格式为\[\[sample\_a\_text\_a, sample\_a\_text\_b\], \[sample\_b\_text\_a, sample\_b\_text\_b\],…,\],其中每个元素都是一个样例,每个样例可以包含text\_a与text\_b。 + - `use_gpu`:是否使用gpu,默认为False。对于GPU用户,建议开启use_gpu。 + + - **返回** + + - `results`:list类型,格式为\[\[sample\_a\_pooled\_feature, sample\_a\_seq\_feature\], \[sample\_b\_pooled\_feature, sample\_b\_seq\_feature\],…,\],其中每个元素都是对应样例的特征输出,每个样例都有句子粒度特征pooled\_feature与字粒度特征seq\_feature。 + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线获取预训练词向量。 + +- ### 第一步:启动PaddleHub Serving + + - ```shell + $ hub serving start -m albert-chinese-base + ``` + + - 这样就完成了一个获取预训练词向量服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于获取embedding的文本[[text_1], [text_2], ... ]} + text = [["今天是个好日子"], ["天气预报说今天要下雨"]] + # 以key的方式指定text传入预测方法的时的参数,此例中为"data" + # 对应本地部署,则为module.get_embedding(data=text) + data = {"data": text} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/albert-chinese-base" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 diff --git a/modules/text/language_model/albert-chinese-base/__init__.py b/modules/text/language_model/albert-chinese-base/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/modules/text/language_model/albert-chinese-base/module.py b/modules/text/language_model/albert-chinese-base/module.py new file mode 100644 index 000000000..0a7796a01 --- /dev/null +++ b/modules/text/language_model/albert-chinese-base/module.py @@ -0,0 +1,175 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +import os +from typing import Dict + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddlenlp.metrics import ChunkEvaluator +from paddlenlp.transformers.albert.modeling import AlbertForSequenceClassification +from paddlenlp.transformers.albert.modeling import AlbertForTokenClassification +from paddlenlp.transformers.albert.modeling import AlbertModel +from paddlenlp.transformers.albert.tokenizer import AlbertTokenizer + +from paddlehub.module.module import moduleinfo +from paddlehub.module.nlp_module import TransformerModule +from paddlehub.utils.log import logger + + +@moduleinfo(name="albert-chinese-base", + version="1.0.0", + summary="", + author="Baidu", + author_email="", + type="nlp/semantic_model", + meta=TransformerModule) +class Albert(nn.Layer): + """ + ALBERT model + """ + + def __init__( + self, + task: str = None, + load_checkpoint: str = None, + label_map: Dict = None, + num_classes: int = 2, + suffix: bool = False, + **kwargs, + ): + super(Albert, self).__init__() + if label_map: + self.label_map = label_map + self.num_classes = len(label_map) + else: + self.num_classes = num_classes + + if task == 'sequence_classification': + task = 'seq-cls' + logger.warning( + "current task name 'sequence_classification' was renamed to 'seq-cls', " + "'sequence_classification' has been deprecated and will be removed in the future.", ) + if task == 'seq-cls': + self.model = AlbertForSequenceClassification.from_pretrained( + pretrained_model_name_or_path='albert-chinese-base', num_classes=self.num_classes, **kwargs) + self.criterion = paddle.nn.loss.CrossEntropyLoss() + self.metric = paddle.metric.Accuracy() + elif task == 'token-cls': + self.model = AlbertForTokenClassification.from_pretrained( + pretrained_model_name_or_path='albert-chinese-base', num_classes=self.num_classes, **kwargs) + self.criterion = paddle.nn.loss.CrossEntropyLoss() + self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], + suffix=suffix) + elif task == 'text-matching': + self.model = AlbertModel.from_pretrained(pretrained_model_name_or_path='albert-chinese-base', **kwargs) + self.dropout = paddle.nn.Dropout(0.1) + self.classifier = paddle.nn.Linear(self.model.config['hidden_size'] * 3, 2) + self.criterion = paddle.nn.loss.CrossEntropyLoss() + self.metric = paddle.metric.Accuracy() + elif task is None: + self.model = AlbertModel.from_pretrained(pretrained_model_name_or_path='albert-chinese-base', **kwargs) + else: + raise RuntimeError("Unknown task {}, task should be one in {}".format(task, self._tasks_supported)) + + self.task = task + + if load_checkpoint is not None and os.path.isfile(load_checkpoint): + state_dict = paddle.load(load_checkpoint) + self.set_state_dict(state_dict) + logger.info('Loaded parameters from %s' % os.path.abspath(load_checkpoint)) + + def forward(self, + input_ids=None, + token_type_ids=None, + position_ids=None, + attention_mask=None, + query_input_ids=None, + query_token_type_ids=None, + query_position_ids=None, + query_attention_mask=None, + title_input_ids=None, + title_token_type_ids=None, + title_position_ids=None, + title_attention_mask=None, + seq_lengths=None, + labels=None): + + if self.task != 'text-matching': + result = self.model(input_ids, token_type_ids, position_ids, attention_mask) + else: + query_result = self.model(query_input_ids, query_token_type_ids, query_position_ids, query_attention_mask) + title_result = self.model(title_input_ids, title_token_type_ids, title_position_ids, title_attention_mask) + + if self.task == 'seq-cls': + logits = result + probs = F.softmax(logits, axis=1) + if labels is not None: + loss = self.criterion(logits, labels) + correct = self.metric.compute(probs, labels) + acc = self.metric.update(correct) + return probs, loss, {'acc': acc} + return probs + elif self.task == 'token-cls': + logits = result + token_level_probs = F.softmax(logits, axis=-1) + preds = token_level_probs.argmax(axis=-1) + if labels is not None: + loss = self.criterion(logits, labels.unsqueeze(-1)) + num_infer_chunks, num_label_chunks, num_correct_chunks = \ + self.metric.compute(None, seq_lengths, preds, labels) + self.metric.update(num_infer_chunks.numpy(), num_label_chunks.numpy(), num_correct_chunks.numpy()) + _, _, f1_score = map(float, self.metric.accumulate()) + return token_level_probs, loss, {'f1_score': f1_score} + return token_level_probs + elif self.task == 'text-matching': + query_token_embedding, _ = query_result + query_token_embedding = self.dropout(query_token_embedding) + query_attention_mask = paddle.unsqueeze( + (query_input_ids != self.model.pad_token_id).astype(self.model.pooler.dense.weight.dtype), axis=2) + query_token_embedding = query_token_embedding * query_attention_mask + query_sum_embedding = paddle.sum(query_token_embedding, axis=1) + query_sum_mask = paddle.sum(query_attention_mask, axis=1) + query_mean = query_sum_embedding / query_sum_mask + + title_token_embedding, _ = title_result + title_token_embedding = self.dropout(title_token_embedding) + title_attention_mask = paddle.unsqueeze( + (title_input_ids != self.model.pad_token_id).astype(self.model.pooler.dense.weight.dtype), axis=2) + title_token_embedding = title_token_embedding * title_attention_mask + title_sum_embedding = paddle.sum(title_token_embedding, axis=1) + title_sum_mask = paddle.sum(title_attention_mask, axis=1) + title_mean = title_sum_embedding / title_sum_mask + + sub = paddle.abs(paddle.subtract(query_mean, title_mean)) + projection = paddle.concat([query_mean, title_mean, sub], axis=-1) + logits = self.classifier(projection) + probs = F.softmax(logits) + if labels is not None: + loss = self.criterion(logits, labels) + correct = self.metric.compute(probs, labels) + acc = self.metric.update(correct) + return probs, loss, {'acc': acc} + return probs + else: + sequence_output, pooled_output = result + return sequence_output, pooled_output + + @staticmethod + def get_tokenizer(*args, **kwargs): + """ + Gets the tokenizer that is customized for this module. + """ + return AlbertTokenizer.from_pretrained(pretrained_model_name_or_path='albert-chinese-base', *args, **kwargs) diff --git a/modules/text/language_model/albert-chinese-large/README.md b/modules/text/language_model/albert-chinese-large/README.md new file mode 100644 index 000000000..48fec21d1 --- /dev/null +++ b/modules/text/language_model/albert-chinese-large/README.md @@ -0,0 +1,173 @@ +# albert-chinese-large +|模型名称|albert-chinese-large| +| :--- | :---: | +|类别|文本-语义模型| +|网络|albert-chinese-large| +|数据集|-| +|是否支持Fine-tuning|是| +|模型大小|112MB| +|最新更新日期|2022-02-08| +|数据指标|-| + +## 一、模型基本信息 + +- ### 模型介绍 + + - ALBERT针对当前预训练模型参数量过大的问题,提出了以下改进方案: + + - 嵌入向量参数化的因式分解。ALBERT对词嵌入参数进行了因式分解,先将单词映射到一个低维的词嵌入空间E,然后再将其映射到高维的隐藏空间H。 + + - 跨层参数共享。ALBERT共享了层之间的全部参数。 + +更多详情请参考[ALBERT论文](https://arxiv.org/abs/1909.11942) + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.2.0 + + - paddlehub >= 2.2.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install albert-chinese-large + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、预测代码示例 + +```python +import paddlehub as hub + +data = [ + ['这个宾馆比较陈旧了,特价的房间也很一般。总体来说一般'], + ['怀着十分激动的心情放映,可是看着看着发现,在放映完毕后,出现一集米老鼠的动画片'], + ['作为老的四星酒店,房间依然很整洁,相当不错。机场接机服务很好,可以在车上办理入住手续,节省时间。'], +] +label_map = {0: 'negative', 1: 'positive'} + +model = hub.Module( + name='albert-chinese-large', + version='1.0.0', + task='seq-cls', + load_checkpoint='/path/to/parameters', + label_map=label_map) +results = model.predict(data, max_seq_len=50, batch_size=1, use_gpu=False) +for idx, text in enumerate(data): + print('Data: {} \t Label: {}'.format(text, results[idx])) +``` + +详情可参考PaddleHub示例: +- [文本分类](../../../../demo/text_classification) +- [序列标注](../../../../demo/sequence_labeling) + +- ### 2、API + + - ```python + def __init__( + task=None, + load_checkpoint=None, + label_map=None, + num_classes=2, + suffix=False, + **kwargs, + ) + ``` + + - 创建Module对象(动态图组网版本) + + - **参数** + + - `task`: 任务名称,可为`seq-cls`(文本分类任务)或`token-cls`(序列标注任务)。 + - `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 + - `label_map`:预测时的类别映射表。 + - `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 + - `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + + - ```python + def predict( + data, + max_seq_len=128, + batch_size=1, + use_gpu=False + ) + ``` + + - **参数** + + - `data`: 待预测数据,格式为\[\[sample\_a\_text\_a, sample\_a\_text\_b\], \[sample\_b\_text\_a, sample\_b\_text\_b\],…,\],其中每个元素都是一个样例,每个样例可以包含text\_a与text\_b。每个样例文本数量(1个或者2个)需和训练时保持一致。 + - `max_seq_len`:模型处理文本的最大长度 + - `batch_size`:模型批处理大小 + - `use_gpu`:是否使用gpu,默认为False。对于GPU用户,建议开启use_gpu。 + + - **返回** + + - `results`:list类型,不同任务类型的返回结果如下 + - 文本分类:列表里包含每个句子的预测标签,格式为\[label\_1, label\_2, …,\] + - 序列标注:列表里包含每个句子每个token的预测标签,格式为\[\[token\_1, token\_2, …,\], \[token\_1, token\_2, …,\], …,\] + + - ```python + def get_embedding( + data, + use_gpu=False + ) + ``` + + - 用于获取输入文本的句子粒度特征与字粒度特征 + + - **参数** + + - `data`:输入文本列表,格式为\[\[sample\_a\_text\_a, sample\_a\_text\_b\], \[sample\_b\_text\_a, sample\_b\_text\_b\],…,\],其中每个元素都是一个样例,每个样例可以包含text\_a与text\_b。 + - `use_gpu`:是否使用gpu,默认为False。对于GPU用户,建议开启use_gpu。 + + - **返回** + + - `results`:list类型,格式为\[\[sample\_a\_pooled\_feature, sample\_a\_seq\_feature\], \[sample\_b\_pooled\_feature, sample\_b\_seq\_feature\],…,\],其中每个元素都是对应样例的特征输出,每个样例都有句子粒度特征pooled\_feature与字粒度特征seq\_feature。 + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线获取预训练词向量。 + +- ### 第一步:启动PaddleHub Serving + + - ```shell + $ hub serving start -m albert-chinese-large + ``` + + - 这样就完成了一个获取预训练词向量服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于获取embedding的文本[[text_1], [text_2], ... ]} + text = [["今天是个好日子"], ["天气预报说今天要下雨"]] + # 以key的方式指定text传入预测方法的时的参数,此例中为"data" + # 对应本地部署,则为module.get_embedding(data=text) + data = {"data": text} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/albert-chinese-large" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 diff --git a/modules/text/language_model/albert-chinese-large/__init__.py b/modules/text/language_model/albert-chinese-large/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/modules/text/language_model/albert-chinese-large/module.py b/modules/text/language_model/albert-chinese-large/module.py new file mode 100644 index 000000000..f7aa985d1 --- /dev/null +++ b/modules/text/language_model/albert-chinese-large/module.py @@ -0,0 +1,175 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +import os +from typing import Dict + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddlenlp.metrics import ChunkEvaluator +from paddlenlp.transformers.albert.modeling import AlbertForSequenceClassification +from paddlenlp.transformers.albert.modeling import AlbertForTokenClassification +from paddlenlp.transformers.albert.modeling import AlbertModel +from paddlenlp.transformers.albert.tokenizer import AlbertTokenizer + +from paddlehub.module.module import moduleinfo +from paddlehub.module.nlp_module import TransformerModule +from paddlehub.utils.log import logger + + +@moduleinfo(name="albert-chinese-large", + version="1.0.0", + summary="", + author="Baidu", + author_email="", + type="nlp/semantic_model", + meta=TransformerModule) +class Albert(nn.Layer): + """ + ALBERT model + """ + + def __init__( + self, + task: str = None, + load_checkpoint: str = None, + label_map: Dict = None, + num_classes: int = 2, + suffix: bool = False, + **kwargs, + ): + super(Albert, self).__init__() + if label_map: + self.label_map = label_map + self.num_classes = len(label_map) + else: + self.num_classes = num_classes + + if task == 'sequence_classification': + task = 'seq-cls' + logger.warning( + "current task name 'sequence_classification' was renamed to 'seq-cls', " + "'sequence_classification' has been deprecated and will be removed in the future.", ) + if task == 'seq-cls': + self.model = AlbertForSequenceClassification.from_pretrained( + pretrained_model_name_or_path='albert-chinese-large', num_classes=self.num_classes, **kwargs) + self.criterion = paddle.nn.loss.CrossEntropyLoss() + self.metric = paddle.metric.Accuracy() + elif task == 'token-cls': + self.model = AlbertForTokenClassification.from_pretrained( + pretrained_model_name_or_path='albert-chinese-large', num_classes=self.num_classes, **kwargs) + self.criterion = paddle.nn.loss.CrossEntropyLoss() + self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], + suffix=suffix) + elif task == 'text-matching': + self.model = AlbertModel.from_pretrained(pretrained_model_name_or_path='albert-chinese-large', **kwargs) + self.dropout = paddle.nn.Dropout(0.1) + self.classifier = paddle.nn.Linear(self.model.config['hidden_size'] * 3, 2) + self.criterion = paddle.nn.loss.CrossEntropyLoss() + self.metric = paddle.metric.Accuracy() + elif task is None: + self.model = AlbertModel.from_pretrained(pretrained_model_name_or_path='albert-chinese-large', **kwargs) + else: + raise RuntimeError("Unknown task {}, task should be one in {}".format(task, self._tasks_supported)) + + self.task = task + + if load_checkpoint is not None and os.path.isfile(load_checkpoint): + state_dict = paddle.load(load_checkpoint) + self.set_state_dict(state_dict) + logger.info('Loaded parameters from %s' % os.path.abspath(load_checkpoint)) + + def forward(self, + input_ids=None, + token_type_ids=None, + position_ids=None, + attention_mask=None, + query_input_ids=None, + query_token_type_ids=None, + query_position_ids=None, + query_attention_mask=None, + title_input_ids=None, + title_token_type_ids=None, + title_position_ids=None, + title_attention_mask=None, + seq_lengths=None, + labels=None): + + if self.task != 'text-matching': + result = self.model(input_ids, token_type_ids, position_ids, attention_mask) + else: + query_result = self.model(query_input_ids, query_token_type_ids, query_position_ids, query_attention_mask) + title_result = self.model(title_input_ids, title_token_type_ids, title_position_ids, title_attention_mask) + + if self.task == 'seq-cls': + logits = result + probs = F.softmax(logits, axis=1) + if labels is not None: + loss = self.criterion(logits, labels) + correct = self.metric.compute(probs, labels) + acc = self.metric.update(correct) + return probs, loss, {'acc': acc} + return probs + elif self.task == 'token-cls': + logits = result + token_level_probs = F.softmax(logits, axis=-1) + preds = token_level_probs.argmax(axis=-1) + if labels is not None: + loss = self.criterion(logits, labels.unsqueeze(-1)) + num_infer_chunks, num_label_chunks, num_correct_chunks = \ + self.metric.compute(None, seq_lengths, preds, labels) + self.metric.update(num_infer_chunks.numpy(), num_label_chunks.numpy(), num_correct_chunks.numpy()) + _, _, f1_score = map(float, self.metric.accumulate()) + return token_level_probs, loss, {'f1_score': f1_score} + return token_level_probs + elif self.task == 'text-matching': + query_token_embedding, _ = query_result + query_token_embedding = self.dropout(query_token_embedding) + query_attention_mask = paddle.unsqueeze( + (query_input_ids != self.model.pad_token_id).astype(self.model.pooler.dense.weight.dtype), axis=2) + query_token_embedding = query_token_embedding * query_attention_mask + query_sum_embedding = paddle.sum(query_token_embedding, axis=1) + query_sum_mask = paddle.sum(query_attention_mask, axis=1) + query_mean = query_sum_embedding / query_sum_mask + + title_token_embedding, _ = title_result + title_token_embedding = self.dropout(title_token_embedding) + title_attention_mask = paddle.unsqueeze( + (title_input_ids != self.model.pad_token_id).astype(self.model.pooler.dense.weight.dtype), axis=2) + title_token_embedding = title_token_embedding * title_attention_mask + title_sum_embedding = paddle.sum(title_token_embedding, axis=1) + title_sum_mask = paddle.sum(title_attention_mask, axis=1) + title_mean = title_sum_embedding / title_sum_mask + + sub = paddle.abs(paddle.subtract(query_mean, title_mean)) + projection = paddle.concat([query_mean, title_mean, sub], axis=-1) + logits = self.classifier(projection) + probs = F.softmax(logits) + if labels is not None: + loss = self.criterion(logits, labels) + correct = self.metric.compute(probs, labels) + acc = self.metric.update(correct) + return probs, loss, {'acc': acc} + return probs + else: + sequence_output, pooled_output = result + return sequence_output, pooled_output + + @staticmethod + def get_tokenizer(*args, **kwargs): + """ + Gets the tokenizer that is customized for this module. + """ + return AlbertTokenizer.from_pretrained(pretrained_model_name_or_path='albert-chinese-large', *args, **kwargs) diff --git a/modules/text/language_model/albert-chinese-small/README.md b/modules/text/language_model/albert-chinese-small/README.md new file mode 100644 index 000000000..8a4440ee5 --- /dev/null +++ b/modules/text/language_model/albert-chinese-small/README.md @@ -0,0 +1,173 @@ +# albert-chinese-small +|模型名称|albert-chinese-small| +| :--- | :---: | +|类别|文本-语义模型| +|网络|albert-chinese-small| +|数据集|-| +|是否支持Fine-tuning|是| +|模型大小|44MB| +|最新更新日期|2022-02-08| +|数据指标|-| + +## 一、模型基本信息 + +- ### 模型介绍 + + - ALBERT针对当前预训练模型参数量过大的问题,提出了以下改进方案: + + - 嵌入向量参数化的因式分解。ALBERT对词嵌入参数进行了因式分解,先将单词映射到一个低维的词嵌入空间E,然后再将其映射到高维的隐藏空间H。 + + - 跨层参数共享。ALBERT共享了层之间的全部参数。 + +更多详情请参考[ALBERT论文](https://arxiv.org/abs/1909.11942) + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.2.0 + + - paddlehub >= 2.2.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install albert-chinese-small + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、预测代码示例 + +```python +import paddlehub as hub + +data = [ + ['这个宾馆比较陈旧了,特价的房间也很一般。总体来说一般'], + ['怀着十分激动的心情放映,可是看着看着发现,在放映完毕后,出现一集米老鼠的动画片'], + ['作为老的四星酒店,房间依然很整洁,相当不错。机场接机服务很好,可以在车上办理入住手续,节省时间。'], +] +label_map = {0: 'negative', 1: 'positive'} + +model = hub.Module( + name='albert-chinese-small', + version='1.0.0', + task='seq-cls', + load_checkpoint='/path/to/parameters', + label_map=label_map) +results = model.predict(data, max_seq_len=50, batch_size=1, use_gpu=False) +for idx, text in enumerate(data): + print('Data: {} \t Label: {}'.format(text, results[idx])) +``` + +详情可参考PaddleHub示例: +- [文本分类](../../../../demo/text_classification) +- [序列标注](../../../../demo/sequence_labeling) + +- ### 2、API + + - ```python + def __init__( + task=None, + load_checkpoint=None, + label_map=None, + num_classes=2, + suffix=False, + **kwargs, + ) + ``` + + - 创建Module对象(动态图组网版本) + + - **参数** + + - `task`: 任务名称,可为`seq-cls`(文本分类任务)或`token-cls`(序列标注任务)。 + - `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 + - `label_map`:预测时的类别映射表。 + - `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 + - `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + + - ```python + def predict( + data, + max_seq_len=128, + batch_size=1, + use_gpu=False + ) + ``` + + - **参数** + + - `data`: 待预测数据,格式为\[\[sample\_a\_text\_a, sample\_a\_text\_b\], \[sample\_b\_text\_a, sample\_b\_text\_b\],…,\],其中每个元素都是一个样例,每个样例可以包含text\_a与text\_b。每个样例文本数量(1个或者2个)需和训练时保持一致。 + - `max_seq_len`:模型处理文本的最大长度 + - `batch_size`:模型批处理大小 + - `use_gpu`:是否使用gpu,默认为False。对于GPU用户,建议开启use_gpu。 + + - **返回** + + - `results`:list类型,不同任务类型的返回结果如下 + - 文本分类:列表里包含每个句子的预测标签,格式为\[label\_1, label\_2, …,\] + - 序列标注:列表里包含每个句子每个token的预测标签,格式为\[\[token\_1, token\_2, …,\], \[token\_1, token\_2, …,\], …,\] + + - ```python + def get_embedding( + data, + use_gpu=False + ) + ``` + + - 用于获取输入文本的句子粒度特征与字粒度特征 + + - **参数** + + - `data`:输入文本列表,格式为\[\[sample\_a\_text\_a, sample\_a\_text\_b\], \[sample\_b\_text\_a, sample\_b\_text\_b\],…,\],其中每个元素都是一个样例,每个样例可以包含text\_a与text\_b。 + - `use_gpu`:是否使用gpu,默认为False。对于GPU用户,建议开启use_gpu。 + + - **返回** + + - `results`:list类型,格式为\[\[sample\_a\_pooled\_feature, sample\_a\_seq\_feature\], \[sample\_b\_pooled\_feature, sample\_b\_seq\_feature\],…,\],其中每个元素都是对应样例的特征输出,每个样例都有句子粒度特征pooled\_feature与字粒度特征seq\_feature。 + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线获取预训练词向量。 + +- ### 第一步:启动PaddleHub Serving + + - ```shell + $ hub serving start -m albert-chinese-small + ``` + + - 这样就完成了一个获取预训练词向量服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于获取embedding的文本[[text_1], [text_2], ... ]} + text = [["今天是个好日子"], ["天气预报说今天要下雨"]] + # 以key的方式指定text传入预测方法的时的参数,此例中为"data" + # 对应本地部署,则为module.get_embedding(data=text) + data = {"data": text} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/albert-chinese-small" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 diff --git a/modules/text/language_model/albert-chinese-small/__init__.py b/modules/text/language_model/albert-chinese-small/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/modules/text/language_model/albert-chinese-small/module.py b/modules/text/language_model/albert-chinese-small/module.py new file mode 100644 index 000000000..f7500c908 --- /dev/null +++ b/modules/text/language_model/albert-chinese-small/module.py @@ -0,0 +1,175 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +import os +from typing import Dict + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddlenlp.metrics import ChunkEvaluator +from paddlenlp.transformers.albert.modeling import AlbertForSequenceClassification +from paddlenlp.transformers.albert.modeling import AlbertForTokenClassification +from paddlenlp.transformers.albert.modeling import AlbertModel +from paddlenlp.transformers.albert.tokenizer import AlbertTokenizer + +from paddlehub.module.module import moduleinfo +from paddlehub.module.nlp_module import TransformerModule +from paddlehub.utils.log import logger + + +@moduleinfo(name="albert-chinese-small", + version="1.0.0", + summary="", + author="Baidu", + author_email="", + type="nlp/semantic_model", + meta=TransformerModule) +class Albert(nn.Layer): + """ + ALBERT model + """ + + def __init__( + self, + task: str = None, + load_checkpoint: str = None, + label_map: Dict = None, + num_classes: int = 2, + suffix: bool = False, + **kwargs, + ): + super(Albert, self).__init__() + if label_map: + self.label_map = label_map + self.num_classes = len(label_map) + else: + self.num_classes = num_classes + + if task == 'sequence_classification': + task = 'seq-cls' + logger.warning( + "current task name 'sequence_classification' was renamed to 'seq-cls', " + "'sequence_classification' has been deprecated and will be removed in the future.", ) + if task == 'seq-cls': + self.model = AlbertForSequenceClassification.from_pretrained( + pretrained_model_name_or_path='albert-chinese-small', num_classes=self.num_classes, **kwargs) + self.criterion = paddle.nn.loss.CrossEntropyLoss() + self.metric = paddle.metric.Accuracy() + elif task == 'token-cls': + self.model = AlbertForTokenClassification.from_pretrained( + pretrained_model_name_or_path='albert-chinese-small', num_classes=self.num_classes, **kwargs) + self.criterion = paddle.nn.loss.CrossEntropyLoss() + self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], + suffix=suffix) + elif task == 'text-matching': + self.model = AlbertModel.from_pretrained(pretrained_model_name_or_path='albert-chinese-small', **kwargs) + self.dropout = paddle.nn.Dropout(0.1) + self.classifier = paddle.nn.Linear(self.model.config['hidden_size'] * 3, 2) + self.criterion = paddle.nn.loss.CrossEntropyLoss() + self.metric = paddle.metric.Accuracy() + elif task is None: + self.model = AlbertModel.from_pretrained(pretrained_model_name_or_path='albert-chinese-small', **kwargs) + else: + raise RuntimeError("Unknown task {}, task should be one in {}".format(task, self._tasks_supported)) + + self.task = task + + if load_checkpoint is not None and os.path.isfile(load_checkpoint): + state_dict = paddle.load(load_checkpoint) + self.set_state_dict(state_dict) + logger.info('Loaded parameters from %s' % os.path.abspath(load_checkpoint)) + + def forward(self, + input_ids=None, + token_type_ids=None, + position_ids=None, + attention_mask=None, + query_input_ids=None, + query_token_type_ids=None, + query_position_ids=None, + query_attention_mask=None, + title_input_ids=None, + title_token_type_ids=None, + title_position_ids=None, + title_attention_mask=None, + seq_lengths=None, + labels=None): + + if self.task != 'text-matching': + result = self.model(input_ids, token_type_ids, position_ids, attention_mask) + else: + query_result = self.model(query_input_ids, query_token_type_ids, query_position_ids, query_attention_mask) + title_result = self.model(title_input_ids, title_token_type_ids, title_position_ids, title_attention_mask) + + if self.task == 'seq-cls': + logits = result + probs = F.softmax(logits, axis=1) + if labels is not None: + loss = self.criterion(logits, labels) + correct = self.metric.compute(probs, labels) + acc = self.metric.update(correct) + return probs, loss, {'acc': acc} + return probs + elif self.task == 'token-cls': + logits = result + token_level_probs = F.softmax(logits, axis=-1) + preds = token_level_probs.argmax(axis=-1) + if labels is not None: + loss = self.criterion(logits, labels.unsqueeze(-1)) + num_infer_chunks, num_label_chunks, num_correct_chunks = \ + self.metric.compute(None, seq_lengths, preds, labels) + self.metric.update(num_infer_chunks.numpy(), num_label_chunks.numpy(), num_correct_chunks.numpy()) + _, _, f1_score = map(float, self.metric.accumulate()) + return token_level_probs, loss, {'f1_score': f1_score} + return token_level_probs + elif self.task == 'text-matching': + query_token_embedding, _ = query_result + query_token_embedding = self.dropout(query_token_embedding) + query_attention_mask = paddle.unsqueeze( + (query_input_ids != self.model.pad_token_id).astype(self.model.pooler.dense.weight.dtype), axis=2) + query_token_embedding = query_token_embedding * query_attention_mask + query_sum_embedding = paddle.sum(query_token_embedding, axis=1) + query_sum_mask = paddle.sum(query_attention_mask, axis=1) + query_mean = query_sum_embedding / query_sum_mask + + title_token_embedding, _ = title_result + title_token_embedding = self.dropout(title_token_embedding) + title_attention_mask = paddle.unsqueeze( + (title_input_ids != self.model.pad_token_id).astype(self.model.pooler.dense.weight.dtype), axis=2) + title_token_embedding = title_token_embedding * title_attention_mask + title_sum_embedding = paddle.sum(title_token_embedding, axis=1) + title_sum_mask = paddle.sum(title_attention_mask, axis=1) + title_mean = title_sum_embedding / title_sum_mask + + sub = paddle.abs(paddle.subtract(query_mean, title_mean)) + projection = paddle.concat([query_mean, title_mean, sub], axis=-1) + logits = self.classifier(projection) + probs = F.softmax(logits) + if labels is not None: + loss = self.criterion(logits, labels) + correct = self.metric.compute(probs, labels) + acc = self.metric.update(correct) + return probs, loss, {'acc': acc} + return probs + else: + sequence_output, pooled_output = result + return sequence_output, pooled_output + + @staticmethod + def get_tokenizer(*args, **kwargs): + """ + Gets the tokenizer that is customized for this module. + """ + return AlbertTokenizer.from_pretrained(pretrained_model_name_or_path='albert-chinese-small', *args, **kwargs) diff --git a/modules/text/language_model/albert-chinese-tiny/README.md b/modules/text/language_model/albert-chinese-tiny/README.md new file mode 100644 index 000000000..08c8f9f8c --- /dev/null +++ b/modules/text/language_model/albert-chinese-tiny/README.md @@ -0,0 +1,173 @@ +# albert-chinese-tiny +|模型名称|albert-chinese-tiny| +| :--- | :---: | +|类别|文本-语义模型| +|网络|albert-chinese-tiny| +|数据集|-| +|是否支持Fine-tuning|是| +|模型大小|40MB| +|最新更新日期|2022-02-08| +|数据指标|-| + +## 一、模型基本信息 + +- ### 模型介绍 + + - ALBERT针对当前预训练模型参数量过大的问题,提出了以下改进方案: + + - 嵌入向量参数化的因式分解。ALBERT对词嵌入参数进行了因式分解,先将单词映射到一个低维的词嵌入空间E,然后再将其映射到高维的隐藏空间H。 + + - 跨层参数共享。ALBERT共享了层之间的全部参数。 + +更多详情请参考[ALBERT论文](https://arxiv.org/abs/1909.11942) + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.2.0 + + - paddlehub >= 2.2.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install albert-chinese-tiny + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、预测代码示例 + +```python +import paddlehub as hub + +data = [ + ['这个宾馆比较陈旧了,特价的房间也很一般。总体来说一般'], + ['怀着十分激动的心情放映,可是看着看着发现,在放映完毕后,出现一集米老鼠的动画片'], + ['作为老的四星酒店,房间依然很整洁,相当不错。机场接机服务很好,可以在车上办理入住手续,节省时间。'], +] +label_map = {0: 'negative', 1: 'positive'} + +model = hub.Module( + name='albert-chinese-tiny', + version='1.0.0', + task='seq-cls', + load_checkpoint='/path/to/parameters', + label_map=label_map) +results = model.predict(data, max_seq_len=50, batch_size=1, use_gpu=False) +for idx, text in enumerate(data): + print('Data: {} \t Label: {}'.format(text, results[idx])) +``` + +详情可参考PaddleHub示例: +- [文本分类](../../../../demo/text_classification) +- [序列标注](../../../../demo/sequence_labeling) + +- ### 2、API + + - ```python + def __init__( + task=None, + load_checkpoint=None, + label_map=None, + num_classes=2, + suffix=False, + **kwargs, + ) + ``` + + - 创建Module对象(动态图组网版本) + + - **参数** + + - `task`: 任务名称,可为`seq-cls`(文本分类任务)或`token-cls`(序列标注任务)。 + - `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 + - `label_map`:预测时的类别映射表。 + - `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 + - `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + + - ```python + def predict( + data, + max_seq_len=128, + batch_size=1, + use_gpu=False + ) + ``` + + - **参数** + + - `data`: 待预测数据,格式为\[\[sample\_a\_text\_a, sample\_a\_text\_b\], \[sample\_b\_text\_a, sample\_b\_text\_b\],…,\],其中每个元素都是一个样例,每个样例可以包含text\_a与text\_b。每个样例文本数量(1个或者2个)需和训练时保持一致。 + - `max_seq_len`:模型处理文本的最大长度 + - `batch_size`:模型批处理大小 + - `use_gpu`:是否使用gpu,默认为False。对于GPU用户,建议开启use_gpu。 + + - **返回** + + - `results`:list类型,不同任务类型的返回结果如下 + - 文本分类:列表里包含每个句子的预测标签,格式为\[label\_1, label\_2, …,\] + - 序列标注:列表里包含每个句子每个token的预测标签,格式为\[\[token\_1, token\_2, …,\], \[token\_1, token\_2, …,\], …,\] + + - ```python + def get_embedding( + data, + use_gpu=False + ) + ``` + + - 用于获取输入文本的句子粒度特征与字粒度特征 + + - **参数** + + - `data`:输入文本列表,格式为\[\[sample\_a\_text\_a, sample\_a\_text\_b\], \[sample\_b\_text\_a, sample\_b\_text\_b\],…,\],其中每个元素都是一个样例,每个样例可以包含text\_a与text\_b。 + - `use_gpu`:是否使用gpu,默认为False。对于GPU用户,建议开启use_gpu。 + + - **返回** + + - `results`:list类型,格式为\[\[sample\_a\_pooled\_feature, sample\_a\_seq\_feature\], \[sample\_b\_pooled\_feature, sample\_b\_seq\_feature\],…,\],其中每个元素都是对应样例的特征输出,每个样例都有句子粒度特征pooled\_feature与字粒度特征seq\_feature。 + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线获取预训练词向量。 + +- ### 第一步:启动PaddleHub Serving + + - ```shell + $ hub serving start -m albert-chinese-tiny + ``` + + - 这样就完成了一个获取预训练词向量服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于获取embedding的文本[[text_1], [text_2], ... ]} + text = [["今天是个好日子"], ["天气预报说今天要下雨"]] + # 以key的方式指定text传入预测方法的时的参数,此例中为"data" + # 对应本地部署,则为module.get_embedding(data=text) + data = {"data": text} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/albert-chinese-tiny" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 diff --git a/modules/text/language_model/albert-chinese-tiny/__init__.py b/modules/text/language_model/albert-chinese-tiny/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/modules/text/language_model/albert-chinese-tiny/module.py b/modules/text/language_model/albert-chinese-tiny/module.py new file mode 100644 index 000000000..5b5b79ba2 --- /dev/null +++ b/modules/text/language_model/albert-chinese-tiny/module.py @@ -0,0 +1,175 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +import os +from typing import Dict + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddlenlp.metrics import ChunkEvaluator +from paddlenlp.transformers.albert.modeling import AlbertForSequenceClassification +from paddlenlp.transformers.albert.modeling import AlbertForTokenClassification +from paddlenlp.transformers.albert.modeling import AlbertModel +from paddlenlp.transformers.albert.tokenizer import AlbertTokenizer + +from paddlehub.module.module import moduleinfo +from paddlehub.module.nlp_module import TransformerModule +from paddlehub.utils.log import logger + + +@moduleinfo(name="albert-chinese-tiny", + version="1.0.0", + summary="", + author="Baidu", + author_email="", + type="nlp/semantic_model", + meta=TransformerModule) +class Albert(nn.Layer): + """ + ALBERT model + """ + + def __init__( + self, + task: str = None, + load_checkpoint: str = None, + label_map: Dict = None, + num_classes: int = 2, + suffix: bool = False, + **kwargs, + ): + super(Albert, self).__init__() + if label_map: + self.label_map = label_map + self.num_classes = len(label_map) + else: + self.num_classes = num_classes + + if task == 'sequence_classification': + task = 'seq-cls' + logger.warning( + "current task name 'sequence_classification' was renamed to 'seq-cls', " + "'sequence_classification' has been deprecated and will be removed in the future.", ) + if task == 'seq-cls': + self.model = AlbertForSequenceClassification.from_pretrained( + pretrained_model_name_or_path='albert-chinese-tiny', num_classes=self.num_classes, **kwargs) + self.criterion = paddle.nn.loss.CrossEntropyLoss() + self.metric = paddle.metric.Accuracy() + elif task == 'token-cls': + self.model = AlbertForTokenClassification.from_pretrained( + pretrained_model_name_or_path='albert-chinese-tiny', num_classes=self.num_classes, **kwargs) + self.criterion = paddle.nn.loss.CrossEntropyLoss() + self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], + suffix=suffix) + elif task == 'text-matching': + self.model = AlbertModel.from_pretrained(pretrained_model_name_or_path='albert-chinese-tiny', **kwargs) + self.dropout = paddle.nn.Dropout(0.1) + self.classifier = paddle.nn.Linear(self.model.config['hidden_size'] * 3, 2) + self.criterion = paddle.nn.loss.CrossEntropyLoss() + self.metric = paddle.metric.Accuracy() + elif task is None: + self.model = AlbertModel.from_pretrained(pretrained_model_name_or_path='albert-chinese-tiny', **kwargs) + else: + raise RuntimeError("Unknown task {}, task should be one in {}".format(task, self._tasks_supported)) + + self.task = task + + if load_checkpoint is not None and os.path.isfile(load_checkpoint): + state_dict = paddle.load(load_checkpoint) + self.set_state_dict(state_dict) + logger.info('Loaded parameters from %s' % os.path.abspath(load_checkpoint)) + + def forward(self, + input_ids=None, + token_type_ids=None, + position_ids=None, + attention_mask=None, + query_input_ids=None, + query_token_type_ids=None, + query_position_ids=None, + query_attention_mask=None, + title_input_ids=None, + title_token_type_ids=None, + title_position_ids=None, + title_attention_mask=None, + seq_lengths=None, + labels=None): + + if self.task != 'text-matching': + result = self.model(input_ids, token_type_ids, position_ids, attention_mask) + else: + query_result = self.model(query_input_ids, query_token_type_ids, query_position_ids, query_attention_mask) + title_result = self.model(title_input_ids, title_token_type_ids, title_position_ids, title_attention_mask) + + if self.task == 'seq-cls': + logits = result + probs = F.softmax(logits, axis=1) + if labels is not None: + loss = self.criterion(logits, labels) + correct = self.metric.compute(probs, labels) + acc = self.metric.update(correct) + return probs, loss, {'acc': acc} + return probs + elif self.task == 'token-cls': + logits = result + token_level_probs = F.softmax(logits, axis=-1) + preds = token_level_probs.argmax(axis=-1) + if labels is not None: + loss = self.criterion(logits, labels.unsqueeze(-1)) + num_infer_chunks, num_label_chunks, num_correct_chunks = \ + self.metric.compute(None, seq_lengths, preds, labels) + self.metric.update(num_infer_chunks.numpy(), num_label_chunks.numpy(), num_correct_chunks.numpy()) + _, _, f1_score = map(float, self.metric.accumulate()) + return token_level_probs, loss, {'f1_score': f1_score} + return token_level_probs + elif self.task == 'text-matching': + query_token_embedding, _ = query_result + query_token_embedding = self.dropout(query_token_embedding) + query_attention_mask = paddle.unsqueeze( + (query_input_ids != self.model.pad_token_id).astype(self.model.pooler.dense.weight.dtype), axis=2) + query_token_embedding = query_token_embedding * query_attention_mask + query_sum_embedding = paddle.sum(query_token_embedding, axis=1) + query_sum_mask = paddle.sum(query_attention_mask, axis=1) + query_mean = query_sum_embedding / query_sum_mask + + title_token_embedding, _ = title_result + title_token_embedding = self.dropout(title_token_embedding) + title_attention_mask = paddle.unsqueeze( + (title_input_ids != self.model.pad_token_id).astype(self.model.pooler.dense.weight.dtype), axis=2) + title_token_embedding = title_token_embedding * title_attention_mask + title_sum_embedding = paddle.sum(title_token_embedding, axis=1) + title_sum_mask = paddle.sum(title_attention_mask, axis=1) + title_mean = title_sum_embedding / title_sum_mask + + sub = paddle.abs(paddle.subtract(query_mean, title_mean)) + projection = paddle.concat([query_mean, title_mean, sub], axis=-1) + logits = self.classifier(projection) + probs = F.softmax(logits) + if labels is not None: + loss = self.criterion(logits, labels) + correct = self.metric.compute(probs, labels) + acc = self.metric.update(correct) + return probs, loss, {'acc': acc} + return probs + else: + sequence_output, pooled_output = result + return sequence_output, pooled_output + + @staticmethod + def get_tokenizer(*args, **kwargs): + """ + Gets the tokenizer that is customized for this module. + """ + return AlbertTokenizer.from_pretrained(pretrained_model_name_or_path='albert-chinese-tiny', *args, **kwargs) diff --git a/modules/text/language_model/albert-chinese-xlarge/README.md b/modules/text/language_model/albert-chinese-xlarge/README.md new file mode 100644 index 000000000..ee1c99456 --- /dev/null +++ b/modules/text/language_model/albert-chinese-xlarge/README.md @@ -0,0 +1,173 @@ +# albert-chinese-xlarge +|模型名称|albert-chinese-xlarge| +| :--- | :---: | +|类别|文本-语义模型| +|网络|albert-chinese-xlarge| +|数据集|-| +|是否支持Fine-tuning|是| +|模型大小|346MB| +|最新更新日期|2022-02-08| +|数据指标|-| + +## 一、模型基本信息 + +- ### 模型介绍 + + - ALBERT针对当前预训练模型参数量过大的问题,提出了以下改进方案: + + - 嵌入向量参数化的因式分解。ALBERT对词嵌入参数进行了因式分解,先将单词映射到一个低维的词嵌入空间E,然后再将其映射到高维的隐藏空间H。 + + - 跨层参数共享。ALBERT共享了层之间的全部参数。 + +更多详情请参考[ALBERT论文](https://arxiv.org/abs/1909.11942) + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.2.0 + + - paddlehub >= 2.2.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install albert-chinese-xlarge + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、预测代码示例 + +```python +import paddlehub as hub + +data = [ + ['这个宾馆比较陈旧了,特价的房间也很一般。总体来说一般'], + ['怀着十分激动的心情放映,可是看着看着发现,在放映完毕后,出现一集米老鼠的动画片'], + ['作为老的四星酒店,房间依然很整洁,相当不错。机场接机服务很好,可以在车上办理入住手续,节省时间。'], +] +label_map = {0: 'negative', 1: 'positive'} + +model = hub.Module( + name='albert-chinese-xlarge', + version='1.0.0', + task='seq-cls', + load_checkpoint='/path/to/parameters', + label_map=label_map) +results = model.predict(data, max_seq_len=50, batch_size=1, use_gpu=False) +for idx, text in enumerate(data): + print('Data: {} \t Label: {}'.format(text, results[idx])) +``` + +详情可参考PaddleHub示例: +- [文本分类](../../../../demo/text_classification) +- [序列标注](../../../../demo/sequence_labeling) + +- ### 2、API + + - ```python + def __init__( + task=None, + load_checkpoint=None, + label_map=None, + num_classes=2, + suffix=False, + **kwargs, + ) + ``` + + - 创建Module对象(动态图组网版本) + + - **参数** + + - `task`: 任务名称,可为`seq-cls`(文本分类任务)或`token-cls`(序列标注任务)。 + - `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 + - `label_map`:预测时的类别映射表。 + - `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 + - `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + + - ```python + def predict( + data, + max_seq_len=128, + batch_size=1, + use_gpu=False + ) + ``` + + - **参数** + + - `data`: 待预测数据,格式为\[\[sample\_a\_text\_a, sample\_a\_text\_b\], \[sample\_b\_text\_a, sample\_b\_text\_b\],…,\],其中每个元素都是一个样例,每个样例可以包含text\_a与text\_b。每个样例文本数量(1个或者2个)需和训练时保持一致。 + - `max_seq_len`:模型处理文本的最大长度 + - `batch_size`:模型批处理大小 + - `use_gpu`:是否使用gpu,默认为False。对于GPU用户,建议开启use_gpu。 + + - **返回** + + - `results`:list类型,不同任务类型的返回结果如下 + - 文本分类:列表里包含每个句子的预测标签,格式为\[label\_1, label\_2, …,\] + - 序列标注:列表里包含每个句子每个token的预测标签,格式为\[\[token\_1, token\_2, …,\], \[token\_1, token\_2, …,\], …,\] + + - ```python + def get_embedding( + data, + use_gpu=False + ) + ``` + + - 用于获取输入文本的句子粒度特征与字粒度特征 + + - **参数** + + - `data`:输入文本列表,格式为\[\[sample\_a\_text\_a, sample\_a\_text\_b\], \[sample\_b\_text\_a, sample\_b\_text\_b\],…,\],其中每个元素都是一个样例,每个样例可以包含text\_a与text\_b。 + - `use_gpu`:是否使用gpu,默认为False。对于GPU用户,建议开启use_gpu。 + + - **返回** + + - `results`:list类型,格式为\[\[sample\_a\_pooled\_feature, sample\_a\_seq\_feature\], \[sample\_b\_pooled\_feature, sample\_b\_seq\_feature\],…,\],其中每个元素都是对应样例的特征输出,每个样例都有句子粒度特征pooled\_feature与字粒度特征seq\_feature。 + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线获取预训练词向量。 + +- ### 第一步:启动PaddleHub Serving + + - ```shell + $ hub serving start -m albert-chinese-xlarge + ``` + + - 这样就完成了一个获取预训练词向量服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于获取embedding的文本[[text_1], [text_2], ... ]} + text = [["今天是个好日子"], ["天气预报说今天要下雨"]] + # 以key的方式指定text传入预测方法的时的参数,此例中为"data" + # 对应本地部署,则为module.get_embedding(data=text) + data = {"data": text} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/albert-chinese-xlarge" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 diff --git a/modules/text/language_model/albert-chinese-xlarge/__init__.py b/modules/text/language_model/albert-chinese-xlarge/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/modules/text/language_model/albert-chinese-xlarge/module.py b/modules/text/language_model/albert-chinese-xlarge/module.py new file mode 100644 index 000000000..5e76ee63f --- /dev/null +++ b/modules/text/language_model/albert-chinese-xlarge/module.py @@ -0,0 +1,175 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +import os +from typing import Dict + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddlenlp.metrics import ChunkEvaluator +from paddlenlp.transformers.albert.modeling import AlbertForSequenceClassification +from paddlenlp.transformers.albert.modeling import AlbertForTokenClassification +from paddlenlp.transformers.albert.modeling import AlbertModel +from paddlenlp.transformers.albert.tokenizer import AlbertTokenizer + +from paddlehub.module.module import moduleinfo +from paddlehub.module.nlp_module import TransformerModule +from paddlehub.utils.log import logger + + +@moduleinfo(name="albert-chinese-xlarge", + version="1.0.0", + summary="", + author="Baidu", + author_email="", + type="nlp/semantic_model", + meta=TransformerModule) +class Albert(nn.Layer): + """ + ALBERT model + """ + + def __init__( + self, + task: str = None, + load_checkpoint: str = None, + label_map: Dict = None, + num_classes: int = 2, + suffix: bool = False, + **kwargs, + ): + super(Albert, self).__init__() + if label_map: + self.label_map = label_map + self.num_classes = len(label_map) + else: + self.num_classes = num_classes + + if task == 'sequence_classification': + task = 'seq-cls' + logger.warning( + "current task name 'sequence_classification' was renamed to 'seq-cls', " + "'sequence_classification' has been deprecated and will be removed in the future.", ) + if task == 'seq-cls': + self.model = AlbertForSequenceClassification.from_pretrained( + pretrained_model_name_or_path='albert-chinese-xlarge', num_classes=self.num_classes, **kwargs) + self.criterion = paddle.nn.loss.CrossEntropyLoss() + self.metric = paddle.metric.Accuracy() + elif task == 'token-cls': + self.model = AlbertForTokenClassification.from_pretrained( + pretrained_model_name_or_path='albert-chinese-xlarge', num_classes=self.num_classes, **kwargs) + self.criterion = paddle.nn.loss.CrossEntropyLoss() + self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], + suffix=suffix) + elif task == 'text-matching': + self.model = AlbertModel.from_pretrained(pretrained_model_name_or_path='albert-chinese-xlarge', **kwargs) + self.dropout = paddle.nn.Dropout(0.1) + self.classifier = paddle.nn.Linear(self.model.config['hidden_size'] * 3, 2) + self.criterion = paddle.nn.loss.CrossEntropyLoss() + self.metric = paddle.metric.Accuracy() + elif task is None: + self.model = AlbertModel.from_pretrained(pretrained_model_name_or_path='albert-chinese-xlarge', **kwargs) + else: + raise RuntimeError("Unknown task {}, task should be one in {}".format(task, self._tasks_supported)) + + self.task = task + + if load_checkpoint is not None and os.path.isfile(load_checkpoint): + state_dict = paddle.load(load_checkpoint) + self.set_state_dict(state_dict) + logger.info('Loaded parameters from %s' % os.path.abspath(load_checkpoint)) + + def forward(self, + input_ids=None, + token_type_ids=None, + position_ids=None, + attention_mask=None, + query_input_ids=None, + query_token_type_ids=None, + query_position_ids=None, + query_attention_mask=None, + title_input_ids=None, + title_token_type_ids=None, + title_position_ids=None, + title_attention_mask=None, + seq_lengths=None, + labels=None): + + if self.task != 'text-matching': + result = self.model(input_ids, token_type_ids, position_ids, attention_mask) + else: + query_result = self.model(query_input_ids, query_token_type_ids, query_position_ids, query_attention_mask) + title_result = self.model(title_input_ids, title_token_type_ids, title_position_ids, title_attention_mask) + + if self.task == 'seq-cls': + logits = result + probs = F.softmax(logits, axis=1) + if labels is not None: + loss = self.criterion(logits, labels) + correct = self.metric.compute(probs, labels) + acc = self.metric.update(correct) + return probs, loss, {'acc': acc} + return probs + elif self.task == 'token-cls': + logits = result + token_level_probs = F.softmax(logits, axis=-1) + preds = token_level_probs.argmax(axis=-1) + if labels is not None: + loss = self.criterion(logits, labels.unsqueeze(-1)) + num_infer_chunks, num_label_chunks, num_correct_chunks = \ + self.metric.compute(None, seq_lengths, preds, labels) + self.metric.update(num_infer_chunks.numpy(), num_label_chunks.numpy(), num_correct_chunks.numpy()) + _, _, f1_score = map(float, self.metric.accumulate()) + return token_level_probs, loss, {'f1_score': f1_score} + return token_level_probs + elif self.task == 'text-matching': + query_token_embedding, _ = query_result + query_token_embedding = self.dropout(query_token_embedding) + query_attention_mask = paddle.unsqueeze( + (query_input_ids != self.model.pad_token_id).astype(self.model.pooler.dense.weight.dtype), axis=2) + query_token_embedding = query_token_embedding * query_attention_mask + query_sum_embedding = paddle.sum(query_token_embedding, axis=1) + query_sum_mask = paddle.sum(query_attention_mask, axis=1) + query_mean = query_sum_embedding / query_sum_mask + + title_token_embedding, _ = title_result + title_token_embedding = self.dropout(title_token_embedding) + title_attention_mask = paddle.unsqueeze( + (title_input_ids != self.model.pad_token_id).astype(self.model.pooler.dense.weight.dtype), axis=2) + title_token_embedding = title_token_embedding * title_attention_mask + title_sum_embedding = paddle.sum(title_token_embedding, axis=1) + title_sum_mask = paddle.sum(title_attention_mask, axis=1) + title_mean = title_sum_embedding / title_sum_mask + + sub = paddle.abs(paddle.subtract(query_mean, title_mean)) + projection = paddle.concat([query_mean, title_mean, sub], axis=-1) + logits = self.classifier(projection) + probs = F.softmax(logits) + if labels is not None: + loss = self.criterion(logits, labels) + correct = self.metric.compute(probs, labels) + acc = self.metric.update(correct) + return probs, loss, {'acc': acc} + return probs + else: + sequence_output, pooled_output = result + return sequence_output, pooled_output + + @staticmethod + def get_tokenizer(*args, **kwargs): + """ + Gets the tokenizer that is customized for this module. + """ + return AlbertTokenizer.from_pretrained(pretrained_model_name_or_path='albert-chinese-xlarge', *args, **kwargs) diff --git a/modules/text/language_model/albert-chinese-xxlarge/README.md b/modules/text/language_model/albert-chinese-xxlarge/README.md new file mode 100644 index 000000000..8cd47b50e --- /dev/null +++ b/modules/text/language_model/albert-chinese-xxlarge/README.md @@ -0,0 +1,173 @@ +# albert-chinese-xxlarge +|模型名称|albert-chinese-xxlarge| +| :--- | :---: | +|类别|文本-语义模型| +|网络|albert-chinese-xxlarge| +|数据集|-| +|是否支持Fine-tuning|是| +|模型大小|1.3GB| +|最新更新日期|2022-02-08| +|数据指标|-| + +## 一、模型基本信息 + +- ### 模型介绍 + + - ALBERT针对当前预训练模型参数量过大的问题,提出了以下改进方案: + + - 嵌入向量参数化的因式分解。ALBERT对词嵌入参数进行了因式分解,先将单词映射到一个低维的词嵌入空间E,然后再将其映射到高维的隐藏空间H。 + + - 跨层参数共享。ALBERT共享了层之间的全部参数。 + +更多详情请参考[ALBERT论文](https://arxiv.org/abs/1909.11942) + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.2.0 + + - paddlehub >= 2.2.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install albert-chinese-xxlarge + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、预测代码示例 + +```python +import paddlehub as hub + +data = [ + ['这个宾馆比较陈旧了,特价的房间也很一般。总体来说一般'], + ['怀着十分激动的心情放映,可是看着看着发现,在放映完毕后,出现一集米老鼠的动画片'], + ['作为老的四星酒店,房间依然很整洁,相当不错。机场接机服务很好,可以在车上办理入住手续,节省时间。'], +] +label_map = {0: 'negative', 1: 'positive'} + +model = hub.Module( + name='albert-chinese-xxlarge', + version='1.0.0', + task='seq-cls', + load_checkpoint='/path/to/parameters', + label_map=label_map) +results = model.predict(data, max_seq_len=50, batch_size=1, use_gpu=False) +for idx, text in enumerate(data): + print('Data: {} \t Label: {}'.format(text, results[idx])) +``` + +详情可参考PaddleHub示例: +- [文本分类](../../../../demo/text_classification) +- [序列标注](../../../../demo/sequence_labeling) + +- ### 2、API + + - ```python + def __init__( + task=None, + load_checkpoint=None, + label_map=None, + num_classes=2, + suffix=False, + **kwargs, + ) + ``` + + - 创建Module对象(动态图组网版本) + + - **参数** + + - `task`: 任务名称,可为`seq-cls`(文本分类任务)或`token-cls`(序列标注任务)。 + - `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 + - `label_map`:预测时的类别映射表。 + - `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 + - `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + + - ```python + def predict( + data, + max_seq_len=128, + batch_size=1, + use_gpu=False + ) + ``` + + - **参数** + + - `data`: 待预测数据,格式为\[\[sample\_a\_text\_a, sample\_a\_text\_b\], \[sample\_b\_text\_a, sample\_b\_text\_b\],…,\],其中每个元素都是一个样例,每个样例可以包含text\_a与text\_b。每个样例文本数量(1个或者2个)需和训练时保持一致。 + - `max_seq_len`:模型处理文本的最大长度 + - `batch_size`:模型批处理大小 + - `use_gpu`:是否使用gpu,默认为False。对于GPU用户,建议开启use_gpu。 + + - **返回** + + - `results`:list类型,不同任务类型的返回结果如下 + - 文本分类:列表里包含每个句子的预测标签,格式为\[label\_1, label\_2, …,\] + - 序列标注:列表里包含每个句子每个token的预测标签,格式为\[\[token\_1, token\_2, …,\], \[token\_1, token\_2, …,\], …,\] + + - ```python + def get_embedding( + data, + use_gpu=False + ) + ``` + + - 用于获取输入文本的句子粒度特征与字粒度特征 + + - **参数** + + - `data`:输入文本列表,格式为\[\[sample\_a\_text\_a, sample\_a\_text\_b\], \[sample\_b\_text\_a, sample\_b\_text\_b\],…,\],其中每个元素都是一个样例,每个样例可以包含text\_a与text\_b。 + - `use_gpu`:是否使用gpu,默认为False。对于GPU用户,建议开启use_gpu。 + + - **返回** + + - `results`:list类型,格式为\[\[sample\_a\_pooled\_feature, sample\_a\_seq\_feature\], \[sample\_b\_pooled\_feature, sample\_b\_seq\_feature\],…,\],其中每个元素都是对应样例的特征输出,每个样例都有句子粒度特征pooled\_feature与字粒度特征seq\_feature。 + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线获取预训练词向量。 + +- ### 第一步:启动PaddleHub Serving + + - ```shell + $ hub serving start -m albert-chinese-xxlarge + ``` + + - 这样就完成了一个获取预训练词向量服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于获取embedding的文本[[text_1], [text_2], ... ]} + text = [["今天是个好日子"], ["天气预报说今天要下雨"]] + # 以key的方式指定text传入预测方法的时的参数,此例中为"data" + # 对应本地部署,则为module.get_embedding(data=text) + data = {"data": text} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/albert-chinese-xxlarge" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 diff --git a/modules/text/language_model/albert-chinese-xxlarge/__init__.py b/modules/text/language_model/albert-chinese-xxlarge/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/modules/text/language_model/albert-chinese-xxlarge/module.py b/modules/text/language_model/albert-chinese-xxlarge/module.py new file mode 100644 index 000000000..2fcc3cbf5 --- /dev/null +++ b/modules/text/language_model/albert-chinese-xxlarge/module.py @@ -0,0 +1,175 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +import os +from typing import Dict + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddlenlp.metrics import ChunkEvaluator +from paddlenlp.transformers.albert.modeling import AlbertForSequenceClassification +from paddlenlp.transformers.albert.modeling import AlbertForTokenClassification +from paddlenlp.transformers.albert.modeling import AlbertModel +from paddlenlp.transformers.albert.tokenizer import AlbertTokenizer + +from paddlehub.module.module import moduleinfo +from paddlehub.module.nlp_module import TransformerModule +from paddlehub.utils.log import logger + + +@moduleinfo(name="albert-chinese-xxlarge", + version="1.0.0", + summary="", + author="Baidu", + author_email="", + type="nlp/semantic_model", + meta=TransformerModule) +class Albert(nn.Layer): + """ + ALBERT model + """ + + def __init__( + self, + task: str = None, + load_checkpoint: str = None, + label_map: Dict = None, + num_classes: int = 2, + suffix: bool = False, + **kwargs, + ): + super(Albert, self).__init__() + if label_map: + self.label_map = label_map + self.num_classes = len(label_map) + else: + self.num_classes = num_classes + + if task == 'sequence_classification': + task = 'seq-cls' + logger.warning( + "current task name 'sequence_classification' was renamed to 'seq-cls', " + "'sequence_classification' has been deprecated and will be removed in the future.", ) + if task == 'seq-cls': + self.model = AlbertForSequenceClassification.from_pretrained( + pretrained_model_name_or_path='albert-chinese-xxlarge', num_classes=self.num_classes, **kwargs) + self.criterion = paddle.nn.loss.CrossEntropyLoss() + self.metric = paddle.metric.Accuracy() + elif task == 'token-cls': + self.model = AlbertForTokenClassification.from_pretrained( + pretrained_model_name_or_path='albert-chinese-xxlarge', num_classes=self.num_classes, **kwargs) + self.criterion = paddle.nn.loss.CrossEntropyLoss() + self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], + suffix=suffix) + elif task == 'text-matching': + self.model = AlbertModel.from_pretrained(pretrained_model_name_or_path='albert-chinese-xxlarge', **kwargs) + self.dropout = paddle.nn.Dropout(0.1) + self.classifier = paddle.nn.Linear(self.model.config['hidden_size'] * 3, 2) + self.criterion = paddle.nn.loss.CrossEntropyLoss() + self.metric = paddle.metric.Accuracy() + elif task is None: + self.model = AlbertModel.from_pretrained(pretrained_model_name_or_path='albert-chinese-xxlarge', **kwargs) + else: + raise RuntimeError("Unknown task {}, task should be one in {}".format(task, self._tasks_supported)) + + self.task = task + + if load_checkpoint is not None and os.path.isfile(load_checkpoint): + state_dict = paddle.load(load_checkpoint) + self.set_state_dict(state_dict) + logger.info('Loaded parameters from %s' % os.path.abspath(load_checkpoint)) + + def forward(self, + input_ids=None, + token_type_ids=None, + position_ids=None, + attention_mask=None, + query_input_ids=None, + query_token_type_ids=None, + query_position_ids=None, + query_attention_mask=None, + title_input_ids=None, + title_token_type_ids=None, + title_position_ids=None, + title_attention_mask=None, + seq_lengths=None, + labels=None): + + if self.task != 'text-matching': + result = self.model(input_ids, token_type_ids, position_ids, attention_mask) + else: + query_result = self.model(query_input_ids, query_token_type_ids, query_position_ids, query_attention_mask) + title_result = self.model(title_input_ids, title_token_type_ids, title_position_ids, title_attention_mask) + + if self.task == 'seq-cls': + logits = result + probs = F.softmax(logits, axis=1) + if labels is not None: + loss = self.criterion(logits, labels) + correct = self.metric.compute(probs, labels) + acc = self.metric.update(correct) + return probs, loss, {'acc': acc} + return probs + elif self.task == 'token-cls': + logits = result + token_level_probs = F.softmax(logits, axis=-1) + preds = token_level_probs.argmax(axis=-1) + if labels is not None: + loss = self.criterion(logits, labels.unsqueeze(-1)) + num_infer_chunks, num_label_chunks, num_correct_chunks = \ + self.metric.compute(None, seq_lengths, preds, labels) + self.metric.update(num_infer_chunks.numpy(), num_label_chunks.numpy(), num_correct_chunks.numpy()) + _, _, f1_score = map(float, self.metric.accumulate()) + return token_level_probs, loss, {'f1_score': f1_score} + return token_level_probs + elif self.task == 'text-matching': + query_token_embedding, _ = query_result + query_token_embedding = self.dropout(query_token_embedding) + query_attention_mask = paddle.unsqueeze( + (query_input_ids != self.model.pad_token_id).astype(self.model.pooler.dense.weight.dtype), axis=2) + query_token_embedding = query_token_embedding * query_attention_mask + query_sum_embedding = paddle.sum(query_token_embedding, axis=1) + query_sum_mask = paddle.sum(query_attention_mask, axis=1) + query_mean = query_sum_embedding / query_sum_mask + + title_token_embedding, _ = title_result + title_token_embedding = self.dropout(title_token_embedding) + title_attention_mask = paddle.unsqueeze( + (title_input_ids != self.model.pad_token_id).astype(self.model.pooler.dense.weight.dtype), axis=2) + title_token_embedding = title_token_embedding * title_attention_mask + title_sum_embedding = paddle.sum(title_token_embedding, axis=1) + title_sum_mask = paddle.sum(title_attention_mask, axis=1) + title_mean = title_sum_embedding / title_sum_mask + + sub = paddle.abs(paddle.subtract(query_mean, title_mean)) + projection = paddle.concat([query_mean, title_mean, sub], axis=-1) + logits = self.classifier(projection) + probs = F.softmax(logits) + if labels is not None: + loss = self.criterion(logits, labels) + correct = self.metric.compute(probs, labels) + acc = self.metric.update(correct) + return probs, loss, {'acc': acc} + return probs + else: + sequence_output, pooled_output = result + return sequence_output, pooled_output + + @staticmethod + def get_tokenizer(*args, **kwargs): + """ + Gets the tokenizer that is customized for this module. + """ + return AlbertTokenizer.from_pretrained(pretrained_model_name_or_path='albert-chinese-xxlarge', *args, **kwargs) diff --git a/modules/text/language_model/albert-xxlarge-v1/README.md b/modules/text/language_model/albert-xxlarge-v1/README.md new file mode 100644 index 000000000..1b001f48d --- /dev/null +++ b/modules/text/language_model/albert-xxlarge-v1/README.md @@ -0,0 +1,173 @@ +# albert-xxlarge-v1 +|模型名称|albert-xxlarge-v1| +| :--- | :---: | +|类别|文本-语义模型| +|网络|albert-xxlarge-v1| +|数据集|-| +|是否支持Fine-tuning|是| +|模型大小|1.3GB| +|最新更新日期|2022-02-08| +|数据指标|-| + +## 一、模型基本信息 + +- ### 模型介绍 + + - ALBERT针对当前预训练模型参数量过大的问题,提出了以下改进方案: + + - 嵌入向量参数化的因式分解。ALBERT对词嵌入参数进行了因式分解,先将单词映射到一个低维的词嵌入空间E,然后再将其映射到高维的隐藏空间H。 + + - 跨层参数共享。ALBERT共享了层之间的全部参数。 + +更多详情请参考[ALBERT论文](https://arxiv.org/abs/1909.11942) + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.2.0 + + - paddlehub >= 2.2.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install albert-xxlarge-v1 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、预测代码示例 + +```python +import paddlehub as hub + +data = [ + ['这个宾馆比较陈旧了,特价的房间也很一般。总体来说一般'], + ['怀着十分激动的心情放映,可是看着看着发现,在放映完毕后,出现一集米老鼠的动画片'], + ['作为老的四星酒店,房间依然很整洁,相当不错。机场接机服务很好,可以在车上办理入住手续,节省时间。'], +] +label_map = {0: 'negative', 1: 'positive'} + +model = hub.Module( + name='albert-xxlarge-v1', + version='1.0.0', + task='seq-cls', + load_checkpoint='/path/to/parameters', + label_map=label_map) +results = model.predict(data, max_seq_len=50, batch_size=1, use_gpu=False) +for idx, text in enumerate(data): + print('Data: {} \t Label: {}'.format(text, results[idx])) +``` + +详情可参考PaddleHub示例: +- [文本分类](../../../../demo/text_classification) +- [序列标注](../../../../demo/sequence_labeling) + +- ### 2、API + + - ```python + def __init__( + task=None, + load_checkpoint=None, + label_map=None, + num_classes=2, + suffix=False, + **kwargs, + ) + ``` + + - 创建Module对象(动态图组网版本) + + - **参数** + + - `task`: 任务名称,可为`seq-cls`(文本分类任务)或`token-cls`(序列标注任务)。 + - `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 + - `label_map`:预测时的类别映射表。 + - `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 + - `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + + - ```python + def predict( + data, + max_seq_len=128, + batch_size=1, + use_gpu=False + ) + ``` + + - **参数** + + - `data`: 待预测数据,格式为\[\[sample\_a\_text\_a, sample\_a\_text\_b\], \[sample\_b\_text\_a, sample\_b\_text\_b\],…,\],其中每个元素都是一个样例,每个样例可以包含text\_a与text\_b。每个样例文本数量(1个或者2个)需和训练时保持一致。 + - `max_seq_len`:模型处理文本的最大长度 + - `batch_size`:模型批处理大小 + - `use_gpu`:是否使用gpu,默认为False。对于GPU用户,建议开启use_gpu。 + + - **返回** + + - `results`:list类型,不同任务类型的返回结果如下 + - 文本分类:列表里包含每个句子的预测标签,格式为\[label\_1, label\_2, …,\] + - 序列标注:列表里包含每个句子每个token的预测标签,格式为\[\[token\_1, token\_2, …,\], \[token\_1, token\_2, …,\], …,\] + + - ```python + def get_embedding( + data, + use_gpu=False + ) + ``` + + - 用于获取输入文本的句子粒度特征与字粒度特征 + + - **参数** + + - `data`:输入文本列表,格式为\[\[sample\_a\_text\_a, sample\_a\_text\_b\], \[sample\_b\_text\_a, sample\_b\_text\_b\],…,\],其中每个元素都是一个样例,每个样例可以包含text\_a与text\_b。 + - `use_gpu`:是否使用gpu,默认为False。对于GPU用户,建议开启use_gpu。 + + - **返回** + + - `results`:list类型,格式为\[\[sample\_a\_pooled\_feature, sample\_a\_seq\_feature\], \[sample\_b\_pooled\_feature, sample\_b\_seq\_feature\],…,\],其中每个元素都是对应样例的特征输出,每个样例都有句子粒度特征pooled\_feature与字粒度特征seq\_feature。 + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线获取预训练词向量。 + +- ### 第一步:启动PaddleHub Serving + + - ```shell + $ hub serving start -m albert-xxlarge-v1 + ``` + + - 这样就完成了一个获取预训练词向量服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于获取embedding的文本[[text_1], [text_2], ... ]} + text = [["今天是个好日子"], ["天气预报说今天要下雨"]] + # 以key的方式指定text传入预测方法的时的参数,此例中为"data" + # 对应本地部署,则为module.get_embedding(data=text) + data = {"data": text} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/albert-xxlarge-v1" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 diff --git a/modules/text/language_model/albert-xxlarge-v1/__init__.py b/modules/text/language_model/albert-xxlarge-v1/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/modules/text/language_model/albert-xxlarge-v1/module.py b/modules/text/language_model/albert-xxlarge-v1/module.py new file mode 100644 index 000000000..a99c06ee2 --- /dev/null +++ b/modules/text/language_model/albert-xxlarge-v1/module.py @@ -0,0 +1,176 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +import os +from typing import Dict + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddlenlp.metrics import ChunkEvaluator +from paddlenlp.transformers.albert.modeling import AlbertForSequenceClassification +from paddlenlp.transformers.albert.modeling import AlbertForTokenClassification +from paddlenlp.transformers.albert.modeling import AlbertModel +from paddlenlp.transformers.albert.tokenizer import AlbertTokenizer + +from paddlehub.module.module import moduleinfo +from paddlehub.module.nlp_module import TransformerModule +from paddlehub.utils.log import logger + + +@moduleinfo(name="albert-xxlarge-v1", + version="1.0.0", + summary="", + author="Baidu", + author_email="", + type="nlp/semantic_model", + meta=TransformerModule) +class Albert(nn.Layer): + """ + ALBERT model + """ + + def __init__( + self, + task: str = None, + load_checkpoint: str = None, + label_map: Dict = None, + num_classes: int = 2, + suffix: bool = False, + **kwargs, + ): + super(Albert, self).__init__() + if label_map: + self.label_map = label_map + self.num_classes = len(label_map) + else: + self.num_classes = num_classes + + if task == 'sequence_classification': + task = 'seq-cls' + logger.warning( + "current task name 'sequence_classification' was renamed to 'seq-cls', " + "'sequence_classification' has been deprecated and will be removed in the future.", ) + if task == 'seq-cls': + self.model = AlbertForSequenceClassification.from_pretrained( + pretrained_model_name_or_path='albert-xxlarge-v1', num_classes=self.num_classes, **kwargs) + self.criterion = paddle.nn.loss.CrossEntropyLoss() + self.metric = paddle.metric.Accuracy() + elif task == 'token-cls': + self.model = AlbertForTokenClassification.from_pretrained(pretrained_model_name_or_path='albert-xxlarge-v1', + num_classes=self.num_classes, + **kwargs) + self.criterion = paddle.nn.loss.CrossEntropyLoss() + self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], + suffix=suffix) + elif task == 'text-matching': + self.model = AlbertModel.from_pretrained(pretrained_model_name_or_path='albert-xxlarge-v1', **kwargs) + self.dropout = paddle.nn.Dropout(0.1) + self.classifier = paddle.nn.Linear(self.model.config['hidden_size'] * 3, 2) + self.criterion = paddle.nn.loss.CrossEntropyLoss() + self.metric = paddle.metric.Accuracy() + elif task is None: + self.model = AlbertModel.from_pretrained(pretrained_model_name_or_path='albert-xxlarge-v1', **kwargs) + else: + raise RuntimeError("Unknown task {}, task should be one in {}".format(task, self._tasks_supported)) + + self.task = task + + if load_checkpoint is not None and os.path.isfile(load_checkpoint): + state_dict = paddle.load(load_checkpoint) + self.set_state_dict(state_dict) + logger.info('Loaded parameters from %s' % os.path.abspath(load_checkpoint)) + + def forward(self, + input_ids=None, + token_type_ids=None, + position_ids=None, + attention_mask=None, + query_input_ids=None, + query_token_type_ids=None, + query_position_ids=None, + query_attention_mask=None, + title_input_ids=None, + title_token_type_ids=None, + title_position_ids=None, + title_attention_mask=None, + seq_lengths=None, + labels=None): + + if self.task != 'text-matching': + result = self.model(input_ids, token_type_ids, position_ids, attention_mask) + else: + query_result = self.model(query_input_ids, query_token_type_ids, query_position_ids, query_attention_mask) + title_result = self.model(title_input_ids, title_token_type_ids, title_position_ids, title_attention_mask) + + if self.task == 'seq-cls': + logits = result + probs = F.softmax(logits, axis=1) + if labels is not None: + loss = self.criterion(logits, labels) + correct = self.metric.compute(probs, labels) + acc = self.metric.update(correct) + return probs, loss, {'acc': acc} + return probs + elif self.task == 'token-cls': + logits = result + token_level_probs = F.softmax(logits, axis=-1) + preds = token_level_probs.argmax(axis=-1) + if labels is not None: + loss = self.criterion(logits, labels.unsqueeze(-1)) + num_infer_chunks, num_label_chunks, num_correct_chunks = \ + self.metric.compute(None, seq_lengths, preds, labels) + self.metric.update(num_infer_chunks.numpy(), num_label_chunks.numpy(), num_correct_chunks.numpy()) + _, _, f1_score = map(float, self.metric.accumulate()) + return token_level_probs, loss, {'f1_score': f1_score} + return token_level_probs + elif self.task == 'text-matching': + query_token_embedding, _ = query_result + query_token_embedding = self.dropout(query_token_embedding) + query_attention_mask = paddle.unsqueeze( + (query_input_ids != self.model.pad_token_id).astype(self.model.pooler.dense.weight.dtype), axis=2) + query_token_embedding = query_token_embedding * query_attention_mask + query_sum_embedding = paddle.sum(query_token_embedding, axis=1) + query_sum_mask = paddle.sum(query_attention_mask, axis=1) + query_mean = query_sum_embedding / query_sum_mask + + title_token_embedding, _ = title_result + title_token_embedding = self.dropout(title_token_embedding) + title_attention_mask = paddle.unsqueeze( + (title_input_ids != self.model.pad_token_id).astype(self.model.pooler.dense.weight.dtype), axis=2) + title_token_embedding = title_token_embedding * title_attention_mask + title_sum_embedding = paddle.sum(title_token_embedding, axis=1) + title_sum_mask = paddle.sum(title_attention_mask, axis=1) + title_mean = title_sum_embedding / title_sum_mask + + sub = paddle.abs(paddle.subtract(query_mean, title_mean)) + projection = paddle.concat([query_mean, title_mean, sub], axis=-1) + logits = self.classifier(projection) + probs = F.softmax(logits) + if labels is not None: + loss = self.criterion(logits, labels) + correct = self.metric.compute(probs, labels) + acc = self.metric.update(correct) + return probs, loss, {'acc': acc} + return probs + else: + sequence_output, pooled_output = result + return sequence_output, pooled_output + + @staticmethod + def get_tokenizer(*args, **kwargs): + """ + Gets the tokenizer that is customized for this module. + """ + return AlbertTokenizer.from_pretrained(pretrained_model_name_or_path='albert-xxlarge-v1', *args, **kwargs) diff --git a/modules/text/language_model/albert-xxlarge-v2/README.md b/modules/text/language_model/albert-xxlarge-v2/README.md new file mode 100644 index 000000000..af477225b --- /dev/null +++ b/modules/text/language_model/albert-xxlarge-v2/README.md @@ -0,0 +1,173 @@ +# albert-xxlarge-v2 +|模型名称|albert-xxlarge-v2| +| :--- | :---: | +|类别|文本-语义模型| +|网络|albert-xxlarge-v2| +|数据集|-| +|是否支持Fine-tuning|是| +|模型大小|1.3GB| +|最新更新日期|2022-02-08| +|数据指标|-| + +## 一、模型基本信息 + +- ### 模型介绍 + + - ALBERT针对当前预训练模型参数量过大的问题,提出了以下改进方案: + + - 嵌入向量参数化的因式分解。ALBERT对词嵌入参数进行了因式分解,先将单词映射到一个低维的词嵌入空间E,然后再将其映射到高维的隐藏空间H。 + + - 跨层参数共享。ALBERT共享了层之间的全部参数。 + +更多详情请参考[ALBERT论文](https://arxiv.org/abs/1909.11942) + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.2.0 + + - paddlehub >= 2.2.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install albert-xxlarge-v2 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、预测代码示例 + +```python +import paddlehub as hub + +data = [ + ['这个宾馆比较陈旧了,特价的房间也很一般。总体来说一般'], + ['怀着十分激动的心情放映,可是看着看着发现,在放映完毕后,出现一集米老鼠的动画片'], + ['作为老的四星酒店,房间依然很整洁,相当不错。机场接机服务很好,可以在车上办理入住手续,节省时间。'], +] +label_map = {0: 'negative', 1: 'positive'} + +model = hub.Module( + name='albert-xxlarge-v2', + version='1.0.0', + task='seq-cls', + load_checkpoint='/path/to/parameters', + label_map=label_map) +results = model.predict(data, max_seq_len=50, batch_size=1, use_gpu=False) +for idx, text in enumerate(data): + print('Data: {} \t Label: {}'.format(text, results[idx])) +``` + +详情可参考PaddleHub示例: +- [文本分类](../../../../demo/text_classification) +- [序列标注](../../../../demo/sequence_labeling) + +- ### 2、API + + - ```python + def __init__( + task=None, + load_checkpoint=None, + label_map=None, + num_classes=2, + suffix=False, + **kwargs, + ) + ``` + + - 创建Module对象(动态图组网版本) + + - **参数** + + - `task`: 任务名称,可为`seq-cls`(文本分类任务)或`token-cls`(序列标注任务)。 + - `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 + - `label_map`:预测时的类别映射表。 + - `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 + - `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + + - ```python + def predict( + data, + max_seq_len=128, + batch_size=1, + use_gpu=False + ) + ``` + + - **参数** + + - `data`: 待预测数据,格式为\[\[sample\_a\_text\_a, sample\_a\_text\_b\], \[sample\_b\_text\_a, sample\_b\_text\_b\],…,\],其中每个元素都是一个样例,每个样例可以包含text\_a与text\_b。每个样例文本数量(1个或者2个)需和训练时保持一致。 + - `max_seq_len`:模型处理文本的最大长度 + - `batch_size`:模型批处理大小 + - `use_gpu`:是否使用gpu,默认为False。对于GPU用户,建议开启use_gpu。 + + - **返回** + + - `results`:list类型,不同任务类型的返回结果如下 + - 文本分类:列表里包含每个句子的预测标签,格式为\[label\_1, label\_2, …,\] + - 序列标注:列表里包含每个句子每个token的预测标签,格式为\[\[token\_1, token\_2, …,\], \[token\_1, token\_2, …,\], …,\] + + - ```python + def get_embedding( + data, + use_gpu=False + ) + ``` + + - 用于获取输入文本的句子粒度特征与字粒度特征 + + - **参数** + + - `data`:输入文本列表,格式为\[\[sample\_a\_text\_a, sample\_a\_text\_b\], \[sample\_b\_text\_a, sample\_b\_text\_b\],…,\],其中每个元素都是一个样例,每个样例可以包含text\_a与text\_b。 + - `use_gpu`:是否使用gpu,默认为False。对于GPU用户,建议开启use_gpu。 + + - **返回** + + - `results`:list类型,格式为\[\[sample\_a\_pooled\_feature, sample\_a\_seq\_feature\], \[sample\_b\_pooled\_feature, sample\_b\_seq\_feature\],…,\],其中每个元素都是对应样例的特征输出,每个样例都有句子粒度特征pooled\_feature与字粒度特征seq\_feature。 + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线获取预训练词向量。 + +- ### 第一步:启动PaddleHub Serving + + - ```shell + $ hub serving start -m albert-xxlarge-v2 + ``` + + - 这样就完成了一个获取预训练词向量服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于获取embedding的文本[[text_1], [text_2], ... ]} + text = [["今天是个好日子"], ["天气预报说今天要下雨"]] + # 以key的方式指定text传入预测方法的时的参数,此例中为"data" + # 对应本地部署,则为module.get_embedding(data=text) + data = {"data": text} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/albert-xxlarge-v2" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 diff --git a/modules/text/language_model/albert-xxlarge-v2/__init__.py b/modules/text/language_model/albert-xxlarge-v2/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/modules/text/language_model/albert-xxlarge-v2/module.py b/modules/text/language_model/albert-xxlarge-v2/module.py new file mode 100644 index 000000000..091d1842c --- /dev/null +++ b/modules/text/language_model/albert-xxlarge-v2/module.py @@ -0,0 +1,176 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +import os +from typing import Dict + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddlenlp.metrics import ChunkEvaluator +from paddlenlp.transformers.albert.modeling import AlbertForSequenceClassification +from paddlenlp.transformers.albert.modeling import AlbertForTokenClassification +from paddlenlp.transformers.albert.modeling import AlbertModel +from paddlenlp.transformers.albert.tokenizer import AlbertTokenizer + +from paddlehub.module.module import moduleinfo +from paddlehub.module.nlp_module import TransformerModule +from paddlehub.utils.log import logger + + +@moduleinfo(name="albert-xxlarge-v2", + version="1.0.0", + summary="", + author="Baidu", + author_email="", + type="nlp/semantic_model", + meta=TransformerModule) +class Albert(nn.Layer): + """ + ALBERT model + """ + + def __init__( + self, + task: str = None, + load_checkpoint: str = None, + label_map: Dict = None, + num_classes: int = 2, + suffix: bool = False, + **kwargs, + ): + super(Albert, self).__init__() + if label_map: + self.label_map = label_map + self.num_classes = len(label_map) + else: + self.num_classes = num_classes + + if task == 'sequence_classification': + task = 'seq-cls' + logger.warning( + "current task name 'sequence_classification' was renamed to 'seq-cls', " + "'sequence_classification' has been deprecated and will be removed in the future.", ) + if task == 'seq-cls': + self.model = AlbertForSequenceClassification.from_pretrained( + pretrained_model_name_or_path='albert-xxlarge-v2', num_classes=self.num_classes, **kwargs) + self.criterion = paddle.nn.loss.CrossEntropyLoss() + self.metric = paddle.metric.Accuracy() + elif task == 'token-cls': + self.model = AlbertForTokenClassification.from_pretrained(pretrained_model_name_or_path='albert-xxlarge-v2', + num_classes=self.num_classes, + **kwargs) + self.criterion = paddle.nn.loss.CrossEntropyLoss() + self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], + suffix=suffix) + elif task == 'text-matching': + self.model = AlbertModel.from_pretrained(pretrained_model_name_or_path='albert-xxlarge-v2', **kwargs) + self.dropout = paddle.nn.Dropout(0.1) + self.classifier = paddle.nn.Linear(self.model.config['hidden_size'] * 3, 2) + self.criterion = paddle.nn.loss.CrossEntropyLoss() + self.metric = paddle.metric.Accuracy() + elif task is None: + self.model = AlbertModel.from_pretrained(pretrained_model_name_or_path='albert-xxlarge-v2', **kwargs) + else: + raise RuntimeError("Unknown task {}, task should be one in {}".format(task, self._tasks_supported)) + + self.task = task + + if load_checkpoint is not None and os.path.isfile(load_checkpoint): + state_dict = paddle.load(load_checkpoint) + self.set_state_dict(state_dict) + logger.info('Loaded parameters from %s' % os.path.abspath(load_checkpoint)) + + def forward(self, + input_ids=None, + token_type_ids=None, + position_ids=None, + attention_mask=None, + query_input_ids=None, + query_token_type_ids=None, + query_position_ids=None, + query_attention_mask=None, + title_input_ids=None, + title_token_type_ids=None, + title_position_ids=None, + title_attention_mask=None, + seq_lengths=None, + labels=None): + + if self.task != 'text-matching': + result = self.model(input_ids, token_type_ids, position_ids, attention_mask) + else: + query_result = self.model(query_input_ids, query_token_type_ids, query_position_ids, query_attention_mask) + title_result = self.model(title_input_ids, title_token_type_ids, title_position_ids, title_attention_mask) + + if self.task == 'seq-cls': + logits = result + probs = F.softmax(logits, axis=1) + if labels is not None: + loss = self.criterion(logits, labels) + correct = self.metric.compute(probs, labels) + acc = self.metric.update(correct) + return probs, loss, {'acc': acc} + return probs + elif self.task == 'token-cls': + logits = result + token_level_probs = F.softmax(logits, axis=-1) + preds = token_level_probs.argmax(axis=-1) + if labels is not None: + loss = self.criterion(logits, labels.unsqueeze(-1)) + num_infer_chunks, num_label_chunks, num_correct_chunks = \ + self.metric.compute(None, seq_lengths, preds, labels) + self.metric.update(num_infer_chunks.numpy(), num_label_chunks.numpy(), num_correct_chunks.numpy()) + _, _, f1_score = map(float, self.metric.accumulate()) + return token_level_probs, loss, {'f1_score': f1_score} + return token_level_probs + elif self.task == 'text-matching': + query_token_embedding, _ = query_result + query_token_embedding = self.dropout(query_token_embedding) + query_attention_mask = paddle.unsqueeze( + (query_input_ids != self.model.pad_token_id).astype(self.model.pooler.dense.weight.dtype), axis=2) + query_token_embedding = query_token_embedding * query_attention_mask + query_sum_embedding = paddle.sum(query_token_embedding, axis=1) + query_sum_mask = paddle.sum(query_attention_mask, axis=1) + query_mean = query_sum_embedding / query_sum_mask + + title_token_embedding, _ = title_result + title_token_embedding = self.dropout(title_token_embedding) + title_attention_mask = paddle.unsqueeze( + (title_input_ids != self.model.pad_token_id).astype(self.model.pooler.dense.weight.dtype), axis=2) + title_token_embedding = title_token_embedding * title_attention_mask + title_sum_embedding = paddle.sum(title_token_embedding, axis=1) + title_sum_mask = paddle.sum(title_attention_mask, axis=1) + title_mean = title_sum_embedding / title_sum_mask + + sub = paddle.abs(paddle.subtract(query_mean, title_mean)) + projection = paddle.concat([query_mean, title_mean, sub], axis=-1) + logits = self.classifier(projection) + probs = F.softmax(logits) + if labels is not None: + loss = self.criterion(logits, labels) + correct = self.metric.compute(probs, labels) + acc = self.metric.update(correct) + return probs, loss, {'acc': acc} + return probs + else: + sequence_output, pooled_output = result + return sequence_output, pooled_output + + @staticmethod + def get_tokenizer(*args, **kwargs): + """ + Gets the tokenizer that is customized for this module. + """ + return AlbertTokenizer.from_pretrained(pretrained_model_name_or_path='albert-xxlarge-v2', *args, **kwargs) From 33c9d1a8e4d7fd0a023819e27ba6c3819cda6b4b Mon Sep 17 00:00:00 2001 From: chenjian Date: Sat, 2 Apr 2022 09:15:47 +0000 Subject: [PATCH 003/117] add clas modules --- .../esnet_x0_25_imagenet/README.md | 133 +++++ .../esnet_x0_25_imagenet/model.py | 506 ++++++++++++++++++ .../esnet_x0_25_imagenet/module.py | 154 ++++++ .../esnet_x0_25_imagenet/processor.py | 374 +++++++++++++ .../esnet_x0_25_imagenet/utils.py | 129 +++++ .../esnet_x0_5_imagenet/README.md | 133 +++++ .../esnet_x0_5_imagenet/model.py | 506 ++++++++++++++++++ .../esnet_x0_5_imagenet/module.py | 154 ++++++ .../esnet_x0_5_imagenet/processor.py | 374 +++++++++++++ .../esnet_x0_5_imagenet/utils.py | 129 +++++ .../levit_128_imagenet/README.md | 132 +++++ .../levit_128_imagenet/model.py | 450 ++++++++++++++++ .../levit_128_imagenet/module.py | 154 ++++++ .../levit_128_imagenet/processor.py | 374 +++++++++++++ .../levit_128_imagenet/utils.py | 129 +++++ .../levit_128s_imagenet/README.md | 132 +++++ .../levit_128s_imagenet/model.py | 450 ++++++++++++++++ .../levit_128s_imagenet/module.py | 154 ++++++ .../levit_128s_imagenet/processor.py | 374 +++++++++++++ .../levit_128s_imagenet/utils.py | 129 +++++ .../levit_192_imagenet/README.md | 132 +++++ .../levit_192_imagenet/model.py | 450 ++++++++++++++++ .../levit_192_imagenet/module.py | 154 ++++++ .../levit_192_imagenet/processor.py | 374 +++++++++++++ .../levit_192_imagenet/utils.py | 129 +++++ .../levit_256_imagenet/README.md | 132 +++++ .../levit_256_imagenet/model.py | 450 ++++++++++++++++ .../levit_256_imagenet/module.py | 154 ++++++ .../levit_256_imagenet/processor.py | 374 +++++++++++++ .../levit_256_imagenet/utils.py | 129 +++++ .../levit_384_imagenet/README.md | 132 +++++ .../levit_384_imagenet/model.py | 450 ++++++++++++++++ .../levit_384_imagenet/module.py | 154 ++++++ .../levit_384_imagenet/processor.py | 374 +++++++++++++ .../levit_384_imagenet/utils.py | 129 +++++ .../pplcnet_x0_25_imagenet/README.md | 132 +++++ .../pplcnet_x0_25_imagenet/model.py | 478 +++++++++++++++++ .../pplcnet_x0_25_imagenet/module.py | 154 ++++++ .../pplcnet_x0_25_imagenet/processor.py | 374 +++++++++++++ .../pplcnet_x0_25_imagenet/utils.py | 129 +++++ .../pplcnet_x0_35_imagenet/README.md | 132 +++++ .../pplcnet_x0_35_imagenet/model.py | 478 +++++++++++++++++ .../pplcnet_x0_35_imagenet/module.py | 154 ++++++ .../pplcnet_x0_35_imagenet/processor.py | 374 +++++++++++++ .../pplcnet_x0_35_imagenet/utils.py | 129 +++++ .../pplcnet_x0_5_imagenet/README.md | 132 +++++ .../pplcnet_x0_5_imagenet/model.py | 478 +++++++++++++++++ .../pplcnet_x0_5_imagenet/module.py | 154 ++++++ .../pplcnet_x0_5_imagenet/processor.py | 374 +++++++++++++ .../pplcnet_x0_5_imagenet/utils.py | 129 +++++ .../pplcnet_x0_75_imagenet/README.md | 132 +++++ .../pplcnet_x0_75_imagenet/model.py | 478 +++++++++++++++++ .../pplcnet_x0_75_imagenet/module.py | 154 ++++++ .../pplcnet_x0_75_imagenet/processor.py | 374 +++++++++++++ .../pplcnet_x0_75_imagenet/utils.py | 129 +++++ .../pplcnet_x1_0_imagenet/README.md | 132 +++++ .../pplcnet_x1_0_imagenet/model.py | 478 +++++++++++++++++ .../pplcnet_x1_0_imagenet/module.py | 154 ++++++ .../pplcnet_x1_0_imagenet/processor.py | 374 +++++++++++++ .../pplcnet_x1_0_imagenet/utils.py | 129 +++++ .../pplcnet_x1_5_imagenet/README.md | 132 +++++ .../pplcnet_x1_5_imagenet/model.py | 478 +++++++++++++++++ .../pplcnet_x1_5_imagenet/module.py | 154 ++++++ .../pplcnet_x1_5_imagenet/processor.py | 374 +++++++++++++ .../pplcnet_x1_5_imagenet/utils.py | 129 +++++ .../pplcnet_x2_0_imagenet/README.md | 132 +++++ .../pplcnet_x2_0_imagenet/model.py | 478 +++++++++++++++++ .../pplcnet_x2_0_imagenet/module.py | 154 ++++++ .../pplcnet_x2_0_imagenet/processor.py | 374 +++++++++++++ .../pplcnet_x2_0_imagenet/utils.py | 129 +++++ .../pplcnet_x2_5_imagenet/README.md | 132 +++++ .../pplcnet_x2_5_imagenet/model.py | 478 +++++++++++++++++ .../pplcnet_x2_5_imagenet/module.py | 154 ++++++ .../pplcnet_x2_5_imagenet/processor.py | 374 +++++++++++++ .../pplcnet_x2_5_imagenet/utils.py | 129 +++++ 75 files changed, 18923 insertions(+) create mode 100644 modules/image/classification/esnet_x0_25_imagenet/README.md create mode 100644 modules/image/classification/esnet_x0_25_imagenet/model.py create mode 100644 modules/image/classification/esnet_x0_25_imagenet/module.py create mode 100644 modules/image/classification/esnet_x0_25_imagenet/processor.py create mode 100644 modules/image/classification/esnet_x0_25_imagenet/utils.py create mode 100644 modules/image/classification/esnet_x0_5_imagenet/README.md create mode 100644 modules/image/classification/esnet_x0_5_imagenet/model.py create mode 100644 modules/image/classification/esnet_x0_5_imagenet/module.py create mode 100644 modules/image/classification/esnet_x0_5_imagenet/processor.py create mode 100644 modules/image/classification/esnet_x0_5_imagenet/utils.py create mode 100644 modules/image/classification/levit_128_imagenet/README.md create mode 100644 modules/image/classification/levit_128_imagenet/model.py create mode 100644 modules/image/classification/levit_128_imagenet/module.py create mode 100644 modules/image/classification/levit_128_imagenet/processor.py create mode 100644 modules/image/classification/levit_128_imagenet/utils.py create mode 100644 modules/image/classification/levit_128s_imagenet/README.md create mode 100644 modules/image/classification/levit_128s_imagenet/model.py create mode 100644 modules/image/classification/levit_128s_imagenet/module.py create mode 100644 modules/image/classification/levit_128s_imagenet/processor.py create mode 100644 modules/image/classification/levit_128s_imagenet/utils.py create mode 100644 modules/image/classification/levit_192_imagenet/README.md create mode 100644 modules/image/classification/levit_192_imagenet/model.py create mode 100644 modules/image/classification/levit_192_imagenet/module.py create mode 100644 modules/image/classification/levit_192_imagenet/processor.py create mode 100644 modules/image/classification/levit_192_imagenet/utils.py create mode 100644 modules/image/classification/levit_256_imagenet/README.md create mode 100644 modules/image/classification/levit_256_imagenet/model.py create mode 100644 modules/image/classification/levit_256_imagenet/module.py create mode 100644 modules/image/classification/levit_256_imagenet/processor.py create mode 100644 modules/image/classification/levit_256_imagenet/utils.py create mode 100644 modules/image/classification/levit_384_imagenet/README.md create mode 100644 modules/image/classification/levit_384_imagenet/model.py create mode 100644 modules/image/classification/levit_384_imagenet/module.py create mode 100644 modules/image/classification/levit_384_imagenet/processor.py create mode 100644 modules/image/classification/levit_384_imagenet/utils.py create mode 100644 modules/image/classification/pplcnet_x0_25_imagenet/README.md create mode 100644 modules/image/classification/pplcnet_x0_25_imagenet/model.py create mode 100644 modules/image/classification/pplcnet_x0_25_imagenet/module.py create mode 100644 modules/image/classification/pplcnet_x0_25_imagenet/processor.py create mode 100644 modules/image/classification/pplcnet_x0_25_imagenet/utils.py create mode 100644 modules/image/classification/pplcnet_x0_35_imagenet/README.md create mode 100644 modules/image/classification/pplcnet_x0_35_imagenet/model.py create mode 100644 modules/image/classification/pplcnet_x0_35_imagenet/module.py create mode 100644 modules/image/classification/pplcnet_x0_35_imagenet/processor.py create mode 100644 modules/image/classification/pplcnet_x0_35_imagenet/utils.py create mode 100644 modules/image/classification/pplcnet_x0_5_imagenet/README.md create mode 100644 modules/image/classification/pplcnet_x0_5_imagenet/model.py create mode 100644 modules/image/classification/pplcnet_x0_5_imagenet/module.py create mode 100644 modules/image/classification/pplcnet_x0_5_imagenet/processor.py create mode 100644 modules/image/classification/pplcnet_x0_5_imagenet/utils.py create mode 100644 modules/image/classification/pplcnet_x0_75_imagenet/README.md create mode 100644 modules/image/classification/pplcnet_x0_75_imagenet/model.py create mode 100644 modules/image/classification/pplcnet_x0_75_imagenet/module.py create mode 100644 modules/image/classification/pplcnet_x0_75_imagenet/processor.py create mode 100644 modules/image/classification/pplcnet_x0_75_imagenet/utils.py create mode 100644 modules/image/classification/pplcnet_x1_0_imagenet/README.md create mode 100644 modules/image/classification/pplcnet_x1_0_imagenet/model.py create mode 100644 modules/image/classification/pplcnet_x1_0_imagenet/module.py create mode 100644 modules/image/classification/pplcnet_x1_0_imagenet/processor.py create mode 100644 modules/image/classification/pplcnet_x1_0_imagenet/utils.py create mode 100644 modules/image/classification/pplcnet_x1_5_imagenet/README.md create mode 100644 modules/image/classification/pplcnet_x1_5_imagenet/model.py create mode 100644 modules/image/classification/pplcnet_x1_5_imagenet/module.py create mode 100644 modules/image/classification/pplcnet_x1_5_imagenet/processor.py create mode 100644 modules/image/classification/pplcnet_x1_5_imagenet/utils.py create mode 100644 modules/image/classification/pplcnet_x2_0_imagenet/README.md create mode 100644 modules/image/classification/pplcnet_x2_0_imagenet/model.py create mode 100644 modules/image/classification/pplcnet_x2_0_imagenet/module.py create mode 100644 modules/image/classification/pplcnet_x2_0_imagenet/processor.py create mode 100644 modules/image/classification/pplcnet_x2_0_imagenet/utils.py create mode 100644 modules/image/classification/pplcnet_x2_5_imagenet/README.md create mode 100644 modules/image/classification/pplcnet_x2_5_imagenet/model.py create mode 100644 modules/image/classification/pplcnet_x2_5_imagenet/module.py create mode 100644 modules/image/classification/pplcnet_x2_5_imagenet/processor.py create mode 100644 modules/image/classification/pplcnet_x2_5_imagenet/utils.py diff --git a/modules/image/classification/esnet_x0_25_imagenet/README.md b/modules/image/classification/esnet_x0_25_imagenet/README.md new file mode 100644 index 000000000..a87a9ee53 --- /dev/null +++ b/modules/image/classification/esnet_x0_25_imagenet/README.md @@ -0,0 +1,133 @@ +# esnet_x0_25_imagenet + +|模型名称|esnet_x0_25_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|ESNet| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|10 MB| +|最新更新日期|2022-04-02| +|数据指标|Acc| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - ESNet(Enhanced ShuffleNet)是百度自研的一个轻量级网络,该网络在 ShuffleNetV2 的基础上融合了 MobileNetV3、GhostNet、PPLCNet 的优点,组合成了一个在 ARM 设备上速度更快、精度更高的网络,由于其出色的表现,所以在 PaddleDetection 推出的 [PP-PicoDet](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.3/configs/picodet) 使用了该模型做 backbone,配合更强的目标检测算法,最终的指标一举刷新了目标检测模型在 ARM 设备上的 SOTA 指标。该模型为模型规模参数scale为x0.25下的ESNet模型。 + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install esnet_x0_25_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run esnet_x0_25_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="esnet_x0_25_imagenet") + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + - 分类接口API。 + - **参数** + + - images (list\[numpy.ndarray\]): 图片数据,每一个图片数据的shape 均为 \[H, W, C\],颜色空间为 BGR;
+ - paths (list\[str\]): 图片的路径;
+ - batch\_size (int): batch 的大小;
+ - use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA_VISIBLE_DEVICES环境变量**
+ - top\_k (int): 返回预测结果的前 k 个。 + + - **返回** + + - res (list\[dict\]): 分类结果,列表的每一个元素均为字典,其中 key 包括'class_ids'(种类索引), 'scores'(置信度) 和 'label_names'(种类名称) + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个图像识别的在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m esnet_x0_25_imagenet + ``` + + - 这样就完成了一个图像识别的在线服务的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"\} + url = "http://127.0.0.1:8866/predict/esnet_x0_25_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + ``` + + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install esnet_x0_25_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/esnet_x0_25_imagenet/model.py b/modules/image/classification/esnet_x0_25_imagenet/model.py new file mode 100644 index 000000000..a2384403f --- /dev/null +++ b/modules/image/classification/esnet_x0_25_imagenet/model.py @@ -0,0 +1,506 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +from typing import Any +from typing import Callable +from typing import Dict +from typing import List +from typing import Tuple +from typing import Union + +import paddle +import paddle.nn as nn +from paddle import concat +from paddle import ParamAttr +from paddle import reshape +from paddle import split +from paddle import transpose +from paddle.nn import AdaptiveAvgPool2D +from paddle.nn import BatchNorm +from paddle.nn import Conv2D +from paddle.nn import Dropout +from paddle.nn import Linear +from paddle.nn import MaxPool2D +from paddle.nn.initializer import KaimingNormal +from paddle.regularizer import L2Decay + +MODEL_STAGES_PATTERN = {"ESNet": ["blocks[2]", "blocks[9]", "blocks[12]"]} + + +class Identity(nn.Layer): + + def __init__(self): + super(Identity, self).__init__() + + def forward(self, inputs): + return inputs + + +class TheseusLayer(nn.Layer): + + def __init__(self, *args, **kwargs): + super(TheseusLayer, self).__init__() + self.res_dict = {} + self.res_name = self.full_name() + self.pruner = None + self.quanter = None + + def _return_dict_hook(self, layer, input, output): + res_dict = {"output": output} + # 'list' is needed to avoid error raised by popping self.res_dict + for res_key in list(self.res_dict): + # clear the res_dict because the forward process may change according to input + res_dict[res_key] = self.res_dict.pop(res_key) + return res_dict + + def init_res(self, stages_pattern, return_patterns=None, return_stages=None): + if return_patterns and return_stages: + msg = f"The 'return_patterns' would be ignored when 'return_stages' is set." + return_stages = None + + if return_stages is True: + return_patterns = stages_pattern + # return_stages is int or bool + if type(return_stages) is int: + return_stages = [return_stages] + if isinstance(return_stages, list): + if max(return_stages) > len(stages_pattern) or min(return_stages) < 0: + msg = f"The 'return_stages' set error. Illegal value(s) have been ignored. The stages' pattern list is {stages_pattern}." + return_stages = [val for val in return_stages if val >= 0 and val < len(stages_pattern)] + return_patterns = [stages_pattern[i] for i in return_stages] + + if return_patterns: + self.update_res(return_patterns) + + def replace_sub(self, *args, **kwargs) -> None: + msg = "The function 'replace_sub()' is deprecated, please use 'upgrade_sublayer()' instead." + raise DeprecationWarning(msg) + + def upgrade_sublayer(self, layer_name_pattern: Union[str, List[str]], + handle_func: Callable[[nn.Layer, str], nn.Layer]) -> Dict[str, nn.Layer]: + """use 'handle_func' to modify the sub-layer(s) specified by 'layer_name_pattern'. + + Args: + layer_name_pattern (Union[str, List[str]]): The name of layer to be modified by 'handle_func'. + handle_func (Callable[[nn.Layer, str], nn.Layer]): The function to modify target layer specified by 'layer_name_pattern'. The formal params are the layer(nn.Layer) and pattern(str) that is (a member of) layer_name_pattern (when layer_name_pattern is List type). And the return is the layer processed. + + Returns: + Dict[str, nn.Layer]: The key is the pattern and corresponding value is the result returned by 'handle_func()'. + + Examples: + + from paddle import nn + import paddleclas + + def rep_func(layer: nn.Layer, pattern: str): + new_layer = nn.Conv2D( + in_channels=layer._in_channels, + out_channels=layer._out_channels, + kernel_size=5, + padding=2 + ) + return new_layer + + net = paddleclas.MobileNetV1() + res = net.replace_sub(layer_name_pattern=["blocks[11].depthwise_conv.conv", "blocks[12].depthwise_conv.conv"], handle_func=rep_func) + print(res) + # {'blocks[11].depthwise_conv.conv': the corresponding new_layer, 'blocks[12].depthwise_conv.conv': the corresponding new_layer} + """ + + if not isinstance(layer_name_pattern, list): + layer_name_pattern = [layer_name_pattern] + + hit_layer_pattern_list = [] + for pattern in layer_name_pattern: + # parse pattern to find target layer and its parent + layer_list = parse_pattern_str(pattern=pattern, parent_layer=self) + if not layer_list: + continue + sub_layer_parent = layer_list[-2]["layer"] if len(layer_list) > 1 else self + + sub_layer = layer_list[-1]["layer"] + sub_layer_name = layer_list[-1]["name"] + sub_layer_index = layer_list[-1]["index"] + + new_sub_layer = handle_func(sub_layer, pattern) + + if sub_layer_index: + getattr(sub_layer_parent, sub_layer_name)[sub_layer_index] = new_sub_layer + else: + setattr(sub_layer_parent, sub_layer_name, new_sub_layer) + + hit_layer_pattern_list.append(pattern) + return hit_layer_pattern_list + + def stop_after(self, stop_layer_name: str) -> bool: + """stop forward and backward after 'stop_layer_name'. + + Args: + stop_layer_name (str): The name of layer that stop forward and backward after this layer. + + Returns: + bool: 'True' if successful, 'False' otherwise. + """ + + layer_list = parse_pattern_str(stop_layer_name, self) + if not layer_list: + return False + + parent_layer = self + for layer_dict in layer_list: + name, index = layer_dict["name"], layer_dict["index"] + if not set_identity(parent_layer, name, index): + msg = f"Failed to set the layers that after stop_layer_name('{stop_layer_name}') to IdentityLayer. The error layer's name is '{name}'." + return False + parent_layer = layer_dict["layer"] + + return True + + def update_res(self, return_patterns: Union[str, List[str]]) -> Dict[str, nn.Layer]: + """update the result(s) to be returned. + + Args: + return_patterns (Union[str, List[str]]): The name of layer to return output. + + Returns: + Dict[str, nn.Layer]: The pattern(str) and corresponding layer(nn.Layer) that have been set successfully. + """ + + # clear res_dict that could have been set + self.res_dict = {} + + class Handler(object): + + def __init__(self, res_dict): + # res_dict is a reference + self.res_dict = res_dict + + def __call__(self, layer, pattern): + layer.res_dict = self.res_dict + layer.res_name = pattern + if hasattr(layer, "hook_remove_helper"): + layer.hook_remove_helper.remove() + layer.hook_remove_helper = layer.register_forward_post_hook(save_sub_res_hook) + return layer + + handle_func = Handler(self.res_dict) + + hit_layer_pattern_list = self.upgrade_sublayer(return_patterns, handle_func=handle_func) + + if hasattr(self, "hook_remove_helper"): + self.hook_remove_helper.remove() + self.hook_remove_helper = self.register_forward_post_hook(self._return_dict_hook) + + return hit_layer_pattern_list + + +def save_sub_res_hook(layer, input, output): + layer.res_dict[layer.res_name] = output + + +def set_identity(parent_layer: nn.Layer, layer_name: str, layer_index: str = None) -> bool: + """set the layer specified by layer_name and layer_index to Indentity. + + Args: + parent_layer (nn.Layer): The parent layer of target layer specified by layer_name and layer_index. + layer_name (str): The name of target layer to be set to Indentity. + layer_index (str, optional): The index of target layer to be set to Indentity in parent_layer. Defaults to None. + + Returns: + bool: True if successfully, False otherwise. + """ + + stop_after = False + for sub_layer_name in parent_layer._sub_layers: + if stop_after: + parent_layer._sub_layers[sub_layer_name] = Identity() + continue + if sub_layer_name == layer_name: + stop_after = True + + if layer_index and stop_after: + stop_after = False + for sub_layer_index in parent_layer._sub_layers[layer_name]._sub_layers: + if stop_after: + parent_layer._sub_layers[layer_name][sub_layer_index] = Identity() + continue + if layer_index == sub_layer_index: + stop_after = True + + return stop_after + + +def parse_pattern_str(pattern: str, parent_layer: nn.Layer) -> Union[None, List[Dict[str, Union[nn.Layer, str, None]]]]: + """parse the string type pattern. + + Args: + pattern (str): The pattern to discribe layer. + parent_layer (nn.Layer): The root layer relative to the pattern. + + Returns: + Union[None, List[Dict[str, Union[nn.Layer, str, None]]]]: None if failed. If successfully, the members are layers parsed in order: + [ + {"layer": first layer, "name": first layer's name parsed, "index": first layer's index parsed if exist}, + {"layer": second layer, "name": second layer's name parsed, "index": second layer's index parsed if exist}, + ... + ] + """ + + pattern_list = pattern.split(".") + if not pattern_list: + msg = f"The pattern('{pattern}') is illegal. Please check and retry." + return None + + layer_list = [] + while len(pattern_list) > 0: + if '[' in pattern_list[0]: + target_layer_name = pattern_list[0].split('[')[0] + target_layer_index = pattern_list[0].split('[')[1].split(']')[0] + else: + target_layer_name = pattern_list[0] + target_layer_index = None + + target_layer = getattr(parent_layer, target_layer_name, None) + + if target_layer is None: + msg = f"Not found layer named('{target_layer_name}') specifed in pattern('{pattern}')." + return None + + if target_layer_index and target_layer: + if int(target_layer_index) < 0 or int(target_layer_index) >= len(target_layer): + msg = f"Not found layer by index('{target_layer_index}') specifed in pattern('{pattern}'). The index should < {len(target_layer)} and > 0." + return None + + target_layer = target_layer[target_layer_index] + + layer_list.append({"layer": target_layer, "name": target_layer_name, "index": target_layer_index}) + + pattern_list = pattern_list[1:] + parent_layer = target_layer + return layer_list + + +def channel_shuffle(x, groups): + batch_size, num_channels, height, width = x.shape[0:4] + channels_per_group = num_channels // groups + x = reshape(x=x, shape=[batch_size, groups, channels_per_group, height, width]) + x = transpose(x=x, perm=[0, 2, 1, 3, 4]) + x = reshape(x=x, shape=[batch_size, num_channels, height, width]) + return x + + +def make_divisible(v, divisor=8, min_value=None): + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + if new_v < 0.9 * v: + new_v += divisor + return new_v + + +class ConvBNLayer(TheseusLayer): + + def __init__(self, in_channels, out_channels, kernel_size, stride=1, groups=1, if_act=True): + super().__init__() + self.conv = Conv2D(in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=(kernel_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr(initializer=KaimingNormal()), + bias_attr=False) + + self.bn = BatchNorm(out_channels, + param_attr=ParamAttr(regularizer=L2Decay(0.0)), + bias_attr=ParamAttr(regularizer=L2Decay(0.0))) + self.if_act = if_act + self.hardswish = nn.Hardswish() + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + if self.if_act: + x = self.hardswish(x) + return x + + +class SEModule(TheseusLayer): + + def __init__(self, channel, reduction=4): + super().__init__() + self.avg_pool = AdaptiveAvgPool2D(1) + self.conv1 = Conv2D(in_channels=channel, out_channels=channel // reduction, kernel_size=1, stride=1, padding=0) + self.relu = nn.ReLU() + self.conv2 = Conv2D(in_channels=channel // reduction, out_channels=channel, kernel_size=1, stride=1, padding=0) + self.hardsigmoid = nn.Hardsigmoid() + + def forward(self, x): + identity = x + x = self.avg_pool(x) + x = self.conv1(x) + x = self.relu(x) + x = self.conv2(x) + x = self.hardsigmoid(x) + x = paddle.multiply(x=identity, y=x) + return x + + +class ESBlock1(TheseusLayer): + + def __init__(self, in_channels, out_channels): + super().__init__() + self.pw_1_1 = ConvBNLayer(in_channels=in_channels // 2, out_channels=out_channels // 2, kernel_size=1, stride=1) + self.dw_1 = ConvBNLayer(in_channels=out_channels // 2, + out_channels=out_channels // 2, + kernel_size=3, + stride=1, + groups=out_channels // 2, + if_act=False) + self.se = SEModule(out_channels) + + self.pw_1_2 = ConvBNLayer(in_channels=out_channels, out_channels=out_channels // 2, kernel_size=1, stride=1) + + def forward(self, x): + x1, x2 = split(x, num_or_sections=[x.shape[1] // 2, x.shape[1] // 2], axis=1) + x2 = self.pw_1_1(x2) + x3 = self.dw_1(x2) + x3 = concat([x2, x3], axis=1) + x3 = self.se(x3) + x3 = self.pw_1_2(x3) + x = concat([x1, x3], axis=1) + return channel_shuffle(x, 2) + + +class ESBlock2(TheseusLayer): + + def __init__(self, in_channels, out_channels): + super().__init__() + + # branch1 + self.dw_1 = ConvBNLayer(in_channels=in_channels, + out_channels=in_channels, + kernel_size=3, + stride=2, + groups=in_channels, + if_act=False) + self.pw_1 = ConvBNLayer(in_channels=in_channels, out_channels=out_channels // 2, kernel_size=1, stride=1) + # branch2 + self.pw_2_1 = ConvBNLayer(in_channels=in_channels, out_channels=out_channels // 2, kernel_size=1) + self.dw_2 = ConvBNLayer(in_channels=out_channels // 2, + out_channels=out_channels // 2, + kernel_size=3, + stride=2, + groups=out_channels // 2, + if_act=False) + self.se = SEModule(out_channels // 2) + self.pw_2_2 = ConvBNLayer(in_channels=out_channels // 2, out_channels=out_channels // 2, kernel_size=1) + self.concat_dw = ConvBNLayer(in_channels=out_channels, + out_channels=out_channels, + kernel_size=3, + groups=out_channels) + self.concat_pw = ConvBNLayer(in_channels=out_channels, out_channels=out_channels, kernel_size=1) + + def forward(self, x): + x1 = self.dw_1(x) + x1 = self.pw_1(x1) + x2 = self.pw_2_1(x) + x2 = self.dw_2(x2) + x2 = self.se(x2) + x2 = self.pw_2_2(x2) + x = concat([x1, x2], axis=1) + x = self.concat_dw(x) + x = self.concat_pw(x) + return x + + +class ESNet(TheseusLayer): + + def __init__(self, + stages_pattern, + class_num=1000, + scale=1.0, + dropout_prob=0.2, + class_expand=1280, + return_patterns=None, + return_stages=None): + super().__init__() + self.scale = scale + self.class_num = class_num + self.class_expand = class_expand + stage_repeats = [3, 7, 3] + stage_out_channels = [ + -1, 24, make_divisible(116 * scale), + make_divisible(232 * scale), + make_divisible(464 * scale), 1024 + ] + + self.conv1 = ConvBNLayer(in_channels=3, out_channels=stage_out_channels[1], kernel_size=3, stride=2) + self.max_pool = MaxPool2D(kernel_size=3, stride=2, padding=1) + + block_list = [] + for stage_id, num_repeat in enumerate(stage_repeats): + for i in range(num_repeat): + if i == 0: + block = ESBlock2(in_channels=stage_out_channels[stage_id + 1], + out_channels=stage_out_channels[stage_id + 2]) + else: + block = ESBlock1(in_channels=stage_out_channels[stage_id + 2], + out_channels=stage_out_channels[stage_id + 2]) + block_list.append(block) + self.blocks = nn.Sequential(*block_list) + + self.conv2 = ConvBNLayer(in_channels=stage_out_channels[-2], out_channels=stage_out_channels[-1], kernel_size=1) + + self.avg_pool = AdaptiveAvgPool2D(1) + + self.last_conv = Conv2D(in_channels=stage_out_channels[-1], + out_channels=self.class_expand, + kernel_size=1, + stride=1, + padding=0, + bias_attr=False) + self.hardswish = nn.Hardswish() + self.dropout = Dropout(p=dropout_prob, mode="downscale_in_infer") + self.flatten = nn.Flatten(start_axis=1, stop_axis=-1) + self.fc = Linear(self.class_expand, self.class_num) + + super().init_res(stages_pattern, return_patterns=return_patterns, return_stages=return_stages) + + def forward(self, x): + x = self.conv1(x) + x = self.max_pool(x) + x = self.blocks(x) + x = self.conv2(x) + x = self.avg_pool(x) + x = self.last_conv(x) + x = self.hardswish(x) + x = self.dropout(x) + x = self.flatten(x) + x = self.fc(x) + return x + + +def ESNet_x0_25(pretrained=False, use_ssld=False, **kwargs): + """ + ESNet_x0_25 + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `ESNet_x0_25` model depends on args. + """ + model = ESNet(scale=0.25, stages_pattern=MODEL_STAGES_PATTERN["ESNet"], **kwargs) + return model diff --git a/modules/image/classification/esnet_x0_25_imagenet/module.py b/modules/image/classification/esnet_x0_25_imagenet/module.py new file mode 100644 index 000000000..2c2edaab7 --- /dev/null +++ b/modules/image/classification/esnet_x0_25_imagenet/module.py @@ -0,0 +1,154 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import copy +import os + +import cv2 +import numpy as np +import paddle +from skimage.io import imread +from skimage.transform import rescale +from skimage.transform import resize + +import paddlehub as hub +from .model import ESNet_x0_25 +from .processor import base64_to_cv2 +from .processor import create_operators +from .processor import Topk +from .utils import get_config +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving + + +@moduleinfo(name="esnet_x0_25_imagenet", + type="cv/classification", + author="paddlepaddle", + author_email="", + summary="", + version="1.0.0") +class Esnet_x0_25_Imagenet: + + def __init__(self): + self.config = get_config(os.path.join(self.directory, 'ESNet_x0_25.yaml'), show=False) + self.label_path = os.path.join(self.directory, 'imagenet1k_label_list.txt') + self.pretrain_path = os.path.join(self.directory, 'ESNet_x0_25_pretrained.pdparams') + self.config['Infer']['PostProcess']['class_id_map_file'] = self.label_path + self.model = ESNet_x0_25() + param_state_dict = paddle.load(self.pretrain_path) + self.model.set_dict(param_state_dict) + self.preprocess_funcs = create_operators(self.config["Infer"]["transforms"]) + + def classification(self, + images: list = None, + paths: list = None, + batch_size: int = 1, + use_gpu: bool = False, + top_k: int = 1): + ''' + Args: + images (list[numpy.ndarray]): data of images, shape of each is [H, W, C], color space must be BGR. + paths (list[str]): The paths of images. + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + top_k (int): Return top k results. + + Returns: + res (list[dict]): The classfication results, each result dict contains key 'class_ids', 'scores' and 'label_names'. + ''' + postprocess_func = Topk(top_k, self.label_path) + inputs = [] + results = [] + paddle.disable_static() + place = 'gpu:0' if use_gpu else 'cpu' + place = paddle.set_device(place) + if images == None and paths == None: + print('No image provided. Please input an image or a image path.') + return + + if images != None: + for image in images: + image = image[:, :, ::-1] + inputs.append(image) + + if paths != None: + for path in paths: + image = cv2.imread(path)[:, :, ::-1] + inputs.append(image) + + batch_data = [] + for idx, imagedata in enumerate(inputs): + for process in self.preprocess_funcs: + imagedata = process(imagedata) + batch_data.append(imagedata) + if len(batch_data) >= batch_size or idx == len(inputs) - 1: + batch_tensor = paddle.to_tensor(batch_data) + out = self.model(batch_tensor) + if isinstance(out, list): + out = out[0] + if isinstance(out, dict) and "logits" in out: + out = out["logits"] + if isinstance(out, dict) and "output" in out: + out = out["output"] + result = postprocess_func(out) + results.extend(result) + batch_data.clear() + return results + + @runnable + def run_cmd(self, argvs: list): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + self.args = self.parser.parse_args(argvs) + results = self.classification(paths=[self.args.input_path], + use_gpu=self.args.use_gpu, + batch_size=self.args.batch_size, + top_k=self.args.top_k) + return results + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.classification(images=images_decode, **kwargs) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument('--use_gpu', action='store_true', help="use GPU or not") + + self.arg_config_group.add_argument('--batch_size', type=int, default=1, help='batch size') + self.arg_config_group.add_argument('--top_k', type=int, default=1, help='Return top k results.') + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to input image.") diff --git a/modules/image/classification/esnet_x0_25_imagenet/processor.py b/modules/image/classification/esnet_x0_25_imagenet/processor.py new file mode 100644 index 000000000..40cab3917 --- /dev/null +++ b/modules/image/classification/esnet_x0_25_imagenet/processor.py @@ -0,0 +1,374 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import base64 +import inspect +import math +import os +import random +import sys +from functools import partial + +import cv2 +import numpy as np +import paddle +import paddle.nn.functional as F +import six +from paddle.vision.transforms import ColorJitter as RawColorJitter +from PIL import Image + + +def create_operators(params, class_num=None): + """ + create operators based on the config + + Args: + params(list): a dict list, used to create some operators + """ + assert isinstance(params, list), ('operator config should be a list') + ops = [] + current_module = sys.modules[__name__] + for operator in params: + assert isinstance(operator, dict) and len(operator) == 1, "yaml format error" + op_name = list(operator)[0] + param = {} if operator[op_name] is None else operator[op_name] + op_func = getattr(current_module, op_name) + if "class_num" in inspect.getfullargspec(op_func).args: + param.update({"class_num": class_num}) + op = op_func(**param) + ops.append(op) + + return ops + + +class UnifiedResize(object): + + def __init__(self, interpolation=None, backend="cv2"): + _cv2_interp_from_str = { + 'nearest': cv2.INTER_NEAREST, + 'bilinear': cv2.INTER_LINEAR, + 'area': cv2.INTER_AREA, + 'bicubic': cv2.INTER_CUBIC, + 'lanczos': cv2.INTER_LANCZOS4 + } + _pil_interp_from_str = { + 'nearest': Image.NEAREST, + 'bilinear': Image.BILINEAR, + 'bicubic': Image.BICUBIC, + 'box': Image.BOX, + 'lanczos': Image.LANCZOS, + 'hamming': Image.HAMMING + } + + def _pil_resize(src, size, resample): + pil_img = Image.fromarray(src) + pil_img = pil_img.resize(size, resample) + return np.asarray(pil_img) + + if backend.lower() == "cv2": + if isinstance(interpolation, str): + interpolation = _cv2_interp_from_str[interpolation.lower()] + # compatible with opencv < version 4.4.0 + elif interpolation is None: + interpolation = cv2.INTER_LINEAR + self.resize_func = partial(cv2.resize, interpolation=interpolation) + elif backend.lower() == "pil": + if isinstance(interpolation, str): + interpolation = _pil_interp_from_str[interpolation.lower()] + self.resize_func = partial(_pil_resize, resample=interpolation) + else: + self.resize_func = cv2.resize + + def __call__(self, src, size): + return self.resize_func(src, size) + + +class OperatorParamError(ValueError): + """ OperatorParamError + """ + pass + + +class DecodeImage(object): + """ decode image """ + + def __init__(self, to_rgb=True, to_np=False, channel_first=False): + self.to_rgb = to_rgb + self.to_np = to_np # to numpy + self.channel_first = channel_first # only enabled when to_np is True + + def __call__(self, img): + if six.PY2: + assert type(img) is str and len(img) > 0, "invalid input 'img' in DecodeImage" + else: + assert type(img) is bytes and len(img) > 0, "invalid input 'img' in DecodeImage" + data = np.frombuffer(img, dtype='uint8') + img = cv2.imdecode(data, 1) + if self.to_rgb: + assert img.shape[2] == 3, 'invalid shape of image[%s]' % (img.shape) + img = img[:, :, ::-1] + + if self.channel_first: + img = img.transpose((2, 0, 1)) + + return img + + +class ResizeImage(object): + """ resize image """ + + def __init__(self, size=None, resize_short=None, interpolation=None, backend="cv2"): + if resize_short is not None and resize_short > 0: + self.resize_short = resize_short + self.w = None + self.h = None + elif size is not None: + self.resize_short = None + self.w = size if type(size) is int else size[0] + self.h = size if type(size) is int else size[1] + else: + raise OperatorParamError("invalid params for ReisizeImage for '\ + 'both 'size' and 'resize_short' are None") + + self._resize_func = UnifiedResize(interpolation=interpolation, backend=backend) + + def __call__(self, img): + img_h, img_w = img.shape[:2] + if self.resize_short is not None: + percent = float(self.resize_short) / min(img_w, img_h) + w = int(round(img_w * percent)) + h = int(round(img_h * percent)) + else: + w = self.w + h = self.h + return self._resize_func(img, (w, h)) + + +class CropImage(object): + """ crop image """ + + def __init__(self, size): + if type(size) is int: + self.size = (size, size) + else: + self.size = size # (h, w) + + def __call__(self, img): + w, h = self.size + img_h, img_w = img.shape[:2] + w_start = (img_w - w) // 2 + h_start = (img_h - h) // 2 + + w_end = w_start + w + h_end = h_start + h + return img[h_start:h_end, w_start:w_end, :] + + +class RandCropImage(object): + """ random crop image """ + + def __init__(self, size, scale=None, ratio=None, interpolation=None, backend="cv2"): + if type(size) is int: + self.size = (size, size) # (h, w) + else: + self.size = size + + self.scale = [0.08, 1.0] if scale is None else scale + self.ratio = [3. / 4., 4. / 3.] if ratio is None else ratio + + self._resize_func = UnifiedResize(interpolation=interpolation, backend=backend) + + def __call__(self, img): + size = self.size + scale = self.scale + ratio = self.ratio + + aspect_ratio = math.sqrt(random.uniform(*ratio)) + w = 1. * aspect_ratio + h = 1. / aspect_ratio + + img_h, img_w = img.shape[:2] + + bound = min((float(img_w) / img_h) / (w**2), (float(img_h) / img_w) / (h**2)) + scale_max = min(scale[1], bound) + scale_min = min(scale[0], bound) + + target_area = img_w * img_h * random.uniform(scale_min, scale_max) + target_size = math.sqrt(target_area) + w = int(target_size * w) + h = int(target_size * h) + + i = random.randint(0, img_w - w) + j = random.randint(0, img_h - h) + + img = img[j:j + h, i:i + w, :] + + return self._resize_func(img, size) + + +class RandFlipImage(object): + """ random flip image + flip_code: + 1: Flipped Horizontally + 0: Flipped Vertically + -1: Flipped Horizontally & Vertically + """ + + def __init__(self, flip_code=1): + assert flip_code in [-1, 0, 1], "flip_code should be a value in [-1, 0, 1]" + self.flip_code = flip_code + + def __call__(self, img): + if random.randint(0, 1) == 1: + return cv2.flip(img, self.flip_code) + else: + return img + + +class NormalizeImage(object): + """ normalize image such as substract mean, divide std + """ + + def __init__(self, scale=None, mean=None, std=None, order='chw', output_fp16=False, channel_num=3): + if isinstance(scale, str): + scale = eval(scale) + assert channel_num in [3, 4], "channel number of input image should be set to 3 or 4." + self.channel_num = channel_num + self.output_dtype = 'float16' if output_fp16 else 'float32' + self.scale = np.float32(scale if scale is not None else 1.0 / 255.0) + self.order = order + mean = mean if mean is not None else [0.485, 0.456, 0.406] + std = std if std is not None else [0.229, 0.224, 0.225] + + shape = (3, 1, 1) if self.order == 'chw' else (1, 1, 3) + self.mean = np.array(mean).reshape(shape).astype('float32') + self.std = np.array(std).reshape(shape).astype('float32') + + def __call__(self, img): + from PIL import Image + if isinstance(img, Image.Image): + img = np.array(img) + + assert isinstance(img, np.ndarray), "invalid input 'img' in NormalizeImage" + + img = (img.astype('float32') * self.scale - self.mean) / self.std + + if self.channel_num == 4: + img_h = img.shape[1] if self.order == 'chw' else img.shape[0] + img_w = img.shape[2] if self.order == 'chw' else img.shape[1] + pad_zeros = np.zeros((1, img_h, img_w)) if self.order == 'chw' else np.zeros((img_h, img_w, 1)) + img = (np.concatenate((img, pad_zeros), axis=0) if self.order == 'chw' else np.concatenate( + (img, pad_zeros), axis=2)) + return img.astype(self.output_dtype) + + +class ToCHWImage(object): + """ convert hwc image to chw image + """ + + def __init__(self): + pass + + def __call__(self, img): + from PIL import Image + if isinstance(img, Image.Image): + img = np.array(img) + + return img.transpose((2, 0, 1)) + + +class ColorJitter(RawColorJitter): + """ColorJitter. + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def __call__(self, img): + if not isinstance(img, Image.Image): + img = np.ascontiguousarray(img) + img = Image.fromarray(img) + img = super()._apply_image(img) + if isinstance(img, Image.Image): + img = np.asarray(img) + return img + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +class Topk(object): + + def __init__(self, topk=1, class_id_map_file=None): + assert isinstance(topk, (int, )) + self.class_id_map = self.parse_class_id_map(class_id_map_file) + self.topk = topk + + def parse_class_id_map(self, class_id_map_file): + if class_id_map_file is None: + return None + if not os.path.exists(class_id_map_file): + print( + "Warning: If want to use your own label_dict, please input legal path!\nOtherwise label_names will be empty!" + ) + return None + + try: + class_id_map = {} + with open(class_id_map_file, "r") as fin: + lines = fin.readlines() + for line in lines: + partition = line.split("\n")[0].partition(" ") + class_id_map[int(partition[0])] = str(partition[-1]) + except Exception as ex: + print(ex) + class_id_map = None + return class_id_map + + def __call__(self, x, file_names=None, multilabel=False): + assert isinstance(x, paddle.Tensor) + if file_names is not None: + assert x.shape[0] == len(file_names) + x = F.softmax(x, axis=-1) if not multilabel else F.sigmoid(x) + x = x.numpy() + y = [] + for idx, probs in enumerate(x): + index = probs.argsort(axis=0)[-self.topk:][::-1].astype("int32") if not multilabel else np.where( + probs >= 0.5)[0].astype("int32") + clas_id_list = [] + score_list = [] + label_name_list = [] + for i in index: + clas_id_list.append(i.item()) + score_list.append(probs[i].item()) + if self.class_id_map is not None: + label_name_list.append(self.class_id_map[i.item()]) + result = { + "class_ids": clas_id_list, + "scores": np.around(score_list, decimals=5).tolist(), + } + if file_names is not None: + result["file_name"] = file_names[idx] + if label_name_list is not None: + result["label_names"] = label_name_list + y.append(result) + return y diff --git a/modules/image/classification/esnet_x0_25_imagenet/utils.py b/modules/image/classification/esnet_x0_25_imagenet/utils.py new file mode 100644 index 000000000..df2bc36b3 --- /dev/null +++ b/modules/image/classification/esnet_x0_25_imagenet/utils.py @@ -0,0 +1,129 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import copy +import os + +import yaml + +__all__ = ['get_config'] + + +class AttrDict(dict): + + def __getattr__(self, key): + return self[key] + + def __setattr__(self, key, value): + if key in self.__dict__: + self.__dict__[key] = value + else: + self[key] = value + + def __deepcopy__(self, content): + return copy.deepcopy(dict(self)) + + +def create_attr_dict(yaml_config): + from ast import literal_eval + for key, value in yaml_config.items(): + if type(value) is dict: + yaml_config[key] = value = AttrDict(value) + if isinstance(value, str): + try: + value = literal_eval(value) + except BaseException: + pass + if isinstance(value, AttrDict): + create_attr_dict(yaml_config[key]) + else: + yaml_config[key] = value + + +def parse_config(cfg_file): + """Load a config file into AttrDict""" + with open(cfg_file, 'r') as fopen: + yaml_config = AttrDict(yaml.load(fopen, Loader=yaml.SafeLoader)) + create_attr_dict(yaml_config) + return yaml_config + + +def override(dl, ks, v): + """ + Recursively replace dict of list + Args: + dl(dict or list): dict or list to be replaced + ks(list): list of keys + v(str): value to be replaced + """ + + def str2num(v): + try: + return eval(v) + except Exception: + return v + + assert isinstance(dl, (list, dict)), ("{} should be a list or a dict") + assert len(ks) > 0, ('lenght of keys should larger than 0') + if isinstance(dl, list): + k = str2num(ks[0]) + if len(ks) == 1: + assert k < len(dl), ('index({}) out of range({})'.format(k, dl)) + dl[k] = str2num(v) + else: + override(dl[k], ks[1:], v) + else: + if len(ks) == 1: + # assert ks[0] in dl, ('{} is not exist in {}'.format(ks[0], dl)) + if not ks[0] in dl: + print('A new filed ({}) detected!'.format(ks[0], dl)) + dl[ks[0]] = str2num(v) + else: + override(dl[ks[0]], ks[1:], v) + + +def override_config(config, options=None): + """ + Recursively override the config + Args: + config(dict): dict to be replaced + options(list): list of pairs(key0.key1.idx.key2=value) + such as: [ + 'topk=2', + 'VALID.transforms.1.ResizeImage.resize_short=300' + ] + Returns: + config(dict): replaced config + """ + if options is not None: + for opt in options: + assert isinstance(opt, str), ("option({}) should be a str".format(opt)) + assert "=" in opt, ("option({}) should contain a =" + "to distinguish between key and value".format(opt)) + pair = opt.split('=') + assert len(pair) == 2, ("there can be only a = in the option") + key, value = pair + keys = key.split('.') + override(config, keys, value) + return config + + +def get_config(fname, overrides=None, show=False): + """ + Read config from file + """ + assert os.path.exists(fname), ('config file({}) is not exist'.format(fname)) + config = parse_config(fname) + override_config(config, overrides) + return config diff --git a/modules/image/classification/esnet_x0_5_imagenet/README.md b/modules/image/classification/esnet_x0_5_imagenet/README.md new file mode 100644 index 000000000..f620be394 --- /dev/null +++ b/modules/image/classification/esnet_x0_5_imagenet/README.md @@ -0,0 +1,133 @@ +# esnet_x0_5_imagenet + +|模型名称|esnet_x0_5_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|ESNet| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|12 MB| +|最新更新日期|2022-04-02| +|数据指标|Acc| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - ESNet(Enhanced ShuffleNet)是百度自研的一个轻量级网络,该网络在 ShuffleNetV2 的基础上融合了 MobileNetV3、GhostNet、PPLCNet 的优点,组合成了一个在 ARM 设备上速度更快、精度更高的网络,由于其出色的表现,所以在 PaddleDetection 推出的 [PP-PicoDet](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.3/configs/picodet) 使用了该模型做 backbone,配合更强的目标检测算法,最终的指标一举刷新了目标检测模型在 ARM 设备上的 SOTA 指标。该模型为模型规模参数scale为x0.5下的ESNet模型。 + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install esnet_x0_5_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run esnet_x0_5_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="esnet_x0_5_imagenet") + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + - 分类接口API。 + - **参数** + + - images (list\[numpy.ndarray\]): 图片数据,每一个图片数据的shape 均为 \[H, W, C\],颜色空间为 BGR;
+ - paths (list\[str\]): 图片的路径;
+ - batch\_size (int): batch 的大小;
+ - use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA_VISIBLE_DEVICES环境变量**
+ - top\_k (int): 返回预测结果的前 k 个。 + + - **返回** + + - res (list\[dict\]): 分类结果,列表的每一个元素均为字典,其中 key 包括'class_ids'(种类索引), 'scores'(置信度) 和 'label_names'(种类名称) + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个图像识别的在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m esnet_x0_5_imagenet + ``` + + - 这样就完成了一个图像识别的在线服务的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"\} + url = "http://127.0.0.1:8866/predict/esnet_x0_5_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + ``` + + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install esnet_x0_5_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/esnet_x0_5_imagenet/model.py b/modules/image/classification/esnet_x0_5_imagenet/model.py new file mode 100644 index 000000000..4e6bd8c7b --- /dev/null +++ b/modules/image/classification/esnet_x0_5_imagenet/model.py @@ -0,0 +1,506 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +from typing import Any +from typing import Callable +from typing import Dict +from typing import List +from typing import Tuple +from typing import Union + +import paddle +import paddle.nn as nn +from paddle import concat +from paddle import ParamAttr +from paddle import reshape +from paddle import split +from paddle import transpose +from paddle.nn import AdaptiveAvgPool2D +from paddle.nn import BatchNorm +from paddle.nn import Conv2D +from paddle.nn import Dropout +from paddle.nn import Linear +from paddle.nn import MaxPool2D +from paddle.nn.initializer import KaimingNormal +from paddle.regularizer import L2Decay + +MODEL_STAGES_PATTERN = {"ESNet": ["blocks[2]", "blocks[9]", "blocks[12]"]} + + +class Identity(nn.Layer): + + def __init__(self): + super(Identity, self).__init__() + + def forward(self, inputs): + return inputs + + +class TheseusLayer(nn.Layer): + + def __init__(self, *args, **kwargs): + super(TheseusLayer, self).__init__() + self.res_dict = {} + self.res_name = self.full_name() + self.pruner = None + self.quanter = None + + def _return_dict_hook(self, layer, input, output): + res_dict = {"output": output} + # 'list' is needed to avoid error raised by popping self.res_dict + for res_key in list(self.res_dict): + # clear the res_dict because the forward process may change according to input + res_dict[res_key] = self.res_dict.pop(res_key) + return res_dict + + def init_res(self, stages_pattern, return_patterns=None, return_stages=None): + if return_patterns and return_stages: + msg = f"The 'return_patterns' would be ignored when 'return_stages' is set." + return_stages = None + + if return_stages is True: + return_patterns = stages_pattern + # return_stages is int or bool + if type(return_stages) is int: + return_stages = [return_stages] + if isinstance(return_stages, list): + if max(return_stages) > len(stages_pattern) or min(return_stages) < 0: + msg = f"The 'return_stages' set error. Illegal value(s) have been ignored. The stages' pattern list is {stages_pattern}." + return_stages = [val for val in return_stages if val >= 0 and val < len(stages_pattern)] + return_patterns = [stages_pattern[i] for i in return_stages] + + if return_patterns: + self.update_res(return_patterns) + + def replace_sub(self, *args, **kwargs) -> None: + msg = "The function 'replace_sub()' is deprecated, please use 'upgrade_sublayer()' instead." + raise DeprecationWarning(msg) + + def upgrade_sublayer(self, layer_name_pattern: Union[str, List[str]], + handle_func: Callable[[nn.Layer, str], nn.Layer]) -> Dict[str, nn.Layer]: + """use 'handle_func' to modify the sub-layer(s) specified by 'layer_name_pattern'. + + Args: + layer_name_pattern (Union[str, List[str]]): The name of layer to be modified by 'handle_func'. + handle_func (Callable[[nn.Layer, str], nn.Layer]): The function to modify target layer specified by 'layer_name_pattern'. The formal params are the layer(nn.Layer) and pattern(str) that is (a member of) layer_name_pattern (when layer_name_pattern is List type). And the return is the layer processed. + + Returns: + Dict[str, nn.Layer]: The key is the pattern and corresponding value is the result returned by 'handle_func()'. + + Examples: + + from paddle import nn + import paddleclas + + def rep_func(layer: nn.Layer, pattern: str): + new_layer = nn.Conv2D( + in_channels=layer._in_channels, + out_channels=layer._out_channels, + kernel_size=5, + padding=2 + ) + return new_layer + + net = paddleclas.MobileNetV1() + res = net.replace_sub(layer_name_pattern=["blocks[11].depthwise_conv.conv", "blocks[12].depthwise_conv.conv"], handle_func=rep_func) + print(res) + # {'blocks[11].depthwise_conv.conv': the corresponding new_layer, 'blocks[12].depthwise_conv.conv': the corresponding new_layer} + """ + + if not isinstance(layer_name_pattern, list): + layer_name_pattern = [layer_name_pattern] + + hit_layer_pattern_list = [] + for pattern in layer_name_pattern: + # parse pattern to find target layer and its parent + layer_list = parse_pattern_str(pattern=pattern, parent_layer=self) + if not layer_list: + continue + sub_layer_parent = layer_list[-2]["layer"] if len(layer_list) > 1 else self + + sub_layer = layer_list[-1]["layer"] + sub_layer_name = layer_list[-1]["name"] + sub_layer_index = layer_list[-1]["index"] + + new_sub_layer = handle_func(sub_layer, pattern) + + if sub_layer_index: + getattr(sub_layer_parent, sub_layer_name)[sub_layer_index] = new_sub_layer + else: + setattr(sub_layer_parent, sub_layer_name, new_sub_layer) + + hit_layer_pattern_list.append(pattern) + return hit_layer_pattern_list + + def stop_after(self, stop_layer_name: str) -> bool: + """stop forward and backward after 'stop_layer_name'. + + Args: + stop_layer_name (str): The name of layer that stop forward and backward after this layer. + + Returns: + bool: 'True' if successful, 'False' otherwise. + """ + + layer_list = parse_pattern_str(stop_layer_name, self) + if not layer_list: + return False + + parent_layer = self + for layer_dict in layer_list: + name, index = layer_dict["name"], layer_dict["index"] + if not set_identity(parent_layer, name, index): + msg = f"Failed to set the layers that after stop_layer_name('{stop_layer_name}') to IdentityLayer. The error layer's name is '{name}'." + return False + parent_layer = layer_dict["layer"] + + return True + + def update_res(self, return_patterns: Union[str, List[str]]) -> Dict[str, nn.Layer]: + """update the result(s) to be returned. + + Args: + return_patterns (Union[str, List[str]]): The name of layer to return output. + + Returns: + Dict[str, nn.Layer]: The pattern(str) and corresponding layer(nn.Layer) that have been set successfully. + """ + + # clear res_dict that could have been set + self.res_dict = {} + + class Handler(object): + + def __init__(self, res_dict): + # res_dict is a reference + self.res_dict = res_dict + + def __call__(self, layer, pattern): + layer.res_dict = self.res_dict + layer.res_name = pattern + if hasattr(layer, "hook_remove_helper"): + layer.hook_remove_helper.remove() + layer.hook_remove_helper = layer.register_forward_post_hook(save_sub_res_hook) + return layer + + handle_func = Handler(self.res_dict) + + hit_layer_pattern_list = self.upgrade_sublayer(return_patterns, handle_func=handle_func) + + if hasattr(self, "hook_remove_helper"): + self.hook_remove_helper.remove() + self.hook_remove_helper = self.register_forward_post_hook(self._return_dict_hook) + + return hit_layer_pattern_list + + +def save_sub_res_hook(layer, input, output): + layer.res_dict[layer.res_name] = output + + +def set_identity(parent_layer: nn.Layer, layer_name: str, layer_index: str = None) -> bool: + """set the layer specified by layer_name and layer_index to Indentity. + + Args: + parent_layer (nn.Layer): The parent layer of target layer specified by layer_name and layer_index. + layer_name (str): The name of target layer to be set to Indentity. + layer_index (str, optional): The index of target layer to be set to Indentity in parent_layer. Defaults to None. + + Returns: + bool: True if successfully, False otherwise. + """ + + stop_after = False + for sub_layer_name in parent_layer._sub_layers: + if stop_after: + parent_layer._sub_layers[sub_layer_name] = Identity() + continue + if sub_layer_name == layer_name: + stop_after = True + + if layer_index and stop_after: + stop_after = False + for sub_layer_index in parent_layer._sub_layers[layer_name]._sub_layers: + if stop_after: + parent_layer._sub_layers[layer_name][sub_layer_index] = Identity() + continue + if layer_index == sub_layer_index: + stop_after = True + + return stop_after + + +def parse_pattern_str(pattern: str, parent_layer: nn.Layer) -> Union[None, List[Dict[str, Union[nn.Layer, str, None]]]]: + """parse the string type pattern. + + Args: + pattern (str): The pattern to discribe layer. + parent_layer (nn.Layer): The root layer relative to the pattern. + + Returns: + Union[None, List[Dict[str, Union[nn.Layer, str, None]]]]: None if failed. If successfully, the members are layers parsed in order: + [ + {"layer": first layer, "name": first layer's name parsed, "index": first layer's index parsed if exist}, + {"layer": second layer, "name": second layer's name parsed, "index": second layer's index parsed if exist}, + ... + ] + """ + + pattern_list = pattern.split(".") + if not pattern_list: + msg = f"The pattern('{pattern}') is illegal. Please check and retry." + return None + + layer_list = [] + while len(pattern_list) > 0: + if '[' in pattern_list[0]: + target_layer_name = pattern_list[0].split('[')[0] + target_layer_index = pattern_list[0].split('[')[1].split(']')[0] + else: + target_layer_name = pattern_list[0] + target_layer_index = None + + target_layer = getattr(parent_layer, target_layer_name, None) + + if target_layer is None: + msg = f"Not found layer named('{target_layer_name}') specifed in pattern('{pattern}')." + return None + + if target_layer_index and target_layer: + if int(target_layer_index) < 0 or int(target_layer_index) >= len(target_layer): + msg = f"Not found layer by index('{target_layer_index}') specifed in pattern('{pattern}'). The index should < {len(target_layer)} and > 0." + return None + + target_layer = target_layer[target_layer_index] + + layer_list.append({"layer": target_layer, "name": target_layer_name, "index": target_layer_index}) + + pattern_list = pattern_list[1:] + parent_layer = target_layer + return layer_list + + +def channel_shuffle(x, groups): + batch_size, num_channels, height, width = x.shape[0:4] + channels_per_group = num_channels // groups + x = reshape(x=x, shape=[batch_size, groups, channels_per_group, height, width]) + x = transpose(x=x, perm=[0, 2, 1, 3, 4]) + x = reshape(x=x, shape=[batch_size, num_channels, height, width]) + return x + + +def make_divisible(v, divisor=8, min_value=None): + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + if new_v < 0.9 * v: + new_v += divisor + return new_v + + +class ConvBNLayer(TheseusLayer): + + def __init__(self, in_channels, out_channels, kernel_size, stride=1, groups=1, if_act=True): + super().__init__() + self.conv = Conv2D(in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=(kernel_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr(initializer=KaimingNormal()), + bias_attr=False) + + self.bn = BatchNorm(out_channels, + param_attr=ParamAttr(regularizer=L2Decay(0.0)), + bias_attr=ParamAttr(regularizer=L2Decay(0.0))) + self.if_act = if_act + self.hardswish = nn.Hardswish() + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + if self.if_act: + x = self.hardswish(x) + return x + + +class SEModule(TheseusLayer): + + def __init__(self, channel, reduction=4): + super().__init__() + self.avg_pool = AdaptiveAvgPool2D(1) + self.conv1 = Conv2D(in_channels=channel, out_channels=channel // reduction, kernel_size=1, stride=1, padding=0) + self.relu = nn.ReLU() + self.conv2 = Conv2D(in_channels=channel // reduction, out_channels=channel, kernel_size=1, stride=1, padding=0) + self.hardsigmoid = nn.Hardsigmoid() + + def forward(self, x): + identity = x + x = self.avg_pool(x) + x = self.conv1(x) + x = self.relu(x) + x = self.conv2(x) + x = self.hardsigmoid(x) + x = paddle.multiply(x=identity, y=x) + return x + + +class ESBlock1(TheseusLayer): + + def __init__(self, in_channels, out_channels): + super().__init__() + self.pw_1_1 = ConvBNLayer(in_channels=in_channels // 2, out_channels=out_channels // 2, kernel_size=1, stride=1) + self.dw_1 = ConvBNLayer(in_channels=out_channels // 2, + out_channels=out_channels // 2, + kernel_size=3, + stride=1, + groups=out_channels // 2, + if_act=False) + self.se = SEModule(out_channels) + + self.pw_1_2 = ConvBNLayer(in_channels=out_channels, out_channels=out_channels // 2, kernel_size=1, stride=1) + + def forward(self, x): + x1, x2 = split(x, num_or_sections=[x.shape[1] // 2, x.shape[1] // 2], axis=1) + x2 = self.pw_1_1(x2) + x3 = self.dw_1(x2) + x3 = concat([x2, x3], axis=1) + x3 = self.se(x3) + x3 = self.pw_1_2(x3) + x = concat([x1, x3], axis=1) + return channel_shuffle(x, 2) + + +class ESBlock2(TheseusLayer): + + def __init__(self, in_channels, out_channels): + super().__init__() + + # branch1 + self.dw_1 = ConvBNLayer(in_channels=in_channels, + out_channels=in_channels, + kernel_size=3, + stride=2, + groups=in_channels, + if_act=False) + self.pw_1 = ConvBNLayer(in_channels=in_channels, out_channels=out_channels // 2, kernel_size=1, stride=1) + # branch2 + self.pw_2_1 = ConvBNLayer(in_channels=in_channels, out_channels=out_channels // 2, kernel_size=1) + self.dw_2 = ConvBNLayer(in_channels=out_channels // 2, + out_channels=out_channels // 2, + kernel_size=3, + stride=2, + groups=out_channels // 2, + if_act=False) + self.se = SEModule(out_channels // 2) + self.pw_2_2 = ConvBNLayer(in_channels=out_channels // 2, out_channels=out_channels // 2, kernel_size=1) + self.concat_dw = ConvBNLayer(in_channels=out_channels, + out_channels=out_channels, + kernel_size=3, + groups=out_channels) + self.concat_pw = ConvBNLayer(in_channels=out_channels, out_channels=out_channels, kernel_size=1) + + def forward(self, x): + x1 = self.dw_1(x) + x1 = self.pw_1(x1) + x2 = self.pw_2_1(x) + x2 = self.dw_2(x2) + x2 = self.se(x2) + x2 = self.pw_2_2(x2) + x = concat([x1, x2], axis=1) + x = self.concat_dw(x) + x = self.concat_pw(x) + return x + + +class ESNet(TheseusLayer): + + def __init__(self, + stages_pattern, + class_num=1000, + scale=1.0, + dropout_prob=0.2, + class_expand=1280, + return_patterns=None, + return_stages=None): + super().__init__() + self.scale = scale + self.class_num = class_num + self.class_expand = class_expand + stage_repeats = [3, 7, 3] + stage_out_channels = [ + -1, 24, make_divisible(116 * scale), + make_divisible(232 * scale), + make_divisible(464 * scale), 1024 + ] + + self.conv1 = ConvBNLayer(in_channels=3, out_channels=stage_out_channels[1], kernel_size=3, stride=2) + self.max_pool = MaxPool2D(kernel_size=3, stride=2, padding=1) + + block_list = [] + for stage_id, num_repeat in enumerate(stage_repeats): + for i in range(num_repeat): + if i == 0: + block = ESBlock2(in_channels=stage_out_channels[stage_id + 1], + out_channels=stage_out_channels[stage_id + 2]) + else: + block = ESBlock1(in_channels=stage_out_channels[stage_id + 2], + out_channels=stage_out_channels[stage_id + 2]) + block_list.append(block) + self.blocks = nn.Sequential(*block_list) + + self.conv2 = ConvBNLayer(in_channels=stage_out_channels[-2], out_channels=stage_out_channels[-1], kernel_size=1) + + self.avg_pool = AdaptiveAvgPool2D(1) + + self.last_conv = Conv2D(in_channels=stage_out_channels[-1], + out_channels=self.class_expand, + kernel_size=1, + stride=1, + padding=0, + bias_attr=False) + self.hardswish = nn.Hardswish() + self.dropout = Dropout(p=dropout_prob, mode="downscale_in_infer") + self.flatten = nn.Flatten(start_axis=1, stop_axis=-1) + self.fc = Linear(self.class_expand, self.class_num) + + super().init_res(stages_pattern, return_patterns=return_patterns, return_stages=return_stages) + + def forward(self, x): + x = self.conv1(x) + x = self.max_pool(x) + x = self.blocks(x) + x = self.conv2(x) + x = self.avg_pool(x) + x = self.last_conv(x) + x = self.hardswish(x) + x = self.dropout(x) + x = self.flatten(x) + x = self.fc(x) + return x + + +def ESNet_x0_5(pretrained=False, use_ssld=False, **kwargs): + """ + ESNet_x0_5 + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `ESNet_x0_5` model depends on args. + """ + model = ESNet(scale=0.5, stages_pattern=MODEL_STAGES_PATTERN["ESNet"], **kwargs) + return model diff --git a/modules/image/classification/esnet_x0_5_imagenet/module.py b/modules/image/classification/esnet_x0_5_imagenet/module.py new file mode 100644 index 000000000..0abb6c0f5 --- /dev/null +++ b/modules/image/classification/esnet_x0_5_imagenet/module.py @@ -0,0 +1,154 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import copy +import os + +import cv2 +import numpy as np +import paddle +from skimage.io import imread +from skimage.transform import rescale +from skimage.transform import resize + +import paddlehub as hub +from .model import ESNet_x0_5 +from .processor import base64_to_cv2 +from .processor import create_operators +from .processor import Topk +from .utils import get_config +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving + + +@moduleinfo(name="esnet_x0_5_imagenet", + type="cv/classification", + author="paddlepaddle", + author_email="", + summary="", + version="1.0.0") +class Esnet_x0_5_Imagenet: + + def __init__(self): + self.config = get_config(os.path.join(self.directory, 'ESNet_x0_5.yaml'), show=False) + self.label_path = os.path.join(self.directory, 'imagenet1k_label_list.txt') + self.pretrain_path = os.path.join(self.directory, 'ESNet_x0_5_pretrained.pdparams') + self.config['Infer']['PostProcess']['class_id_map_file'] = self.label_path + self.model = ESNet_x0_5() + param_state_dict = paddle.load(self.pretrain_path) + self.model.set_dict(param_state_dict) + self.preprocess_funcs = create_operators(self.config["Infer"]["transforms"]) + + def classification(self, + images: list = None, + paths: list = None, + batch_size: int = 1, + use_gpu: bool = False, + top_k: int = 1): + ''' + Args: + images (list[numpy.ndarray]): data of images, shape of each is [H, W, C], color space must be BGR. + paths (list[str]): The paths of images. + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + top_k (int): Return top k results. + + Returns: + res (list[dict]): The classfication results, each result dict contains key 'class_ids', 'scores' and 'label_names'. + ''' + postprocess_func = Topk(top_k, self.label_path) + inputs = [] + results = [] + paddle.disable_static() + place = 'gpu:0' if use_gpu else 'cpu' + place = paddle.set_device(place) + if images == None and paths == None: + print('No image provided. Please input an image or a image path.') + return + + if images != None: + for image in images: + image = image[:, :, ::-1] + inputs.append(image) + + if paths != None: + for path in paths: + image = cv2.imread(path)[:, :, ::-1] + inputs.append(image) + + batch_data = [] + for idx, imagedata in enumerate(inputs): + for process in self.preprocess_funcs: + imagedata = process(imagedata) + batch_data.append(imagedata) + if len(batch_data) >= batch_size or idx == len(inputs) - 1: + batch_tensor = paddle.to_tensor(batch_data) + out = self.model(batch_tensor) + if isinstance(out, list): + out = out[0] + if isinstance(out, dict) and "logits" in out: + out = out["logits"] + if isinstance(out, dict) and "output" in out: + out = out["output"] + result = postprocess_func(out) + results.extend(result) + batch_data.clear() + return results + + @runnable + def run_cmd(self, argvs: list): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + self.args = self.parser.parse_args(argvs) + results = self.classification(paths=[self.args.input_path], + use_gpu=self.args.use_gpu, + batch_size=self.args.batch_size, + top_k=self.args.top_k) + return results + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.classification(images=images_decode, **kwargs) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument('--use_gpu', action='store_true', help="use GPU or not") + + self.arg_config_group.add_argument('--batch_size', type=int, default=1, help='batch size') + self.arg_config_group.add_argument('--top_k', type=int, default=1, help='Return top k results.') + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to input image.") diff --git a/modules/image/classification/esnet_x0_5_imagenet/processor.py b/modules/image/classification/esnet_x0_5_imagenet/processor.py new file mode 100644 index 000000000..40cab3917 --- /dev/null +++ b/modules/image/classification/esnet_x0_5_imagenet/processor.py @@ -0,0 +1,374 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import base64 +import inspect +import math +import os +import random +import sys +from functools import partial + +import cv2 +import numpy as np +import paddle +import paddle.nn.functional as F +import six +from paddle.vision.transforms import ColorJitter as RawColorJitter +from PIL import Image + + +def create_operators(params, class_num=None): + """ + create operators based on the config + + Args: + params(list): a dict list, used to create some operators + """ + assert isinstance(params, list), ('operator config should be a list') + ops = [] + current_module = sys.modules[__name__] + for operator in params: + assert isinstance(operator, dict) and len(operator) == 1, "yaml format error" + op_name = list(operator)[0] + param = {} if operator[op_name] is None else operator[op_name] + op_func = getattr(current_module, op_name) + if "class_num" in inspect.getfullargspec(op_func).args: + param.update({"class_num": class_num}) + op = op_func(**param) + ops.append(op) + + return ops + + +class UnifiedResize(object): + + def __init__(self, interpolation=None, backend="cv2"): + _cv2_interp_from_str = { + 'nearest': cv2.INTER_NEAREST, + 'bilinear': cv2.INTER_LINEAR, + 'area': cv2.INTER_AREA, + 'bicubic': cv2.INTER_CUBIC, + 'lanczos': cv2.INTER_LANCZOS4 + } + _pil_interp_from_str = { + 'nearest': Image.NEAREST, + 'bilinear': Image.BILINEAR, + 'bicubic': Image.BICUBIC, + 'box': Image.BOX, + 'lanczos': Image.LANCZOS, + 'hamming': Image.HAMMING + } + + def _pil_resize(src, size, resample): + pil_img = Image.fromarray(src) + pil_img = pil_img.resize(size, resample) + return np.asarray(pil_img) + + if backend.lower() == "cv2": + if isinstance(interpolation, str): + interpolation = _cv2_interp_from_str[interpolation.lower()] + # compatible with opencv < version 4.4.0 + elif interpolation is None: + interpolation = cv2.INTER_LINEAR + self.resize_func = partial(cv2.resize, interpolation=interpolation) + elif backend.lower() == "pil": + if isinstance(interpolation, str): + interpolation = _pil_interp_from_str[interpolation.lower()] + self.resize_func = partial(_pil_resize, resample=interpolation) + else: + self.resize_func = cv2.resize + + def __call__(self, src, size): + return self.resize_func(src, size) + + +class OperatorParamError(ValueError): + """ OperatorParamError + """ + pass + + +class DecodeImage(object): + """ decode image """ + + def __init__(self, to_rgb=True, to_np=False, channel_first=False): + self.to_rgb = to_rgb + self.to_np = to_np # to numpy + self.channel_first = channel_first # only enabled when to_np is True + + def __call__(self, img): + if six.PY2: + assert type(img) is str and len(img) > 0, "invalid input 'img' in DecodeImage" + else: + assert type(img) is bytes and len(img) > 0, "invalid input 'img' in DecodeImage" + data = np.frombuffer(img, dtype='uint8') + img = cv2.imdecode(data, 1) + if self.to_rgb: + assert img.shape[2] == 3, 'invalid shape of image[%s]' % (img.shape) + img = img[:, :, ::-1] + + if self.channel_first: + img = img.transpose((2, 0, 1)) + + return img + + +class ResizeImage(object): + """ resize image """ + + def __init__(self, size=None, resize_short=None, interpolation=None, backend="cv2"): + if resize_short is not None and resize_short > 0: + self.resize_short = resize_short + self.w = None + self.h = None + elif size is not None: + self.resize_short = None + self.w = size if type(size) is int else size[0] + self.h = size if type(size) is int else size[1] + else: + raise OperatorParamError("invalid params for ReisizeImage for '\ + 'both 'size' and 'resize_short' are None") + + self._resize_func = UnifiedResize(interpolation=interpolation, backend=backend) + + def __call__(self, img): + img_h, img_w = img.shape[:2] + if self.resize_short is not None: + percent = float(self.resize_short) / min(img_w, img_h) + w = int(round(img_w * percent)) + h = int(round(img_h * percent)) + else: + w = self.w + h = self.h + return self._resize_func(img, (w, h)) + + +class CropImage(object): + """ crop image """ + + def __init__(self, size): + if type(size) is int: + self.size = (size, size) + else: + self.size = size # (h, w) + + def __call__(self, img): + w, h = self.size + img_h, img_w = img.shape[:2] + w_start = (img_w - w) // 2 + h_start = (img_h - h) // 2 + + w_end = w_start + w + h_end = h_start + h + return img[h_start:h_end, w_start:w_end, :] + + +class RandCropImage(object): + """ random crop image """ + + def __init__(self, size, scale=None, ratio=None, interpolation=None, backend="cv2"): + if type(size) is int: + self.size = (size, size) # (h, w) + else: + self.size = size + + self.scale = [0.08, 1.0] if scale is None else scale + self.ratio = [3. / 4., 4. / 3.] if ratio is None else ratio + + self._resize_func = UnifiedResize(interpolation=interpolation, backend=backend) + + def __call__(self, img): + size = self.size + scale = self.scale + ratio = self.ratio + + aspect_ratio = math.sqrt(random.uniform(*ratio)) + w = 1. * aspect_ratio + h = 1. / aspect_ratio + + img_h, img_w = img.shape[:2] + + bound = min((float(img_w) / img_h) / (w**2), (float(img_h) / img_w) / (h**2)) + scale_max = min(scale[1], bound) + scale_min = min(scale[0], bound) + + target_area = img_w * img_h * random.uniform(scale_min, scale_max) + target_size = math.sqrt(target_area) + w = int(target_size * w) + h = int(target_size * h) + + i = random.randint(0, img_w - w) + j = random.randint(0, img_h - h) + + img = img[j:j + h, i:i + w, :] + + return self._resize_func(img, size) + + +class RandFlipImage(object): + """ random flip image + flip_code: + 1: Flipped Horizontally + 0: Flipped Vertically + -1: Flipped Horizontally & Vertically + """ + + def __init__(self, flip_code=1): + assert flip_code in [-1, 0, 1], "flip_code should be a value in [-1, 0, 1]" + self.flip_code = flip_code + + def __call__(self, img): + if random.randint(0, 1) == 1: + return cv2.flip(img, self.flip_code) + else: + return img + + +class NormalizeImage(object): + """ normalize image such as substract mean, divide std + """ + + def __init__(self, scale=None, mean=None, std=None, order='chw', output_fp16=False, channel_num=3): + if isinstance(scale, str): + scale = eval(scale) + assert channel_num in [3, 4], "channel number of input image should be set to 3 or 4." + self.channel_num = channel_num + self.output_dtype = 'float16' if output_fp16 else 'float32' + self.scale = np.float32(scale if scale is not None else 1.0 / 255.0) + self.order = order + mean = mean if mean is not None else [0.485, 0.456, 0.406] + std = std if std is not None else [0.229, 0.224, 0.225] + + shape = (3, 1, 1) if self.order == 'chw' else (1, 1, 3) + self.mean = np.array(mean).reshape(shape).astype('float32') + self.std = np.array(std).reshape(shape).astype('float32') + + def __call__(self, img): + from PIL import Image + if isinstance(img, Image.Image): + img = np.array(img) + + assert isinstance(img, np.ndarray), "invalid input 'img' in NormalizeImage" + + img = (img.astype('float32') * self.scale - self.mean) / self.std + + if self.channel_num == 4: + img_h = img.shape[1] if self.order == 'chw' else img.shape[0] + img_w = img.shape[2] if self.order == 'chw' else img.shape[1] + pad_zeros = np.zeros((1, img_h, img_w)) if self.order == 'chw' else np.zeros((img_h, img_w, 1)) + img = (np.concatenate((img, pad_zeros), axis=0) if self.order == 'chw' else np.concatenate( + (img, pad_zeros), axis=2)) + return img.astype(self.output_dtype) + + +class ToCHWImage(object): + """ convert hwc image to chw image + """ + + def __init__(self): + pass + + def __call__(self, img): + from PIL import Image + if isinstance(img, Image.Image): + img = np.array(img) + + return img.transpose((2, 0, 1)) + + +class ColorJitter(RawColorJitter): + """ColorJitter. + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def __call__(self, img): + if not isinstance(img, Image.Image): + img = np.ascontiguousarray(img) + img = Image.fromarray(img) + img = super()._apply_image(img) + if isinstance(img, Image.Image): + img = np.asarray(img) + return img + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +class Topk(object): + + def __init__(self, topk=1, class_id_map_file=None): + assert isinstance(topk, (int, )) + self.class_id_map = self.parse_class_id_map(class_id_map_file) + self.topk = topk + + def parse_class_id_map(self, class_id_map_file): + if class_id_map_file is None: + return None + if not os.path.exists(class_id_map_file): + print( + "Warning: If want to use your own label_dict, please input legal path!\nOtherwise label_names will be empty!" + ) + return None + + try: + class_id_map = {} + with open(class_id_map_file, "r") as fin: + lines = fin.readlines() + for line in lines: + partition = line.split("\n")[0].partition(" ") + class_id_map[int(partition[0])] = str(partition[-1]) + except Exception as ex: + print(ex) + class_id_map = None + return class_id_map + + def __call__(self, x, file_names=None, multilabel=False): + assert isinstance(x, paddle.Tensor) + if file_names is not None: + assert x.shape[0] == len(file_names) + x = F.softmax(x, axis=-1) if not multilabel else F.sigmoid(x) + x = x.numpy() + y = [] + for idx, probs in enumerate(x): + index = probs.argsort(axis=0)[-self.topk:][::-1].astype("int32") if not multilabel else np.where( + probs >= 0.5)[0].astype("int32") + clas_id_list = [] + score_list = [] + label_name_list = [] + for i in index: + clas_id_list.append(i.item()) + score_list.append(probs[i].item()) + if self.class_id_map is not None: + label_name_list.append(self.class_id_map[i.item()]) + result = { + "class_ids": clas_id_list, + "scores": np.around(score_list, decimals=5).tolist(), + } + if file_names is not None: + result["file_name"] = file_names[idx] + if label_name_list is not None: + result["label_names"] = label_name_list + y.append(result) + return y diff --git a/modules/image/classification/esnet_x0_5_imagenet/utils.py b/modules/image/classification/esnet_x0_5_imagenet/utils.py new file mode 100644 index 000000000..df2bc36b3 --- /dev/null +++ b/modules/image/classification/esnet_x0_5_imagenet/utils.py @@ -0,0 +1,129 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import copy +import os + +import yaml + +__all__ = ['get_config'] + + +class AttrDict(dict): + + def __getattr__(self, key): + return self[key] + + def __setattr__(self, key, value): + if key in self.__dict__: + self.__dict__[key] = value + else: + self[key] = value + + def __deepcopy__(self, content): + return copy.deepcopy(dict(self)) + + +def create_attr_dict(yaml_config): + from ast import literal_eval + for key, value in yaml_config.items(): + if type(value) is dict: + yaml_config[key] = value = AttrDict(value) + if isinstance(value, str): + try: + value = literal_eval(value) + except BaseException: + pass + if isinstance(value, AttrDict): + create_attr_dict(yaml_config[key]) + else: + yaml_config[key] = value + + +def parse_config(cfg_file): + """Load a config file into AttrDict""" + with open(cfg_file, 'r') as fopen: + yaml_config = AttrDict(yaml.load(fopen, Loader=yaml.SafeLoader)) + create_attr_dict(yaml_config) + return yaml_config + + +def override(dl, ks, v): + """ + Recursively replace dict of list + Args: + dl(dict or list): dict or list to be replaced + ks(list): list of keys + v(str): value to be replaced + """ + + def str2num(v): + try: + return eval(v) + except Exception: + return v + + assert isinstance(dl, (list, dict)), ("{} should be a list or a dict") + assert len(ks) > 0, ('lenght of keys should larger than 0') + if isinstance(dl, list): + k = str2num(ks[0]) + if len(ks) == 1: + assert k < len(dl), ('index({}) out of range({})'.format(k, dl)) + dl[k] = str2num(v) + else: + override(dl[k], ks[1:], v) + else: + if len(ks) == 1: + # assert ks[0] in dl, ('{} is not exist in {}'.format(ks[0], dl)) + if not ks[0] in dl: + print('A new filed ({}) detected!'.format(ks[0], dl)) + dl[ks[0]] = str2num(v) + else: + override(dl[ks[0]], ks[1:], v) + + +def override_config(config, options=None): + """ + Recursively override the config + Args: + config(dict): dict to be replaced + options(list): list of pairs(key0.key1.idx.key2=value) + such as: [ + 'topk=2', + 'VALID.transforms.1.ResizeImage.resize_short=300' + ] + Returns: + config(dict): replaced config + """ + if options is not None: + for opt in options: + assert isinstance(opt, str), ("option({}) should be a str".format(opt)) + assert "=" in opt, ("option({}) should contain a =" + "to distinguish between key and value".format(opt)) + pair = opt.split('=') + assert len(pair) == 2, ("there can be only a = in the option") + key, value = pair + keys = key.split('.') + override(config, keys, value) + return config + + +def get_config(fname, overrides=None, show=False): + """ + Read config from file + """ + assert os.path.exists(fname), ('config file({}) is not exist'.format(fname)) + config = parse_config(fname) + override_config(config, overrides) + return config diff --git a/modules/image/classification/levit_128_imagenet/README.md b/modules/image/classification/levit_128_imagenet/README.md new file mode 100644 index 000000000..5a1bbedfc --- /dev/null +++ b/modules/image/classification/levit_128_imagenet/README.md @@ -0,0 +1,132 @@ +# levit_128_imagenet + +|模型名称|levit_128_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|LeViT| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|54 MB| +|最新更新日期|2022-04-02| +|数据指标|Acc| + + +## 一、模型基本信息 + + +- ### 模型介绍 + + - LeViT 是一种快速推理的、用于图像分类任务的混合神经网络。其设计之初考虑了网络模型在不同的硬件平台上的性能,因此能够更好地反映普遍应用的真实场景。通过大量实验,作者找到了卷积神经网络与 Transformer 体系更好的结合方式,并且提出了 attention-based 方法,用于整合 Transformer 中的位置信息编码, 该模块的模型结构配置为LeViT128, 详情可参考[论文地址](https://arxiv.org/abs/2104.01136)。 + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install levit_128_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run levit_128_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="levit_128_imagenet") + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + - 分类接口API。 + - **参数** + + - images (list\[numpy.ndarray\]): 图片数据,每一个图片数据的shape 均为 \[H, W, C\],颜色空间为 BGR;
+ - paths (list\[str\]): 图片的路径;
+ - batch\_size (int): batch 的大小;
+ - use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA_VISIBLE_DEVICES环境变量**
+ - top\_k (int): 返回预测结果的前 k 个。 + + - **返回** + + - res (list\[dict\]): 分类结果,列表的每一个元素均为字典,其中 key 包括'class_ids'(种类索引), 'scores'(置信度) 和 'label_names'(种类名称) + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个图像识别的在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m levit_128_imagenet + ``` + + - 这样就完成了一个图像识别的在线服务的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"\} + url = "http://127.0.0.1:8866/predict/levit_128_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + ``` + + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install levit_128_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/levit_128_imagenet/model.py b/modules/image/classification/levit_128_imagenet/model.py new file mode 100644 index 000000000..2cf87d515 --- /dev/null +++ b/modules/image/classification/levit_128_imagenet/model.py @@ -0,0 +1,450 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# Code was based on https://github.com/facebookresearch/LeViT +import itertools +import math +import warnings + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn.initializer import Constant +from paddle.nn.initializer import TruncatedNormal +from paddle.regularizer import L2Decay + +from .vision_transformer import Identity +from .vision_transformer import ones_ +from .vision_transformer import trunc_normal_ +from .vision_transformer import zeros_ + + +def cal_attention_biases(attention_biases, attention_bias_idxs): + gather_list = [] + attention_bias_t = paddle.transpose(attention_biases, (1, 0)) + nums = attention_bias_idxs.shape[0] + for idx in range(nums): + gather = paddle.gather(attention_bias_t, attention_bias_idxs[idx]) + gather_list.append(gather) + shape0, shape1 = attention_bias_idxs.shape + gather = paddle.concat(gather_list) + return paddle.transpose(gather, (1, 0)).reshape((0, shape0, shape1)) + + +class Conv2d_BN(nn.Sequential): + + def __init__(self, a, b, ks=1, stride=1, pad=0, dilation=1, groups=1, bn_weight_init=1, resolution=-10000): + super().__init__() + self.add_sublayer('c', nn.Conv2D(a, b, ks, stride, pad, dilation, groups, bias_attr=False)) + bn = nn.BatchNorm2D(b) + ones_(bn.weight) + zeros_(bn.bias) + self.add_sublayer('bn', bn) + + +class Linear_BN(nn.Sequential): + + def __init__(self, a, b, bn_weight_init=1): + super().__init__() + self.add_sublayer('c', nn.Linear(a, b, bias_attr=False)) + bn = nn.BatchNorm1D(b) + if bn_weight_init == 0: + zeros_(bn.weight) + else: + ones_(bn.weight) + zeros_(bn.bias) + self.add_sublayer('bn', bn) + + def forward(self, x): + l, bn = self._sub_layers.values() + x = l(x) + return paddle.reshape(bn(x.flatten(0, 1)), x.shape) + + +class BN_Linear(nn.Sequential): + + def __init__(self, a, b, bias=True, std=0.02): + super().__init__() + self.add_sublayer('bn', nn.BatchNorm1D(a)) + l = nn.Linear(a, b, bias_attr=bias) + trunc_normal_(l.weight) + if bias: + zeros_(l.bias) + self.add_sublayer('l', l) + + +def b16(n, activation, resolution=224): + return nn.Sequential(Conv2d_BN(3, n // 8, 3, 2, 1, resolution=resolution), activation(), + Conv2d_BN(n // 8, n // 4, 3, 2, 1, resolution=resolution // 2), activation(), + Conv2d_BN(n // 4, n // 2, 3, 2, 1, resolution=resolution // 4), activation(), + Conv2d_BN(n // 2, n, 3, 2, 1, resolution=resolution // 8)) + + +class Residual(nn.Layer): + + def __init__(self, m, drop): + super().__init__() + self.m = m + self.drop = drop + + def forward(self, x): + if self.training and self.drop > 0: + y = paddle.rand(shape=[x.shape[0], 1, 1]).__ge__(self.drop).astype("float32") + y = y.divide(paddle.full_like(y, 1 - self.drop)) + return paddle.add(x, y) + else: + return paddle.add(x, self.m(x)) + + +class Attention(nn.Layer): + + def __init__(self, dim, key_dim, num_heads=8, attn_ratio=4, activation=None, resolution=14): + super().__init__() + self.num_heads = num_heads + self.scale = key_dim**-0.5 + self.key_dim = key_dim + self.nh_kd = nh_kd = key_dim * num_heads + self.d = int(attn_ratio * key_dim) + self.dh = int(attn_ratio * key_dim) * num_heads + self.attn_ratio = attn_ratio + self.h = self.dh + nh_kd * 2 + self.qkv = Linear_BN(dim, self.h) + self.proj = nn.Sequential(activation(), Linear_BN(self.dh, dim, bn_weight_init=0)) + points = list(itertools.product(range(resolution), range(resolution))) + N = len(points) + attention_offsets = {} + idxs = [] + for p1 in points: + for p2 in points: + offset = (abs(p1[0] - p2[0]), abs(p1[1] - p2[1])) + if offset not in attention_offsets: + attention_offsets[offset] = len(attention_offsets) + idxs.append(attention_offsets[offset]) + self.attention_biases = self.create_parameter(shape=(num_heads, len(attention_offsets)), + default_initializer=zeros_, + attr=paddle.ParamAttr(regularizer=L2Decay(0.0))) + tensor_idxs = paddle.to_tensor(idxs, dtype='int64') + self.register_buffer('attention_bias_idxs', paddle.reshape(tensor_idxs, [N, N])) + + @paddle.no_grad() + def train(self, mode=True): + if mode: + super().train() + else: + super().eval() + if mode and hasattr(self, 'ab'): + del self.ab + else: + self.ab = cal_attention_biases(self.attention_biases, self.attention_bias_idxs) + + def forward(self, x): + self.training = True + B, N, C = x.shape + qkv = self.qkv(x) + qkv = paddle.reshape(qkv, [B, N, self.num_heads, self.h // self.num_heads]) + q, k, v = paddle.split(qkv, [self.key_dim, self.key_dim, self.d], axis=3) + q = paddle.transpose(q, perm=[0, 2, 1, 3]) + k = paddle.transpose(k, perm=[0, 2, 1, 3]) + v = paddle.transpose(v, perm=[0, 2, 1, 3]) + k_transpose = paddle.transpose(k, perm=[0, 1, 3, 2]) + + if self.training: + attention_biases = cal_attention_biases(self.attention_biases, self.attention_bias_idxs) + else: + attention_biases = self.ab + attn = (paddle.matmul(q, k_transpose) * self.scale + attention_biases) + attn = F.softmax(attn) + x = paddle.transpose(paddle.matmul(attn, v), perm=[0, 2, 1, 3]) + x = paddle.reshape(x, [B, N, self.dh]) + x = self.proj(x) + return x + + +class Subsample(nn.Layer): + + def __init__(self, stride, resolution): + super().__init__() + self.stride = stride + self.resolution = resolution + + def forward(self, x): + B, N, C = x.shape + x = paddle.reshape(x, [B, self.resolution, self.resolution, C]) + end1, end2 = x.shape[1], x.shape[2] + x = x[:, 0:end1:self.stride, 0:end2:self.stride] + x = paddle.reshape(x, [B, -1, C]) + return x + + +class AttentionSubsample(nn.Layer): + + def __init__(self, + in_dim, + out_dim, + key_dim, + num_heads=8, + attn_ratio=2, + activation=None, + stride=2, + resolution=14, + resolution_=7): + super().__init__() + self.num_heads = num_heads + self.scale = key_dim**-0.5 + self.key_dim = key_dim + self.nh_kd = nh_kd = key_dim * num_heads + self.d = int(attn_ratio * key_dim) + self.dh = int(attn_ratio * key_dim) * self.num_heads + self.attn_ratio = attn_ratio + self.resolution_ = resolution_ + self.resolution_2 = resolution_**2 + self.training = True + h = self.dh + nh_kd + self.kv = Linear_BN(in_dim, h) + + self.q = nn.Sequential(Subsample(stride, resolution), Linear_BN(in_dim, nh_kd)) + self.proj = nn.Sequential(activation(), Linear_BN(self.dh, out_dim)) + + self.stride = stride + self.resolution = resolution + points = list(itertools.product(range(resolution), range(resolution))) + points_ = list(itertools.product(range(resolution_), range(resolution_))) + + N = len(points) + N_ = len(points_) + attention_offsets = {} + idxs = [] + i = 0 + j = 0 + for p1 in points_: + i += 1 + for p2 in points: + j += 1 + size = 1 + offset = (abs(p1[0] * stride - p2[0] + (size - 1) / 2), abs(p1[1] * stride - p2[1] + (size - 1) / 2)) + if offset not in attention_offsets: + attention_offsets[offset] = len(attention_offsets) + idxs.append(attention_offsets[offset]) + self.attention_biases = self.create_parameter(shape=(num_heads, len(attention_offsets)), + default_initializer=zeros_, + attr=paddle.ParamAttr(regularizer=L2Decay(0.0))) + + tensor_idxs_ = paddle.to_tensor(idxs, dtype='int64') + self.register_buffer('attention_bias_idxs', paddle.reshape(tensor_idxs_, [N_, N])) + + @paddle.no_grad() + def train(self, mode=True): + if mode: + super().train() + else: + super().eval() + if mode and hasattr(self, 'ab'): + del self.ab + else: + self.ab = cal_attention_biases(self.attention_biases, self.attention_bias_idxs) + + def forward(self, x): + self.training = True + B, N, C = x.shape + kv = self.kv(x) + kv = paddle.reshape(kv, [B, N, self.num_heads, -1]) + k, v = paddle.split(kv, [self.key_dim, self.d], axis=3) + k = paddle.transpose(k, perm=[0, 2, 1, 3]) # BHNC + v = paddle.transpose(v, perm=[0, 2, 1, 3]) + q = paddle.reshape(self.q(x), [B, self.resolution_2, self.num_heads, self.key_dim]) + q = paddle.transpose(q, perm=[0, 2, 1, 3]) + + if self.training: + attention_biases = cal_attention_biases(self.attention_biases, self.attention_bias_idxs) + else: + attention_biases = self.ab + + attn = (paddle.matmul(q, paddle.transpose(k, perm=[0, 1, 3, 2]))) * self.scale + attention_biases + attn = F.softmax(attn) + + x = paddle.reshape(paddle.transpose(paddle.matmul(attn, v), perm=[0, 2, 1, 3]), [B, -1, self.dh]) + x = self.proj(x) + return x + + +class LeViT(nn.Layer): + """ Vision Transformer with support for patch or hybrid CNN input stage + """ + + def __init__(self, + img_size=224, + patch_size=16, + in_chans=3, + class_num=1000, + embed_dim=[192], + key_dim=[64], + depth=[12], + num_heads=[3], + attn_ratio=[2], + mlp_ratio=[2], + hybrid_backbone=None, + down_ops=[], + attention_activation=nn.Hardswish, + mlp_activation=nn.Hardswish, + distillation=True, + drop_path=0): + super().__init__() + + self.class_num = class_num + self.num_features = embed_dim[-1] + self.embed_dim = embed_dim + self.distillation = distillation + + self.patch_embed = hybrid_backbone + + self.blocks = [] + down_ops.append(['']) + resolution = img_size // patch_size + for i, (ed, kd, dpth, nh, ar, mr, + do) in enumerate(zip(embed_dim, key_dim, depth, num_heads, attn_ratio, mlp_ratio, down_ops)): + for _ in range(dpth): + self.blocks.append( + Residual( + Attention( + ed, + kd, + nh, + attn_ratio=ar, + activation=attention_activation, + resolution=resolution, + ), drop_path)) + if mr > 0: + h = int(ed * mr) + self.blocks.append( + Residual( + nn.Sequential( + Linear_BN(ed, h), + mlp_activation(), + Linear_BN(h, ed, bn_weight_init=0), + ), drop_path)) + if do[0] == 'Subsample': + #('Subsample',key_dim, num_heads, attn_ratio, mlp_ratio, stride) + resolution_ = (resolution - 1) // do[5] + 1 + self.blocks.append( + AttentionSubsample(*embed_dim[i:i + 2], + key_dim=do[1], + num_heads=do[2], + attn_ratio=do[3], + activation=attention_activation, + stride=do[5], + resolution=resolution, + resolution_=resolution_)) + resolution = resolution_ + if do[4] > 0: # mlp_ratio + h = int(embed_dim[i + 1] * do[4]) + self.blocks.append( + Residual( + nn.Sequential( + Linear_BN(embed_dim[i + 1], h), + mlp_activation(), + Linear_BN(h, embed_dim[i + 1], bn_weight_init=0), + ), drop_path)) + self.blocks = nn.Sequential(*self.blocks) + + # Classifier head + self.head = BN_Linear(embed_dim[-1], class_num) if class_num > 0 else Identity() + if distillation: + self.head_dist = BN_Linear(embed_dim[-1], class_num) if class_num > 0 else Identity() + + def forward(self, x): + x = self.patch_embed(x) + x = x.flatten(2) + x = paddle.transpose(x, perm=[0, 2, 1]) + x = self.blocks(x) + x = x.mean(1) + + x = paddle.reshape(x, [-1, self.embed_dim[-1]]) + if self.distillation: + x = self.head(x), self.head_dist(x) + if not self.training: + x = (x[0] + x[1]) / 2 + else: + x = self.head(x) + return x + + +def model_factory(C, D, X, N, drop_path, class_num, distillation): + embed_dim = [int(x) for x in C.split('_')] + num_heads = [int(x) for x in N.split('_')] + depth = [int(x) for x in X.split('_')] + act = nn.Hardswish + model = LeViT( + patch_size=16, + embed_dim=embed_dim, + num_heads=num_heads, + key_dim=[D] * 3, + depth=depth, + attn_ratio=[2, 2, 2], + mlp_ratio=[2, 2, 2], + down_ops=[ + #('Subsample',key_dim, num_heads, attn_ratio, mlp_ratio, stride) + ['Subsample', D, embed_dim[0] // D, 4, 2, 2], + ['Subsample', D, embed_dim[1] // D, 4, 2, 2], + ], + attention_activation=act, + mlp_activation=act, + hybrid_backbone=b16(embed_dim[0], activation=act), + class_num=class_num, + drop_path=drop_path, + distillation=distillation) + + return model + + +specification = { + 'LeViT_128S': { + 'C': '128_256_384', + 'D': 16, + 'N': '4_6_8', + 'X': '2_3_4', + 'drop_path': 0 + }, + 'LeViT_128': { + 'C': '128_256_384', + 'D': 16, + 'N': '4_8_12', + 'X': '4_4_4', + 'drop_path': 0 + }, + 'LeViT_192': { + 'C': '192_288_384', + 'D': 32, + 'N': '3_5_6', + 'X': '4_4_4', + 'drop_path': 0 + }, + 'LeViT_256': { + 'C': '256_384_512', + 'D': 32, + 'N': '4_6_8', + 'X': '4_4_4', + 'drop_path': 0 + }, + 'LeViT_384': { + 'C': '384_512_768', + 'D': 32, + 'N': '6_9_12', + 'X': '4_4_4', + 'drop_path': 0.1 + }, +} + + +def LeViT_128(**kwargs): + model = model_factory(**specification['LeViT_128'], class_num=1000, distillation=False) + return model diff --git a/modules/image/classification/levit_128_imagenet/module.py b/modules/image/classification/levit_128_imagenet/module.py new file mode 100644 index 000000000..1ed4aba85 --- /dev/null +++ b/modules/image/classification/levit_128_imagenet/module.py @@ -0,0 +1,154 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import copy +import os + +import cv2 +import numpy as np +import paddle +from skimage.io import imread +from skimage.transform import rescale +from skimage.transform import resize + +import paddlehub as hub +from .model import LeViT_128 +from .processor import base64_to_cv2 +from .processor import create_operators +from .processor import Topk +from .utils import get_config +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving + + +@moduleinfo(name="levit_128_imagenet", + type="cv/classification", + author="paddlepaddle", + author_email="", + summary="", + version="1.0.0") +class LeViT_128_ImageNet: + + def __init__(self): + self.config = get_config(os.path.join(self.directory, 'LeViT_128.yaml'), show=False) + self.label_path = os.path.join(self.directory, 'imagenet1k_label_list.txt') + self.pretrain_path = os.path.join(self.directory, 'LeViT_128_pretrained.pdparams') + self.config['Infer']['PostProcess']['class_id_map_file'] = self.label_path + self.model = LeViT_128() + param_state_dict = paddle.load(self.pretrain_path) + self.model.set_dict(param_state_dict) + self.preprocess_funcs = create_operators(self.config["Infer"]["transforms"]) + + def classification(self, + images: list = None, + paths: list = None, + batch_size: int = 1, + use_gpu: bool = False, + top_k: int = 1): + ''' + Args: + images (list[numpy.ndarray]): data of images, shape of each is [H, W, C], color space must be BGR. + paths (list[str]): The paths of images. + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + top_k (int): Return top k results. + + Returns: + res (list[dict]): The classfication results, each result dict contains key 'class_ids', 'scores' and 'label_names'. + ''' + postprocess_func = Topk(top_k, self.label_path) + inputs = [] + results = [] + paddle.disable_static() + place = 'gpu:0' if use_gpu else 'cpu' + place = paddle.set_device(place) + if images == None and paths == None: + print('No image provided. Please input an image or a image path.') + return + + if images != None: + for image in images: + image = image[:, :, ::-1] + inputs.append(image) + + if paths != None: + for path in paths: + image = cv2.imread(path)[:, :, ::-1] + inputs.append(image) + + batch_data = [] + for idx, imagedata in enumerate(inputs): + for process in self.preprocess_funcs: + imagedata = process(imagedata) + batch_data.append(imagedata) + if len(batch_data) >= batch_size or idx == len(inputs) - 1: + batch_tensor = paddle.to_tensor(batch_data) + out = self.model(batch_tensor) + if isinstance(out, list): + out = out[0] + if isinstance(out, dict) and "logits" in out: + out = out["logits"] + if isinstance(out, dict) and "output" in out: + out = out["output"] + result = postprocess_func(out) + results.extend(result) + batch_data.clear() + return results + + @runnable + def run_cmd(self, argvs: list): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + self.args = self.parser.parse_args(argvs) + results = self.classification(paths=[self.args.input_path], + use_gpu=self.args.use_gpu, + batch_size=self.args.batch_size, + top_k=self.args.top_k) + return results + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.classification(images=images_decode, **kwargs) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument('--use_gpu', action='store_true', help="use GPU or not") + + self.arg_config_group.add_argument('--batch_size', type=int, default=1, help='batch size') + self.arg_config_group.add_argument('--top_k', type=int, default=1, help='Return top k results.') + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to input image.") diff --git a/modules/image/classification/levit_128_imagenet/processor.py b/modules/image/classification/levit_128_imagenet/processor.py new file mode 100644 index 000000000..40cab3917 --- /dev/null +++ b/modules/image/classification/levit_128_imagenet/processor.py @@ -0,0 +1,374 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import base64 +import inspect +import math +import os +import random +import sys +from functools import partial + +import cv2 +import numpy as np +import paddle +import paddle.nn.functional as F +import six +from paddle.vision.transforms import ColorJitter as RawColorJitter +from PIL import Image + + +def create_operators(params, class_num=None): + """ + create operators based on the config + + Args: + params(list): a dict list, used to create some operators + """ + assert isinstance(params, list), ('operator config should be a list') + ops = [] + current_module = sys.modules[__name__] + for operator in params: + assert isinstance(operator, dict) and len(operator) == 1, "yaml format error" + op_name = list(operator)[0] + param = {} if operator[op_name] is None else operator[op_name] + op_func = getattr(current_module, op_name) + if "class_num" in inspect.getfullargspec(op_func).args: + param.update({"class_num": class_num}) + op = op_func(**param) + ops.append(op) + + return ops + + +class UnifiedResize(object): + + def __init__(self, interpolation=None, backend="cv2"): + _cv2_interp_from_str = { + 'nearest': cv2.INTER_NEAREST, + 'bilinear': cv2.INTER_LINEAR, + 'area': cv2.INTER_AREA, + 'bicubic': cv2.INTER_CUBIC, + 'lanczos': cv2.INTER_LANCZOS4 + } + _pil_interp_from_str = { + 'nearest': Image.NEAREST, + 'bilinear': Image.BILINEAR, + 'bicubic': Image.BICUBIC, + 'box': Image.BOX, + 'lanczos': Image.LANCZOS, + 'hamming': Image.HAMMING + } + + def _pil_resize(src, size, resample): + pil_img = Image.fromarray(src) + pil_img = pil_img.resize(size, resample) + return np.asarray(pil_img) + + if backend.lower() == "cv2": + if isinstance(interpolation, str): + interpolation = _cv2_interp_from_str[interpolation.lower()] + # compatible with opencv < version 4.4.0 + elif interpolation is None: + interpolation = cv2.INTER_LINEAR + self.resize_func = partial(cv2.resize, interpolation=interpolation) + elif backend.lower() == "pil": + if isinstance(interpolation, str): + interpolation = _pil_interp_from_str[interpolation.lower()] + self.resize_func = partial(_pil_resize, resample=interpolation) + else: + self.resize_func = cv2.resize + + def __call__(self, src, size): + return self.resize_func(src, size) + + +class OperatorParamError(ValueError): + """ OperatorParamError + """ + pass + + +class DecodeImage(object): + """ decode image """ + + def __init__(self, to_rgb=True, to_np=False, channel_first=False): + self.to_rgb = to_rgb + self.to_np = to_np # to numpy + self.channel_first = channel_first # only enabled when to_np is True + + def __call__(self, img): + if six.PY2: + assert type(img) is str and len(img) > 0, "invalid input 'img' in DecodeImage" + else: + assert type(img) is bytes and len(img) > 0, "invalid input 'img' in DecodeImage" + data = np.frombuffer(img, dtype='uint8') + img = cv2.imdecode(data, 1) + if self.to_rgb: + assert img.shape[2] == 3, 'invalid shape of image[%s]' % (img.shape) + img = img[:, :, ::-1] + + if self.channel_first: + img = img.transpose((2, 0, 1)) + + return img + + +class ResizeImage(object): + """ resize image """ + + def __init__(self, size=None, resize_short=None, interpolation=None, backend="cv2"): + if resize_short is not None and resize_short > 0: + self.resize_short = resize_short + self.w = None + self.h = None + elif size is not None: + self.resize_short = None + self.w = size if type(size) is int else size[0] + self.h = size if type(size) is int else size[1] + else: + raise OperatorParamError("invalid params for ReisizeImage for '\ + 'both 'size' and 'resize_short' are None") + + self._resize_func = UnifiedResize(interpolation=interpolation, backend=backend) + + def __call__(self, img): + img_h, img_w = img.shape[:2] + if self.resize_short is not None: + percent = float(self.resize_short) / min(img_w, img_h) + w = int(round(img_w * percent)) + h = int(round(img_h * percent)) + else: + w = self.w + h = self.h + return self._resize_func(img, (w, h)) + + +class CropImage(object): + """ crop image """ + + def __init__(self, size): + if type(size) is int: + self.size = (size, size) + else: + self.size = size # (h, w) + + def __call__(self, img): + w, h = self.size + img_h, img_w = img.shape[:2] + w_start = (img_w - w) // 2 + h_start = (img_h - h) // 2 + + w_end = w_start + w + h_end = h_start + h + return img[h_start:h_end, w_start:w_end, :] + + +class RandCropImage(object): + """ random crop image """ + + def __init__(self, size, scale=None, ratio=None, interpolation=None, backend="cv2"): + if type(size) is int: + self.size = (size, size) # (h, w) + else: + self.size = size + + self.scale = [0.08, 1.0] if scale is None else scale + self.ratio = [3. / 4., 4. / 3.] if ratio is None else ratio + + self._resize_func = UnifiedResize(interpolation=interpolation, backend=backend) + + def __call__(self, img): + size = self.size + scale = self.scale + ratio = self.ratio + + aspect_ratio = math.sqrt(random.uniform(*ratio)) + w = 1. * aspect_ratio + h = 1. / aspect_ratio + + img_h, img_w = img.shape[:2] + + bound = min((float(img_w) / img_h) / (w**2), (float(img_h) / img_w) / (h**2)) + scale_max = min(scale[1], bound) + scale_min = min(scale[0], bound) + + target_area = img_w * img_h * random.uniform(scale_min, scale_max) + target_size = math.sqrt(target_area) + w = int(target_size * w) + h = int(target_size * h) + + i = random.randint(0, img_w - w) + j = random.randint(0, img_h - h) + + img = img[j:j + h, i:i + w, :] + + return self._resize_func(img, size) + + +class RandFlipImage(object): + """ random flip image + flip_code: + 1: Flipped Horizontally + 0: Flipped Vertically + -1: Flipped Horizontally & Vertically + """ + + def __init__(self, flip_code=1): + assert flip_code in [-1, 0, 1], "flip_code should be a value in [-1, 0, 1]" + self.flip_code = flip_code + + def __call__(self, img): + if random.randint(0, 1) == 1: + return cv2.flip(img, self.flip_code) + else: + return img + + +class NormalizeImage(object): + """ normalize image such as substract mean, divide std + """ + + def __init__(self, scale=None, mean=None, std=None, order='chw', output_fp16=False, channel_num=3): + if isinstance(scale, str): + scale = eval(scale) + assert channel_num in [3, 4], "channel number of input image should be set to 3 or 4." + self.channel_num = channel_num + self.output_dtype = 'float16' if output_fp16 else 'float32' + self.scale = np.float32(scale if scale is not None else 1.0 / 255.0) + self.order = order + mean = mean if mean is not None else [0.485, 0.456, 0.406] + std = std if std is not None else [0.229, 0.224, 0.225] + + shape = (3, 1, 1) if self.order == 'chw' else (1, 1, 3) + self.mean = np.array(mean).reshape(shape).astype('float32') + self.std = np.array(std).reshape(shape).astype('float32') + + def __call__(self, img): + from PIL import Image + if isinstance(img, Image.Image): + img = np.array(img) + + assert isinstance(img, np.ndarray), "invalid input 'img' in NormalizeImage" + + img = (img.astype('float32') * self.scale - self.mean) / self.std + + if self.channel_num == 4: + img_h = img.shape[1] if self.order == 'chw' else img.shape[0] + img_w = img.shape[2] if self.order == 'chw' else img.shape[1] + pad_zeros = np.zeros((1, img_h, img_w)) if self.order == 'chw' else np.zeros((img_h, img_w, 1)) + img = (np.concatenate((img, pad_zeros), axis=0) if self.order == 'chw' else np.concatenate( + (img, pad_zeros), axis=2)) + return img.astype(self.output_dtype) + + +class ToCHWImage(object): + """ convert hwc image to chw image + """ + + def __init__(self): + pass + + def __call__(self, img): + from PIL import Image + if isinstance(img, Image.Image): + img = np.array(img) + + return img.transpose((2, 0, 1)) + + +class ColorJitter(RawColorJitter): + """ColorJitter. + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def __call__(self, img): + if not isinstance(img, Image.Image): + img = np.ascontiguousarray(img) + img = Image.fromarray(img) + img = super()._apply_image(img) + if isinstance(img, Image.Image): + img = np.asarray(img) + return img + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +class Topk(object): + + def __init__(self, topk=1, class_id_map_file=None): + assert isinstance(topk, (int, )) + self.class_id_map = self.parse_class_id_map(class_id_map_file) + self.topk = topk + + def parse_class_id_map(self, class_id_map_file): + if class_id_map_file is None: + return None + if not os.path.exists(class_id_map_file): + print( + "Warning: If want to use your own label_dict, please input legal path!\nOtherwise label_names will be empty!" + ) + return None + + try: + class_id_map = {} + with open(class_id_map_file, "r") as fin: + lines = fin.readlines() + for line in lines: + partition = line.split("\n")[0].partition(" ") + class_id_map[int(partition[0])] = str(partition[-1]) + except Exception as ex: + print(ex) + class_id_map = None + return class_id_map + + def __call__(self, x, file_names=None, multilabel=False): + assert isinstance(x, paddle.Tensor) + if file_names is not None: + assert x.shape[0] == len(file_names) + x = F.softmax(x, axis=-1) if not multilabel else F.sigmoid(x) + x = x.numpy() + y = [] + for idx, probs in enumerate(x): + index = probs.argsort(axis=0)[-self.topk:][::-1].astype("int32") if not multilabel else np.where( + probs >= 0.5)[0].astype("int32") + clas_id_list = [] + score_list = [] + label_name_list = [] + for i in index: + clas_id_list.append(i.item()) + score_list.append(probs[i].item()) + if self.class_id_map is not None: + label_name_list.append(self.class_id_map[i.item()]) + result = { + "class_ids": clas_id_list, + "scores": np.around(score_list, decimals=5).tolist(), + } + if file_names is not None: + result["file_name"] = file_names[idx] + if label_name_list is not None: + result["label_names"] = label_name_list + y.append(result) + return y diff --git a/modules/image/classification/levit_128_imagenet/utils.py b/modules/image/classification/levit_128_imagenet/utils.py new file mode 100644 index 000000000..df2bc36b3 --- /dev/null +++ b/modules/image/classification/levit_128_imagenet/utils.py @@ -0,0 +1,129 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import copy +import os + +import yaml + +__all__ = ['get_config'] + + +class AttrDict(dict): + + def __getattr__(self, key): + return self[key] + + def __setattr__(self, key, value): + if key in self.__dict__: + self.__dict__[key] = value + else: + self[key] = value + + def __deepcopy__(self, content): + return copy.deepcopy(dict(self)) + + +def create_attr_dict(yaml_config): + from ast import literal_eval + for key, value in yaml_config.items(): + if type(value) is dict: + yaml_config[key] = value = AttrDict(value) + if isinstance(value, str): + try: + value = literal_eval(value) + except BaseException: + pass + if isinstance(value, AttrDict): + create_attr_dict(yaml_config[key]) + else: + yaml_config[key] = value + + +def parse_config(cfg_file): + """Load a config file into AttrDict""" + with open(cfg_file, 'r') as fopen: + yaml_config = AttrDict(yaml.load(fopen, Loader=yaml.SafeLoader)) + create_attr_dict(yaml_config) + return yaml_config + + +def override(dl, ks, v): + """ + Recursively replace dict of list + Args: + dl(dict or list): dict or list to be replaced + ks(list): list of keys + v(str): value to be replaced + """ + + def str2num(v): + try: + return eval(v) + except Exception: + return v + + assert isinstance(dl, (list, dict)), ("{} should be a list or a dict") + assert len(ks) > 0, ('lenght of keys should larger than 0') + if isinstance(dl, list): + k = str2num(ks[0]) + if len(ks) == 1: + assert k < len(dl), ('index({}) out of range({})'.format(k, dl)) + dl[k] = str2num(v) + else: + override(dl[k], ks[1:], v) + else: + if len(ks) == 1: + # assert ks[0] in dl, ('{} is not exist in {}'.format(ks[0], dl)) + if not ks[0] in dl: + print('A new filed ({}) detected!'.format(ks[0], dl)) + dl[ks[0]] = str2num(v) + else: + override(dl[ks[0]], ks[1:], v) + + +def override_config(config, options=None): + """ + Recursively override the config + Args: + config(dict): dict to be replaced + options(list): list of pairs(key0.key1.idx.key2=value) + such as: [ + 'topk=2', + 'VALID.transforms.1.ResizeImage.resize_short=300' + ] + Returns: + config(dict): replaced config + """ + if options is not None: + for opt in options: + assert isinstance(opt, str), ("option({}) should be a str".format(opt)) + assert "=" in opt, ("option({}) should contain a =" + "to distinguish between key and value".format(opt)) + pair = opt.split('=') + assert len(pair) == 2, ("there can be only a = in the option") + key, value = pair + keys = key.split('.') + override(config, keys, value) + return config + + +def get_config(fname, overrides=None, show=False): + """ + Read config from file + """ + assert os.path.exists(fname), ('config file({}) is not exist'.format(fname)) + config = parse_config(fname) + override_config(config, overrides) + return config diff --git a/modules/image/classification/levit_128s_imagenet/README.md b/modules/image/classification/levit_128s_imagenet/README.md new file mode 100644 index 000000000..34a1b82fa --- /dev/null +++ b/modules/image/classification/levit_128s_imagenet/README.md @@ -0,0 +1,132 @@ +# levit_128s_imagenet + +|模型名称|levit_128s_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|LeViT| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|45 MB| +|最新更新日期|2022-04-02| +|数据指标|Acc| + + +## 一、模型基本信息 + + +- ### 模型介绍 + + - LeViT 是一种快速推理的、用于图像分类任务的混合神经网络。其设计之初考虑了网络模型在不同的硬件平台上的性能,因此能够更好地反映普遍应用的真实场景。通过大量实验,作者找到了卷积神经网络与 Transformer 体系更好的结合方式,并且提出了 attention-based 方法,用于整合 Transformer 中的位置信息编码, 该模块的模型结构配置为LeViT128s, 详情可参考[论文地址](https://arxiv.org/abs/2104.01136)。 + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install levit_128s_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run levit_128s_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="levit_128s_imagenet") + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + - 分类接口API。 + - **参数** + + - images (list\[numpy.ndarray\]): 图片数据,每一个图片数据的shape 均为 \[H, W, C\],颜色空间为 BGR;
+ - paths (list\[str\]): 图片的路径;
+ - batch\_size (int): batch 的大小;
+ - use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA_VISIBLE_DEVICES环境变量**
+ - top\_k (int): 返回预测结果的前 k 个。 + + - **返回** + + - res (list\[dict\]): 分类结果,列表的每一个元素均为字典,其中 key 包括'class_ids'(种类索引), 'scores'(置信度) 和 'label_names'(种类名称) + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个图像识别的在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m levit_128s_imagenet + ``` + + - 这样就完成了一个图像识别的在线服务的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"\} + url = "http://127.0.0.1:8866/predict/levit_128s_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + ``` + + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install levit_128s_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/levit_128s_imagenet/model.py b/modules/image/classification/levit_128s_imagenet/model.py new file mode 100644 index 000000000..7a1b84671 --- /dev/null +++ b/modules/image/classification/levit_128s_imagenet/model.py @@ -0,0 +1,450 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# Code was based on https://github.com/facebookresearch/LeViT +import itertools +import math +import warnings + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn.initializer import Constant +from paddle.nn.initializer import TruncatedNormal +from paddle.regularizer import L2Decay + +from .vision_transformer import Identity +from .vision_transformer import ones_ +from .vision_transformer import trunc_normal_ +from .vision_transformer import zeros_ + + +def cal_attention_biases(attention_biases, attention_bias_idxs): + gather_list = [] + attention_bias_t = paddle.transpose(attention_biases, (1, 0)) + nums = attention_bias_idxs.shape[0] + for idx in range(nums): + gather = paddle.gather(attention_bias_t, attention_bias_idxs[idx]) + gather_list.append(gather) + shape0, shape1 = attention_bias_idxs.shape + gather = paddle.concat(gather_list) + return paddle.transpose(gather, (1, 0)).reshape((0, shape0, shape1)) + + +class Conv2d_BN(nn.Sequential): + + def __init__(self, a, b, ks=1, stride=1, pad=0, dilation=1, groups=1, bn_weight_init=1, resolution=-10000): + super().__init__() + self.add_sublayer('c', nn.Conv2D(a, b, ks, stride, pad, dilation, groups, bias_attr=False)) + bn = nn.BatchNorm2D(b) + ones_(bn.weight) + zeros_(bn.bias) + self.add_sublayer('bn', bn) + + +class Linear_BN(nn.Sequential): + + def __init__(self, a, b, bn_weight_init=1): + super().__init__() + self.add_sublayer('c', nn.Linear(a, b, bias_attr=False)) + bn = nn.BatchNorm1D(b) + if bn_weight_init == 0: + zeros_(bn.weight) + else: + ones_(bn.weight) + zeros_(bn.bias) + self.add_sublayer('bn', bn) + + def forward(self, x): + l, bn = self._sub_layers.values() + x = l(x) + return paddle.reshape(bn(x.flatten(0, 1)), x.shape) + + +class BN_Linear(nn.Sequential): + + def __init__(self, a, b, bias=True, std=0.02): + super().__init__() + self.add_sublayer('bn', nn.BatchNorm1D(a)) + l = nn.Linear(a, b, bias_attr=bias) + trunc_normal_(l.weight) + if bias: + zeros_(l.bias) + self.add_sublayer('l', l) + + +def b16(n, activation, resolution=224): + return nn.Sequential(Conv2d_BN(3, n // 8, 3, 2, 1, resolution=resolution), activation(), + Conv2d_BN(n // 8, n // 4, 3, 2, 1, resolution=resolution // 2), activation(), + Conv2d_BN(n // 4, n // 2, 3, 2, 1, resolution=resolution // 4), activation(), + Conv2d_BN(n // 2, n, 3, 2, 1, resolution=resolution // 8)) + + +class Residual(nn.Layer): + + def __init__(self, m, drop): + super().__init__() + self.m = m + self.drop = drop + + def forward(self, x): + if self.training and self.drop > 0: + y = paddle.rand(shape=[x.shape[0], 1, 1]).__ge__(self.drop).astype("float32") + y = y.divide(paddle.full_like(y, 1 - self.drop)) + return paddle.add(x, y) + else: + return paddle.add(x, self.m(x)) + + +class Attention(nn.Layer): + + def __init__(self, dim, key_dim, num_heads=8, attn_ratio=4, activation=None, resolution=14): + super().__init__() + self.num_heads = num_heads + self.scale = key_dim**-0.5 + self.key_dim = key_dim + self.nh_kd = nh_kd = key_dim * num_heads + self.d = int(attn_ratio * key_dim) + self.dh = int(attn_ratio * key_dim) * num_heads + self.attn_ratio = attn_ratio + self.h = self.dh + nh_kd * 2 + self.qkv = Linear_BN(dim, self.h) + self.proj = nn.Sequential(activation(), Linear_BN(self.dh, dim, bn_weight_init=0)) + points = list(itertools.product(range(resolution), range(resolution))) + N = len(points) + attention_offsets = {} + idxs = [] + for p1 in points: + for p2 in points: + offset = (abs(p1[0] - p2[0]), abs(p1[1] - p2[1])) + if offset not in attention_offsets: + attention_offsets[offset] = len(attention_offsets) + idxs.append(attention_offsets[offset]) + self.attention_biases = self.create_parameter(shape=(num_heads, len(attention_offsets)), + default_initializer=zeros_, + attr=paddle.ParamAttr(regularizer=L2Decay(0.0))) + tensor_idxs = paddle.to_tensor(idxs, dtype='int64') + self.register_buffer('attention_bias_idxs', paddle.reshape(tensor_idxs, [N, N])) + + @paddle.no_grad() + def train(self, mode=True): + if mode: + super().train() + else: + super().eval() + if mode and hasattr(self, 'ab'): + del self.ab + else: + self.ab = cal_attention_biases(self.attention_biases, self.attention_bias_idxs) + + def forward(self, x): + self.training = True + B, N, C = x.shape + qkv = self.qkv(x) + qkv = paddle.reshape(qkv, [B, N, self.num_heads, self.h // self.num_heads]) + q, k, v = paddle.split(qkv, [self.key_dim, self.key_dim, self.d], axis=3) + q = paddle.transpose(q, perm=[0, 2, 1, 3]) + k = paddle.transpose(k, perm=[0, 2, 1, 3]) + v = paddle.transpose(v, perm=[0, 2, 1, 3]) + k_transpose = paddle.transpose(k, perm=[0, 1, 3, 2]) + + if self.training: + attention_biases = cal_attention_biases(self.attention_biases, self.attention_bias_idxs) + else: + attention_biases = self.ab + attn = (paddle.matmul(q, k_transpose) * self.scale + attention_biases) + attn = F.softmax(attn) + x = paddle.transpose(paddle.matmul(attn, v), perm=[0, 2, 1, 3]) + x = paddle.reshape(x, [B, N, self.dh]) + x = self.proj(x) + return x + + +class Subsample(nn.Layer): + + def __init__(self, stride, resolution): + super().__init__() + self.stride = stride + self.resolution = resolution + + def forward(self, x): + B, N, C = x.shape + x = paddle.reshape(x, [B, self.resolution, self.resolution, C]) + end1, end2 = x.shape[1], x.shape[2] + x = x[:, 0:end1:self.stride, 0:end2:self.stride] + x = paddle.reshape(x, [B, -1, C]) + return x + + +class AttentionSubsample(nn.Layer): + + def __init__(self, + in_dim, + out_dim, + key_dim, + num_heads=8, + attn_ratio=2, + activation=None, + stride=2, + resolution=14, + resolution_=7): + super().__init__() + self.num_heads = num_heads + self.scale = key_dim**-0.5 + self.key_dim = key_dim + self.nh_kd = nh_kd = key_dim * num_heads + self.d = int(attn_ratio * key_dim) + self.dh = int(attn_ratio * key_dim) * self.num_heads + self.attn_ratio = attn_ratio + self.resolution_ = resolution_ + self.resolution_2 = resolution_**2 + self.training = True + h = self.dh + nh_kd + self.kv = Linear_BN(in_dim, h) + + self.q = nn.Sequential(Subsample(stride, resolution), Linear_BN(in_dim, nh_kd)) + self.proj = nn.Sequential(activation(), Linear_BN(self.dh, out_dim)) + + self.stride = stride + self.resolution = resolution + points = list(itertools.product(range(resolution), range(resolution))) + points_ = list(itertools.product(range(resolution_), range(resolution_))) + + N = len(points) + N_ = len(points_) + attention_offsets = {} + idxs = [] + i = 0 + j = 0 + for p1 in points_: + i += 1 + for p2 in points: + j += 1 + size = 1 + offset = (abs(p1[0] * stride - p2[0] + (size - 1) / 2), abs(p1[1] * stride - p2[1] + (size - 1) / 2)) + if offset not in attention_offsets: + attention_offsets[offset] = len(attention_offsets) + idxs.append(attention_offsets[offset]) + self.attention_biases = self.create_parameter(shape=(num_heads, len(attention_offsets)), + default_initializer=zeros_, + attr=paddle.ParamAttr(regularizer=L2Decay(0.0))) + + tensor_idxs_ = paddle.to_tensor(idxs, dtype='int64') + self.register_buffer('attention_bias_idxs', paddle.reshape(tensor_idxs_, [N_, N])) + + @paddle.no_grad() + def train(self, mode=True): + if mode: + super().train() + else: + super().eval() + if mode and hasattr(self, 'ab'): + del self.ab + else: + self.ab = cal_attention_biases(self.attention_biases, self.attention_bias_idxs) + + def forward(self, x): + self.training = True + B, N, C = x.shape + kv = self.kv(x) + kv = paddle.reshape(kv, [B, N, self.num_heads, -1]) + k, v = paddle.split(kv, [self.key_dim, self.d], axis=3) + k = paddle.transpose(k, perm=[0, 2, 1, 3]) # BHNC + v = paddle.transpose(v, perm=[0, 2, 1, 3]) + q = paddle.reshape(self.q(x), [B, self.resolution_2, self.num_heads, self.key_dim]) + q = paddle.transpose(q, perm=[0, 2, 1, 3]) + + if self.training: + attention_biases = cal_attention_biases(self.attention_biases, self.attention_bias_idxs) + else: + attention_biases = self.ab + + attn = (paddle.matmul(q, paddle.transpose(k, perm=[0, 1, 3, 2]))) * self.scale + attention_biases + attn = F.softmax(attn) + + x = paddle.reshape(paddle.transpose(paddle.matmul(attn, v), perm=[0, 2, 1, 3]), [B, -1, self.dh]) + x = self.proj(x) + return x + + +class LeViT(nn.Layer): + """ Vision Transformer with support for patch or hybrid CNN input stage + """ + + def __init__(self, + img_size=224, + patch_size=16, + in_chans=3, + class_num=1000, + embed_dim=[192], + key_dim=[64], + depth=[12], + num_heads=[3], + attn_ratio=[2], + mlp_ratio=[2], + hybrid_backbone=None, + down_ops=[], + attention_activation=nn.Hardswish, + mlp_activation=nn.Hardswish, + distillation=True, + drop_path=0): + super().__init__() + + self.class_num = class_num + self.num_features = embed_dim[-1] + self.embed_dim = embed_dim + self.distillation = distillation + + self.patch_embed = hybrid_backbone + + self.blocks = [] + down_ops.append(['']) + resolution = img_size // patch_size + for i, (ed, kd, dpth, nh, ar, mr, + do) in enumerate(zip(embed_dim, key_dim, depth, num_heads, attn_ratio, mlp_ratio, down_ops)): + for _ in range(dpth): + self.blocks.append( + Residual( + Attention( + ed, + kd, + nh, + attn_ratio=ar, + activation=attention_activation, + resolution=resolution, + ), drop_path)) + if mr > 0: + h = int(ed * mr) + self.blocks.append( + Residual( + nn.Sequential( + Linear_BN(ed, h), + mlp_activation(), + Linear_BN(h, ed, bn_weight_init=0), + ), drop_path)) + if do[0] == 'Subsample': + #('Subsample',key_dim, num_heads, attn_ratio, mlp_ratio, stride) + resolution_ = (resolution - 1) // do[5] + 1 + self.blocks.append( + AttentionSubsample(*embed_dim[i:i + 2], + key_dim=do[1], + num_heads=do[2], + attn_ratio=do[3], + activation=attention_activation, + stride=do[5], + resolution=resolution, + resolution_=resolution_)) + resolution = resolution_ + if do[4] > 0: # mlp_ratio + h = int(embed_dim[i + 1] * do[4]) + self.blocks.append( + Residual( + nn.Sequential( + Linear_BN(embed_dim[i + 1], h), + mlp_activation(), + Linear_BN(h, embed_dim[i + 1], bn_weight_init=0), + ), drop_path)) + self.blocks = nn.Sequential(*self.blocks) + + # Classifier head + self.head = BN_Linear(embed_dim[-1], class_num) if class_num > 0 else Identity() + if distillation: + self.head_dist = BN_Linear(embed_dim[-1], class_num) if class_num > 0 else Identity() + + def forward(self, x): + x = self.patch_embed(x) + x = x.flatten(2) + x = paddle.transpose(x, perm=[0, 2, 1]) + x = self.blocks(x) + x = x.mean(1) + + x = paddle.reshape(x, [-1, self.embed_dim[-1]]) + if self.distillation: + x = self.head(x), self.head_dist(x) + if not self.training: + x = (x[0] + x[1]) / 2 + else: + x = self.head(x) + return x + + +def model_factory(C, D, X, N, drop_path, class_num, distillation): + embed_dim = [int(x) for x in C.split('_')] + num_heads = [int(x) for x in N.split('_')] + depth = [int(x) for x in X.split('_')] + act = nn.Hardswish + model = LeViT( + patch_size=16, + embed_dim=embed_dim, + num_heads=num_heads, + key_dim=[D] * 3, + depth=depth, + attn_ratio=[2, 2, 2], + mlp_ratio=[2, 2, 2], + down_ops=[ + #('Subsample',key_dim, num_heads, attn_ratio, mlp_ratio, stride) + ['Subsample', D, embed_dim[0] // D, 4, 2, 2], + ['Subsample', D, embed_dim[1] // D, 4, 2, 2], + ], + attention_activation=act, + mlp_activation=act, + hybrid_backbone=b16(embed_dim[0], activation=act), + class_num=class_num, + drop_path=drop_path, + distillation=distillation) + + return model + + +specification = { + 'LeViT_128S': { + 'C': '128_256_384', + 'D': 16, + 'N': '4_6_8', + 'X': '2_3_4', + 'drop_path': 0 + }, + 'LeViT_128': { + 'C': '128_256_384', + 'D': 16, + 'N': '4_8_12', + 'X': '4_4_4', + 'drop_path': 0 + }, + 'LeViT_192': { + 'C': '192_288_384', + 'D': 32, + 'N': '3_5_6', + 'X': '4_4_4', + 'drop_path': 0 + }, + 'LeViT_256': { + 'C': '256_384_512', + 'D': 32, + 'N': '4_6_8', + 'X': '4_4_4', + 'drop_path': 0 + }, + 'LeViT_384': { + 'C': '384_512_768', + 'D': 32, + 'N': '6_9_12', + 'X': '4_4_4', + 'drop_path': 0.1 + }, +} + + +def LeViT_128S(**kwargs): + model = model_factory(**specification['LeViT_128S'], class_num=1000, distillation=False) + return model diff --git a/modules/image/classification/levit_128s_imagenet/module.py b/modules/image/classification/levit_128s_imagenet/module.py new file mode 100644 index 000000000..9476fecfa --- /dev/null +++ b/modules/image/classification/levit_128s_imagenet/module.py @@ -0,0 +1,154 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import copy +import os + +import cv2 +import numpy as np +import paddle +from skimage.io import imread +from skimage.transform import rescale +from skimage.transform import resize + +import paddlehub as hub +from .model import LeViT_128S +from .processor import base64_to_cv2 +from .processor import create_operators +from .processor import Topk +from .utils import get_config +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving + + +@moduleinfo(name="levit_128s_imagenet", + type="cv/classification", + author="paddlepaddle", + author_email="", + summary="", + version="1.0.0") +class LeViT_128S_ImageNet: + + def __init__(self): + self.config = get_config(os.path.join(self.directory, 'LeViT_128S.yaml'), show=False) + self.label_path = os.path.join(self.directory, 'imagenet1k_label_list.txt') + self.pretrain_path = os.path.join(self.directory, 'LeViT_128S_pretrained.pdparams') + self.config['Infer']['PostProcess']['class_id_map_file'] = self.label_path + self.model = LeViT_128S() + param_state_dict = paddle.load(self.pretrain_path) + self.model.set_dict(param_state_dict) + self.preprocess_funcs = create_operators(self.config["Infer"]["transforms"]) + + def classification(self, + images: list = None, + paths: list = None, + batch_size: int = 1, + use_gpu: bool = False, + top_k: int = 1): + ''' + Args: + images (list[numpy.ndarray]): data of images, shape of each is [H, W, C], color space must be BGR. + paths (list[str]): The paths of images. + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + top_k (int): Return top k results. + + Returns: + res (list[dict]): The classfication results, each result dict contains key 'class_ids', 'scores' and 'label_names'. + ''' + postprocess_func = Topk(top_k, self.label_path) + inputs = [] + results = [] + paddle.disable_static() + place = 'gpu:0' if use_gpu else 'cpu' + place = paddle.set_device(place) + if images == None and paths == None: + print('No image provided. Please input an image or a image path.') + return + + if images != None: + for image in images: + image = image[:, :, ::-1] + inputs.append(image) + + if paths != None: + for path in paths: + image = cv2.imread(path)[:, :, ::-1] + inputs.append(image) + + batch_data = [] + for idx, imagedata in enumerate(inputs): + for process in self.preprocess_funcs: + imagedata = process(imagedata) + batch_data.append(imagedata) + if len(batch_data) >= batch_size or idx == len(inputs) - 1: + batch_tensor = paddle.to_tensor(batch_data) + out = self.model(batch_tensor) + if isinstance(out, list): + out = out[0] + if isinstance(out, dict) and "logits" in out: + out = out["logits"] + if isinstance(out, dict) and "output" in out: + out = out["output"] + result = postprocess_func(out) + results.extend(result) + batch_data.clear() + return results + + @runnable + def run_cmd(self, argvs: list): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + self.args = self.parser.parse_args(argvs) + results = self.classification(paths=[self.args.input_path], + use_gpu=self.args.use_gpu, + batch_size=self.args.batch_size, + top_k=self.args.top_k) + return results + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.classification(images=images_decode, **kwargs) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument('--use_gpu', action='store_true', help="use GPU or not") + + self.arg_config_group.add_argument('--batch_size', type=int, default=1, help='batch size') + self.arg_config_group.add_argument('--top_k', type=int, default=1, help='Return top k results.') + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to input image.") diff --git a/modules/image/classification/levit_128s_imagenet/processor.py b/modules/image/classification/levit_128s_imagenet/processor.py new file mode 100644 index 000000000..40cab3917 --- /dev/null +++ b/modules/image/classification/levit_128s_imagenet/processor.py @@ -0,0 +1,374 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import base64 +import inspect +import math +import os +import random +import sys +from functools import partial + +import cv2 +import numpy as np +import paddle +import paddle.nn.functional as F +import six +from paddle.vision.transforms import ColorJitter as RawColorJitter +from PIL import Image + + +def create_operators(params, class_num=None): + """ + create operators based on the config + + Args: + params(list): a dict list, used to create some operators + """ + assert isinstance(params, list), ('operator config should be a list') + ops = [] + current_module = sys.modules[__name__] + for operator in params: + assert isinstance(operator, dict) and len(operator) == 1, "yaml format error" + op_name = list(operator)[0] + param = {} if operator[op_name] is None else operator[op_name] + op_func = getattr(current_module, op_name) + if "class_num" in inspect.getfullargspec(op_func).args: + param.update({"class_num": class_num}) + op = op_func(**param) + ops.append(op) + + return ops + + +class UnifiedResize(object): + + def __init__(self, interpolation=None, backend="cv2"): + _cv2_interp_from_str = { + 'nearest': cv2.INTER_NEAREST, + 'bilinear': cv2.INTER_LINEAR, + 'area': cv2.INTER_AREA, + 'bicubic': cv2.INTER_CUBIC, + 'lanczos': cv2.INTER_LANCZOS4 + } + _pil_interp_from_str = { + 'nearest': Image.NEAREST, + 'bilinear': Image.BILINEAR, + 'bicubic': Image.BICUBIC, + 'box': Image.BOX, + 'lanczos': Image.LANCZOS, + 'hamming': Image.HAMMING + } + + def _pil_resize(src, size, resample): + pil_img = Image.fromarray(src) + pil_img = pil_img.resize(size, resample) + return np.asarray(pil_img) + + if backend.lower() == "cv2": + if isinstance(interpolation, str): + interpolation = _cv2_interp_from_str[interpolation.lower()] + # compatible with opencv < version 4.4.0 + elif interpolation is None: + interpolation = cv2.INTER_LINEAR + self.resize_func = partial(cv2.resize, interpolation=interpolation) + elif backend.lower() == "pil": + if isinstance(interpolation, str): + interpolation = _pil_interp_from_str[interpolation.lower()] + self.resize_func = partial(_pil_resize, resample=interpolation) + else: + self.resize_func = cv2.resize + + def __call__(self, src, size): + return self.resize_func(src, size) + + +class OperatorParamError(ValueError): + """ OperatorParamError + """ + pass + + +class DecodeImage(object): + """ decode image """ + + def __init__(self, to_rgb=True, to_np=False, channel_first=False): + self.to_rgb = to_rgb + self.to_np = to_np # to numpy + self.channel_first = channel_first # only enabled when to_np is True + + def __call__(self, img): + if six.PY2: + assert type(img) is str and len(img) > 0, "invalid input 'img' in DecodeImage" + else: + assert type(img) is bytes and len(img) > 0, "invalid input 'img' in DecodeImage" + data = np.frombuffer(img, dtype='uint8') + img = cv2.imdecode(data, 1) + if self.to_rgb: + assert img.shape[2] == 3, 'invalid shape of image[%s]' % (img.shape) + img = img[:, :, ::-1] + + if self.channel_first: + img = img.transpose((2, 0, 1)) + + return img + + +class ResizeImage(object): + """ resize image """ + + def __init__(self, size=None, resize_short=None, interpolation=None, backend="cv2"): + if resize_short is not None and resize_short > 0: + self.resize_short = resize_short + self.w = None + self.h = None + elif size is not None: + self.resize_short = None + self.w = size if type(size) is int else size[0] + self.h = size if type(size) is int else size[1] + else: + raise OperatorParamError("invalid params for ReisizeImage for '\ + 'both 'size' and 'resize_short' are None") + + self._resize_func = UnifiedResize(interpolation=interpolation, backend=backend) + + def __call__(self, img): + img_h, img_w = img.shape[:2] + if self.resize_short is not None: + percent = float(self.resize_short) / min(img_w, img_h) + w = int(round(img_w * percent)) + h = int(round(img_h * percent)) + else: + w = self.w + h = self.h + return self._resize_func(img, (w, h)) + + +class CropImage(object): + """ crop image """ + + def __init__(self, size): + if type(size) is int: + self.size = (size, size) + else: + self.size = size # (h, w) + + def __call__(self, img): + w, h = self.size + img_h, img_w = img.shape[:2] + w_start = (img_w - w) // 2 + h_start = (img_h - h) // 2 + + w_end = w_start + w + h_end = h_start + h + return img[h_start:h_end, w_start:w_end, :] + + +class RandCropImage(object): + """ random crop image """ + + def __init__(self, size, scale=None, ratio=None, interpolation=None, backend="cv2"): + if type(size) is int: + self.size = (size, size) # (h, w) + else: + self.size = size + + self.scale = [0.08, 1.0] if scale is None else scale + self.ratio = [3. / 4., 4. / 3.] if ratio is None else ratio + + self._resize_func = UnifiedResize(interpolation=interpolation, backend=backend) + + def __call__(self, img): + size = self.size + scale = self.scale + ratio = self.ratio + + aspect_ratio = math.sqrt(random.uniform(*ratio)) + w = 1. * aspect_ratio + h = 1. / aspect_ratio + + img_h, img_w = img.shape[:2] + + bound = min((float(img_w) / img_h) / (w**2), (float(img_h) / img_w) / (h**2)) + scale_max = min(scale[1], bound) + scale_min = min(scale[0], bound) + + target_area = img_w * img_h * random.uniform(scale_min, scale_max) + target_size = math.sqrt(target_area) + w = int(target_size * w) + h = int(target_size * h) + + i = random.randint(0, img_w - w) + j = random.randint(0, img_h - h) + + img = img[j:j + h, i:i + w, :] + + return self._resize_func(img, size) + + +class RandFlipImage(object): + """ random flip image + flip_code: + 1: Flipped Horizontally + 0: Flipped Vertically + -1: Flipped Horizontally & Vertically + """ + + def __init__(self, flip_code=1): + assert flip_code in [-1, 0, 1], "flip_code should be a value in [-1, 0, 1]" + self.flip_code = flip_code + + def __call__(self, img): + if random.randint(0, 1) == 1: + return cv2.flip(img, self.flip_code) + else: + return img + + +class NormalizeImage(object): + """ normalize image such as substract mean, divide std + """ + + def __init__(self, scale=None, mean=None, std=None, order='chw', output_fp16=False, channel_num=3): + if isinstance(scale, str): + scale = eval(scale) + assert channel_num in [3, 4], "channel number of input image should be set to 3 or 4." + self.channel_num = channel_num + self.output_dtype = 'float16' if output_fp16 else 'float32' + self.scale = np.float32(scale if scale is not None else 1.0 / 255.0) + self.order = order + mean = mean if mean is not None else [0.485, 0.456, 0.406] + std = std if std is not None else [0.229, 0.224, 0.225] + + shape = (3, 1, 1) if self.order == 'chw' else (1, 1, 3) + self.mean = np.array(mean).reshape(shape).astype('float32') + self.std = np.array(std).reshape(shape).astype('float32') + + def __call__(self, img): + from PIL import Image + if isinstance(img, Image.Image): + img = np.array(img) + + assert isinstance(img, np.ndarray), "invalid input 'img' in NormalizeImage" + + img = (img.astype('float32') * self.scale - self.mean) / self.std + + if self.channel_num == 4: + img_h = img.shape[1] if self.order == 'chw' else img.shape[0] + img_w = img.shape[2] if self.order == 'chw' else img.shape[1] + pad_zeros = np.zeros((1, img_h, img_w)) if self.order == 'chw' else np.zeros((img_h, img_w, 1)) + img = (np.concatenate((img, pad_zeros), axis=0) if self.order == 'chw' else np.concatenate( + (img, pad_zeros), axis=2)) + return img.astype(self.output_dtype) + + +class ToCHWImage(object): + """ convert hwc image to chw image + """ + + def __init__(self): + pass + + def __call__(self, img): + from PIL import Image + if isinstance(img, Image.Image): + img = np.array(img) + + return img.transpose((2, 0, 1)) + + +class ColorJitter(RawColorJitter): + """ColorJitter. + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def __call__(self, img): + if not isinstance(img, Image.Image): + img = np.ascontiguousarray(img) + img = Image.fromarray(img) + img = super()._apply_image(img) + if isinstance(img, Image.Image): + img = np.asarray(img) + return img + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +class Topk(object): + + def __init__(self, topk=1, class_id_map_file=None): + assert isinstance(topk, (int, )) + self.class_id_map = self.parse_class_id_map(class_id_map_file) + self.topk = topk + + def parse_class_id_map(self, class_id_map_file): + if class_id_map_file is None: + return None + if not os.path.exists(class_id_map_file): + print( + "Warning: If want to use your own label_dict, please input legal path!\nOtherwise label_names will be empty!" + ) + return None + + try: + class_id_map = {} + with open(class_id_map_file, "r") as fin: + lines = fin.readlines() + for line in lines: + partition = line.split("\n")[0].partition(" ") + class_id_map[int(partition[0])] = str(partition[-1]) + except Exception as ex: + print(ex) + class_id_map = None + return class_id_map + + def __call__(self, x, file_names=None, multilabel=False): + assert isinstance(x, paddle.Tensor) + if file_names is not None: + assert x.shape[0] == len(file_names) + x = F.softmax(x, axis=-1) if not multilabel else F.sigmoid(x) + x = x.numpy() + y = [] + for idx, probs in enumerate(x): + index = probs.argsort(axis=0)[-self.topk:][::-1].astype("int32") if not multilabel else np.where( + probs >= 0.5)[0].astype("int32") + clas_id_list = [] + score_list = [] + label_name_list = [] + for i in index: + clas_id_list.append(i.item()) + score_list.append(probs[i].item()) + if self.class_id_map is not None: + label_name_list.append(self.class_id_map[i.item()]) + result = { + "class_ids": clas_id_list, + "scores": np.around(score_list, decimals=5).tolist(), + } + if file_names is not None: + result["file_name"] = file_names[idx] + if label_name_list is not None: + result["label_names"] = label_name_list + y.append(result) + return y diff --git a/modules/image/classification/levit_128s_imagenet/utils.py b/modules/image/classification/levit_128s_imagenet/utils.py new file mode 100644 index 000000000..df2bc36b3 --- /dev/null +++ b/modules/image/classification/levit_128s_imagenet/utils.py @@ -0,0 +1,129 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import copy +import os + +import yaml + +__all__ = ['get_config'] + + +class AttrDict(dict): + + def __getattr__(self, key): + return self[key] + + def __setattr__(self, key, value): + if key in self.__dict__: + self.__dict__[key] = value + else: + self[key] = value + + def __deepcopy__(self, content): + return copy.deepcopy(dict(self)) + + +def create_attr_dict(yaml_config): + from ast import literal_eval + for key, value in yaml_config.items(): + if type(value) is dict: + yaml_config[key] = value = AttrDict(value) + if isinstance(value, str): + try: + value = literal_eval(value) + except BaseException: + pass + if isinstance(value, AttrDict): + create_attr_dict(yaml_config[key]) + else: + yaml_config[key] = value + + +def parse_config(cfg_file): + """Load a config file into AttrDict""" + with open(cfg_file, 'r') as fopen: + yaml_config = AttrDict(yaml.load(fopen, Loader=yaml.SafeLoader)) + create_attr_dict(yaml_config) + return yaml_config + + +def override(dl, ks, v): + """ + Recursively replace dict of list + Args: + dl(dict or list): dict or list to be replaced + ks(list): list of keys + v(str): value to be replaced + """ + + def str2num(v): + try: + return eval(v) + except Exception: + return v + + assert isinstance(dl, (list, dict)), ("{} should be a list or a dict") + assert len(ks) > 0, ('lenght of keys should larger than 0') + if isinstance(dl, list): + k = str2num(ks[0]) + if len(ks) == 1: + assert k < len(dl), ('index({}) out of range({})'.format(k, dl)) + dl[k] = str2num(v) + else: + override(dl[k], ks[1:], v) + else: + if len(ks) == 1: + # assert ks[0] in dl, ('{} is not exist in {}'.format(ks[0], dl)) + if not ks[0] in dl: + print('A new filed ({}) detected!'.format(ks[0], dl)) + dl[ks[0]] = str2num(v) + else: + override(dl[ks[0]], ks[1:], v) + + +def override_config(config, options=None): + """ + Recursively override the config + Args: + config(dict): dict to be replaced + options(list): list of pairs(key0.key1.idx.key2=value) + such as: [ + 'topk=2', + 'VALID.transforms.1.ResizeImage.resize_short=300' + ] + Returns: + config(dict): replaced config + """ + if options is not None: + for opt in options: + assert isinstance(opt, str), ("option({}) should be a str".format(opt)) + assert "=" in opt, ("option({}) should contain a =" + "to distinguish between key and value".format(opt)) + pair = opt.split('=') + assert len(pair) == 2, ("there can be only a = in the option") + key, value = pair + keys = key.split('.') + override(config, keys, value) + return config + + +def get_config(fname, overrides=None, show=False): + """ + Read config from file + """ + assert os.path.exists(fname), ('config file({}) is not exist'.format(fname)) + config = parse_config(fname) + override_config(config, overrides) + return config diff --git a/modules/image/classification/levit_192_imagenet/README.md b/modules/image/classification/levit_192_imagenet/README.md new file mode 100644 index 000000000..c3e86eea1 --- /dev/null +++ b/modules/image/classification/levit_192_imagenet/README.md @@ -0,0 +1,132 @@ +# levit_192_imagenet + +|模型名称|levit_192_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|LeViT| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|64 MB| +|最新更新日期|2022-04-02| +|数据指标|Acc| + + +## 一、模型基本信息 + + +- ### 模型介绍 + + - LeViT 是一种快速推理的、用于图像分类任务的混合神经网络。其设计之初考虑了网络模型在不同的硬件平台上的性能,因此能够更好地反映普遍应用的真实场景。通过大量实验,作者找到了卷积神经网络与 Transformer 体系更好的结合方式,并且提出了 attention-based 方法,用于整合 Transformer 中的位置信息编码, 该模块的模型结构配置为LeViT192, 详情可参考[论文地址](https://arxiv.org/abs/2104.01136)。 + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install levit_192_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run levit_192_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="levit_192_imagenet") + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + - 分类接口API。 + - **参数** + + - images (list\[numpy.ndarray\]): 图片数据,每一个图片数据的shape 均为 \[H, W, C\],颜色空间为 BGR;
+ - paths (list\[str\]): 图片的路径;
+ - batch\_size (int): batch 的大小;
+ - use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA_VISIBLE_DEVICES环境变量**
+ - top\_k (int): 返回预测结果的前 k 个。 + + - **返回** + + - res (list\[dict\]): 分类结果,列表的每一个元素均为字典,其中 key 包括'class_ids'(种类索引), 'scores'(置信度) 和 'label_names'(种类名称) + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个图像识别的在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m levit_192_imagenet + ``` + + - 这样就完成了一个图像识别的在线服务的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"\} + url = "http://127.0.0.1:8866/predict/levit_192_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + ``` + + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install levit_192_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/levit_192_imagenet/model.py b/modules/image/classification/levit_192_imagenet/model.py new file mode 100644 index 000000000..104d5f066 --- /dev/null +++ b/modules/image/classification/levit_192_imagenet/model.py @@ -0,0 +1,450 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# Code was based on https://github.com/facebookresearch/LeViT +import itertools +import math +import warnings + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn.initializer import Constant +from paddle.nn.initializer import TruncatedNormal +from paddle.regularizer import L2Decay + +from .vision_transformer import Identity +from .vision_transformer import ones_ +from .vision_transformer import trunc_normal_ +from .vision_transformer import zeros_ + + +def cal_attention_biases(attention_biases, attention_bias_idxs): + gather_list = [] + attention_bias_t = paddle.transpose(attention_biases, (1, 0)) + nums = attention_bias_idxs.shape[0] + for idx in range(nums): + gather = paddle.gather(attention_bias_t, attention_bias_idxs[idx]) + gather_list.append(gather) + shape0, shape1 = attention_bias_idxs.shape + gather = paddle.concat(gather_list) + return paddle.transpose(gather, (1, 0)).reshape((0, shape0, shape1)) + + +class Conv2d_BN(nn.Sequential): + + def __init__(self, a, b, ks=1, stride=1, pad=0, dilation=1, groups=1, bn_weight_init=1, resolution=-10000): + super().__init__() + self.add_sublayer('c', nn.Conv2D(a, b, ks, stride, pad, dilation, groups, bias_attr=False)) + bn = nn.BatchNorm2D(b) + ones_(bn.weight) + zeros_(bn.bias) + self.add_sublayer('bn', bn) + + +class Linear_BN(nn.Sequential): + + def __init__(self, a, b, bn_weight_init=1): + super().__init__() + self.add_sublayer('c', nn.Linear(a, b, bias_attr=False)) + bn = nn.BatchNorm1D(b) + if bn_weight_init == 0: + zeros_(bn.weight) + else: + ones_(bn.weight) + zeros_(bn.bias) + self.add_sublayer('bn', bn) + + def forward(self, x): + l, bn = self._sub_layers.values() + x = l(x) + return paddle.reshape(bn(x.flatten(0, 1)), x.shape) + + +class BN_Linear(nn.Sequential): + + def __init__(self, a, b, bias=True, std=0.02): + super().__init__() + self.add_sublayer('bn', nn.BatchNorm1D(a)) + l = nn.Linear(a, b, bias_attr=bias) + trunc_normal_(l.weight) + if bias: + zeros_(l.bias) + self.add_sublayer('l', l) + + +def b16(n, activation, resolution=224): + return nn.Sequential(Conv2d_BN(3, n // 8, 3, 2, 1, resolution=resolution), activation(), + Conv2d_BN(n // 8, n // 4, 3, 2, 1, resolution=resolution // 2), activation(), + Conv2d_BN(n // 4, n // 2, 3, 2, 1, resolution=resolution // 4), activation(), + Conv2d_BN(n // 2, n, 3, 2, 1, resolution=resolution // 8)) + + +class Residual(nn.Layer): + + def __init__(self, m, drop): + super().__init__() + self.m = m + self.drop = drop + + def forward(self, x): + if self.training and self.drop > 0: + y = paddle.rand(shape=[x.shape[0], 1, 1]).__ge__(self.drop).astype("float32") + y = y.divide(paddle.full_like(y, 1 - self.drop)) + return paddle.add(x, y) + else: + return paddle.add(x, self.m(x)) + + +class Attention(nn.Layer): + + def __init__(self, dim, key_dim, num_heads=8, attn_ratio=4, activation=None, resolution=14): + super().__init__() + self.num_heads = num_heads + self.scale = key_dim**-0.5 + self.key_dim = key_dim + self.nh_kd = nh_kd = key_dim * num_heads + self.d = int(attn_ratio * key_dim) + self.dh = int(attn_ratio * key_dim) * num_heads + self.attn_ratio = attn_ratio + self.h = self.dh + nh_kd * 2 + self.qkv = Linear_BN(dim, self.h) + self.proj = nn.Sequential(activation(), Linear_BN(self.dh, dim, bn_weight_init=0)) + points = list(itertools.product(range(resolution), range(resolution))) + N = len(points) + attention_offsets = {} + idxs = [] + for p1 in points: + for p2 in points: + offset = (abs(p1[0] - p2[0]), abs(p1[1] - p2[1])) + if offset not in attention_offsets: + attention_offsets[offset] = len(attention_offsets) + idxs.append(attention_offsets[offset]) + self.attention_biases = self.create_parameter(shape=(num_heads, len(attention_offsets)), + default_initializer=zeros_, + attr=paddle.ParamAttr(regularizer=L2Decay(0.0))) + tensor_idxs = paddle.to_tensor(idxs, dtype='int64') + self.register_buffer('attention_bias_idxs', paddle.reshape(tensor_idxs, [N, N])) + + @paddle.no_grad() + def train(self, mode=True): + if mode: + super().train() + else: + super().eval() + if mode and hasattr(self, 'ab'): + del self.ab + else: + self.ab = cal_attention_biases(self.attention_biases, self.attention_bias_idxs) + + def forward(self, x): + self.training = True + B, N, C = x.shape + qkv = self.qkv(x) + qkv = paddle.reshape(qkv, [B, N, self.num_heads, self.h // self.num_heads]) + q, k, v = paddle.split(qkv, [self.key_dim, self.key_dim, self.d], axis=3) + q = paddle.transpose(q, perm=[0, 2, 1, 3]) + k = paddle.transpose(k, perm=[0, 2, 1, 3]) + v = paddle.transpose(v, perm=[0, 2, 1, 3]) + k_transpose = paddle.transpose(k, perm=[0, 1, 3, 2]) + + if self.training: + attention_biases = cal_attention_biases(self.attention_biases, self.attention_bias_idxs) + else: + attention_biases = self.ab + attn = (paddle.matmul(q, k_transpose) * self.scale + attention_biases) + attn = F.softmax(attn) + x = paddle.transpose(paddle.matmul(attn, v), perm=[0, 2, 1, 3]) + x = paddle.reshape(x, [B, N, self.dh]) + x = self.proj(x) + return x + + +class Subsample(nn.Layer): + + def __init__(self, stride, resolution): + super().__init__() + self.stride = stride + self.resolution = resolution + + def forward(self, x): + B, N, C = x.shape + x = paddle.reshape(x, [B, self.resolution, self.resolution, C]) + end1, end2 = x.shape[1], x.shape[2] + x = x[:, 0:end1:self.stride, 0:end2:self.stride] + x = paddle.reshape(x, [B, -1, C]) + return x + + +class AttentionSubsample(nn.Layer): + + def __init__(self, + in_dim, + out_dim, + key_dim, + num_heads=8, + attn_ratio=2, + activation=None, + stride=2, + resolution=14, + resolution_=7): + super().__init__() + self.num_heads = num_heads + self.scale = key_dim**-0.5 + self.key_dim = key_dim + self.nh_kd = nh_kd = key_dim * num_heads + self.d = int(attn_ratio * key_dim) + self.dh = int(attn_ratio * key_dim) * self.num_heads + self.attn_ratio = attn_ratio + self.resolution_ = resolution_ + self.resolution_2 = resolution_**2 + self.training = True + h = self.dh + nh_kd + self.kv = Linear_BN(in_dim, h) + + self.q = nn.Sequential(Subsample(stride, resolution), Linear_BN(in_dim, nh_kd)) + self.proj = nn.Sequential(activation(), Linear_BN(self.dh, out_dim)) + + self.stride = stride + self.resolution = resolution + points = list(itertools.product(range(resolution), range(resolution))) + points_ = list(itertools.product(range(resolution_), range(resolution_))) + + N = len(points) + N_ = len(points_) + attention_offsets = {} + idxs = [] + i = 0 + j = 0 + for p1 in points_: + i += 1 + for p2 in points: + j += 1 + size = 1 + offset = (abs(p1[0] * stride - p2[0] + (size - 1) / 2), abs(p1[1] * stride - p2[1] + (size - 1) / 2)) + if offset not in attention_offsets: + attention_offsets[offset] = len(attention_offsets) + idxs.append(attention_offsets[offset]) + self.attention_biases = self.create_parameter(shape=(num_heads, len(attention_offsets)), + default_initializer=zeros_, + attr=paddle.ParamAttr(regularizer=L2Decay(0.0))) + + tensor_idxs_ = paddle.to_tensor(idxs, dtype='int64') + self.register_buffer('attention_bias_idxs', paddle.reshape(tensor_idxs_, [N_, N])) + + @paddle.no_grad() + def train(self, mode=True): + if mode: + super().train() + else: + super().eval() + if mode and hasattr(self, 'ab'): + del self.ab + else: + self.ab = cal_attention_biases(self.attention_biases, self.attention_bias_idxs) + + def forward(self, x): + self.training = True + B, N, C = x.shape + kv = self.kv(x) + kv = paddle.reshape(kv, [B, N, self.num_heads, -1]) + k, v = paddle.split(kv, [self.key_dim, self.d], axis=3) + k = paddle.transpose(k, perm=[0, 2, 1, 3]) # BHNC + v = paddle.transpose(v, perm=[0, 2, 1, 3]) + q = paddle.reshape(self.q(x), [B, self.resolution_2, self.num_heads, self.key_dim]) + q = paddle.transpose(q, perm=[0, 2, 1, 3]) + + if self.training: + attention_biases = cal_attention_biases(self.attention_biases, self.attention_bias_idxs) + else: + attention_biases = self.ab + + attn = (paddle.matmul(q, paddle.transpose(k, perm=[0, 1, 3, 2]))) * self.scale + attention_biases + attn = F.softmax(attn) + + x = paddle.reshape(paddle.transpose(paddle.matmul(attn, v), perm=[0, 2, 1, 3]), [B, -1, self.dh]) + x = self.proj(x) + return x + + +class LeViT(nn.Layer): + """ Vision Transformer with support for patch or hybrid CNN input stage + """ + + def __init__(self, + img_size=224, + patch_size=16, + in_chans=3, + class_num=1000, + embed_dim=[192], + key_dim=[64], + depth=[12], + num_heads=[3], + attn_ratio=[2], + mlp_ratio=[2], + hybrid_backbone=None, + down_ops=[], + attention_activation=nn.Hardswish, + mlp_activation=nn.Hardswish, + distillation=True, + drop_path=0): + super().__init__() + + self.class_num = class_num + self.num_features = embed_dim[-1] + self.embed_dim = embed_dim + self.distillation = distillation + + self.patch_embed = hybrid_backbone + + self.blocks = [] + down_ops.append(['']) + resolution = img_size // patch_size + for i, (ed, kd, dpth, nh, ar, mr, + do) in enumerate(zip(embed_dim, key_dim, depth, num_heads, attn_ratio, mlp_ratio, down_ops)): + for _ in range(dpth): + self.blocks.append( + Residual( + Attention( + ed, + kd, + nh, + attn_ratio=ar, + activation=attention_activation, + resolution=resolution, + ), drop_path)) + if mr > 0: + h = int(ed * mr) + self.blocks.append( + Residual( + nn.Sequential( + Linear_BN(ed, h), + mlp_activation(), + Linear_BN(h, ed, bn_weight_init=0), + ), drop_path)) + if do[0] == 'Subsample': + #('Subsample',key_dim, num_heads, attn_ratio, mlp_ratio, stride) + resolution_ = (resolution - 1) // do[5] + 1 + self.blocks.append( + AttentionSubsample(*embed_dim[i:i + 2], + key_dim=do[1], + num_heads=do[2], + attn_ratio=do[3], + activation=attention_activation, + stride=do[5], + resolution=resolution, + resolution_=resolution_)) + resolution = resolution_ + if do[4] > 0: # mlp_ratio + h = int(embed_dim[i + 1] * do[4]) + self.blocks.append( + Residual( + nn.Sequential( + Linear_BN(embed_dim[i + 1], h), + mlp_activation(), + Linear_BN(h, embed_dim[i + 1], bn_weight_init=0), + ), drop_path)) + self.blocks = nn.Sequential(*self.blocks) + + # Classifier head + self.head = BN_Linear(embed_dim[-1], class_num) if class_num > 0 else Identity() + if distillation: + self.head_dist = BN_Linear(embed_dim[-1], class_num) if class_num > 0 else Identity() + + def forward(self, x): + x = self.patch_embed(x) + x = x.flatten(2) + x = paddle.transpose(x, perm=[0, 2, 1]) + x = self.blocks(x) + x = x.mean(1) + + x = paddle.reshape(x, [-1, self.embed_dim[-1]]) + if self.distillation: + x = self.head(x), self.head_dist(x) + if not self.training: + x = (x[0] + x[1]) / 2 + else: + x = self.head(x) + return x + + +def model_factory(C, D, X, N, drop_path, class_num, distillation): + embed_dim = [int(x) for x in C.split('_')] + num_heads = [int(x) for x in N.split('_')] + depth = [int(x) for x in X.split('_')] + act = nn.Hardswish + model = LeViT( + patch_size=16, + embed_dim=embed_dim, + num_heads=num_heads, + key_dim=[D] * 3, + depth=depth, + attn_ratio=[2, 2, 2], + mlp_ratio=[2, 2, 2], + down_ops=[ + #('Subsample',key_dim, num_heads, attn_ratio, mlp_ratio, stride) + ['Subsample', D, embed_dim[0] // D, 4, 2, 2], + ['Subsample', D, embed_dim[1] // D, 4, 2, 2], + ], + attention_activation=act, + mlp_activation=act, + hybrid_backbone=b16(embed_dim[0], activation=act), + class_num=class_num, + drop_path=drop_path, + distillation=distillation) + + return model + + +specification = { + 'LeViT_128S': { + 'C': '128_256_384', + 'D': 16, + 'N': '4_6_8', + 'X': '2_3_4', + 'drop_path': 0 + }, + 'LeViT_128': { + 'C': '128_256_384', + 'D': 16, + 'N': '4_8_12', + 'X': '4_4_4', + 'drop_path': 0 + }, + 'LeViT_192': { + 'C': '192_288_384', + 'D': 32, + 'N': '3_5_6', + 'X': '4_4_4', + 'drop_path': 0 + }, + 'LeViT_256': { + 'C': '256_384_512', + 'D': 32, + 'N': '4_6_8', + 'X': '4_4_4', + 'drop_path': 0 + }, + 'LeViT_384': { + 'C': '384_512_768', + 'D': 32, + 'N': '6_9_12', + 'X': '4_4_4', + 'drop_path': 0.1 + }, +} + + +def LeViT_192(**kwargs): + model = model_factory(**specification['LeViT_192'], class_num=1000, distillation=False) + return model diff --git a/modules/image/classification/levit_192_imagenet/module.py b/modules/image/classification/levit_192_imagenet/module.py new file mode 100644 index 000000000..1e982e824 --- /dev/null +++ b/modules/image/classification/levit_192_imagenet/module.py @@ -0,0 +1,154 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import copy +import os + +import cv2 +import numpy as np +import paddle +from skimage.io import imread +from skimage.transform import rescale +from skimage.transform import resize + +import paddlehub as hub +from .model import LeViT_192 +from .processor import base64_to_cv2 +from .processor import create_operators +from .processor import Topk +from .utils import get_config +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving + + +@moduleinfo(name="levit_192_imagenet", + type="cv/classification", + author="paddlepaddle", + author_email="", + summary="", + version="1.0.0") +class LeViT_192_ImageNet: + + def __init__(self): + self.config = get_config(os.path.join(self.directory, 'LeViT_192.yaml'), show=False) + self.label_path = os.path.join(self.directory, 'imagenet1k_label_list.txt') + self.pretrain_path = os.path.join(self.directory, 'LeViT_192_pretrained.pdparams') + self.config['Infer']['PostProcess']['class_id_map_file'] = self.label_path + self.model = LeViT_192() + param_state_dict = paddle.load(self.pretrain_path) + self.model.set_dict(param_state_dict) + self.preprocess_funcs = create_operators(self.config["Infer"]["transforms"]) + + def classification(self, + images: list = None, + paths: list = None, + batch_size: int = 1, + use_gpu: bool = False, + top_k: int = 1): + ''' + Args: + images (list[numpy.ndarray]): data of images, shape of each is [H, W, C], color space must be BGR. + paths (list[str]): The paths of images. + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + top_k (int): Return top k results. + + Returns: + res (list[dict]): The classfication results, each result dict contains key 'class_ids', 'scores' and 'label_names'. + ''' + postprocess_func = Topk(top_k, self.label_path) + inputs = [] + results = [] + paddle.disable_static() + place = 'gpu:0' if use_gpu else 'cpu' + place = paddle.set_device(place) + if images == None and paths == None: + print('No image provided. Please input an image or a image path.') + return + + if images != None: + for image in images: + image = image[:, :, ::-1] + inputs.append(image) + + if paths != None: + for path in paths: + image = cv2.imread(path)[:, :, ::-1] + inputs.append(image) + + batch_data = [] + for idx, imagedata in enumerate(inputs): + for process in self.preprocess_funcs: + imagedata = process(imagedata) + batch_data.append(imagedata) + if len(batch_data) >= batch_size or idx == len(inputs) - 1: + batch_tensor = paddle.to_tensor(batch_data) + out = self.model(batch_tensor) + if isinstance(out, list): + out = out[0] + if isinstance(out, dict) and "logits" in out: + out = out["logits"] + if isinstance(out, dict) and "output" in out: + out = out["output"] + result = postprocess_func(out) + results.extend(result) + batch_data.clear() + return results + + @runnable + def run_cmd(self, argvs: list): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + self.args = self.parser.parse_args(argvs) + results = self.classification(paths=[self.args.input_path], + use_gpu=self.args.use_gpu, + batch_size=self.args.batch_size, + top_k=self.args.top_k) + return results + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.classification(images=images_decode, **kwargs) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument('--use_gpu', action='store_true', help="use GPU or not") + + self.arg_config_group.add_argument('--batch_size', type=int, default=1, help='batch size') + self.arg_config_group.add_argument('--top_k', type=int, default=1, help='Return top k results.') + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to input image.") diff --git a/modules/image/classification/levit_192_imagenet/processor.py b/modules/image/classification/levit_192_imagenet/processor.py new file mode 100644 index 000000000..40cab3917 --- /dev/null +++ b/modules/image/classification/levit_192_imagenet/processor.py @@ -0,0 +1,374 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import base64 +import inspect +import math +import os +import random +import sys +from functools import partial + +import cv2 +import numpy as np +import paddle +import paddle.nn.functional as F +import six +from paddle.vision.transforms import ColorJitter as RawColorJitter +from PIL import Image + + +def create_operators(params, class_num=None): + """ + create operators based on the config + + Args: + params(list): a dict list, used to create some operators + """ + assert isinstance(params, list), ('operator config should be a list') + ops = [] + current_module = sys.modules[__name__] + for operator in params: + assert isinstance(operator, dict) and len(operator) == 1, "yaml format error" + op_name = list(operator)[0] + param = {} if operator[op_name] is None else operator[op_name] + op_func = getattr(current_module, op_name) + if "class_num" in inspect.getfullargspec(op_func).args: + param.update({"class_num": class_num}) + op = op_func(**param) + ops.append(op) + + return ops + + +class UnifiedResize(object): + + def __init__(self, interpolation=None, backend="cv2"): + _cv2_interp_from_str = { + 'nearest': cv2.INTER_NEAREST, + 'bilinear': cv2.INTER_LINEAR, + 'area': cv2.INTER_AREA, + 'bicubic': cv2.INTER_CUBIC, + 'lanczos': cv2.INTER_LANCZOS4 + } + _pil_interp_from_str = { + 'nearest': Image.NEAREST, + 'bilinear': Image.BILINEAR, + 'bicubic': Image.BICUBIC, + 'box': Image.BOX, + 'lanczos': Image.LANCZOS, + 'hamming': Image.HAMMING + } + + def _pil_resize(src, size, resample): + pil_img = Image.fromarray(src) + pil_img = pil_img.resize(size, resample) + return np.asarray(pil_img) + + if backend.lower() == "cv2": + if isinstance(interpolation, str): + interpolation = _cv2_interp_from_str[interpolation.lower()] + # compatible with opencv < version 4.4.0 + elif interpolation is None: + interpolation = cv2.INTER_LINEAR + self.resize_func = partial(cv2.resize, interpolation=interpolation) + elif backend.lower() == "pil": + if isinstance(interpolation, str): + interpolation = _pil_interp_from_str[interpolation.lower()] + self.resize_func = partial(_pil_resize, resample=interpolation) + else: + self.resize_func = cv2.resize + + def __call__(self, src, size): + return self.resize_func(src, size) + + +class OperatorParamError(ValueError): + """ OperatorParamError + """ + pass + + +class DecodeImage(object): + """ decode image """ + + def __init__(self, to_rgb=True, to_np=False, channel_first=False): + self.to_rgb = to_rgb + self.to_np = to_np # to numpy + self.channel_first = channel_first # only enabled when to_np is True + + def __call__(self, img): + if six.PY2: + assert type(img) is str and len(img) > 0, "invalid input 'img' in DecodeImage" + else: + assert type(img) is bytes and len(img) > 0, "invalid input 'img' in DecodeImage" + data = np.frombuffer(img, dtype='uint8') + img = cv2.imdecode(data, 1) + if self.to_rgb: + assert img.shape[2] == 3, 'invalid shape of image[%s]' % (img.shape) + img = img[:, :, ::-1] + + if self.channel_first: + img = img.transpose((2, 0, 1)) + + return img + + +class ResizeImage(object): + """ resize image """ + + def __init__(self, size=None, resize_short=None, interpolation=None, backend="cv2"): + if resize_short is not None and resize_short > 0: + self.resize_short = resize_short + self.w = None + self.h = None + elif size is not None: + self.resize_short = None + self.w = size if type(size) is int else size[0] + self.h = size if type(size) is int else size[1] + else: + raise OperatorParamError("invalid params for ReisizeImage for '\ + 'both 'size' and 'resize_short' are None") + + self._resize_func = UnifiedResize(interpolation=interpolation, backend=backend) + + def __call__(self, img): + img_h, img_w = img.shape[:2] + if self.resize_short is not None: + percent = float(self.resize_short) / min(img_w, img_h) + w = int(round(img_w * percent)) + h = int(round(img_h * percent)) + else: + w = self.w + h = self.h + return self._resize_func(img, (w, h)) + + +class CropImage(object): + """ crop image """ + + def __init__(self, size): + if type(size) is int: + self.size = (size, size) + else: + self.size = size # (h, w) + + def __call__(self, img): + w, h = self.size + img_h, img_w = img.shape[:2] + w_start = (img_w - w) // 2 + h_start = (img_h - h) // 2 + + w_end = w_start + w + h_end = h_start + h + return img[h_start:h_end, w_start:w_end, :] + + +class RandCropImage(object): + """ random crop image """ + + def __init__(self, size, scale=None, ratio=None, interpolation=None, backend="cv2"): + if type(size) is int: + self.size = (size, size) # (h, w) + else: + self.size = size + + self.scale = [0.08, 1.0] if scale is None else scale + self.ratio = [3. / 4., 4. / 3.] if ratio is None else ratio + + self._resize_func = UnifiedResize(interpolation=interpolation, backend=backend) + + def __call__(self, img): + size = self.size + scale = self.scale + ratio = self.ratio + + aspect_ratio = math.sqrt(random.uniform(*ratio)) + w = 1. * aspect_ratio + h = 1. / aspect_ratio + + img_h, img_w = img.shape[:2] + + bound = min((float(img_w) / img_h) / (w**2), (float(img_h) / img_w) / (h**2)) + scale_max = min(scale[1], bound) + scale_min = min(scale[0], bound) + + target_area = img_w * img_h * random.uniform(scale_min, scale_max) + target_size = math.sqrt(target_area) + w = int(target_size * w) + h = int(target_size * h) + + i = random.randint(0, img_w - w) + j = random.randint(0, img_h - h) + + img = img[j:j + h, i:i + w, :] + + return self._resize_func(img, size) + + +class RandFlipImage(object): + """ random flip image + flip_code: + 1: Flipped Horizontally + 0: Flipped Vertically + -1: Flipped Horizontally & Vertically + """ + + def __init__(self, flip_code=1): + assert flip_code in [-1, 0, 1], "flip_code should be a value in [-1, 0, 1]" + self.flip_code = flip_code + + def __call__(self, img): + if random.randint(0, 1) == 1: + return cv2.flip(img, self.flip_code) + else: + return img + + +class NormalizeImage(object): + """ normalize image such as substract mean, divide std + """ + + def __init__(self, scale=None, mean=None, std=None, order='chw', output_fp16=False, channel_num=3): + if isinstance(scale, str): + scale = eval(scale) + assert channel_num in [3, 4], "channel number of input image should be set to 3 or 4." + self.channel_num = channel_num + self.output_dtype = 'float16' if output_fp16 else 'float32' + self.scale = np.float32(scale if scale is not None else 1.0 / 255.0) + self.order = order + mean = mean if mean is not None else [0.485, 0.456, 0.406] + std = std if std is not None else [0.229, 0.224, 0.225] + + shape = (3, 1, 1) if self.order == 'chw' else (1, 1, 3) + self.mean = np.array(mean).reshape(shape).astype('float32') + self.std = np.array(std).reshape(shape).astype('float32') + + def __call__(self, img): + from PIL import Image + if isinstance(img, Image.Image): + img = np.array(img) + + assert isinstance(img, np.ndarray), "invalid input 'img' in NormalizeImage" + + img = (img.astype('float32') * self.scale - self.mean) / self.std + + if self.channel_num == 4: + img_h = img.shape[1] if self.order == 'chw' else img.shape[0] + img_w = img.shape[2] if self.order == 'chw' else img.shape[1] + pad_zeros = np.zeros((1, img_h, img_w)) if self.order == 'chw' else np.zeros((img_h, img_w, 1)) + img = (np.concatenate((img, pad_zeros), axis=0) if self.order == 'chw' else np.concatenate( + (img, pad_zeros), axis=2)) + return img.astype(self.output_dtype) + + +class ToCHWImage(object): + """ convert hwc image to chw image + """ + + def __init__(self): + pass + + def __call__(self, img): + from PIL import Image + if isinstance(img, Image.Image): + img = np.array(img) + + return img.transpose((2, 0, 1)) + + +class ColorJitter(RawColorJitter): + """ColorJitter. + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def __call__(self, img): + if not isinstance(img, Image.Image): + img = np.ascontiguousarray(img) + img = Image.fromarray(img) + img = super()._apply_image(img) + if isinstance(img, Image.Image): + img = np.asarray(img) + return img + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +class Topk(object): + + def __init__(self, topk=1, class_id_map_file=None): + assert isinstance(topk, (int, )) + self.class_id_map = self.parse_class_id_map(class_id_map_file) + self.topk = topk + + def parse_class_id_map(self, class_id_map_file): + if class_id_map_file is None: + return None + if not os.path.exists(class_id_map_file): + print( + "Warning: If want to use your own label_dict, please input legal path!\nOtherwise label_names will be empty!" + ) + return None + + try: + class_id_map = {} + with open(class_id_map_file, "r") as fin: + lines = fin.readlines() + for line in lines: + partition = line.split("\n")[0].partition(" ") + class_id_map[int(partition[0])] = str(partition[-1]) + except Exception as ex: + print(ex) + class_id_map = None + return class_id_map + + def __call__(self, x, file_names=None, multilabel=False): + assert isinstance(x, paddle.Tensor) + if file_names is not None: + assert x.shape[0] == len(file_names) + x = F.softmax(x, axis=-1) if not multilabel else F.sigmoid(x) + x = x.numpy() + y = [] + for idx, probs in enumerate(x): + index = probs.argsort(axis=0)[-self.topk:][::-1].astype("int32") if not multilabel else np.where( + probs >= 0.5)[0].astype("int32") + clas_id_list = [] + score_list = [] + label_name_list = [] + for i in index: + clas_id_list.append(i.item()) + score_list.append(probs[i].item()) + if self.class_id_map is not None: + label_name_list.append(self.class_id_map[i.item()]) + result = { + "class_ids": clas_id_list, + "scores": np.around(score_list, decimals=5).tolist(), + } + if file_names is not None: + result["file_name"] = file_names[idx] + if label_name_list is not None: + result["label_names"] = label_name_list + y.append(result) + return y diff --git a/modules/image/classification/levit_192_imagenet/utils.py b/modules/image/classification/levit_192_imagenet/utils.py new file mode 100644 index 000000000..df2bc36b3 --- /dev/null +++ b/modules/image/classification/levit_192_imagenet/utils.py @@ -0,0 +1,129 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import copy +import os + +import yaml + +__all__ = ['get_config'] + + +class AttrDict(dict): + + def __getattr__(self, key): + return self[key] + + def __setattr__(self, key, value): + if key in self.__dict__: + self.__dict__[key] = value + else: + self[key] = value + + def __deepcopy__(self, content): + return copy.deepcopy(dict(self)) + + +def create_attr_dict(yaml_config): + from ast import literal_eval + for key, value in yaml_config.items(): + if type(value) is dict: + yaml_config[key] = value = AttrDict(value) + if isinstance(value, str): + try: + value = literal_eval(value) + except BaseException: + pass + if isinstance(value, AttrDict): + create_attr_dict(yaml_config[key]) + else: + yaml_config[key] = value + + +def parse_config(cfg_file): + """Load a config file into AttrDict""" + with open(cfg_file, 'r') as fopen: + yaml_config = AttrDict(yaml.load(fopen, Loader=yaml.SafeLoader)) + create_attr_dict(yaml_config) + return yaml_config + + +def override(dl, ks, v): + """ + Recursively replace dict of list + Args: + dl(dict or list): dict or list to be replaced + ks(list): list of keys + v(str): value to be replaced + """ + + def str2num(v): + try: + return eval(v) + except Exception: + return v + + assert isinstance(dl, (list, dict)), ("{} should be a list or a dict") + assert len(ks) > 0, ('lenght of keys should larger than 0') + if isinstance(dl, list): + k = str2num(ks[0]) + if len(ks) == 1: + assert k < len(dl), ('index({}) out of range({})'.format(k, dl)) + dl[k] = str2num(v) + else: + override(dl[k], ks[1:], v) + else: + if len(ks) == 1: + # assert ks[0] in dl, ('{} is not exist in {}'.format(ks[0], dl)) + if not ks[0] in dl: + print('A new filed ({}) detected!'.format(ks[0], dl)) + dl[ks[0]] = str2num(v) + else: + override(dl[ks[0]], ks[1:], v) + + +def override_config(config, options=None): + """ + Recursively override the config + Args: + config(dict): dict to be replaced + options(list): list of pairs(key0.key1.idx.key2=value) + such as: [ + 'topk=2', + 'VALID.transforms.1.ResizeImage.resize_short=300' + ] + Returns: + config(dict): replaced config + """ + if options is not None: + for opt in options: + assert isinstance(opt, str), ("option({}) should be a str".format(opt)) + assert "=" in opt, ("option({}) should contain a =" + "to distinguish between key and value".format(opt)) + pair = opt.split('=') + assert len(pair) == 2, ("there can be only a = in the option") + key, value = pair + keys = key.split('.') + override(config, keys, value) + return config + + +def get_config(fname, overrides=None, show=False): + """ + Read config from file + """ + assert os.path.exists(fname), ('config file({}) is not exist'.format(fname)) + config = parse_config(fname) + override_config(config, overrides) + return config diff --git a/modules/image/classification/levit_256_imagenet/README.md b/modules/image/classification/levit_256_imagenet/README.md new file mode 100644 index 000000000..fefc5bebd --- /dev/null +++ b/modules/image/classification/levit_256_imagenet/README.md @@ -0,0 +1,132 @@ +# levit_256_imagenet + +|模型名称|levit_256_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|LeViT| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|109 MB| +|最新更新日期|2022-04-02| +|数据指标|Acc| + + +## 一、模型基本信息 + + +- ### 模型介绍 + + - LeViT 是一种快速推理的、用于图像分类任务的混合神经网络。其设计之初考虑了网络模型在不同的硬件平台上的性能,因此能够更好地反映普遍应用的真实场景。通过大量实验,作者找到了卷积神经网络与 Transformer 体系更好的结合方式,并且提出了 attention-based 方法,用于整合 Transformer 中的位置信息编码, 该模块的模型结构配置为LeViT256, 详情可参考[论文地址](https://arxiv.org/abs/2104.01136)。 + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install levit_256_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run levit_256_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="levit_256_imagenet") + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + - 分类接口API。 + - **参数** + + - images (list\[numpy.ndarray\]): 图片数据,每一个图片数据的shape 均为 \[H, W, C\],颜色空间为 BGR;
+ - paths (list\[str\]): 图片的路径;
+ - batch\_size (int): batch 的大小;
+ - use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA_VISIBLE_DEVICES环境变量**
+ - top\_k (int): 返回预测结果的前 k 个。 + + - **返回** + + - res (list\[dict\]): 分类结果,列表的每一个元素均为字典,其中 key 包括'class_ids'(种类索引), 'scores'(置信度) 和 'label_names'(种类名称) + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个图像识别的在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m levit_256_imagenet + ``` + + - 这样就完成了一个图像识别的在线服务的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"\} + url = "http://127.0.0.1:8866/predict/levit_256_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + ``` + + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install levit_256_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/levit_256_imagenet/model.py b/modules/image/classification/levit_256_imagenet/model.py new file mode 100644 index 000000000..66b5cd8d0 --- /dev/null +++ b/modules/image/classification/levit_256_imagenet/model.py @@ -0,0 +1,450 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# Code was based on https://github.com/facebookresearch/LeViT +import itertools +import math +import warnings + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn.initializer import Constant +from paddle.nn.initializer import TruncatedNormal +from paddle.regularizer import L2Decay + +from .vision_transformer import Identity +from .vision_transformer import ones_ +from .vision_transformer import trunc_normal_ +from .vision_transformer import zeros_ + + +def cal_attention_biases(attention_biases, attention_bias_idxs): + gather_list = [] + attention_bias_t = paddle.transpose(attention_biases, (1, 0)) + nums = attention_bias_idxs.shape[0] + for idx in range(nums): + gather = paddle.gather(attention_bias_t, attention_bias_idxs[idx]) + gather_list.append(gather) + shape0, shape1 = attention_bias_idxs.shape + gather = paddle.concat(gather_list) + return paddle.transpose(gather, (1, 0)).reshape((0, shape0, shape1)) + + +class Conv2d_BN(nn.Sequential): + + def __init__(self, a, b, ks=1, stride=1, pad=0, dilation=1, groups=1, bn_weight_init=1, resolution=-10000): + super().__init__() + self.add_sublayer('c', nn.Conv2D(a, b, ks, stride, pad, dilation, groups, bias_attr=False)) + bn = nn.BatchNorm2D(b) + ones_(bn.weight) + zeros_(bn.bias) + self.add_sublayer('bn', bn) + + +class Linear_BN(nn.Sequential): + + def __init__(self, a, b, bn_weight_init=1): + super().__init__() + self.add_sublayer('c', nn.Linear(a, b, bias_attr=False)) + bn = nn.BatchNorm1D(b) + if bn_weight_init == 0: + zeros_(bn.weight) + else: + ones_(bn.weight) + zeros_(bn.bias) + self.add_sublayer('bn', bn) + + def forward(self, x): + l, bn = self._sub_layers.values() + x = l(x) + return paddle.reshape(bn(x.flatten(0, 1)), x.shape) + + +class BN_Linear(nn.Sequential): + + def __init__(self, a, b, bias=True, std=0.02): + super().__init__() + self.add_sublayer('bn', nn.BatchNorm1D(a)) + l = nn.Linear(a, b, bias_attr=bias) + trunc_normal_(l.weight) + if bias: + zeros_(l.bias) + self.add_sublayer('l', l) + + +def b16(n, activation, resolution=224): + return nn.Sequential(Conv2d_BN(3, n // 8, 3, 2, 1, resolution=resolution), activation(), + Conv2d_BN(n // 8, n // 4, 3, 2, 1, resolution=resolution // 2), activation(), + Conv2d_BN(n // 4, n // 2, 3, 2, 1, resolution=resolution // 4), activation(), + Conv2d_BN(n // 2, n, 3, 2, 1, resolution=resolution // 8)) + + +class Residual(nn.Layer): + + def __init__(self, m, drop): + super().__init__() + self.m = m + self.drop = drop + + def forward(self, x): + if self.training and self.drop > 0: + y = paddle.rand(shape=[x.shape[0], 1, 1]).__ge__(self.drop).astype("float32") + y = y.divide(paddle.full_like(y, 1 - self.drop)) + return paddle.add(x, y) + else: + return paddle.add(x, self.m(x)) + + +class Attention(nn.Layer): + + def __init__(self, dim, key_dim, num_heads=8, attn_ratio=4, activation=None, resolution=14): + super().__init__() + self.num_heads = num_heads + self.scale = key_dim**-0.5 + self.key_dim = key_dim + self.nh_kd = nh_kd = key_dim * num_heads + self.d = int(attn_ratio * key_dim) + self.dh = int(attn_ratio * key_dim) * num_heads + self.attn_ratio = attn_ratio + self.h = self.dh + nh_kd * 2 + self.qkv = Linear_BN(dim, self.h) + self.proj = nn.Sequential(activation(), Linear_BN(self.dh, dim, bn_weight_init=0)) + points = list(itertools.product(range(resolution), range(resolution))) + N = len(points) + attention_offsets = {} + idxs = [] + for p1 in points: + for p2 in points: + offset = (abs(p1[0] - p2[0]), abs(p1[1] - p2[1])) + if offset not in attention_offsets: + attention_offsets[offset] = len(attention_offsets) + idxs.append(attention_offsets[offset]) + self.attention_biases = self.create_parameter(shape=(num_heads, len(attention_offsets)), + default_initializer=zeros_, + attr=paddle.ParamAttr(regularizer=L2Decay(0.0))) + tensor_idxs = paddle.to_tensor(idxs, dtype='int64') + self.register_buffer('attention_bias_idxs', paddle.reshape(tensor_idxs, [N, N])) + + @paddle.no_grad() + def train(self, mode=True): + if mode: + super().train() + else: + super().eval() + if mode and hasattr(self, 'ab'): + del self.ab + else: + self.ab = cal_attention_biases(self.attention_biases, self.attention_bias_idxs) + + def forward(self, x): + self.training = True + B, N, C = x.shape + qkv = self.qkv(x) + qkv = paddle.reshape(qkv, [B, N, self.num_heads, self.h // self.num_heads]) + q, k, v = paddle.split(qkv, [self.key_dim, self.key_dim, self.d], axis=3) + q = paddle.transpose(q, perm=[0, 2, 1, 3]) + k = paddle.transpose(k, perm=[0, 2, 1, 3]) + v = paddle.transpose(v, perm=[0, 2, 1, 3]) + k_transpose = paddle.transpose(k, perm=[0, 1, 3, 2]) + + if self.training: + attention_biases = cal_attention_biases(self.attention_biases, self.attention_bias_idxs) + else: + attention_biases = self.ab + attn = (paddle.matmul(q, k_transpose) * self.scale + attention_biases) + attn = F.softmax(attn) + x = paddle.transpose(paddle.matmul(attn, v), perm=[0, 2, 1, 3]) + x = paddle.reshape(x, [B, N, self.dh]) + x = self.proj(x) + return x + + +class Subsample(nn.Layer): + + def __init__(self, stride, resolution): + super().__init__() + self.stride = stride + self.resolution = resolution + + def forward(self, x): + B, N, C = x.shape + x = paddle.reshape(x, [B, self.resolution, self.resolution, C]) + end1, end2 = x.shape[1], x.shape[2] + x = x[:, 0:end1:self.stride, 0:end2:self.stride] + x = paddle.reshape(x, [B, -1, C]) + return x + + +class AttentionSubsample(nn.Layer): + + def __init__(self, + in_dim, + out_dim, + key_dim, + num_heads=8, + attn_ratio=2, + activation=None, + stride=2, + resolution=14, + resolution_=7): + super().__init__() + self.num_heads = num_heads + self.scale = key_dim**-0.5 + self.key_dim = key_dim + self.nh_kd = nh_kd = key_dim * num_heads + self.d = int(attn_ratio * key_dim) + self.dh = int(attn_ratio * key_dim) * self.num_heads + self.attn_ratio = attn_ratio + self.resolution_ = resolution_ + self.resolution_2 = resolution_**2 + self.training = True + h = self.dh + nh_kd + self.kv = Linear_BN(in_dim, h) + + self.q = nn.Sequential(Subsample(stride, resolution), Linear_BN(in_dim, nh_kd)) + self.proj = nn.Sequential(activation(), Linear_BN(self.dh, out_dim)) + + self.stride = stride + self.resolution = resolution + points = list(itertools.product(range(resolution), range(resolution))) + points_ = list(itertools.product(range(resolution_), range(resolution_))) + + N = len(points) + N_ = len(points_) + attention_offsets = {} + idxs = [] + i = 0 + j = 0 + for p1 in points_: + i += 1 + for p2 in points: + j += 1 + size = 1 + offset = (abs(p1[0] * stride - p2[0] + (size - 1) / 2), abs(p1[1] * stride - p2[1] + (size - 1) / 2)) + if offset not in attention_offsets: + attention_offsets[offset] = len(attention_offsets) + idxs.append(attention_offsets[offset]) + self.attention_biases = self.create_parameter(shape=(num_heads, len(attention_offsets)), + default_initializer=zeros_, + attr=paddle.ParamAttr(regularizer=L2Decay(0.0))) + + tensor_idxs_ = paddle.to_tensor(idxs, dtype='int64') + self.register_buffer('attention_bias_idxs', paddle.reshape(tensor_idxs_, [N_, N])) + + @paddle.no_grad() + def train(self, mode=True): + if mode: + super().train() + else: + super().eval() + if mode and hasattr(self, 'ab'): + del self.ab + else: + self.ab = cal_attention_biases(self.attention_biases, self.attention_bias_idxs) + + def forward(self, x): + self.training = True + B, N, C = x.shape + kv = self.kv(x) + kv = paddle.reshape(kv, [B, N, self.num_heads, -1]) + k, v = paddle.split(kv, [self.key_dim, self.d], axis=3) + k = paddle.transpose(k, perm=[0, 2, 1, 3]) # BHNC + v = paddle.transpose(v, perm=[0, 2, 1, 3]) + q = paddle.reshape(self.q(x), [B, self.resolution_2, self.num_heads, self.key_dim]) + q = paddle.transpose(q, perm=[0, 2, 1, 3]) + + if self.training: + attention_biases = cal_attention_biases(self.attention_biases, self.attention_bias_idxs) + else: + attention_biases = self.ab + + attn = (paddle.matmul(q, paddle.transpose(k, perm=[0, 1, 3, 2]))) * self.scale + attention_biases + attn = F.softmax(attn) + + x = paddle.reshape(paddle.transpose(paddle.matmul(attn, v), perm=[0, 2, 1, 3]), [B, -1, self.dh]) + x = self.proj(x) + return x + + +class LeViT(nn.Layer): + """ Vision Transformer with support for patch or hybrid CNN input stage + """ + + def __init__(self, + img_size=224, + patch_size=16, + in_chans=3, + class_num=1000, + embed_dim=[192], + key_dim=[64], + depth=[12], + num_heads=[3], + attn_ratio=[2], + mlp_ratio=[2], + hybrid_backbone=None, + down_ops=[], + attention_activation=nn.Hardswish, + mlp_activation=nn.Hardswish, + distillation=True, + drop_path=0): + super().__init__() + + self.class_num = class_num + self.num_features = embed_dim[-1] + self.embed_dim = embed_dim + self.distillation = distillation + + self.patch_embed = hybrid_backbone + + self.blocks = [] + down_ops.append(['']) + resolution = img_size // patch_size + for i, (ed, kd, dpth, nh, ar, mr, + do) in enumerate(zip(embed_dim, key_dim, depth, num_heads, attn_ratio, mlp_ratio, down_ops)): + for _ in range(dpth): + self.blocks.append( + Residual( + Attention( + ed, + kd, + nh, + attn_ratio=ar, + activation=attention_activation, + resolution=resolution, + ), drop_path)) + if mr > 0: + h = int(ed * mr) + self.blocks.append( + Residual( + nn.Sequential( + Linear_BN(ed, h), + mlp_activation(), + Linear_BN(h, ed, bn_weight_init=0), + ), drop_path)) + if do[0] == 'Subsample': + #('Subsample',key_dim, num_heads, attn_ratio, mlp_ratio, stride) + resolution_ = (resolution - 1) // do[5] + 1 + self.blocks.append( + AttentionSubsample(*embed_dim[i:i + 2], + key_dim=do[1], + num_heads=do[2], + attn_ratio=do[3], + activation=attention_activation, + stride=do[5], + resolution=resolution, + resolution_=resolution_)) + resolution = resolution_ + if do[4] > 0: # mlp_ratio + h = int(embed_dim[i + 1] * do[4]) + self.blocks.append( + Residual( + nn.Sequential( + Linear_BN(embed_dim[i + 1], h), + mlp_activation(), + Linear_BN(h, embed_dim[i + 1], bn_weight_init=0), + ), drop_path)) + self.blocks = nn.Sequential(*self.blocks) + + # Classifier head + self.head = BN_Linear(embed_dim[-1], class_num) if class_num > 0 else Identity() + if distillation: + self.head_dist = BN_Linear(embed_dim[-1], class_num) if class_num > 0 else Identity() + + def forward(self, x): + x = self.patch_embed(x) + x = x.flatten(2) + x = paddle.transpose(x, perm=[0, 2, 1]) + x = self.blocks(x) + x = x.mean(1) + + x = paddle.reshape(x, [-1, self.embed_dim[-1]]) + if self.distillation: + x = self.head(x), self.head_dist(x) + if not self.training: + x = (x[0] + x[1]) / 2 + else: + x = self.head(x) + return x + + +def model_factory(C, D, X, N, drop_path, class_num, distillation): + embed_dim = [int(x) for x in C.split('_')] + num_heads = [int(x) for x in N.split('_')] + depth = [int(x) for x in X.split('_')] + act = nn.Hardswish + model = LeViT( + patch_size=16, + embed_dim=embed_dim, + num_heads=num_heads, + key_dim=[D] * 3, + depth=depth, + attn_ratio=[2, 2, 2], + mlp_ratio=[2, 2, 2], + down_ops=[ + #('Subsample',key_dim, num_heads, attn_ratio, mlp_ratio, stride) + ['Subsample', D, embed_dim[0] // D, 4, 2, 2], + ['Subsample', D, embed_dim[1] // D, 4, 2, 2], + ], + attention_activation=act, + mlp_activation=act, + hybrid_backbone=b16(embed_dim[0], activation=act), + class_num=class_num, + drop_path=drop_path, + distillation=distillation) + + return model + + +specification = { + 'LeViT_128S': { + 'C': '128_256_384', + 'D': 16, + 'N': '4_6_8', + 'X': '2_3_4', + 'drop_path': 0 + }, + 'LeViT_128': { + 'C': '128_256_384', + 'D': 16, + 'N': '4_8_12', + 'X': '4_4_4', + 'drop_path': 0 + }, + 'LeViT_192': { + 'C': '192_288_384', + 'D': 32, + 'N': '3_5_6', + 'X': '4_4_4', + 'drop_path': 0 + }, + 'LeViT_256': { + 'C': '256_384_512', + 'D': 32, + 'N': '4_6_8', + 'X': '4_4_4', + 'drop_path': 0 + }, + 'LeViT_384': { + 'C': '384_512_768', + 'D': 32, + 'N': '6_9_12', + 'X': '4_4_4', + 'drop_path': 0.1 + }, +} + + +def LeViT_256(**kwargs): + model = model_factory(**specification['LeViT_256'], class_num=1000, distillation=False) + return model diff --git a/modules/image/classification/levit_256_imagenet/module.py b/modules/image/classification/levit_256_imagenet/module.py new file mode 100644 index 000000000..161cc02c0 --- /dev/null +++ b/modules/image/classification/levit_256_imagenet/module.py @@ -0,0 +1,154 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import copy +import os + +import cv2 +import numpy as np +import paddle +from skimage.io import imread +from skimage.transform import rescale +from skimage.transform import resize + +import paddlehub as hub +from .model import LeViT_256 +from .processor import base64_to_cv2 +from .processor import create_operators +from .processor import Topk +from .utils import get_config +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving + + +@moduleinfo(name="levit_256_imagenet", + type="cv/classification", + author="paddlepaddle", + author_email="", + summary="", + version="1.0.0") +class LeViT_256_ImageNet: + + def __init__(self): + self.config = get_config(os.path.join(self.directory, 'LeViT_256.yaml'), show=False) + self.label_path = os.path.join(self.directory, 'imagenet1k_label_list.txt') + self.pretrain_path = os.path.join(self.directory, 'LeViT_256_pretrained.pdparams') + self.config['Infer']['PostProcess']['class_id_map_file'] = self.label_path + self.model = LeViT_256() + param_state_dict = paddle.load(self.pretrain_path) + self.model.set_dict(param_state_dict) + self.preprocess_funcs = create_operators(self.config["Infer"]["transforms"]) + + def classification(self, + images: list = None, + paths: list = None, + batch_size: int = 1, + use_gpu: bool = False, + top_k: int = 1): + ''' + Args: + images (list[numpy.ndarray]): data of images, shape of each is [H, W, C], color space must be BGR. + paths (list[str]): The paths of images. + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + top_k (int): Return top k results. + + Returns: + res (list[dict]): The classfication results, each result dict contains key 'class_ids', 'scores' and 'label_names'. + ''' + postprocess_func = Topk(top_k, self.label_path) + inputs = [] + results = [] + paddle.disable_static() + place = 'gpu:0' if use_gpu else 'cpu' + place = paddle.set_device(place) + if images == None and paths == None: + print('No image provided. Please input an image or a image path.') + return + + if images != None: + for image in images: + image = image[:, :, ::-1] + inputs.append(image) + + if paths != None: + for path in paths: + image = cv2.imread(path)[:, :, ::-1] + inputs.append(image) + + batch_data = [] + for idx, imagedata in enumerate(inputs): + for process in self.preprocess_funcs: + imagedata = process(imagedata) + batch_data.append(imagedata) + if len(batch_data) >= batch_size or idx == len(inputs) - 1: + batch_tensor = paddle.to_tensor(batch_data) + out = self.model(batch_tensor) + if isinstance(out, list): + out = out[0] + if isinstance(out, dict) and "logits" in out: + out = out["logits"] + if isinstance(out, dict) and "output" in out: + out = out["output"] + result = postprocess_func(out) + results.extend(result) + batch_data.clear() + return results + + @runnable + def run_cmd(self, argvs: list): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + self.args = self.parser.parse_args(argvs) + results = self.classification(paths=[self.args.input_path], + use_gpu=self.args.use_gpu, + batch_size=self.args.batch_size, + top_k=self.args.top_k) + return results + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.classification(images=images_decode, **kwargs) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument('--use_gpu', action='store_true', help="use GPU or not") + + self.arg_config_group.add_argument('--batch_size', type=int, default=1, help='batch size') + self.arg_config_group.add_argument('--top_k', type=int, default=1, help='Return top k results.') + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to input image.") diff --git a/modules/image/classification/levit_256_imagenet/processor.py b/modules/image/classification/levit_256_imagenet/processor.py new file mode 100644 index 000000000..40cab3917 --- /dev/null +++ b/modules/image/classification/levit_256_imagenet/processor.py @@ -0,0 +1,374 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import base64 +import inspect +import math +import os +import random +import sys +from functools import partial + +import cv2 +import numpy as np +import paddle +import paddle.nn.functional as F +import six +from paddle.vision.transforms import ColorJitter as RawColorJitter +from PIL import Image + + +def create_operators(params, class_num=None): + """ + create operators based on the config + + Args: + params(list): a dict list, used to create some operators + """ + assert isinstance(params, list), ('operator config should be a list') + ops = [] + current_module = sys.modules[__name__] + for operator in params: + assert isinstance(operator, dict) and len(operator) == 1, "yaml format error" + op_name = list(operator)[0] + param = {} if operator[op_name] is None else operator[op_name] + op_func = getattr(current_module, op_name) + if "class_num" in inspect.getfullargspec(op_func).args: + param.update({"class_num": class_num}) + op = op_func(**param) + ops.append(op) + + return ops + + +class UnifiedResize(object): + + def __init__(self, interpolation=None, backend="cv2"): + _cv2_interp_from_str = { + 'nearest': cv2.INTER_NEAREST, + 'bilinear': cv2.INTER_LINEAR, + 'area': cv2.INTER_AREA, + 'bicubic': cv2.INTER_CUBIC, + 'lanczos': cv2.INTER_LANCZOS4 + } + _pil_interp_from_str = { + 'nearest': Image.NEAREST, + 'bilinear': Image.BILINEAR, + 'bicubic': Image.BICUBIC, + 'box': Image.BOX, + 'lanczos': Image.LANCZOS, + 'hamming': Image.HAMMING + } + + def _pil_resize(src, size, resample): + pil_img = Image.fromarray(src) + pil_img = pil_img.resize(size, resample) + return np.asarray(pil_img) + + if backend.lower() == "cv2": + if isinstance(interpolation, str): + interpolation = _cv2_interp_from_str[interpolation.lower()] + # compatible with opencv < version 4.4.0 + elif interpolation is None: + interpolation = cv2.INTER_LINEAR + self.resize_func = partial(cv2.resize, interpolation=interpolation) + elif backend.lower() == "pil": + if isinstance(interpolation, str): + interpolation = _pil_interp_from_str[interpolation.lower()] + self.resize_func = partial(_pil_resize, resample=interpolation) + else: + self.resize_func = cv2.resize + + def __call__(self, src, size): + return self.resize_func(src, size) + + +class OperatorParamError(ValueError): + """ OperatorParamError + """ + pass + + +class DecodeImage(object): + """ decode image """ + + def __init__(self, to_rgb=True, to_np=False, channel_first=False): + self.to_rgb = to_rgb + self.to_np = to_np # to numpy + self.channel_first = channel_first # only enabled when to_np is True + + def __call__(self, img): + if six.PY2: + assert type(img) is str and len(img) > 0, "invalid input 'img' in DecodeImage" + else: + assert type(img) is bytes and len(img) > 0, "invalid input 'img' in DecodeImage" + data = np.frombuffer(img, dtype='uint8') + img = cv2.imdecode(data, 1) + if self.to_rgb: + assert img.shape[2] == 3, 'invalid shape of image[%s]' % (img.shape) + img = img[:, :, ::-1] + + if self.channel_first: + img = img.transpose((2, 0, 1)) + + return img + + +class ResizeImage(object): + """ resize image """ + + def __init__(self, size=None, resize_short=None, interpolation=None, backend="cv2"): + if resize_short is not None and resize_short > 0: + self.resize_short = resize_short + self.w = None + self.h = None + elif size is not None: + self.resize_short = None + self.w = size if type(size) is int else size[0] + self.h = size if type(size) is int else size[1] + else: + raise OperatorParamError("invalid params for ReisizeImage for '\ + 'both 'size' and 'resize_short' are None") + + self._resize_func = UnifiedResize(interpolation=interpolation, backend=backend) + + def __call__(self, img): + img_h, img_w = img.shape[:2] + if self.resize_short is not None: + percent = float(self.resize_short) / min(img_w, img_h) + w = int(round(img_w * percent)) + h = int(round(img_h * percent)) + else: + w = self.w + h = self.h + return self._resize_func(img, (w, h)) + + +class CropImage(object): + """ crop image """ + + def __init__(self, size): + if type(size) is int: + self.size = (size, size) + else: + self.size = size # (h, w) + + def __call__(self, img): + w, h = self.size + img_h, img_w = img.shape[:2] + w_start = (img_w - w) // 2 + h_start = (img_h - h) // 2 + + w_end = w_start + w + h_end = h_start + h + return img[h_start:h_end, w_start:w_end, :] + + +class RandCropImage(object): + """ random crop image """ + + def __init__(self, size, scale=None, ratio=None, interpolation=None, backend="cv2"): + if type(size) is int: + self.size = (size, size) # (h, w) + else: + self.size = size + + self.scale = [0.08, 1.0] if scale is None else scale + self.ratio = [3. / 4., 4. / 3.] if ratio is None else ratio + + self._resize_func = UnifiedResize(interpolation=interpolation, backend=backend) + + def __call__(self, img): + size = self.size + scale = self.scale + ratio = self.ratio + + aspect_ratio = math.sqrt(random.uniform(*ratio)) + w = 1. * aspect_ratio + h = 1. / aspect_ratio + + img_h, img_w = img.shape[:2] + + bound = min((float(img_w) / img_h) / (w**2), (float(img_h) / img_w) / (h**2)) + scale_max = min(scale[1], bound) + scale_min = min(scale[0], bound) + + target_area = img_w * img_h * random.uniform(scale_min, scale_max) + target_size = math.sqrt(target_area) + w = int(target_size * w) + h = int(target_size * h) + + i = random.randint(0, img_w - w) + j = random.randint(0, img_h - h) + + img = img[j:j + h, i:i + w, :] + + return self._resize_func(img, size) + + +class RandFlipImage(object): + """ random flip image + flip_code: + 1: Flipped Horizontally + 0: Flipped Vertically + -1: Flipped Horizontally & Vertically + """ + + def __init__(self, flip_code=1): + assert flip_code in [-1, 0, 1], "flip_code should be a value in [-1, 0, 1]" + self.flip_code = flip_code + + def __call__(self, img): + if random.randint(0, 1) == 1: + return cv2.flip(img, self.flip_code) + else: + return img + + +class NormalizeImage(object): + """ normalize image such as substract mean, divide std + """ + + def __init__(self, scale=None, mean=None, std=None, order='chw', output_fp16=False, channel_num=3): + if isinstance(scale, str): + scale = eval(scale) + assert channel_num in [3, 4], "channel number of input image should be set to 3 or 4." + self.channel_num = channel_num + self.output_dtype = 'float16' if output_fp16 else 'float32' + self.scale = np.float32(scale if scale is not None else 1.0 / 255.0) + self.order = order + mean = mean if mean is not None else [0.485, 0.456, 0.406] + std = std if std is not None else [0.229, 0.224, 0.225] + + shape = (3, 1, 1) if self.order == 'chw' else (1, 1, 3) + self.mean = np.array(mean).reshape(shape).astype('float32') + self.std = np.array(std).reshape(shape).astype('float32') + + def __call__(self, img): + from PIL import Image + if isinstance(img, Image.Image): + img = np.array(img) + + assert isinstance(img, np.ndarray), "invalid input 'img' in NormalizeImage" + + img = (img.astype('float32') * self.scale - self.mean) / self.std + + if self.channel_num == 4: + img_h = img.shape[1] if self.order == 'chw' else img.shape[0] + img_w = img.shape[2] if self.order == 'chw' else img.shape[1] + pad_zeros = np.zeros((1, img_h, img_w)) if self.order == 'chw' else np.zeros((img_h, img_w, 1)) + img = (np.concatenate((img, pad_zeros), axis=0) if self.order == 'chw' else np.concatenate( + (img, pad_zeros), axis=2)) + return img.astype(self.output_dtype) + + +class ToCHWImage(object): + """ convert hwc image to chw image + """ + + def __init__(self): + pass + + def __call__(self, img): + from PIL import Image + if isinstance(img, Image.Image): + img = np.array(img) + + return img.transpose((2, 0, 1)) + + +class ColorJitter(RawColorJitter): + """ColorJitter. + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def __call__(self, img): + if not isinstance(img, Image.Image): + img = np.ascontiguousarray(img) + img = Image.fromarray(img) + img = super()._apply_image(img) + if isinstance(img, Image.Image): + img = np.asarray(img) + return img + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +class Topk(object): + + def __init__(self, topk=1, class_id_map_file=None): + assert isinstance(topk, (int, )) + self.class_id_map = self.parse_class_id_map(class_id_map_file) + self.topk = topk + + def parse_class_id_map(self, class_id_map_file): + if class_id_map_file is None: + return None + if not os.path.exists(class_id_map_file): + print( + "Warning: If want to use your own label_dict, please input legal path!\nOtherwise label_names will be empty!" + ) + return None + + try: + class_id_map = {} + with open(class_id_map_file, "r") as fin: + lines = fin.readlines() + for line in lines: + partition = line.split("\n")[0].partition(" ") + class_id_map[int(partition[0])] = str(partition[-1]) + except Exception as ex: + print(ex) + class_id_map = None + return class_id_map + + def __call__(self, x, file_names=None, multilabel=False): + assert isinstance(x, paddle.Tensor) + if file_names is not None: + assert x.shape[0] == len(file_names) + x = F.softmax(x, axis=-1) if not multilabel else F.sigmoid(x) + x = x.numpy() + y = [] + for idx, probs in enumerate(x): + index = probs.argsort(axis=0)[-self.topk:][::-1].astype("int32") if not multilabel else np.where( + probs >= 0.5)[0].astype("int32") + clas_id_list = [] + score_list = [] + label_name_list = [] + for i in index: + clas_id_list.append(i.item()) + score_list.append(probs[i].item()) + if self.class_id_map is not None: + label_name_list.append(self.class_id_map[i.item()]) + result = { + "class_ids": clas_id_list, + "scores": np.around(score_list, decimals=5).tolist(), + } + if file_names is not None: + result["file_name"] = file_names[idx] + if label_name_list is not None: + result["label_names"] = label_name_list + y.append(result) + return y diff --git a/modules/image/classification/levit_256_imagenet/utils.py b/modules/image/classification/levit_256_imagenet/utils.py new file mode 100644 index 000000000..df2bc36b3 --- /dev/null +++ b/modules/image/classification/levit_256_imagenet/utils.py @@ -0,0 +1,129 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import copy +import os + +import yaml + +__all__ = ['get_config'] + + +class AttrDict(dict): + + def __getattr__(self, key): + return self[key] + + def __setattr__(self, key, value): + if key in self.__dict__: + self.__dict__[key] = value + else: + self[key] = value + + def __deepcopy__(self, content): + return copy.deepcopy(dict(self)) + + +def create_attr_dict(yaml_config): + from ast import literal_eval + for key, value in yaml_config.items(): + if type(value) is dict: + yaml_config[key] = value = AttrDict(value) + if isinstance(value, str): + try: + value = literal_eval(value) + except BaseException: + pass + if isinstance(value, AttrDict): + create_attr_dict(yaml_config[key]) + else: + yaml_config[key] = value + + +def parse_config(cfg_file): + """Load a config file into AttrDict""" + with open(cfg_file, 'r') as fopen: + yaml_config = AttrDict(yaml.load(fopen, Loader=yaml.SafeLoader)) + create_attr_dict(yaml_config) + return yaml_config + + +def override(dl, ks, v): + """ + Recursively replace dict of list + Args: + dl(dict or list): dict or list to be replaced + ks(list): list of keys + v(str): value to be replaced + """ + + def str2num(v): + try: + return eval(v) + except Exception: + return v + + assert isinstance(dl, (list, dict)), ("{} should be a list or a dict") + assert len(ks) > 0, ('lenght of keys should larger than 0') + if isinstance(dl, list): + k = str2num(ks[0]) + if len(ks) == 1: + assert k < len(dl), ('index({}) out of range({})'.format(k, dl)) + dl[k] = str2num(v) + else: + override(dl[k], ks[1:], v) + else: + if len(ks) == 1: + # assert ks[0] in dl, ('{} is not exist in {}'.format(ks[0], dl)) + if not ks[0] in dl: + print('A new filed ({}) detected!'.format(ks[0], dl)) + dl[ks[0]] = str2num(v) + else: + override(dl[ks[0]], ks[1:], v) + + +def override_config(config, options=None): + """ + Recursively override the config + Args: + config(dict): dict to be replaced + options(list): list of pairs(key0.key1.idx.key2=value) + such as: [ + 'topk=2', + 'VALID.transforms.1.ResizeImage.resize_short=300' + ] + Returns: + config(dict): replaced config + """ + if options is not None: + for opt in options: + assert isinstance(opt, str), ("option({}) should be a str".format(opt)) + assert "=" in opt, ("option({}) should contain a =" + "to distinguish between key and value".format(opt)) + pair = opt.split('=') + assert len(pair) == 2, ("there can be only a = in the option") + key, value = pair + keys = key.split('.') + override(config, keys, value) + return config + + +def get_config(fname, overrides=None, show=False): + """ + Read config from file + """ + assert os.path.exists(fname), ('config file({}) is not exist'.format(fname)) + config = parse_config(fname) + override_config(config, overrides) + return config diff --git a/modules/image/classification/levit_384_imagenet/README.md b/modules/image/classification/levit_384_imagenet/README.md new file mode 100644 index 000000000..45149034b --- /dev/null +++ b/modules/image/classification/levit_384_imagenet/README.md @@ -0,0 +1,132 @@ +# levit_384_imagenet + +|模型名称|levit_384_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|LeViT| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|225 MB| +|最新更新日期|2022-04-02| +|数据指标|Acc| + + +## 一、模型基本信息 + + +- ### 模型介绍 + + - LeViT 是一种快速推理的、用于图像分类任务的混合神经网络。其设计之初考虑了网络模型在不同的硬件平台上的性能,因此能够更好地反映普遍应用的真实场景。通过大量实验,作者找到了卷积神经网络与 Transformer 体系更好的结合方式,并且提出了 attention-based 方法,用于整合 Transformer 中的位置信息编码, 该模块的模型结构配置为LeViT384, 详情可参考[论文地址](https://arxiv.org/abs/2104.01136)。 + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install levit_384_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run levit_384_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="levit_384_imagenet") + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + - 分类接口API。 + - **参数** + + - images (list\[numpy.ndarray\]): 图片数据,每一个图片数据的shape 均为 \[H, W, C\],颜色空间为 BGR;
+ - paths (list\[str\]): 图片的路径;
+ - batch\_size (int): batch 的大小;
+ - use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA_VISIBLE_DEVICES环境变量**
+ - top\_k (int): 返回预测结果的前 k 个。 + + - **返回** + + - res (list\[dict\]): 分类结果,列表的每一个元素均为字典,其中 key 包括'class_ids'(种类索引), 'scores'(置信度) 和 'label_names'(种类名称) + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个图像识别的在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m levit_384_imagenet + ``` + + - 这样就完成了一个图像识别的在线服务的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"\} + url = "http://127.0.0.1:8866/predict/levit_384_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + ``` + + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install levit_384_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/levit_384_imagenet/model.py b/modules/image/classification/levit_384_imagenet/model.py new file mode 100644 index 000000000..c1b3bf68f --- /dev/null +++ b/modules/image/classification/levit_384_imagenet/model.py @@ -0,0 +1,450 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# Code was based on https://github.com/facebookresearch/LeViT +import itertools +import math +import warnings + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn.initializer import Constant +from paddle.nn.initializer import TruncatedNormal +from paddle.regularizer import L2Decay + +from .vision_transformer import Identity +from .vision_transformer import ones_ +from .vision_transformer import trunc_normal_ +from .vision_transformer import zeros_ + + +def cal_attention_biases(attention_biases, attention_bias_idxs): + gather_list = [] + attention_bias_t = paddle.transpose(attention_biases, (1, 0)) + nums = attention_bias_idxs.shape[0] + for idx in range(nums): + gather = paddle.gather(attention_bias_t, attention_bias_idxs[idx]) + gather_list.append(gather) + shape0, shape1 = attention_bias_idxs.shape + gather = paddle.concat(gather_list) + return paddle.transpose(gather, (1, 0)).reshape((0, shape0, shape1)) + + +class Conv2d_BN(nn.Sequential): + + def __init__(self, a, b, ks=1, stride=1, pad=0, dilation=1, groups=1, bn_weight_init=1, resolution=-10000): + super().__init__() + self.add_sublayer('c', nn.Conv2D(a, b, ks, stride, pad, dilation, groups, bias_attr=False)) + bn = nn.BatchNorm2D(b) + ones_(bn.weight) + zeros_(bn.bias) + self.add_sublayer('bn', bn) + + +class Linear_BN(nn.Sequential): + + def __init__(self, a, b, bn_weight_init=1): + super().__init__() + self.add_sublayer('c', nn.Linear(a, b, bias_attr=False)) + bn = nn.BatchNorm1D(b) + if bn_weight_init == 0: + zeros_(bn.weight) + else: + ones_(bn.weight) + zeros_(bn.bias) + self.add_sublayer('bn', bn) + + def forward(self, x): + l, bn = self._sub_layers.values() + x = l(x) + return paddle.reshape(bn(x.flatten(0, 1)), x.shape) + + +class BN_Linear(nn.Sequential): + + def __init__(self, a, b, bias=True, std=0.02): + super().__init__() + self.add_sublayer('bn', nn.BatchNorm1D(a)) + l = nn.Linear(a, b, bias_attr=bias) + trunc_normal_(l.weight) + if bias: + zeros_(l.bias) + self.add_sublayer('l', l) + + +def b16(n, activation, resolution=224): + return nn.Sequential(Conv2d_BN(3, n // 8, 3, 2, 1, resolution=resolution), activation(), + Conv2d_BN(n // 8, n // 4, 3, 2, 1, resolution=resolution // 2), activation(), + Conv2d_BN(n // 4, n // 2, 3, 2, 1, resolution=resolution // 4), activation(), + Conv2d_BN(n // 2, n, 3, 2, 1, resolution=resolution // 8)) + + +class Residual(nn.Layer): + + def __init__(self, m, drop): + super().__init__() + self.m = m + self.drop = drop + + def forward(self, x): + if self.training and self.drop > 0: + y = paddle.rand(shape=[x.shape[0], 1, 1]).__ge__(self.drop).astype("float32") + y = y.divide(paddle.full_like(y, 1 - self.drop)) + return paddle.add(x, y) + else: + return paddle.add(x, self.m(x)) + + +class Attention(nn.Layer): + + def __init__(self, dim, key_dim, num_heads=8, attn_ratio=4, activation=None, resolution=14): + super().__init__() + self.num_heads = num_heads + self.scale = key_dim**-0.5 + self.key_dim = key_dim + self.nh_kd = nh_kd = key_dim * num_heads + self.d = int(attn_ratio * key_dim) + self.dh = int(attn_ratio * key_dim) * num_heads + self.attn_ratio = attn_ratio + self.h = self.dh + nh_kd * 2 + self.qkv = Linear_BN(dim, self.h) + self.proj = nn.Sequential(activation(), Linear_BN(self.dh, dim, bn_weight_init=0)) + points = list(itertools.product(range(resolution), range(resolution))) + N = len(points) + attention_offsets = {} + idxs = [] + for p1 in points: + for p2 in points: + offset = (abs(p1[0] - p2[0]), abs(p1[1] - p2[1])) + if offset not in attention_offsets: + attention_offsets[offset] = len(attention_offsets) + idxs.append(attention_offsets[offset]) + self.attention_biases = self.create_parameter(shape=(num_heads, len(attention_offsets)), + default_initializer=zeros_, + attr=paddle.ParamAttr(regularizer=L2Decay(0.0))) + tensor_idxs = paddle.to_tensor(idxs, dtype='int64') + self.register_buffer('attention_bias_idxs', paddle.reshape(tensor_idxs, [N, N])) + + @paddle.no_grad() + def train(self, mode=True): + if mode: + super().train() + else: + super().eval() + if mode and hasattr(self, 'ab'): + del self.ab + else: + self.ab = cal_attention_biases(self.attention_biases, self.attention_bias_idxs) + + def forward(self, x): + self.training = True + B, N, C = x.shape + qkv = self.qkv(x) + qkv = paddle.reshape(qkv, [B, N, self.num_heads, self.h // self.num_heads]) + q, k, v = paddle.split(qkv, [self.key_dim, self.key_dim, self.d], axis=3) + q = paddle.transpose(q, perm=[0, 2, 1, 3]) + k = paddle.transpose(k, perm=[0, 2, 1, 3]) + v = paddle.transpose(v, perm=[0, 2, 1, 3]) + k_transpose = paddle.transpose(k, perm=[0, 1, 3, 2]) + + if self.training: + attention_biases = cal_attention_biases(self.attention_biases, self.attention_bias_idxs) + else: + attention_biases = self.ab + attn = (paddle.matmul(q, k_transpose) * self.scale + attention_biases) + attn = F.softmax(attn) + x = paddle.transpose(paddle.matmul(attn, v), perm=[0, 2, 1, 3]) + x = paddle.reshape(x, [B, N, self.dh]) + x = self.proj(x) + return x + + +class Subsample(nn.Layer): + + def __init__(self, stride, resolution): + super().__init__() + self.stride = stride + self.resolution = resolution + + def forward(self, x): + B, N, C = x.shape + x = paddle.reshape(x, [B, self.resolution, self.resolution, C]) + end1, end2 = x.shape[1], x.shape[2] + x = x[:, 0:end1:self.stride, 0:end2:self.stride] + x = paddle.reshape(x, [B, -1, C]) + return x + + +class AttentionSubsample(nn.Layer): + + def __init__(self, + in_dim, + out_dim, + key_dim, + num_heads=8, + attn_ratio=2, + activation=None, + stride=2, + resolution=14, + resolution_=7): + super().__init__() + self.num_heads = num_heads + self.scale = key_dim**-0.5 + self.key_dim = key_dim + self.nh_kd = nh_kd = key_dim * num_heads + self.d = int(attn_ratio * key_dim) + self.dh = int(attn_ratio * key_dim) * self.num_heads + self.attn_ratio = attn_ratio + self.resolution_ = resolution_ + self.resolution_2 = resolution_**2 + self.training = True + h = self.dh + nh_kd + self.kv = Linear_BN(in_dim, h) + + self.q = nn.Sequential(Subsample(stride, resolution), Linear_BN(in_dim, nh_kd)) + self.proj = nn.Sequential(activation(), Linear_BN(self.dh, out_dim)) + + self.stride = stride + self.resolution = resolution + points = list(itertools.product(range(resolution), range(resolution))) + points_ = list(itertools.product(range(resolution_), range(resolution_))) + + N = len(points) + N_ = len(points_) + attention_offsets = {} + idxs = [] + i = 0 + j = 0 + for p1 in points_: + i += 1 + for p2 in points: + j += 1 + size = 1 + offset = (abs(p1[0] * stride - p2[0] + (size - 1) / 2), abs(p1[1] * stride - p2[1] + (size - 1) / 2)) + if offset not in attention_offsets: + attention_offsets[offset] = len(attention_offsets) + idxs.append(attention_offsets[offset]) + self.attention_biases = self.create_parameter(shape=(num_heads, len(attention_offsets)), + default_initializer=zeros_, + attr=paddle.ParamAttr(regularizer=L2Decay(0.0))) + + tensor_idxs_ = paddle.to_tensor(idxs, dtype='int64') + self.register_buffer('attention_bias_idxs', paddle.reshape(tensor_idxs_, [N_, N])) + + @paddle.no_grad() + def train(self, mode=True): + if mode: + super().train() + else: + super().eval() + if mode and hasattr(self, 'ab'): + del self.ab + else: + self.ab = cal_attention_biases(self.attention_biases, self.attention_bias_idxs) + + def forward(self, x): + self.training = True + B, N, C = x.shape + kv = self.kv(x) + kv = paddle.reshape(kv, [B, N, self.num_heads, -1]) + k, v = paddle.split(kv, [self.key_dim, self.d], axis=3) + k = paddle.transpose(k, perm=[0, 2, 1, 3]) # BHNC + v = paddle.transpose(v, perm=[0, 2, 1, 3]) + q = paddle.reshape(self.q(x), [B, self.resolution_2, self.num_heads, self.key_dim]) + q = paddle.transpose(q, perm=[0, 2, 1, 3]) + + if self.training: + attention_biases = cal_attention_biases(self.attention_biases, self.attention_bias_idxs) + else: + attention_biases = self.ab + + attn = (paddle.matmul(q, paddle.transpose(k, perm=[0, 1, 3, 2]))) * self.scale + attention_biases + attn = F.softmax(attn) + + x = paddle.reshape(paddle.transpose(paddle.matmul(attn, v), perm=[0, 2, 1, 3]), [B, -1, self.dh]) + x = self.proj(x) + return x + + +class LeViT(nn.Layer): + """ Vision Transformer with support for patch or hybrid CNN input stage + """ + + def __init__(self, + img_size=224, + patch_size=16, + in_chans=3, + class_num=1000, + embed_dim=[192], + key_dim=[64], + depth=[12], + num_heads=[3], + attn_ratio=[2], + mlp_ratio=[2], + hybrid_backbone=None, + down_ops=[], + attention_activation=nn.Hardswish, + mlp_activation=nn.Hardswish, + distillation=True, + drop_path=0): + super().__init__() + + self.class_num = class_num + self.num_features = embed_dim[-1] + self.embed_dim = embed_dim + self.distillation = distillation + + self.patch_embed = hybrid_backbone + + self.blocks = [] + down_ops.append(['']) + resolution = img_size // patch_size + for i, (ed, kd, dpth, nh, ar, mr, + do) in enumerate(zip(embed_dim, key_dim, depth, num_heads, attn_ratio, mlp_ratio, down_ops)): + for _ in range(dpth): + self.blocks.append( + Residual( + Attention( + ed, + kd, + nh, + attn_ratio=ar, + activation=attention_activation, + resolution=resolution, + ), drop_path)) + if mr > 0: + h = int(ed * mr) + self.blocks.append( + Residual( + nn.Sequential( + Linear_BN(ed, h), + mlp_activation(), + Linear_BN(h, ed, bn_weight_init=0), + ), drop_path)) + if do[0] == 'Subsample': + #('Subsample',key_dim, num_heads, attn_ratio, mlp_ratio, stride) + resolution_ = (resolution - 1) // do[5] + 1 + self.blocks.append( + AttentionSubsample(*embed_dim[i:i + 2], + key_dim=do[1], + num_heads=do[2], + attn_ratio=do[3], + activation=attention_activation, + stride=do[5], + resolution=resolution, + resolution_=resolution_)) + resolution = resolution_ + if do[4] > 0: # mlp_ratio + h = int(embed_dim[i + 1] * do[4]) + self.blocks.append( + Residual( + nn.Sequential( + Linear_BN(embed_dim[i + 1], h), + mlp_activation(), + Linear_BN(h, embed_dim[i + 1], bn_weight_init=0), + ), drop_path)) + self.blocks = nn.Sequential(*self.blocks) + + # Classifier head + self.head = BN_Linear(embed_dim[-1], class_num) if class_num > 0 else Identity() + if distillation: + self.head_dist = BN_Linear(embed_dim[-1], class_num) if class_num > 0 else Identity() + + def forward(self, x): + x = self.patch_embed(x) + x = x.flatten(2) + x = paddle.transpose(x, perm=[0, 2, 1]) + x = self.blocks(x) + x = x.mean(1) + + x = paddle.reshape(x, [-1, self.embed_dim[-1]]) + if self.distillation: + x = self.head(x), self.head_dist(x) + if not self.training: + x = (x[0] + x[1]) / 2 + else: + x = self.head(x) + return x + + +def model_factory(C, D, X, N, drop_path, class_num, distillation): + embed_dim = [int(x) for x in C.split('_')] + num_heads = [int(x) for x in N.split('_')] + depth = [int(x) for x in X.split('_')] + act = nn.Hardswish + model = LeViT( + patch_size=16, + embed_dim=embed_dim, + num_heads=num_heads, + key_dim=[D] * 3, + depth=depth, + attn_ratio=[2, 2, 2], + mlp_ratio=[2, 2, 2], + down_ops=[ + #('Subsample',key_dim, num_heads, attn_ratio, mlp_ratio, stride) + ['Subsample', D, embed_dim[0] // D, 4, 2, 2], + ['Subsample', D, embed_dim[1] // D, 4, 2, 2], + ], + attention_activation=act, + mlp_activation=act, + hybrid_backbone=b16(embed_dim[0], activation=act), + class_num=class_num, + drop_path=drop_path, + distillation=distillation) + + return model + + +specification = { + 'LeViT_128S': { + 'C': '128_256_384', + 'D': 16, + 'N': '4_6_8', + 'X': '2_3_4', + 'drop_path': 0 + }, + 'LeViT_128': { + 'C': '128_256_384', + 'D': 16, + 'N': '4_8_12', + 'X': '4_4_4', + 'drop_path': 0 + }, + 'LeViT_192': { + 'C': '192_288_384', + 'D': 32, + 'N': '3_5_6', + 'X': '4_4_4', + 'drop_path': 0 + }, + 'LeViT_256': { + 'C': '256_384_512', + 'D': 32, + 'N': '4_6_8', + 'X': '4_4_4', + 'drop_path': 0 + }, + 'LeViT_384': { + 'C': '384_512_768', + 'D': 32, + 'N': '6_9_12', + 'X': '4_4_4', + 'drop_path': 0.1 + }, +} + + +def LeViT_384(**kwargs): + model = model_factory(**specification['LeViT_384'], class_num=1000, distillation=False) + return model diff --git a/modules/image/classification/levit_384_imagenet/module.py b/modules/image/classification/levit_384_imagenet/module.py new file mode 100644 index 000000000..790a66d5f --- /dev/null +++ b/modules/image/classification/levit_384_imagenet/module.py @@ -0,0 +1,154 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import copy +import os + +import cv2 +import numpy as np +import paddle +from skimage.io import imread +from skimage.transform import rescale +from skimage.transform import resize + +import paddlehub as hub +from .model import LeViT_384 +from .processor import base64_to_cv2 +from .processor import create_operators +from .processor import Topk +from .utils import get_config +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving + + +@moduleinfo(name="levit_384_imagenet", + type="cv/classification", + author="paddlepaddle", + author_email="", + summary="", + version="1.0.0") +class LeViT_384_ImageNet: + + def __init__(self): + self.config = get_config(os.path.join(self.directory, 'LeViT_384.yaml'), show=False) + self.label_path = os.path.join(self.directory, 'imagenet1k_label_list.txt') + self.pretrain_path = os.path.join(self.directory, 'LeViT_384_pretrained.pdparams') + self.config['Infer']['PostProcess']['class_id_map_file'] = self.label_path + self.model = LeViT_384() + param_state_dict = paddle.load(self.pretrain_path) + self.model.set_dict(param_state_dict) + self.preprocess_funcs = create_operators(self.config["Infer"]["transforms"]) + + def classification(self, + images: list = None, + paths: list = None, + batch_size: int = 1, + use_gpu: bool = False, + top_k: int = 1): + ''' + Args: + images (list[numpy.ndarray]): data of images, shape of each is [H, W, C], color space must be BGR. + paths (list[str]): The paths of images. + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + top_k (int): Return top k results. + + Returns: + res (list[dict]): The classfication results, each result dict contains key 'class_ids', 'scores' and 'label_names'. + ''' + postprocess_func = Topk(top_k, self.label_path) + inputs = [] + results = [] + paddle.disable_static() + place = 'gpu:0' if use_gpu else 'cpu' + place = paddle.set_device(place) + if images == None and paths == None: + print('No image provided. Please input an image or a image path.') + return + + if images != None: + for image in images: + image = image[:, :, ::-1] + inputs.append(image) + + if paths != None: + for path in paths: + image = cv2.imread(path)[:, :, ::-1] + inputs.append(image) + + batch_data = [] + for idx, imagedata in enumerate(inputs): + for process in self.preprocess_funcs: + imagedata = process(imagedata) + batch_data.append(imagedata) + if len(batch_data) >= batch_size or idx == len(inputs) - 1: + batch_tensor = paddle.to_tensor(batch_data) + out = self.model(batch_tensor) + if isinstance(out, list): + out = out[0] + if isinstance(out, dict) and "logits" in out: + out = out["logits"] + if isinstance(out, dict) and "output" in out: + out = out["output"] + result = postprocess_func(out) + results.extend(result) + batch_data.clear() + return results + + @runnable + def run_cmd(self, argvs: list): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + self.args = self.parser.parse_args(argvs) + results = self.classification(paths=[self.args.input_path], + use_gpu=self.args.use_gpu, + batch_size=self.args.batch_size, + top_k=self.args.top_k) + return results + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.classification(images=images_decode, **kwargs) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument('--use_gpu', action='store_true', help="use GPU or not") + + self.arg_config_group.add_argument('--batch_size', type=int, default=1, help='batch size') + self.arg_config_group.add_argument('--top_k', type=int, default=1, help='Return top k results.') + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to input image.") diff --git a/modules/image/classification/levit_384_imagenet/processor.py b/modules/image/classification/levit_384_imagenet/processor.py new file mode 100644 index 000000000..40cab3917 --- /dev/null +++ b/modules/image/classification/levit_384_imagenet/processor.py @@ -0,0 +1,374 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import base64 +import inspect +import math +import os +import random +import sys +from functools import partial + +import cv2 +import numpy as np +import paddle +import paddle.nn.functional as F +import six +from paddle.vision.transforms import ColorJitter as RawColorJitter +from PIL import Image + + +def create_operators(params, class_num=None): + """ + create operators based on the config + + Args: + params(list): a dict list, used to create some operators + """ + assert isinstance(params, list), ('operator config should be a list') + ops = [] + current_module = sys.modules[__name__] + for operator in params: + assert isinstance(operator, dict) and len(operator) == 1, "yaml format error" + op_name = list(operator)[0] + param = {} if operator[op_name] is None else operator[op_name] + op_func = getattr(current_module, op_name) + if "class_num" in inspect.getfullargspec(op_func).args: + param.update({"class_num": class_num}) + op = op_func(**param) + ops.append(op) + + return ops + + +class UnifiedResize(object): + + def __init__(self, interpolation=None, backend="cv2"): + _cv2_interp_from_str = { + 'nearest': cv2.INTER_NEAREST, + 'bilinear': cv2.INTER_LINEAR, + 'area': cv2.INTER_AREA, + 'bicubic': cv2.INTER_CUBIC, + 'lanczos': cv2.INTER_LANCZOS4 + } + _pil_interp_from_str = { + 'nearest': Image.NEAREST, + 'bilinear': Image.BILINEAR, + 'bicubic': Image.BICUBIC, + 'box': Image.BOX, + 'lanczos': Image.LANCZOS, + 'hamming': Image.HAMMING + } + + def _pil_resize(src, size, resample): + pil_img = Image.fromarray(src) + pil_img = pil_img.resize(size, resample) + return np.asarray(pil_img) + + if backend.lower() == "cv2": + if isinstance(interpolation, str): + interpolation = _cv2_interp_from_str[interpolation.lower()] + # compatible with opencv < version 4.4.0 + elif interpolation is None: + interpolation = cv2.INTER_LINEAR + self.resize_func = partial(cv2.resize, interpolation=interpolation) + elif backend.lower() == "pil": + if isinstance(interpolation, str): + interpolation = _pil_interp_from_str[interpolation.lower()] + self.resize_func = partial(_pil_resize, resample=interpolation) + else: + self.resize_func = cv2.resize + + def __call__(self, src, size): + return self.resize_func(src, size) + + +class OperatorParamError(ValueError): + """ OperatorParamError + """ + pass + + +class DecodeImage(object): + """ decode image """ + + def __init__(self, to_rgb=True, to_np=False, channel_first=False): + self.to_rgb = to_rgb + self.to_np = to_np # to numpy + self.channel_first = channel_first # only enabled when to_np is True + + def __call__(self, img): + if six.PY2: + assert type(img) is str and len(img) > 0, "invalid input 'img' in DecodeImage" + else: + assert type(img) is bytes and len(img) > 0, "invalid input 'img' in DecodeImage" + data = np.frombuffer(img, dtype='uint8') + img = cv2.imdecode(data, 1) + if self.to_rgb: + assert img.shape[2] == 3, 'invalid shape of image[%s]' % (img.shape) + img = img[:, :, ::-1] + + if self.channel_first: + img = img.transpose((2, 0, 1)) + + return img + + +class ResizeImage(object): + """ resize image """ + + def __init__(self, size=None, resize_short=None, interpolation=None, backend="cv2"): + if resize_short is not None and resize_short > 0: + self.resize_short = resize_short + self.w = None + self.h = None + elif size is not None: + self.resize_short = None + self.w = size if type(size) is int else size[0] + self.h = size if type(size) is int else size[1] + else: + raise OperatorParamError("invalid params for ReisizeImage for '\ + 'both 'size' and 'resize_short' are None") + + self._resize_func = UnifiedResize(interpolation=interpolation, backend=backend) + + def __call__(self, img): + img_h, img_w = img.shape[:2] + if self.resize_short is not None: + percent = float(self.resize_short) / min(img_w, img_h) + w = int(round(img_w * percent)) + h = int(round(img_h * percent)) + else: + w = self.w + h = self.h + return self._resize_func(img, (w, h)) + + +class CropImage(object): + """ crop image """ + + def __init__(self, size): + if type(size) is int: + self.size = (size, size) + else: + self.size = size # (h, w) + + def __call__(self, img): + w, h = self.size + img_h, img_w = img.shape[:2] + w_start = (img_w - w) // 2 + h_start = (img_h - h) // 2 + + w_end = w_start + w + h_end = h_start + h + return img[h_start:h_end, w_start:w_end, :] + + +class RandCropImage(object): + """ random crop image """ + + def __init__(self, size, scale=None, ratio=None, interpolation=None, backend="cv2"): + if type(size) is int: + self.size = (size, size) # (h, w) + else: + self.size = size + + self.scale = [0.08, 1.0] if scale is None else scale + self.ratio = [3. / 4., 4. / 3.] if ratio is None else ratio + + self._resize_func = UnifiedResize(interpolation=interpolation, backend=backend) + + def __call__(self, img): + size = self.size + scale = self.scale + ratio = self.ratio + + aspect_ratio = math.sqrt(random.uniform(*ratio)) + w = 1. * aspect_ratio + h = 1. / aspect_ratio + + img_h, img_w = img.shape[:2] + + bound = min((float(img_w) / img_h) / (w**2), (float(img_h) / img_w) / (h**2)) + scale_max = min(scale[1], bound) + scale_min = min(scale[0], bound) + + target_area = img_w * img_h * random.uniform(scale_min, scale_max) + target_size = math.sqrt(target_area) + w = int(target_size * w) + h = int(target_size * h) + + i = random.randint(0, img_w - w) + j = random.randint(0, img_h - h) + + img = img[j:j + h, i:i + w, :] + + return self._resize_func(img, size) + + +class RandFlipImage(object): + """ random flip image + flip_code: + 1: Flipped Horizontally + 0: Flipped Vertically + -1: Flipped Horizontally & Vertically + """ + + def __init__(self, flip_code=1): + assert flip_code in [-1, 0, 1], "flip_code should be a value in [-1, 0, 1]" + self.flip_code = flip_code + + def __call__(self, img): + if random.randint(0, 1) == 1: + return cv2.flip(img, self.flip_code) + else: + return img + + +class NormalizeImage(object): + """ normalize image such as substract mean, divide std + """ + + def __init__(self, scale=None, mean=None, std=None, order='chw', output_fp16=False, channel_num=3): + if isinstance(scale, str): + scale = eval(scale) + assert channel_num in [3, 4], "channel number of input image should be set to 3 or 4." + self.channel_num = channel_num + self.output_dtype = 'float16' if output_fp16 else 'float32' + self.scale = np.float32(scale if scale is not None else 1.0 / 255.0) + self.order = order + mean = mean if mean is not None else [0.485, 0.456, 0.406] + std = std if std is not None else [0.229, 0.224, 0.225] + + shape = (3, 1, 1) if self.order == 'chw' else (1, 1, 3) + self.mean = np.array(mean).reshape(shape).astype('float32') + self.std = np.array(std).reshape(shape).astype('float32') + + def __call__(self, img): + from PIL import Image + if isinstance(img, Image.Image): + img = np.array(img) + + assert isinstance(img, np.ndarray), "invalid input 'img' in NormalizeImage" + + img = (img.astype('float32') * self.scale - self.mean) / self.std + + if self.channel_num == 4: + img_h = img.shape[1] if self.order == 'chw' else img.shape[0] + img_w = img.shape[2] if self.order == 'chw' else img.shape[1] + pad_zeros = np.zeros((1, img_h, img_w)) if self.order == 'chw' else np.zeros((img_h, img_w, 1)) + img = (np.concatenate((img, pad_zeros), axis=0) if self.order == 'chw' else np.concatenate( + (img, pad_zeros), axis=2)) + return img.astype(self.output_dtype) + + +class ToCHWImage(object): + """ convert hwc image to chw image + """ + + def __init__(self): + pass + + def __call__(self, img): + from PIL import Image + if isinstance(img, Image.Image): + img = np.array(img) + + return img.transpose((2, 0, 1)) + + +class ColorJitter(RawColorJitter): + """ColorJitter. + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def __call__(self, img): + if not isinstance(img, Image.Image): + img = np.ascontiguousarray(img) + img = Image.fromarray(img) + img = super()._apply_image(img) + if isinstance(img, Image.Image): + img = np.asarray(img) + return img + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +class Topk(object): + + def __init__(self, topk=1, class_id_map_file=None): + assert isinstance(topk, (int, )) + self.class_id_map = self.parse_class_id_map(class_id_map_file) + self.topk = topk + + def parse_class_id_map(self, class_id_map_file): + if class_id_map_file is None: + return None + if not os.path.exists(class_id_map_file): + print( + "Warning: If want to use your own label_dict, please input legal path!\nOtherwise label_names will be empty!" + ) + return None + + try: + class_id_map = {} + with open(class_id_map_file, "r") as fin: + lines = fin.readlines() + for line in lines: + partition = line.split("\n")[0].partition(" ") + class_id_map[int(partition[0])] = str(partition[-1]) + except Exception as ex: + print(ex) + class_id_map = None + return class_id_map + + def __call__(self, x, file_names=None, multilabel=False): + assert isinstance(x, paddle.Tensor) + if file_names is not None: + assert x.shape[0] == len(file_names) + x = F.softmax(x, axis=-1) if not multilabel else F.sigmoid(x) + x = x.numpy() + y = [] + for idx, probs in enumerate(x): + index = probs.argsort(axis=0)[-self.topk:][::-1].astype("int32") if not multilabel else np.where( + probs >= 0.5)[0].astype("int32") + clas_id_list = [] + score_list = [] + label_name_list = [] + for i in index: + clas_id_list.append(i.item()) + score_list.append(probs[i].item()) + if self.class_id_map is not None: + label_name_list.append(self.class_id_map[i.item()]) + result = { + "class_ids": clas_id_list, + "scores": np.around(score_list, decimals=5).tolist(), + } + if file_names is not None: + result["file_name"] = file_names[idx] + if label_name_list is not None: + result["label_names"] = label_name_list + y.append(result) + return y diff --git a/modules/image/classification/levit_384_imagenet/utils.py b/modules/image/classification/levit_384_imagenet/utils.py new file mode 100644 index 000000000..df2bc36b3 --- /dev/null +++ b/modules/image/classification/levit_384_imagenet/utils.py @@ -0,0 +1,129 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import copy +import os + +import yaml + +__all__ = ['get_config'] + + +class AttrDict(dict): + + def __getattr__(self, key): + return self[key] + + def __setattr__(self, key, value): + if key in self.__dict__: + self.__dict__[key] = value + else: + self[key] = value + + def __deepcopy__(self, content): + return copy.deepcopy(dict(self)) + + +def create_attr_dict(yaml_config): + from ast import literal_eval + for key, value in yaml_config.items(): + if type(value) is dict: + yaml_config[key] = value = AttrDict(value) + if isinstance(value, str): + try: + value = literal_eval(value) + except BaseException: + pass + if isinstance(value, AttrDict): + create_attr_dict(yaml_config[key]) + else: + yaml_config[key] = value + + +def parse_config(cfg_file): + """Load a config file into AttrDict""" + with open(cfg_file, 'r') as fopen: + yaml_config = AttrDict(yaml.load(fopen, Loader=yaml.SafeLoader)) + create_attr_dict(yaml_config) + return yaml_config + + +def override(dl, ks, v): + """ + Recursively replace dict of list + Args: + dl(dict or list): dict or list to be replaced + ks(list): list of keys + v(str): value to be replaced + """ + + def str2num(v): + try: + return eval(v) + except Exception: + return v + + assert isinstance(dl, (list, dict)), ("{} should be a list or a dict") + assert len(ks) > 0, ('lenght of keys should larger than 0') + if isinstance(dl, list): + k = str2num(ks[0]) + if len(ks) == 1: + assert k < len(dl), ('index({}) out of range({})'.format(k, dl)) + dl[k] = str2num(v) + else: + override(dl[k], ks[1:], v) + else: + if len(ks) == 1: + # assert ks[0] in dl, ('{} is not exist in {}'.format(ks[0], dl)) + if not ks[0] in dl: + print('A new filed ({}) detected!'.format(ks[0], dl)) + dl[ks[0]] = str2num(v) + else: + override(dl[ks[0]], ks[1:], v) + + +def override_config(config, options=None): + """ + Recursively override the config + Args: + config(dict): dict to be replaced + options(list): list of pairs(key0.key1.idx.key2=value) + such as: [ + 'topk=2', + 'VALID.transforms.1.ResizeImage.resize_short=300' + ] + Returns: + config(dict): replaced config + """ + if options is not None: + for opt in options: + assert isinstance(opt, str), ("option({}) should be a str".format(opt)) + assert "=" in opt, ("option({}) should contain a =" + "to distinguish between key and value".format(opt)) + pair = opt.split('=') + assert len(pair) == 2, ("there can be only a = in the option") + key, value = pair + keys = key.split('.') + override(config, keys, value) + return config + + +def get_config(fname, overrides=None, show=False): + """ + Read config from file + """ + assert os.path.exists(fname), ('config file({}) is not exist'.format(fname)) + config = parse_config(fname) + override_config(config, overrides) + return config diff --git a/modules/image/classification/pplcnet_x0_25_imagenet/README.md b/modules/image/classification/pplcnet_x0_25_imagenet/README.md new file mode 100644 index 000000000..1444f3e47 --- /dev/null +++ b/modules/image/classification/pplcnet_x0_25_imagenet/README.md @@ -0,0 +1,132 @@ +# pplcnet_x0_25_imagenet + +|模型名称|pplcnet_x0_25_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|PPLCNet| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|5 MB| +|最新更新日期|2022-04-02| +|数据指标|Acc| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - PP-LCNet是百度针对Intel CPU 设备以及其加速库 MKLDNN 设计的特定骨干网络 ,比起其他的轻量级的 SOTA 模型,该骨干网络可以在不增加推理时间的情况下,进一步提升模型的性能,最终大幅度超越现有的 SOTA 模型。该模型为模型规模参数scale为x0.25下的PP-LCNet模型,关于模型结构的更多信息,可参考[论文](https://arxiv.org/pdf/2109.15099.pdf)。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install pplcnet_x0_25_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run pplcnet_x0_25_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="pplcnet_x0_25_imagenet") + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + - 分类接口API。 + - **参数** + + - images (list\[numpy.ndarray\]): 图片数据,每一个图片数据的shape 均为 \[H, W, C\],颜色空间为 BGR;
+ - paths (list\[str\]): 图片的路径;
+ - batch\_size (int): batch 的大小;
+ - use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA_VISIBLE_DEVICES环境变量**
+ - top\_k (int): 返回预测结果的前 k 个。 + + - **返回** + + - res (list\[dict\]): 分类结果,列表的每一个元素均为字典,其中 key 包括'class_ids'(种类索引), 'scores'(置信度) 和 'label_names'(种类名称) + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个图像识别的在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m pplcnet_x0_25_imagenet + ``` + + - 这样就完成了一个图像识别的在线服务的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"\} + url = "http://127.0.0.1:8866/predict/pplcnet_x0_25_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + ``` + + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install pplcnet_x0_25_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/pplcnet_x0_25_imagenet/model.py b/modules/image/classification/pplcnet_x0_25_imagenet/model.py new file mode 100644 index 000000000..071131b1b --- /dev/null +++ b/modules/image/classification/pplcnet_x0_25_imagenet/model.py @@ -0,0 +1,478 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any +from typing import Callable +from typing import Dict +from typing import List +from typing import Tuple +from typing import Union + +import paddle +import paddle.nn as nn +from paddle import ParamAttr +from paddle.nn import AdaptiveAvgPool2D +from paddle.nn import BatchNorm +from paddle.nn import Conv2D +from paddle.nn import Dropout +from paddle.nn import Linear +from paddle.nn.initializer import KaimingNormal +from paddle.regularizer import L2Decay + + +class Identity(nn.Layer): + + def __init__(self): + super(Identity, self).__init__() + + def forward(self, inputs): + return inputs + + +class TheseusLayer(nn.Layer): + + def __init__(self, *args, **kwargs): + super(TheseusLayer, self).__init__() + self.res_dict = {} + self.res_name = self.full_name() + self.pruner = None + self.quanter = None + + def _return_dict_hook(self, layer, input, output): + res_dict = {"output": output} + # 'list' is needed to avoid error raised by popping self.res_dict + for res_key in list(self.res_dict): + # clear the res_dict because the forward process may change according to input + res_dict[res_key] = self.res_dict.pop(res_key) + return res_dict + + def init_res(self, stages_pattern, return_patterns=None, return_stages=None): + if return_patterns and return_stages: + msg = f"The 'return_patterns' would be ignored when 'return_stages' is set." + return_stages = None + + if return_stages is True: + return_patterns = stages_pattern + # return_stages is int or bool + if type(return_stages) is int: + return_stages = [return_stages] + if isinstance(return_stages, list): + if max(return_stages) > len(stages_pattern) or min(return_stages) < 0: + msg = f"The 'return_stages' set error. Illegal value(s) have been ignored. The stages' pattern list is {stages_pattern}." + return_stages = [val for val in return_stages if val >= 0 and val < len(stages_pattern)] + return_patterns = [stages_pattern[i] for i in return_stages] + + if return_patterns: + self.update_res(return_patterns) + + def replace_sub(self, *args, **kwargs) -> None: + msg = "The function 'replace_sub()' is deprecated, please use 'upgrade_sublayer()' instead." + raise DeprecationWarning(msg) + + def upgrade_sublayer(self, layer_name_pattern: Union[str, List[str]], + handle_func: Callable[[nn.Layer, str], nn.Layer]) -> Dict[str, nn.Layer]: + """use 'handle_func' to modify the sub-layer(s) specified by 'layer_name_pattern'. + + Args: + layer_name_pattern (Union[str, List[str]]): The name of layer to be modified by 'handle_func'. + handle_func (Callable[[nn.Layer, str], nn.Layer]): The function to modify target layer specified by 'layer_name_pattern'. The formal params are the layer(nn.Layer) and pattern(str) that is (a member of) layer_name_pattern (when layer_name_pattern is List type). And the return is the layer processed. + + Returns: + Dict[str, nn.Layer]: The key is the pattern and corresponding value is the result returned by 'handle_func()'. + + Examples: + + from paddle import nn + import paddleclas + + def rep_func(layer: nn.Layer, pattern: str): + new_layer = nn.Conv2D( + in_channels=layer._in_channels, + out_channels=layer._out_channels, + kernel_size=5, + padding=2 + ) + return new_layer + + net = paddleclas.MobileNetV1() + res = net.replace_sub(layer_name_pattern=["blocks[11].depthwise_conv.conv", "blocks[12].depthwise_conv.conv"], handle_func=rep_func) + print(res) + # {'blocks[11].depthwise_conv.conv': the corresponding new_layer, 'blocks[12].depthwise_conv.conv': the corresponding new_layer} + """ + + if not isinstance(layer_name_pattern, list): + layer_name_pattern = [layer_name_pattern] + + hit_layer_pattern_list = [] + for pattern in layer_name_pattern: + # parse pattern to find target layer and its parent + layer_list = parse_pattern_str(pattern=pattern, parent_layer=self) + if not layer_list: + continue + sub_layer_parent = layer_list[-2]["layer"] if len(layer_list) > 1 else self + + sub_layer = layer_list[-1]["layer"] + sub_layer_name = layer_list[-1]["name"] + sub_layer_index = layer_list[-1]["index"] + + new_sub_layer = handle_func(sub_layer, pattern) + + if sub_layer_index: + getattr(sub_layer_parent, sub_layer_name)[sub_layer_index] = new_sub_layer + else: + setattr(sub_layer_parent, sub_layer_name, new_sub_layer) + + hit_layer_pattern_list.append(pattern) + return hit_layer_pattern_list + + def stop_after(self, stop_layer_name: str) -> bool: + """stop forward and backward after 'stop_layer_name'. + + Args: + stop_layer_name (str): The name of layer that stop forward and backward after this layer. + + Returns: + bool: 'True' if successful, 'False' otherwise. + """ + + layer_list = parse_pattern_str(stop_layer_name, self) + if not layer_list: + return False + + parent_layer = self + for layer_dict in layer_list: + name, index = layer_dict["name"], layer_dict["index"] + if not set_identity(parent_layer, name, index): + msg = f"Failed to set the layers that after stop_layer_name('{stop_layer_name}') to IdentityLayer. The error layer's name is '{name}'." + return False + parent_layer = layer_dict["layer"] + + return True + + def update_res(self, return_patterns: Union[str, List[str]]) -> Dict[str, nn.Layer]: + """update the result(s) to be returned. + + Args: + return_patterns (Union[str, List[str]]): The name of layer to return output. + + Returns: + Dict[str, nn.Layer]: The pattern(str) and corresponding layer(nn.Layer) that have been set successfully. + """ + + # clear res_dict that could have been set + self.res_dict = {} + + class Handler(object): + + def __init__(self, res_dict): + # res_dict is a reference + self.res_dict = res_dict + + def __call__(self, layer, pattern): + layer.res_dict = self.res_dict + layer.res_name = pattern + if hasattr(layer, "hook_remove_helper"): + layer.hook_remove_helper.remove() + layer.hook_remove_helper = layer.register_forward_post_hook(save_sub_res_hook) + return layer + + handle_func = Handler(self.res_dict) + + hit_layer_pattern_list = self.upgrade_sublayer(return_patterns, handle_func=handle_func) + + if hasattr(self, "hook_remove_helper"): + self.hook_remove_helper.remove() + self.hook_remove_helper = self.register_forward_post_hook(self._return_dict_hook) + + return hit_layer_pattern_list + + +def save_sub_res_hook(layer, input, output): + layer.res_dict[layer.res_name] = output + + +def set_identity(parent_layer: nn.Layer, layer_name: str, layer_index: str = None) -> bool: + """set the layer specified by layer_name and layer_index to Indentity. + + Args: + parent_layer (nn.Layer): The parent layer of target layer specified by layer_name and layer_index. + layer_name (str): The name of target layer to be set to Indentity. + layer_index (str, optional): The index of target layer to be set to Indentity in parent_layer. Defaults to None. + + Returns: + bool: True if successfully, False otherwise. + """ + + stop_after = False + for sub_layer_name in parent_layer._sub_layers: + if stop_after: + parent_layer._sub_layers[sub_layer_name] = Identity() + continue + if sub_layer_name == layer_name: + stop_after = True + + if layer_index and stop_after: + stop_after = False + for sub_layer_index in parent_layer._sub_layers[layer_name]._sub_layers: + if stop_after: + parent_layer._sub_layers[layer_name][sub_layer_index] = Identity() + continue + if layer_index == sub_layer_index: + stop_after = True + + return stop_after + + +def parse_pattern_str(pattern: str, parent_layer: nn.Layer) -> Union[None, List[Dict[str, Union[nn.Layer, str, None]]]]: + """parse the string type pattern. + + Args: + pattern (str): The pattern to discribe layer. + parent_layer (nn.Layer): The root layer relative to the pattern. + + Returns: + Union[None, List[Dict[str, Union[nn.Layer, str, None]]]]: None if failed. If successfully, the members are layers parsed in order: + [ + {"layer": first layer, "name": first layer's name parsed, "index": first layer's index parsed if exist}, + {"layer": second layer, "name": second layer's name parsed, "index": second layer's index parsed if exist}, + ... + ] + """ + + pattern_list = pattern.split(".") + if not pattern_list: + msg = f"The pattern('{pattern}') is illegal. Please check and retry." + return None + + layer_list = [] + while len(pattern_list) > 0: + if '[' in pattern_list[0]: + target_layer_name = pattern_list[0].split('[')[0] + target_layer_index = pattern_list[0].split('[')[1].split(']')[0] + else: + target_layer_name = pattern_list[0] + target_layer_index = None + + target_layer = getattr(parent_layer, target_layer_name, None) + + if target_layer is None: + msg = f"Not found layer named('{target_layer_name}') specifed in pattern('{pattern}')." + return None + + if target_layer_index and target_layer: + if int(target_layer_index) < 0 or int(target_layer_index) >= len(target_layer): + msg = f"Not found layer by index('{target_layer_index}') specifed in pattern('{pattern}'). The index should < {len(target_layer)} and > 0." + return None + + target_layer = target_layer[target_layer_index] + + layer_list.append({"layer": target_layer, "name": target_layer_name, "index": target_layer_index}) + + pattern_list = pattern_list[1:] + parent_layer = target_layer + return layer_list + + +MODEL_STAGES_PATTERN = {"PPLCNet": ["blocks2", "blocks3", "blocks4", "blocks5", "blocks6"]} + +# Each element(list) represents a depthwise block, which is composed of k, in_c, out_c, s, use_se. +# k: kernel_size +# in_c: input channel number in depthwise block +# out_c: output channel number in depthwise block +# s: stride in depthwise block +# use_se: whether to use SE block + +NET_CONFIG = { + "blocks2": + #k, in_c, out_c, s, use_se + [[3, 16, 32, 1, False]], + "blocks3": [[3, 32, 64, 2, False], [3, 64, 64, 1, False]], + "blocks4": [[3, 64, 128, 2, False], [3, 128, 128, 1, False]], + "blocks5": [[3, 128, 256, 2, False], [5, 256, 256, 1, False], [5, 256, 256, 1, False], [5, 256, 256, 1, False], + [5, 256, 256, 1, False], [5, 256, 256, 1, False]], + "blocks6": [[5, 256, 512, 2, True], [5, 512, 512, 1, True]] +} + + +def make_divisible(v, divisor=8, min_value=None): + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + if new_v < 0.9 * v: + new_v += divisor + return new_v + + +class ConvBNLayer(TheseusLayer): + + def __init__(self, num_channels, filter_size, num_filters, stride, num_groups=1): + super().__init__() + + self.conv = Conv2D(in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=num_groups, + weight_attr=ParamAttr(initializer=KaimingNormal()), + bias_attr=False) + + self.bn = BatchNorm(num_filters, + param_attr=ParamAttr(regularizer=L2Decay(0.0)), + bias_attr=ParamAttr(regularizer=L2Decay(0.0))) + self.hardswish = nn.Hardswish() + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + x = self.hardswish(x) + return x + + +class DepthwiseSeparable(TheseusLayer): + + def __init__(self, num_channels, num_filters, stride, dw_size=3, use_se=False): + super().__init__() + self.use_se = use_se + self.dw_conv = ConvBNLayer(num_channels=num_channels, + num_filters=num_channels, + filter_size=dw_size, + stride=stride, + num_groups=num_channels) + if use_se: + self.se = SEModule(num_channels) + self.pw_conv = ConvBNLayer(num_channels=num_channels, filter_size=1, num_filters=num_filters, stride=1) + + def forward(self, x): + x = self.dw_conv(x) + if self.use_se: + x = self.se(x) + x = self.pw_conv(x) + return x + + +class SEModule(TheseusLayer): + + def __init__(self, channel, reduction=4): + super().__init__() + self.avg_pool = AdaptiveAvgPool2D(1) + self.conv1 = Conv2D(in_channels=channel, out_channels=channel // reduction, kernel_size=1, stride=1, padding=0) + self.relu = nn.ReLU() + self.conv2 = Conv2D(in_channels=channel // reduction, out_channels=channel, kernel_size=1, stride=1, padding=0) + self.hardsigmoid = nn.Hardsigmoid() + + def forward(self, x): + identity = x + x = self.avg_pool(x) + x = self.conv1(x) + x = self.relu(x) + x = self.conv2(x) + x = self.hardsigmoid(x) + x = paddle.multiply(x=identity, y=x) + return x + + +class PPLCNet(TheseusLayer): + + def __init__(self, + stages_pattern, + scale=1.0, + class_num=1000, + dropout_prob=0.2, + class_expand=1280, + return_patterns=None, + return_stages=None): + super().__init__() + self.scale = scale + self.class_expand = class_expand + + self.conv1 = ConvBNLayer(num_channels=3, filter_size=3, num_filters=make_divisible(16 * scale), stride=2) + + self.blocks2 = nn.Sequential(*[ + DepthwiseSeparable(num_channels=make_divisible(in_c * scale), + num_filters=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se) for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks2"]) + ]) + + self.blocks3 = nn.Sequential(*[ + DepthwiseSeparable(num_channels=make_divisible(in_c * scale), + num_filters=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se) for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks3"]) + ]) + + self.blocks4 = nn.Sequential(*[ + DepthwiseSeparable(num_channels=make_divisible(in_c * scale), + num_filters=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se) for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks4"]) + ]) + + self.blocks5 = nn.Sequential(*[ + DepthwiseSeparable(num_channels=make_divisible(in_c * scale), + num_filters=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se) for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks5"]) + ]) + + self.blocks6 = nn.Sequential(*[ + DepthwiseSeparable(num_channels=make_divisible(in_c * scale), + num_filters=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se) for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks6"]) + ]) + + self.avg_pool = AdaptiveAvgPool2D(1) + + self.last_conv = Conv2D(in_channels=make_divisible(NET_CONFIG["blocks6"][-1][2] * scale), + out_channels=self.class_expand, + kernel_size=1, + stride=1, + padding=0, + bias_attr=False) + + self.hardswish = nn.Hardswish() + self.dropout = Dropout(p=dropout_prob, mode="downscale_in_infer") + self.flatten = nn.Flatten(start_axis=1, stop_axis=-1) + + self.fc = Linear(self.class_expand, class_num) + + super().init_res(stages_pattern, return_patterns=return_patterns, return_stages=return_stages) + + def forward(self, x): + x = self.conv1(x) + + x = self.blocks2(x) + x = self.blocks3(x) + x = self.blocks4(x) + x = self.blocks5(x) + x = self.blocks6(x) + + x = self.avg_pool(x) + x = self.last_conv(x) + x = self.hardswish(x) + x = self.dropout(x) + x = self.flatten(x) + x = self.fc(x) + return x + + +def PPLCNet_x0_25(**kwargs): + model = PPLCNet(scale=0.25, stages_pattern=MODEL_STAGES_PATTERN["PPLCNet"], **kwargs) + return model diff --git a/modules/image/classification/pplcnet_x0_25_imagenet/module.py b/modules/image/classification/pplcnet_x0_25_imagenet/module.py new file mode 100644 index 000000000..a4f987863 --- /dev/null +++ b/modules/image/classification/pplcnet_x0_25_imagenet/module.py @@ -0,0 +1,154 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import copy +import os + +import cv2 +import numpy as np +import paddle +from skimage.io import imread +from skimage.transform import rescale +from skimage.transform import resize + +import paddlehub as hub +from .model import PPLCNet_x0_25 +from .processor import base64_to_cv2 +from .processor import create_operators +from .processor import Topk +from .utils import get_config +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving + + +@moduleinfo(name="pplcnet_x0_25_imagenet", + type="cv/classification", + author="paddlepaddle", + author_email="", + summary="", + version="1.0.0") +class PPLcNet_x0_5: + + def __init__(self): + self.config = get_config(os.path.join(self.directory, 'PPLCNet_x0_25.yaml'), show=False) + self.label_path = os.path.join(self.directory, 'imagenet1k_label_list.txt') + self.pretrain_path = os.path.join(self.directory, 'PPLCNet_x0_25_pretrained.pdparams') + self.config['Infer']['PostProcess']['class_id_map_file'] = self.label_path + self.model = PPLCNet_x0_25() + param_state_dict = paddle.load(self.pretrain_path) + self.model.set_dict(param_state_dict) + self.preprocess_funcs = create_operators(self.config["Infer"]["transforms"]) + + def classification(self, + images: list = None, + paths: list = None, + batch_size: int = 1, + use_gpu: bool = False, + top_k: int = 1): + ''' + Args: + images (list[numpy.ndarray]): data of images, shape of each is [H, W, C], color space must be BGR. + paths (list[str]): The paths of images. + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + top_k (int): Return top k results. + + Returns: + res (list[dict]): The classfication results, each result dict contains key 'class_ids', 'scores' and 'label_names'. + ''' + postprocess_func = Topk(top_k, self.label_path) + inputs = [] + results = [] + paddle.disable_static() + place = 'gpu:0' if use_gpu else 'cpu' + place = paddle.set_device(place) + if images == None and paths == None: + print('No image provided. Please input an image or a image path.') + return + + if images != None: + for image in images: + image = image[:, :, ::-1] + inputs.append(image) + + if paths != None: + for path in paths: + image = cv2.imread(path)[:, :, ::-1] + inputs.append(image) + + batch_data = [] + for idx, imagedata in enumerate(inputs): + for process in self.preprocess_funcs: + imagedata = process(imagedata) + batch_data.append(imagedata) + if len(batch_data) >= batch_size or idx == len(inputs) - 1: + batch_tensor = paddle.to_tensor(batch_data) + out = self.model(batch_tensor) + if isinstance(out, list): + out = out[0] + if isinstance(out, dict) and "logits" in out: + out = out["logits"] + if isinstance(out, dict) and "output" in out: + out = out["output"] + result = postprocess_func(out) + results.extend(result) + batch_data.clear() + return results + + @runnable + def run_cmd(self, argvs: list): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + self.args = self.parser.parse_args(argvs) + results = self.classification(paths=[self.args.input_path], + use_gpu=self.args.use_gpu, + batch_size=self.args.batch_size, + top_k=self.args.top_k) + return results + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.classification(images=images_decode, **kwargs) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument('--use_gpu', action='store_true', help="use GPU or not") + + self.arg_config_group.add_argument('--batch_size', type=int, default=1, help='batch size') + self.arg_config_group.add_argument('--top_k', type=int, default=1, help='Return top k results.') + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to input image.") diff --git a/modules/image/classification/pplcnet_x0_25_imagenet/processor.py b/modules/image/classification/pplcnet_x0_25_imagenet/processor.py new file mode 100644 index 000000000..40cab3917 --- /dev/null +++ b/modules/image/classification/pplcnet_x0_25_imagenet/processor.py @@ -0,0 +1,374 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import base64 +import inspect +import math +import os +import random +import sys +from functools import partial + +import cv2 +import numpy as np +import paddle +import paddle.nn.functional as F +import six +from paddle.vision.transforms import ColorJitter as RawColorJitter +from PIL import Image + + +def create_operators(params, class_num=None): + """ + create operators based on the config + + Args: + params(list): a dict list, used to create some operators + """ + assert isinstance(params, list), ('operator config should be a list') + ops = [] + current_module = sys.modules[__name__] + for operator in params: + assert isinstance(operator, dict) and len(operator) == 1, "yaml format error" + op_name = list(operator)[0] + param = {} if operator[op_name] is None else operator[op_name] + op_func = getattr(current_module, op_name) + if "class_num" in inspect.getfullargspec(op_func).args: + param.update({"class_num": class_num}) + op = op_func(**param) + ops.append(op) + + return ops + + +class UnifiedResize(object): + + def __init__(self, interpolation=None, backend="cv2"): + _cv2_interp_from_str = { + 'nearest': cv2.INTER_NEAREST, + 'bilinear': cv2.INTER_LINEAR, + 'area': cv2.INTER_AREA, + 'bicubic': cv2.INTER_CUBIC, + 'lanczos': cv2.INTER_LANCZOS4 + } + _pil_interp_from_str = { + 'nearest': Image.NEAREST, + 'bilinear': Image.BILINEAR, + 'bicubic': Image.BICUBIC, + 'box': Image.BOX, + 'lanczos': Image.LANCZOS, + 'hamming': Image.HAMMING + } + + def _pil_resize(src, size, resample): + pil_img = Image.fromarray(src) + pil_img = pil_img.resize(size, resample) + return np.asarray(pil_img) + + if backend.lower() == "cv2": + if isinstance(interpolation, str): + interpolation = _cv2_interp_from_str[interpolation.lower()] + # compatible with opencv < version 4.4.0 + elif interpolation is None: + interpolation = cv2.INTER_LINEAR + self.resize_func = partial(cv2.resize, interpolation=interpolation) + elif backend.lower() == "pil": + if isinstance(interpolation, str): + interpolation = _pil_interp_from_str[interpolation.lower()] + self.resize_func = partial(_pil_resize, resample=interpolation) + else: + self.resize_func = cv2.resize + + def __call__(self, src, size): + return self.resize_func(src, size) + + +class OperatorParamError(ValueError): + """ OperatorParamError + """ + pass + + +class DecodeImage(object): + """ decode image """ + + def __init__(self, to_rgb=True, to_np=False, channel_first=False): + self.to_rgb = to_rgb + self.to_np = to_np # to numpy + self.channel_first = channel_first # only enabled when to_np is True + + def __call__(self, img): + if six.PY2: + assert type(img) is str and len(img) > 0, "invalid input 'img' in DecodeImage" + else: + assert type(img) is bytes and len(img) > 0, "invalid input 'img' in DecodeImage" + data = np.frombuffer(img, dtype='uint8') + img = cv2.imdecode(data, 1) + if self.to_rgb: + assert img.shape[2] == 3, 'invalid shape of image[%s]' % (img.shape) + img = img[:, :, ::-1] + + if self.channel_first: + img = img.transpose((2, 0, 1)) + + return img + + +class ResizeImage(object): + """ resize image """ + + def __init__(self, size=None, resize_short=None, interpolation=None, backend="cv2"): + if resize_short is not None and resize_short > 0: + self.resize_short = resize_short + self.w = None + self.h = None + elif size is not None: + self.resize_short = None + self.w = size if type(size) is int else size[0] + self.h = size if type(size) is int else size[1] + else: + raise OperatorParamError("invalid params for ReisizeImage for '\ + 'both 'size' and 'resize_short' are None") + + self._resize_func = UnifiedResize(interpolation=interpolation, backend=backend) + + def __call__(self, img): + img_h, img_w = img.shape[:2] + if self.resize_short is not None: + percent = float(self.resize_short) / min(img_w, img_h) + w = int(round(img_w * percent)) + h = int(round(img_h * percent)) + else: + w = self.w + h = self.h + return self._resize_func(img, (w, h)) + + +class CropImage(object): + """ crop image """ + + def __init__(self, size): + if type(size) is int: + self.size = (size, size) + else: + self.size = size # (h, w) + + def __call__(self, img): + w, h = self.size + img_h, img_w = img.shape[:2] + w_start = (img_w - w) // 2 + h_start = (img_h - h) // 2 + + w_end = w_start + w + h_end = h_start + h + return img[h_start:h_end, w_start:w_end, :] + + +class RandCropImage(object): + """ random crop image """ + + def __init__(self, size, scale=None, ratio=None, interpolation=None, backend="cv2"): + if type(size) is int: + self.size = (size, size) # (h, w) + else: + self.size = size + + self.scale = [0.08, 1.0] if scale is None else scale + self.ratio = [3. / 4., 4. / 3.] if ratio is None else ratio + + self._resize_func = UnifiedResize(interpolation=interpolation, backend=backend) + + def __call__(self, img): + size = self.size + scale = self.scale + ratio = self.ratio + + aspect_ratio = math.sqrt(random.uniform(*ratio)) + w = 1. * aspect_ratio + h = 1. / aspect_ratio + + img_h, img_w = img.shape[:2] + + bound = min((float(img_w) / img_h) / (w**2), (float(img_h) / img_w) / (h**2)) + scale_max = min(scale[1], bound) + scale_min = min(scale[0], bound) + + target_area = img_w * img_h * random.uniform(scale_min, scale_max) + target_size = math.sqrt(target_area) + w = int(target_size * w) + h = int(target_size * h) + + i = random.randint(0, img_w - w) + j = random.randint(0, img_h - h) + + img = img[j:j + h, i:i + w, :] + + return self._resize_func(img, size) + + +class RandFlipImage(object): + """ random flip image + flip_code: + 1: Flipped Horizontally + 0: Flipped Vertically + -1: Flipped Horizontally & Vertically + """ + + def __init__(self, flip_code=1): + assert flip_code in [-1, 0, 1], "flip_code should be a value in [-1, 0, 1]" + self.flip_code = flip_code + + def __call__(self, img): + if random.randint(0, 1) == 1: + return cv2.flip(img, self.flip_code) + else: + return img + + +class NormalizeImage(object): + """ normalize image such as substract mean, divide std + """ + + def __init__(self, scale=None, mean=None, std=None, order='chw', output_fp16=False, channel_num=3): + if isinstance(scale, str): + scale = eval(scale) + assert channel_num in [3, 4], "channel number of input image should be set to 3 or 4." + self.channel_num = channel_num + self.output_dtype = 'float16' if output_fp16 else 'float32' + self.scale = np.float32(scale if scale is not None else 1.0 / 255.0) + self.order = order + mean = mean if mean is not None else [0.485, 0.456, 0.406] + std = std if std is not None else [0.229, 0.224, 0.225] + + shape = (3, 1, 1) if self.order == 'chw' else (1, 1, 3) + self.mean = np.array(mean).reshape(shape).astype('float32') + self.std = np.array(std).reshape(shape).astype('float32') + + def __call__(self, img): + from PIL import Image + if isinstance(img, Image.Image): + img = np.array(img) + + assert isinstance(img, np.ndarray), "invalid input 'img' in NormalizeImage" + + img = (img.astype('float32') * self.scale - self.mean) / self.std + + if self.channel_num == 4: + img_h = img.shape[1] if self.order == 'chw' else img.shape[0] + img_w = img.shape[2] if self.order == 'chw' else img.shape[1] + pad_zeros = np.zeros((1, img_h, img_w)) if self.order == 'chw' else np.zeros((img_h, img_w, 1)) + img = (np.concatenate((img, pad_zeros), axis=0) if self.order == 'chw' else np.concatenate( + (img, pad_zeros), axis=2)) + return img.astype(self.output_dtype) + + +class ToCHWImage(object): + """ convert hwc image to chw image + """ + + def __init__(self): + pass + + def __call__(self, img): + from PIL import Image + if isinstance(img, Image.Image): + img = np.array(img) + + return img.transpose((2, 0, 1)) + + +class ColorJitter(RawColorJitter): + """ColorJitter. + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def __call__(self, img): + if not isinstance(img, Image.Image): + img = np.ascontiguousarray(img) + img = Image.fromarray(img) + img = super()._apply_image(img) + if isinstance(img, Image.Image): + img = np.asarray(img) + return img + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +class Topk(object): + + def __init__(self, topk=1, class_id_map_file=None): + assert isinstance(topk, (int, )) + self.class_id_map = self.parse_class_id_map(class_id_map_file) + self.topk = topk + + def parse_class_id_map(self, class_id_map_file): + if class_id_map_file is None: + return None + if not os.path.exists(class_id_map_file): + print( + "Warning: If want to use your own label_dict, please input legal path!\nOtherwise label_names will be empty!" + ) + return None + + try: + class_id_map = {} + with open(class_id_map_file, "r") as fin: + lines = fin.readlines() + for line in lines: + partition = line.split("\n")[0].partition(" ") + class_id_map[int(partition[0])] = str(partition[-1]) + except Exception as ex: + print(ex) + class_id_map = None + return class_id_map + + def __call__(self, x, file_names=None, multilabel=False): + assert isinstance(x, paddle.Tensor) + if file_names is not None: + assert x.shape[0] == len(file_names) + x = F.softmax(x, axis=-1) if not multilabel else F.sigmoid(x) + x = x.numpy() + y = [] + for idx, probs in enumerate(x): + index = probs.argsort(axis=0)[-self.topk:][::-1].astype("int32") if not multilabel else np.where( + probs >= 0.5)[0].astype("int32") + clas_id_list = [] + score_list = [] + label_name_list = [] + for i in index: + clas_id_list.append(i.item()) + score_list.append(probs[i].item()) + if self.class_id_map is not None: + label_name_list.append(self.class_id_map[i.item()]) + result = { + "class_ids": clas_id_list, + "scores": np.around(score_list, decimals=5).tolist(), + } + if file_names is not None: + result["file_name"] = file_names[idx] + if label_name_list is not None: + result["label_names"] = label_name_list + y.append(result) + return y diff --git a/modules/image/classification/pplcnet_x0_25_imagenet/utils.py b/modules/image/classification/pplcnet_x0_25_imagenet/utils.py new file mode 100644 index 000000000..df2bc36b3 --- /dev/null +++ b/modules/image/classification/pplcnet_x0_25_imagenet/utils.py @@ -0,0 +1,129 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import copy +import os + +import yaml + +__all__ = ['get_config'] + + +class AttrDict(dict): + + def __getattr__(self, key): + return self[key] + + def __setattr__(self, key, value): + if key in self.__dict__: + self.__dict__[key] = value + else: + self[key] = value + + def __deepcopy__(self, content): + return copy.deepcopy(dict(self)) + + +def create_attr_dict(yaml_config): + from ast import literal_eval + for key, value in yaml_config.items(): + if type(value) is dict: + yaml_config[key] = value = AttrDict(value) + if isinstance(value, str): + try: + value = literal_eval(value) + except BaseException: + pass + if isinstance(value, AttrDict): + create_attr_dict(yaml_config[key]) + else: + yaml_config[key] = value + + +def parse_config(cfg_file): + """Load a config file into AttrDict""" + with open(cfg_file, 'r') as fopen: + yaml_config = AttrDict(yaml.load(fopen, Loader=yaml.SafeLoader)) + create_attr_dict(yaml_config) + return yaml_config + + +def override(dl, ks, v): + """ + Recursively replace dict of list + Args: + dl(dict or list): dict or list to be replaced + ks(list): list of keys + v(str): value to be replaced + """ + + def str2num(v): + try: + return eval(v) + except Exception: + return v + + assert isinstance(dl, (list, dict)), ("{} should be a list or a dict") + assert len(ks) > 0, ('lenght of keys should larger than 0') + if isinstance(dl, list): + k = str2num(ks[0]) + if len(ks) == 1: + assert k < len(dl), ('index({}) out of range({})'.format(k, dl)) + dl[k] = str2num(v) + else: + override(dl[k], ks[1:], v) + else: + if len(ks) == 1: + # assert ks[0] in dl, ('{} is not exist in {}'.format(ks[0], dl)) + if not ks[0] in dl: + print('A new filed ({}) detected!'.format(ks[0], dl)) + dl[ks[0]] = str2num(v) + else: + override(dl[ks[0]], ks[1:], v) + + +def override_config(config, options=None): + """ + Recursively override the config + Args: + config(dict): dict to be replaced + options(list): list of pairs(key0.key1.idx.key2=value) + such as: [ + 'topk=2', + 'VALID.transforms.1.ResizeImage.resize_short=300' + ] + Returns: + config(dict): replaced config + """ + if options is not None: + for opt in options: + assert isinstance(opt, str), ("option({}) should be a str".format(opt)) + assert "=" in opt, ("option({}) should contain a =" + "to distinguish between key and value".format(opt)) + pair = opt.split('=') + assert len(pair) == 2, ("there can be only a = in the option") + key, value = pair + keys = key.split('.') + override(config, keys, value) + return config + + +def get_config(fname, overrides=None, show=False): + """ + Read config from file + """ + assert os.path.exists(fname), ('config file({}) is not exist'.format(fname)) + config = parse_config(fname) + override_config(config, overrides) + return config diff --git a/modules/image/classification/pplcnet_x0_35_imagenet/README.md b/modules/image/classification/pplcnet_x0_35_imagenet/README.md new file mode 100644 index 000000000..021c52b8e --- /dev/null +++ b/modules/image/classification/pplcnet_x0_35_imagenet/README.md @@ -0,0 +1,132 @@ +# pplcnet_x0_35_imagenet + +|模型名称|pplcnet_x0_35_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|PPLCNet| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|6 MB| +|最新更新日期|2022-04-02| +|数据指标|Acc| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - PP-LCNet是百度针对Intel CPU 设备以及其加速库 MKLDNN 设计的特定骨干网络 ,比起其他的轻量级的 SOTA 模型,该骨干网络可以在不增加推理时间的情况下,进一步提升模型的性能,最终大幅度超越现有的 SOTA 模型。该模型为模型规模参数scale为x0.35下的PP-LCNet模型,关于模型结构的更多信息,可参考[论文](https://arxiv.org/pdf/2109.15099.pdf)。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install pplcnet_x0_35_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run pplcnet_x0_35_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="pplcnet_x0_35_imagenet") + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + - 分类接口API。 + - **参数** + + - images (list\[numpy.ndarray\]): 图片数据,每一个图片数据的shape 均为 \[H, W, C\],颜色空间为 BGR;
+ - paths (list\[str\]): 图片的路径;
+ - batch\_size (int): batch 的大小;
+ - use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA_VISIBLE_DEVICES环境变量**
+ - top\_k (int): 返回预测结果的前 k 个。 + + - **返回** + + - res (list\[dict\]): 分类结果,列表的每一个元素均为字典,其中 key 包括'class_ids'(种类索引), 'scores'(置信度) 和 'label_names'(种类名称) + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个图像识别的在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m pplcnet_x0_35_imagenet + ``` + + - 这样就完成了一个图像识别的在线服务的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"\} + url = "http://127.0.0.1:8866/predict/pplcnet_x0_35_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + ``` + + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install pplcnet_x0_35_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/pplcnet_x0_35_imagenet/model.py b/modules/image/classification/pplcnet_x0_35_imagenet/model.py new file mode 100644 index 000000000..85580ae9f --- /dev/null +++ b/modules/image/classification/pplcnet_x0_35_imagenet/model.py @@ -0,0 +1,478 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any +from typing import Callable +from typing import Dict +from typing import List +from typing import Tuple +from typing import Union + +import paddle +import paddle.nn as nn +from paddle import ParamAttr +from paddle.nn import AdaptiveAvgPool2D +from paddle.nn import BatchNorm +from paddle.nn import Conv2D +from paddle.nn import Dropout +from paddle.nn import Linear +from paddle.nn.initializer import KaimingNormal +from paddle.regularizer import L2Decay + + +class Identity(nn.Layer): + + def __init__(self): + super(Identity, self).__init__() + + def forward(self, inputs): + return inputs + + +class TheseusLayer(nn.Layer): + + def __init__(self, *args, **kwargs): + super(TheseusLayer, self).__init__() + self.res_dict = {} + self.res_name = self.full_name() + self.pruner = None + self.quanter = None + + def _return_dict_hook(self, layer, input, output): + res_dict = {"output": output} + # 'list' is needed to avoid error raised by popping self.res_dict + for res_key in list(self.res_dict): + # clear the res_dict because the forward process may change according to input + res_dict[res_key] = self.res_dict.pop(res_key) + return res_dict + + def init_res(self, stages_pattern, return_patterns=None, return_stages=None): + if return_patterns and return_stages: + msg = f"The 'return_patterns' would be ignored when 'return_stages' is set." + return_stages = None + + if return_stages is True: + return_patterns = stages_pattern + # return_stages is int or bool + if type(return_stages) is int: + return_stages = [return_stages] + if isinstance(return_stages, list): + if max(return_stages) > len(stages_pattern) or min(return_stages) < 0: + msg = f"The 'return_stages' set error. Illegal value(s) have been ignored. The stages' pattern list is {stages_pattern}." + return_stages = [val for val in return_stages if val >= 0 and val < len(stages_pattern)] + return_patterns = [stages_pattern[i] for i in return_stages] + + if return_patterns: + self.update_res(return_patterns) + + def replace_sub(self, *args, **kwargs) -> None: + msg = "The function 'replace_sub()' is deprecated, please use 'upgrade_sublayer()' instead." + raise DeprecationWarning(msg) + + def upgrade_sublayer(self, layer_name_pattern: Union[str, List[str]], + handle_func: Callable[[nn.Layer, str], nn.Layer]) -> Dict[str, nn.Layer]: + """use 'handle_func' to modify the sub-layer(s) specified by 'layer_name_pattern'. + + Args: + layer_name_pattern (Union[str, List[str]]): The name of layer to be modified by 'handle_func'. + handle_func (Callable[[nn.Layer, str], nn.Layer]): The function to modify target layer specified by 'layer_name_pattern'. The formal params are the layer(nn.Layer) and pattern(str) that is (a member of) layer_name_pattern (when layer_name_pattern is List type). And the return is the layer processed. + + Returns: + Dict[str, nn.Layer]: The key is the pattern and corresponding value is the result returned by 'handle_func()'. + + Examples: + + from paddle import nn + import paddleclas + + def rep_func(layer: nn.Layer, pattern: str): + new_layer = nn.Conv2D( + in_channels=layer._in_channels, + out_channels=layer._out_channels, + kernel_size=5, + padding=2 + ) + return new_layer + + net = paddleclas.MobileNetV1() + res = net.replace_sub(layer_name_pattern=["blocks[11].depthwise_conv.conv", "blocks[12].depthwise_conv.conv"], handle_func=rep_func) + print(res) + # {'blocks[11].depthwise_conv.conv': the corresponding new_layer, 'blocks[12].depthwise_conv.conv': the corresponding new_layer} + """ + + if not isinstance(layer_name_pattern, list): + layer_name_pattern = [layer_name_pattern] + + hit_layer_pattern_list = [] + for pattern in layer_name_pattern: + # parse pattern to find target layer and its parent + layer_list = parse_pattern_str(pattern=pattern, parent_layer=self) + if not layer_list: + continue + sub_layer_parent = layer_list[-2]["layer"] if len(layer_list) > 1 else self + + sub_layer = layer_list[-1]["layer"] + sub_layer_name = layer_list[-1]["name"] + sub_layer_index = layer_list[-1]["index"] + + new_sub_layer = handle_func(sub_layer, pattern) + + if sub_layer_index: + getattr(sub_layer_parent, sub_layer_name)[sub_layer_index] = new_sub_layer + else: + setattr(sub_layer_parent, sub_layer_name, new_sub_layer) + + hit_layer_pattern_list.append(pattern) + return hit_layer_pattern_list + + def stop_after(self, stop_layer_name: str) -> bool: + """stop forward and backward after 'stop_layer_name'. + + Args: + stop_layer_name (str): The name of layer that stop forward and backward after this layer. + + Returns: + bool: 'True' if successful, 'False' otherwise. + """ + + layer_list = parse_pattern_str(stop_layer_name, self) + if not layer_list: + return False + + parent_layer = self + for layer_dict in layer_list: + name, index = layer_dict["name"], layer_dict["index"] + if not set_identity(parent_layer, name, index): + msg = f"Failed to set the layers that after stop_layer_name('{stop_layer_name}') to IdentityLayer. The error layer's name is '{name}'." + return False + parent_layer = layer_dict["layer"] + + return True + + def update_res(self, return_patterns: Union[str, List[str]]) -> Dict[str, nn.Layer]: + """update the result(s) to be returned. + + Args: + return_patterns (Union[str, List[str]]): The name of layer to return output. + + Returns: + Dict[str, nn.Layer]: The pattern(str) and corresponding layer(nn.Layer) that have been set successfully. + """ + + # clear res_dict that could have been set + self.res_dict = {} + + class Handler(object): + + def __init__(self, res_dict): + # res_dict is a reference + self.res_dict = res_dict + + def __call__(self, layer, pattern): + layer.res_dict = self.res_dict + layer.res_name = pattern + if hasattr(layer, "hook_remove_helper"): + layer.hook_remove_helper.remove() + layer.hook_remove_helper = layer.register_forward_post_hook(save_sub_res_hook) + return layer + + handle_func = Handler(self.res_dict) + + hit_layer_pattern_list = self.upgrade_sublayer(return_patterns, handle_func=handle_func) + + if hasattr(self, "hook_remove_helper"): + self.hook_remove_helper.remove() + self.hook_remove_helper = self.register_forward_post_hook(self._return_dict_hook) + + return hit_layer_pattern_list + + +def save_sub_res_hook(layer, input, output): + layer.res_dict[layer.res_name] = output + + +def set_identity(parent_layer: nn.Layer, layer_name: str, layer_index: str = None) -> bool: + """set the layer specified by layer_name and layer_index to Indentity. + + Args: + parent_layer (nn.Layer): The parent layer of target layer specified by layer_name and layer_index. + layer_name (str): The name of target layer to be set to Indentity. + layer_index (str, optional): The index of target layer to be set to Indentity in parent_layer. Defaults to None. + + Returns: + bool: True if successfully, False otherwise. + """ + + stop_after = False + for sub_layer_name in parent_layer._sub_layers: + if stop_after: + parent_layer._sub_layers[sub_layer_name] = Identity() + continue + if sub_layer_name == layer_name: + stop_after = True + + if layer_index and stop_after: + stop_after = False + for sub_layer_index in parent_layer._sub_layers[layer_name]._sub_layers: + if stop_after: + parent_layer._sub_layers[layer_name][sub_layer_index] = Identity() + continue + if layer_index == sub_layer_index: + stop_after = True + + return stop_after + + +def parse_pattern_str(pattern: str, parent_layer: nn.Layer) -> Union[None, List[Dict[str, Union[nn.Layer, str, None]]]]: + """parse the string type pattern. + + Args: + pattern (str): The pattern to discribe layer. + parent_layer (nn.Layer): The root layer relative to the pattern. + + Returns: + Union[None, List[Dict[str, Union[nn.Layer, str, None]]]]: None if failed. If successfully, the members are layers parsed in order: + [ + {"layer": first layer, "name": first layer's name parsed, "index": first layer's index parsed if exist}, + {"layer": second layer, "name": second layer's name parsed, "index": second layer's index parsed if exist}, + ... + ] + """ + + pattern_list = pattern.split(".") + if not pattern_list: + msg = f"The pattern('{pattern}') is illegal. Please check and retry." + return None + + layer_list = [] + while len(pattern_list) > 0: + if '[' in pattern_list[0]: + target_layer_name = pattern_list[0].split('[')[0] + target_layer_index = pattern_list[0].split('[')[1].split(']')[0] + else: + target_layer_name = pattern_list[0] + target_layer_index = None + + target_layer = getattr(parent_layer, target_layer_name, None) + + if target_layer is None: + msg = f"Not found layer named('{target_layer_name}') specifed in pattern('{pattern}')." + return None + + if target_layer_index and target_layer: + if int(target_layer_index) < 0 or int(target_layer_index) >= len(target_layer): + msg = f"Not found layer by index('{target_layer_index}') specifed in pattern('{pattern}'). The index should < {len(target_layer)} and > 0." + return None + + target_layer = target_layer[target_layer_index] + + layer_list.append({"layer": target_layer, "name": target_layer_name, "index": target_layer_index}) + + pattern_list = pattern_list[1:] + parent_layer = target_layer + return layer_list + + +MODEL_STAGES_PATTERN = {"PPLCNet": ["blocks2", "blocks3", "blocks4", "blocks5", "blocks6"]} + +# Each element(list) represents a depthwise block, which is composed of k, in_c, out_c, s, use_se. +# k: kernel_size +# in_c: input channel number in depthwise block +# out_c: output channel number in depthwise block +# s: stride in depthwise block +# use_se: whether to use SE block + +NET_CONFIG = { + "blocks2": + #k, in_c, out_c, s, use_se + [[3, 16, 32, 1, False]], + "blocks3": [[3, 32, 64, 2, False], [3, 64, 64, 1, False]], + "blocks4": [[3, 64, 128, 2, False], [3, 128, 128, 1, False]], + "blocks5": [[3, 128, 256, 2, False], [5, 256, 256, 1, False], [5, 256, 256, 1, False], [5, 256, 256, 1, False], + [5, 256, 256, 1, False], [5, 256, 256, 1, False]], + "blocks6": [[5, 256, 512, 2, True], [5, 512, 512, 1, True]] +} + + +def make_divisible(v, divisor=8, min_value=None): + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + if new_v < 0.9 * v: + new_v += divisor + return new_v + + +class ConvBNLayer(TheseusLayer): + + def __init__(self, num_channels, filter_size, num_filters, stride, num_groups=1): + super().__init__() + + self.conv = Conv2D(in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=num_groups, + weight_attr=ParamAttr(initializer=KaimingNormal()), + bias_attr=False) + + self.bn = BatchNorm(num_filters, + param_attr=ParamAttr(regularizer=L2Decay(0.0)), + bias_attr=ParamAttr(regularizer=L2Decay(0.0))) + self.hardswish = nn.Hardswish() + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + x = self.hardswish(x) + return x + + +class DepthwiseSeparable(TheseusLayer): + + def __init__(self, num_channels, num_filters, stride, dw_size=3, use_se=False): + super().__init__() + self.use_se = use_se + self.dw_conv = ConvBNLayer(num_channels=num_channels, + num_filters=num_channels, + filter_size=dw_size, + stride=stride, + num_groups=num_channels) + if use_se: + self.se = SEModule(num_channels) + self.pw_conv = ConvBNLayer(num_channels=num_channels, filter_size=1, num_filters=num_filters, stride=1) + + def forward(self, x): + x = self.dw_conv(x) + if self.use_se: + x = self.se(x) + x = self.pw_conv(x) + return x + + +class SEModule(TheseusLayer): + + def __init__(self, channel, reduction=4): + super().__init__() + self.avg_pool = AdaptiveAvgPool2D(1) + self.conv1 = Conv2D(in_channels=channel, out_channels=channel // reduction, kernel_size=1, stride=1, padding=0) + self.relu = nn.ReLU() + self.conv2 = Conv2D(in_channels=channel // reduction, out_channels=channel, kernel_size=1, stride=1, padding=0) + self.hardsigmoid = nn.Hardsigmoid() + + def forward(self, x): + identity = x + x = self.avg_pool(x) + x = self.conv1(x) + x = self.relu(x) + x = self.conv2(x) + x = self.hardsigmoid(x) + x = paddle.multiply(x=identity, y=x) + return x + + +class PPLCNet(TheseusLayer): + + def __init__(self, + stages_pattern, + scale=1.0, + class_num=1000, + dropout_prob=0.2, + class_expand=1280, + return_patterns=None, + return_stages=None): + super().__init__() + self.scale = scale + self.class_expand = class_expand + + self.conv1 = ConvBNLayer(num_channels=3, filter_size=3, num_filters=make_divisible(16 * scale), stride=2) + + self.blocks2 = nn.Sequential(*[ + DepthwiseSeparable(num_channels=make_divisible(in_c * scale), + num_filters=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se) for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks2"]) + ]) + + self.blocks3 = nn.Sequential(*[ + DepthwiseSeparable(num_channels=make_divisible(in_c * scale), + num_filters=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se) for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks3"]) + ]) + + self.blocks4 = nn.Sequential(*[ + DepthwiseSeparable(num_channels=make_divisible(in_c * scale), + num_filters=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se) for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks4"]) + ]) + + self.blocks5 = nn.Sequential(*[ + DepthwiseSeparable(num_channels=make_divisible(in_c * scale), + num_filters=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se) for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks5"]) + ]) + + self.blocks6 = nn.Sequential(*[ + DepthwiseSeparable(num_channels=make_divisible(in_c * scale), + num_filters=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se) for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks6"]) + ]) + + self.avg_pool = AdaptiveAvgPool2D(1) + + self.last_conv = Conv2D(in_channels=make_divisible(NET_CONFIG["blocks6"][-1][2] * scale), + out_channels=self.class_expand, + kernel_size=1, + stride=1, + padding=0, + bias_attr=False) + + self.hardswish = nn.Hardswish() + self.dropout = Dropout(p=dropout_prob, mode="downscale_in_infer") + self.flatten = nn.Flatten(start_axis=1, stop_axis=-1) + + self.fc = Linear(self.class_expand, class_num) + + super().init_res(stages_pattern, return_patterns=return_patterns, return_stages=return_stages) + + def forward(self, x): + x = self.conv1(x) + + x = self.blocks2(x) + x = self.blocks3(x) + x = self.blocks4(x) + x = self.blocks5(x) + x = self.blocks6(x) + + x = self.avg_pool(x) + x = self.last_conv(x) + x = self.hardswish(x) + x = self.dropout(x) + x = self.flatten(x) + x = self.fc(x) + return x + + +def PPLCNet_x0_35(pretrained=False, use_ssld=False, **kwargs): + model = PPLCNet(scale=0.35, stages_pattern=MODEL_STAGES_PATTERN["PPLCNet"], **kwargs) + return model diff --git a/modules/image/classification/pplcnet_x0_35_imagenet/module.py b/modules/image/classification/pplcnet_x0_35_imagenet/module.py new file mode 100644 index 000000000..acd31f026 --- /dev/null +++ b/modules/image/classification/pplcnet_x0_35_imagenet/module.py @@ -0,0 +1,154 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import copy +import os + +import cv2 +import numpy as np +import paddle +from skimage.io import imread +from skimage.transform import rescale +from skimage.transform import resize + +import paddlehub as hub +from .model import PPLCNet_x0_35 +from .processor import base64_to_cv2 +from .processor import create_operators +from .processor import Topk +from .utils import get_config +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving + + +@moduleinfo(name="pplcnet_x0_35_imagenet", + type="cv/classification", + author="paddlepaddle", + author_email="", + summary="", + version="1.0.0") +class PPLcNet_x0_35: + + def __init__(self): + self.config = get_config(os.path.join(self.directory, 'PPLCNet_x0_35.yaml'), show=False) + self.label_path = os.path.join(self.directory, 'imagenet1k_label_list.txt') + self.pretrain_path = os.path.join(self.directory, 'PPLCNet_x0_35_pretrained.pdparams') + self.config['Infer']['PostProcess']['class_id_map_file'] = self.label_path + self.model = PPLCNet_x0_35() + param_state_dict = paddle.load(self.pretrain_path) + self.model.set_dict(param_state_dict) + self.preprocess_funcs = create_operators(self.config["Infer"]["transforms"]) + + def classification(self, + images: list = None, + paths: list = None, + batch_size: int = 1, + use_gpu: bool = False, + top_k: int = 1): + ''' + Args: + images (list[numpy.ndarray]): data of images, shape of each is [H, W, C], color space must be BGR. + paths (list[str]): The paths of images. + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + top_k (int): Return top k results. + + Returns: + res (list[dict]): The classfication results, each result dict contains key 'class_ids', 'scores' and 'label_names'. + ''' + postprocess_func = Topk(top_k, self.label_path) + inputs = [] + results = [] + paddle.disable_static() + place = 'gpu:0' if use_gpu else 'cpu' + place = paddle.set_device(place) + if images == None and paths == None: + print('No image provided. Please input an image or a image path.') + return + + if images != None: + for image in images: + image = image[:, :, ::-1] + inputs.append(image) + + if paths != None: + for path in paths: + image = cv2.imread(path)[:, :, ::-1] + inputs.append(image) + + batch_data = [] + for idx, imagedata in enumerate(inputs): + for process in self.preprocess_funcs: + imagedata = process(imagedata) + batch_data.append(imagedata) + if len(batch_data) >= batch_size or idx == len(inputs) - 1: + batch_tensor = paddle.to_tensor(batch_data) + out = self.model(batch_tensor) + if isinstance(out, list): + out = out[0] + if isinstance(out, dict) and "logits" in out: + out = out["logits"] + if isinstance(out, dict) and "output" in out: + out = out["output"] + result = postprocess_func(out) + results.extend(result) + batch_data.clear() + return results + + @runnable + def run_cmd(self, argvs: list): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + self.args = self.parser.parse_args(argvs) + results = self.classification(paths=[self.args.input_path], + use_gpu=self.args.use_gpu, + batch_size=self.args.batch_size, + top_k=self.args.top_k) + return results + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.classification(images=images_decode, **kwargs) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument('--use_gpu', action='store_true', help="use GPU or not") + + self.arg_config_group.add_argument('--batch_size', type=int, default=1, help='batch size') + self.arg_config_group.add_argument('--top_k', type=int, default=1, help='Return top k results.') + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to input image.") diff --git a/modules/image/classification/pplcnet_x0_35_imagenet/processor.py b/modules/image/classification/pplcnet_x0_35_imagenet/processor.py new file mode 100644 index 000000000..40cab3917 --- /dev/null +++ b/modules/image/classification/pplcnet_x0_35_imagenet/processor.py @@ -0,0 +1,374 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import base64 +import inspect +import math +import os +import random +import sys +from functools import partial + +import cv2 +import numpy as np +import paddle +import paddle.nn.functional as F +import six +from paddle.vision.transforms import ColorJitter as RawColorJitter +from PIL import Image + + +def create_operators(params, class_num=None): + """ + create operators based on the config + + Args: + params(list): a dict list, used to create some operators + """ + assert isinstance(params, list), ('operator config should be a list') + ops = [] + current_module = sys.modules[__name__] + for operator in params: + assert isinstance(operator, dict) and len(operator) == 1, "yaml format error" + op_name = list(operator)[0] + param = {} if operator[op_name] is None else operator[op_name] + op_func = getattr(current_module, op_name) + if "class_num" in inspect.getfullargspec(op_func).args: + param.update({"class_num": class_num}) + op = op_func(**param) + ops.append(op) + + return ops + + +class UnifiedResize(object): + + def __init__(self, interpolation=None, backend="cv2"): + _cv2_interp_from_str = { + 'nearest': cv2.INTER_NEAREST, + 'bilinear': cv2.INTER_LINEAR, + 'area': cv2.INTER_AREA, + 'bicubic': cv2.INTER_CUBIC, + 'lanczos': cv2.INTER_LANCZOS4 + } + _pil_interp_from_str = { + 'nearest': Image.NEAREST, + 'bilinear': Image.BILINEAR, + 'bicubic': Image.BICUBIC, + 'box': Image.BOX, + 'lanczos': Image.LANCZOS, + 'hamming': Image.HAMMING + } + + def _pil_resize(src, size, resample): + pil_img = Image.fromarray(src) + pil_img = pil_img.resize(size, resample) + return np.asarray(pil_img) + + if backend.lower() == "cv2": + if isinstance(interpolation, str): + interpolation = _cv2_interp_from_str[interpolation.lower()] + # compatible with opencv < version 4.4.0 + elif interpolation is None: + interpolation = cv2.INTER_LINEAR + self.resize_func = partial(cv2.resize, interpolation=interpolation) + elif backend.lower() == "pil": + if isinstance(interpolation, str): + interpolation = _pil_interp_from_str[interpolation.lower()] + self.resize_func = partial(_pil_resize, resample=interpolation) + else: + self.resize_func = cv2.resize + + def __call__(self, src, size): + return self.resize_func(src, size) + + +class OperatorParamError(ValueError): + """ OperatorParamError + """ + pass + + +class DecodeImage(object): + """ decode image """ + + def __init__(self, to_rgb=True, to_np=False, channel_first=False): + self.to_rgb = to_rgb + self.to_np = to_np # to numpy + self.channel_first = channel_first # only enabled when to_np is True + + def __call__(self, img): + if six.PY2: + assert type(img) is str and len(img) > 0, "invalid input 'img' in DecodeImage" + else: + assert type(img) is bytes and len(img) > 0, "invalid input 'img' in DecodeImage" + data = np.frombuffer(img, dtype='uint8') + img = cv2.imdecode(data, 1) + if self.to_rgb: + assert img.shape[2] == 3, 'invalid shape of image[%s]' % (img.shape) + img = img[:, :, ::-1] + + if self.channel_first: + img = img.transpose((2, 0, 1)) + + return img + + +class ResizeImage(object): + """ resize image """ + + def __init__(self, size=None, resize_short=None, interpolation=None, backend="cv2"): + if resize_short is not None and resize_short > 0: + self.resize_short = resize_short + self.w = None + self.h = None + elif size is not None: + self.resize_short = None + self.w = size if type(size) is int else size[0] + self.h = size if type(size) is int else size[1] + else: + raise OperatorParamError("invalid params for ReisizeImage for '\ + 'both 'size' and 'resize_short' are None") + + self._resize_func = UnifiedResize(interpolation=interpolation, backend=backend) + + def __call__(self, img): + img_h, img_w = img.shape[:2] + if self.resize_short is not None: + percent = float(self.resize_short) / min(img_w, img_h) + w = int(round(img_w * percent)) + h = int(round(img_h * percent)) + else: + w = self.w + h = self.h + return self._resize_func(img, (w, h)) + + +class CropImage(object): + """ crop image """ + + def __init__(self, size): + if type(size) is int: + self.size = (size, size) + else: + self.size = size # (h, w) + + def __call__(self, img): + w, h = self.size + img_h, img_w = img.shape[:2] + w_start = (img_w - w) // 2 + h_start = (img_h - h) // 2 + + w_end = w_start + w + h_end = h_start + h + return img[h_start:h_end, w_start:w_end, :] + + +class RandCropImage(object): + """ random crop image """ + + def __init__(self, size, scale=None, ratio=None, interpolation=None, backend="cv2"): + if type(size) is int: + self.size = (size, size) # (h, w) + else: + self.size = size + + self.scale = [0.08, 1.0] if scale is None else scale + self.ratio = [3. / 4., 4. / 3.] if ratio is None else ratio + + self._resize_func = UnifiedResize(interpolation=interpolation, backend=backend) + + def __call__(self, img): + size = self.size + scale = self.scale + ratio = self.ratio + + aspect_ratio = math.sqrt(random.uniform(*ratio)) + w = 1. * aspect_ratio + h = 1. / aspect_ratio + + img_h, img_w = img.shape[:2] + + bound = min((float(img_w) / img_h) / (w**2), (float(img_h) / img_w) / (h**2)) + scale_max = min(scale[1], bound) + scale_min = min(scale[0], bound) + + target_area = img_w * img_h * random.uniform(scale_min, scale_max) + target_size = math.sqrt(target_area) + w = int(target_size * w) + h = int(target_size * h) + + i = random.randint(0, img_w - w) + j = random.randint(0, img_h - h) + + img = img[j:j + h, i:i + w, :] + + return self._resize_func(img, size) + + +class RandFlipImage(object): + """ random flip image + flip_code: + 1: Flipped Horizontally + 0: Flipped Vertically + -1: Flipped Horizontally & Vertically + """ + + def __init__(self, flip_code=1): + assert flip_code in [-1, 0, 1], "flip_code should be a value in [-1, 0, 1]" + self.flip_code = flip_code + + def __call__(self, img): + if random.randint(0, 1) == 1: + return cv2.flip(img, self.flip_code) + else: + return img + + +class NormalizeImage(object): + """ normalize image such as substract mean, divide std + """ + + def __init__(self, scale=None, mean=None, std=None, order='chw', output_fp16=False, channel_num=3): + if isinstance(scale, str): + scale = eval(scale) + assert channel_num in [3, 4], "channel number of input image should be set to 3 or 4." + self.channel_num = channel_num + self.output_dtype = 'float16' if output_fp16 else 'float32' + self.scale = np.float32(scale if scale is not None else 1.0 / 255.0) + self.order = order + mean = mean if mean is not None else [0.485, 0.456, 0.406] + std = std if std is not None else [0.229, 0.224, 0.225] + + shape = (3, 1, 1) if self.order == 'chw' else (1, 1, 3) + self.mean = np.array(mean).reshape(shape).astype('float32') + self.std = np.array(std).reshape(shape).astype('float32') + + def __call__(self, img): + from PIL import Image + if isinstance(img, Image.Image): + img = np.array(img) + + assert isinstance(img, np.ndarray), "invalid input 'img' in NormalizeImage" + + img = (img.astype('float32') * self.scale - self.mean) / self.std + + if self.channel_num == 4: + img_h = img.shape[1] if self.order == 'chw' else img.shape[0] + img_w = img.shape[2] if self.order == 'chw' else img.shape[1] + pad_zeros = np.zeros((1, img_h, img_w)) if self.order == 'chw' else np.zeros((img_h, img_w, 1)) + img = (np.concatenate((img, pad_zeros), axis=0) if self.order == 'chw' else np.concatenate( + (img, pad_zeros), axis=2)) + return img.astype(self.output_dtype) + + +class ToCHWImage(object): + """ convert hwc image to chw image + """ + + def __init__(self): + pass + + def __call__(self, img): + from PIL import Image + if isinstance(img, Image.Image): + img = np.array(img) + + return img.transpose((2, 0, 1)) + + +class ColorJitter(RawColorJitter): + """ColorJitter. + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def __call__(self, img): + if not isinstance(img, Image.Image): + img = np.ascontiguousarray(img) + img = Image.fromarray(img) + img = super()._apply_image(img) + if isinstance(img, Image.Image): + img = np.asarray(img) + return img + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +class Topk(object): + + def __init__(self, topk=1, class_id_map_file=None): + assert isinstance(topk, (int, )) + self.class_id_map = self.parse_class_id_map(class_id_map_file) + self.topk = topk + + def parse_class_id_map(self, class_id_map_file): + if class_id_map_file is None: + return None + if not os.path.exists(class_id_map_file): + print( + "Warning: If want to use your own label_dict, please input legal path!\nOtherwise label_names will be empty!" + ) + return None + + try: + class_id_map = {} + with open(class_id_map_file, "r") as fin: + lines = fin.readlines() + for line in lines: + partition = line.split("\n")[0].partition(" ") + class_id_map[int(partition[0])] = str(partition[-1]) + except Exception as ex: + print(ex) + class_id_map = None + return class_id_map + + def __call__(self, x, file_names=None, multilabel=False): + assert isinstance(x, paddle.Tensor) + if file_names is not None: + assert x.shape[0] == len(file_names) + x = F.softmax(x, axis=-1) if not multilabel else F.sigmoid(x) + x = x.numpy() + y = [] + for idx, probs in enumerate(x): + index = probs.argsort(axis=0)[-self.topk:][::-1].astype("int32") if not multilabel else np.where( + probs >= 0.5)[0].astype("int32") + clas_id_list = [] + score_list = [] + label_name_list = [] + for i in index: + clas_id_list.append(i.item()) + score_list.append(probs[i].item()) + if self.class_id_map is not None: + label_name_list.append(self.class_id_map[i.item()]) + result = { + "class_ids": clas_id_list, + "scores": np.around(score_list, decimals=5).tolist(), + } + if file_names is not None: + result["file_name"] = file_names[idx] + if label_name_list is not None: + result["label_names"] = label_name_list + y.append(result) + return y diff --git a/modules/image/classification/pplcnet_x0_35_imagenet/utils.py b/modules/image/classification/pplcnet_x0_35_imagenet/utils.py new file mode 100644 index 000000000..df2bc36b3 --- /dev/null +++ b/modules/image/classification/pplcnet_x0_35_imagenet/utils.py @@ -0,0 +1,129 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import copy +import os + +import yaml + +__all__ = ['get_config'] + + +class AttrDict(dict): + + def __getattr__(self, key): + return self[key] + + def __setattr__(self, key, value): + if key in self.__dict__: + self.__dict__[key] = value + else: + self[key] = value + + def __deepcopy__(self, content): + return copy.deepcopy(dict(self)) + + +def create_attr_dict(yaml_config): + from ast import literal_eval + for key, value in yaml_config.items(): + if type(value) is dict: + yaml_config[key] = value = AttrDict(value) + if isinstance(value, str): + try: + value = literal_eval(value) + except BaseException: + pass + if isinstance(value, AttrDict): + create_attr_dict(yaml_config[key]) + else: + yaml_config[key] = value + + +def parse_config(cfg_file): + """Load a config file into AttrDict""" + with open(cfg_file, 'r') as fopen: + yaml_config = AttrDict(yaml.load(fopen, Loader=yaml.SafeLoader)) + create_attr_dict(yaml_config) + return yaml_config + + +def override(dl, ks, v): + """ + Recursively replace dict of list + Args: + dl(dict or list): dict or list to be replaced + ks(list): list of keys + v(str): value to be replaced + """ + + def str2num(v): + try: + return eval(v) + except Exception: + return v + + assert isinstance(dl, (list, dict)), ("{} should be a list or a dict") + assert len(ks) > 0, ('lenght of keys should larger than 0') + if isinstance(dl, list): + k = str2num(ks[0]) + if len(ks) == 1: + assert k < len(dl), ('index({}) out of range({})'.format(k, dl)) + dl[k] = str2num(v) + else: + override(dl[k], ks[1:], v) + else: + if len(ks) == 1: + # assert ks[0] in dl, ('{} is not exist in {}'.format(ks[0], dl)) + if not ks[0] in dl: + print('A new filed ({}) detected!'.format(ks[0], dl)) + dl[ks[0]] = str2num(v) + else: + override(dl[ks[0]], ks[1:], v) + + +def override_config(config, options=None): + """ + Recursively override the config + Args: + config(dict): dict to be replaced + options(list): list of pairs(key0.key1.idx.key2=value) + such as: [ + 'topk=2', + 'VALID.transforms.1.ResizeImage.resize_short=300' + ] + Returns: + config(dict): replaced config + """ + if options is not None: + for opt in options: + assert isinstance(opt, str), ("option({}) should be a str".format(opt)) + assert "=" in opt, ("option({}) should contain a =" + "to distinguish between key and value".format(opt)) + pair = opt.split('=') + assert len(pair) == 2, ("there can be only a = in the option") + key, value = pair + keys = key.split('.') + override(config, keys, value) + return config + + +def get_config(fname, overrides=None, show=False): + """ + Read config from file + """ + assert os.path.exists(fname), ('config file({}) is not exist'.format(fname)) + config = parse_config(fname) + override_config(config, overrides) + return config diff --git a/modules/image/classification/pplcnet_x0_5_imagenet/README.md b/modules/image/classification/pplcnet_x0_5_imagenet/README.md new file mode 100644 index 000000000..3efd7cd06 --- /dev/null +++ b/modules/image/classification/pplcnet_x0_5_imagenet/README.md @@ -0,0 +1,132 @@ +# pplcnet_x0_5_imagenet + +|模型名称|pplcnet_x0_5_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|PPLCNet| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|7 MB| +|最新更新日期|2022-04-02| +|数据指标|Acc| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - PP-LCNet是百度针对Intel CPU 设备以及其加速库 MKLDNN 设计的特定骨干网络 ,比起其他的轻量级的 SOTA 模型,该骨干网络可以在不增加推理时间的情况下,进一步提升模型的性能,最终大幅度超越现有的 SOTA 模型。该模型为模型规模参数scale为x0.5下的PP-LCNet模型,关于模型结构的更多信息,可参考[论文](https://arxiv.org/pdf/2109.15099.pdf)。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install pplcnet_x0_5_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run pplcnet_x0_5_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="pplcnet_x0_5_imagenet") + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + - 分类接口API。 + - **参数** + + - images (list\[numpy.ndarray\]): 图片数据,每一个图片数据的shape 均为 \[H, W, C\],颜色空间为 BGR;
+ - paths (list\[str\]): 图片的路径;
+ - batch\_size (int): batch 的大小;
+ - use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA_VISIBLE_DEVICES环境变量**
+ - top\_k (int): 返回预测结果的前 k 个。 + + - **返回** + + - res (list\[dict\]): 分类结果,列表的每一个元素均为字典,其中 key 包括'class_ids'(种类索引), 'scores'(置信度) 和 'label_names'(种类名称) + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个图像识别的在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m pplcnet_x0_5_imagenet + ``` + + - 这样就完成了一个图像识别的在线服务的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"\} + url = "http://127.0.0.1:8866/predict/pplcnet_x0_5_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + ``` + + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install pplcnet_x0_5_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/pplcnet_x0_5_imagenet/model.py b/modules/image/classification/pplcnet_x0_5_imagenet/model.py new file mode 100644 index 000000000..8c6a399bc --- /dev/null +++ b/modules/image/classification/pplcnet_x0_5_imagenet/model.py @@ -0,0 +1,478 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any +from typing import Callable +from typing import Dict +from typing import List +from typing import Tuple +from typing import Union + +import paddle +import paddle.nn as nn +from paddle import ParamAttr +from paddle.nn import AdaptiveAvgPool2D +from paddle.nn import BatchNorm +from paddle.nn import Conv2D +from paddle.nn import Dropout +from paddle.nn import Linear +from paddle.nn.initializer import KaimingNormal +from paddle.regularizer import L2Decay + + +class Identity(nn.Layer): + + def __init__(self): + super(Identity, self).__init__() + + def forward(self, inputs): + return inputs + + +class TheseusLayer(nn.Layer): + + def __init__(self, *args, **kwargs): + super(TheseusLayer, self).__init__() + self.res_dict = {} + self.res_name = self.full_name() + self.pruner = None + self.quanter = None + + def _return_dict_hook(self, layer, input, output): + res_dict = {"output": output} + # 'list' is needed to avoid error raised by popping self.res_dict + for res_key in list(self.res_dict): + # clear the res_dict because the forward process may change according to input + res_dict[res_key] = self.res_dict.pop(res_key) + return res_dict + + def init_res(self, stages_pattern, return_patterns=None, return_stages=None): + if return_patterns and return_stages: + msg = f"The 'return_patterns' would be ignored when 'return_stages' is set." + return_stages = None + + if return_stages is True: + return_patterns = stages_pattern + # return_stages is int or bool + if type(return_stages) is int: + return_stages = [return_stages] + if isinstance(return_stages, list): + if max(return_stages) > len(stages_pattern) or min(return_stages) < 0: + msg = f"The 'return_stages' set error. Illegal value(s) have been ignored. The stages' pattern list is {stages_pattern}." + return_stages = [val for val in return_stages if val >= 0 and val < len(stages_pattern)] + return_patterns = [stages_pattern[i] for i in return_stages] + + if return_patterns: + self.update_res(return_patterns) + + def replace_sub(self, *args, **kwargs) -> None: + msg = "The function 'replace_sub()' is deprecated, please use 'upgrade_sublayer()' instead." + raise DeprecationWarning(msg) + + def upgrade_sublayer(self, layer_name_pattern: Union[str, List[str]], + handle_func: Callable[[nn.Layer, str], nn.Layer]) -> Dict[str, nn.Layer]: + """use 'handle_func' to modify the sub-layer(s) specified by 'layer_name_pattern'. + + Args: + layer_name_pattern (Union[str, List[str]]): The name of layer to be modified by 'handle_func'. + handle_func (Callable[[nn.Layer, str], nn.Layer]): The function to modify target layer specified by 'layer_name_pattern'. The formal params are the layer(nn.Layer) and pattern(str) that is (a member of) layer_name_pattern (when layer_name_pattern is List type). And the return is the layer processed. + + Returns: + Dict[str, nn.Layer]: The key is the pattern and corresponding value is the result returned by 'handle_func()'. + + Examples: + + from paddle import nn + import paddleclas + + def rep_func(layer: nn.Layer, pattern: str): + new_layer = nn.Conv2D( + in_channels=layer._in_channels, + out_channels=layer._out_channels, + kernel_size=5, + padding=2 + ) + return new_layer + + net = paddleclas.MobileNetV1() + res = net.replace_sub(layer_name_pattern=["blocks[11].depthwise_conv.conv", "blocks[12].depthwise_conv.conv"], handle_func=rep_func) + print(res) + # {'blocks[11].depthwise_conv.conv': the corresponding new_layer, 'blocks[12].depthwise_conv.conv': the corresponding new_layer} + """ + + if not isinstance(layer_name_pattern, list): + layer_name_pattern = [layer_name_pattern] + + hit_layer_pattern_list = [] + for pattern in layer_name_pattern: + # parse pattern to find target layer and its parent + layer_list = parse_pattern_str(pattern=pattern, parent_layer=self) + if not layer_list: + continue + sub_layer_parent = layer_list[-2]["layer"] if len(layer_list) > 1 else self + + sub_layer = layer_list[-1]["layer"] + sub_layer_name = layer_list[-1]["name"] + sub_layer_index = layer_list[-1]["index"] + + new_sub_layer = handle_func(sub_layer, pattern) + + if sub_layer_index: + getattr(sub_layer_parent, sub_layer_name)[sub_layer_index] = new_sub_layer + else: + setattr(sub_layer_parent, sub_layer_name, new_sub_layer) + + hit_layer_pattern_list.append(pattern) + return hit_layer_pattern_list + + def stop_after(self, stop_layer_name: str) -> bool: + """stop forward and backward after 'stop_layer_name'. + + Args: + stop_layer_name (str): The name of layer that stop forward and backward after this layer. + + Returns: + bool: 'True' if successful, 'False' otherwise. + """ + + layer_list = parse_pattern_str(stop_layer_name, self) + if not layer_list: + return False + + parent_layer = self + for layer_dict in layer_list: + name, index = layer_dict["name"], layer_dict["index"] + if not set_identity(parent_layer, name, index): + msg = f"Failed to set the layers that after stop_layer_name('{stop_layer_name}') to IdentityLayer. The error layer's name is '{name}'." + return False + parent_layer = layer_dict["layer"] + + return True + + def update_res(self, return_patterns: Union[str, List[str]]) -> Dict[str, nn.Layer]: + """update the result(s) to be returned. + + Args: + return_patterns (Union[str, List[str]]): The name of layer to return output. + + Returns: + Dict[str, nn.Layer]: The pattern(str) and corresponding layer(nn.Layer) that have been set successfully. + """ + + # clear res_dict that could have been set + self.res_dict = {} + + class Handler(object): + + def __init__(self, res_dict): + # res_dict is a reference + self.res_dict = res_dict + + def __call__(self, layer, pattern): + layer.res_dict = self.res_dict + layer.res_name = pattern + if hasattr(layer, "hook_remove_helper"): + layer.hook_remove_helper.remove() + layer.hook_remove_helper = layer.register_forward_post_hook(save_sub_res_hook) + return layer + + handle_func = Handler(self.res_dict) + + hit_layer_pattern_list = self.upgrade_sublayer(return_patterns, handle_func=handle_func) + + if hasattr(self, "hook_remove_helper"): + self.hook_remove_helper.remove() + self.hook_remove_helper = self.register_forward_post_hook(self._return_dict_hook) + + return hit_layer_pattern_list + + +def save_sub_res_hook(layer, input, output): + layer.res_dict[layer.res_name] = output + + +def set_identity(parent_layer: nn.Layer, layer_name: str, layer_index: str = None) -> bool: + """set the layer specified by layer_name and layer_index to Indentity. + + Args: + parent_layer (nn.Layer): The parent layer of target layer specified by layer_name and layer_index. + layer_name (str): The name of target layer to be set to Indentity. + layer_index (str, optional): The index of target layer to be set to Indentity in parent_layer. Defaults to None. + + Returns: + bool: True if successfully, False otherwise. + """ + + stop_after = False + for sub_layer_name in parent_layer._sub_layers: + if stop_after: + parent_layer._sub_layers[sub_layer_name] = Identity() + continue + if sub_layer_name == layer_name: + stop_after = True + + if layer_index and stop_after: + stop_after = False + for sub_layer_index in parent_layer._sub_layers[layer_name]._sub_layers: + if stop_after: + parent_layer._sub_layers[layer_name][sub_layer_index] = Identity() + continue + if layer_index == sub_layer_index: + stop_after = True + + return stop_after + + +def parse_pattern_str(pattern: str, parent_layer: nn.Layer) -> Union[None, List[Dict[str, Union[nn.Layer, str, None]]]]: + """parse the string type pattern. + + Args: + pattern (str): The pattern to discribe layer. + parent_layer (nn.Layer): The root layer relative to the pattern. + + Returns: + Union[None, List[Dict[str, Union[nn.Layer, str, None]]]]: None if failed. If successfully, the members are layers parsed in order: + [ + {"layer": first layer, "name": first layer's name parsed, "index": first layer's index parsed if exist}, + {"layer": second layer, "name": second layer's name parsed, "index": second layer's index parsed if exist}, + ... + ] + """ + + pattern_list = pattern.split(".") + if not pattern_list: + msg = f"The pattern('{pattern}') is illegal. Please check and retry." + return None + + layer_list = [] + while len(pattern_list) > 0: + if '[' in pattern_list[0]: + target_layer_name = pattern_list[0].split('[')[0] + target_layer_index = pattern_list[0].split('[')[1].split(']')[0] + else: + target_layer_name = pattern_list[0] + target_layer_index = None + + target_layer = getattr(parent_layer, target_layer_name, None) + + if target_layer is None: + msg = f"Not found layer named('{target_layer_name}') specifed in pattern('{pattern}')." + return None + + if target_layer_index and target_layer: + if int(target_layer_index) < 0 or int(target_layer_index) >= len(target_layer): + msg = f"Not found layer by index('{target_layer_index}') specifed in pattern('{pattern}'). The index should < {len(target_layer)} and > 0." + return None + + target_layer = target_layer[target_layer_index] + + layer_list.append({"layer": target_layer, "name": target_layer_name, "index": target_layer_index}) + + pattern_list = pattern_list[1:] + parent_layer = target_layer + return layer_list + + +MODEL_STAGES_PATTERN = {"PPLCNet": ["blocks2", "blocks3", "blocks4", "blocks5", "blocks6"]} + +# Each element(list) represents a depthwise block, which is composed of k, in_c, out_c, s, use_se. +# k: kernel_size +# in_c: input channel number in depthwise block +# out_c: output channel number in depthwise block +# s: stride in depthwise block +# use_se: whether to use SE block + +NET_CONFIG = { + "blocks2": + #k, in_c, out_c, s, use_se + [[3, 16, 32, 1, False]], + "blocks3": [[3, 32, 64, 2, False], [3, 64, 64, 1, False]], + "blocks4": [[3, 64, 128, 2, False], [3, 128, 128, 1, False]], + "blocks5": [[3, 128, 256, 2, False], [5, 256, 256, 1, False], [5, 256, 256, 1, False], [5, 256, 256, 1, False], + [5, 256, 256, 1, False], [5, 256, 256, 1, False]], + "blocks6": [[5, 256, 512, 2, True], [5, 512, 512, 1, True]] +} + + +def make_divisible(v, divisor=8, min_value=None): + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + if new_v < 0.9 * v: + new_v += divisor + return new_v + + +class ConvBNLayer(TheseusLayer): + + def __init__(self, num_channels, filter_size, num_filters, stride, num_groups=1): + super().__init__() + + self.conv = Conv2D(in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=num_groups, + weight_attr=ParamAttr(initializer=KaimingNormal()), + bias_attr=False) + + self.bn = BatchNorm(num_filters, + param_attr=ParamAttr(regularizer=L2Decay(0.0)), + bias_attr=ParamAttr(regularizer=L2Decay(0.0))) + self.hardswish = nn.Hardswish() + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + x = self.hardswish(x) + return x + + +class DepthwiseSeparable(TheseusLayer): + + def __init__(self, num_channels, num_filters, stride, dw_size=3, use_se=False): + super().__init__() + self.use_se = use_se + self.dw_conv = ConvBNLayer(num_channels=num_channels, + num_filters=num_channels, + filter_size=dw_size, + stride=stride, + num_groups=num_channels) + if use_se: + self.se = SEModule(num_channels) + self.pw_conv = ConvBNLayer(num_channels=num_channels, filter_size=1, num_filters=num_filters, stride=1) + + def forward(self, x): + x = self.dw_conv(x) + if self.use_se: + x = self.se(x) + x = self.pw_conv(x) + return x + + +class SEModule(TheseusLayer): + + def __init__(self, channel, reduction=4): + super().__init__() + self.avg_pool = AdaptiveAvgPool2D(1) + self.conv1 = Conv2D(in_channels=channel, out_channels=channel // reduction, kernel_size=1, stride=1, padding=0) + self.relu = nn.ReLU() + self.conv2 = Conv2D(in_channels=channel // reduction, out_channels=channel, kernel_size=1, stride=1, padding=0) + self.hardsigmoid = nn.Hardsigmoid() + + def forward(self, x): + identity = x + x = self.avg_pool(x) + x = self.conv1(x) + x = self.relu(x) + x = self.conv2(x) + x = self.hardsigmoid(x) + x = paddle.multiply(x=identity, y=x) + return x + + +class PPLCNet(TheseusLayer): + + def __init__(self, + stages_pattern, + scale=1.0, + class_num=1000, + dropout_prob=0.2, + class_expand=1280, + return_patterns=None, + return_stages=None): + super().__init__() + self.scale = scale + self.class_expand = class_expand + + self.conv1 = ConvBNLayer(num_channels=3, filter_size=3, num_filters=make_divisible(16 * scale), stride=2) + + self.blocks2 = nn.Sequential(*[ + DepthwiseSeparable(num_channels=make_divisible(in_c * scale), + num_filters=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se) for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks2"]) + ]) + + self.blocks3 = nn.Sequential(*[ + DepthwiseSeparable(num_channels=make_divisible(in_c * scale), + num_filters=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se) for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks3"]) + ]) + + self.blocks4 = nn.Sequential(*[ + DepthwiseSeparable(num_channels=make_divisible(in_c * scale), + num_filters=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se) for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks4"]) + ]) + + self.blocks5 = nn.Sequential(*[ + DepthwiseSeparable(num_channels=make_divisible(in_c * scale), + num_filters=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se) for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks5"]) + ]) + + self.blocks6 = nn.Sequential(*[ + DepthwiseSeparable(num_channels=make_divisible(in_c * scale), + num_filters=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se) for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks6"]) + ]) + + self.avg_pool = AdaptiveAvgPool2D(1) + + self.last_conv = Conv2D(in_channels=make_divisible(NET_CONFIG["blocks6"][-1][2] * scale), + out_channels=self.class_expand, + kernel_size=1, + stride=1, + padding=0, + bias_attr=False) + + self.hardswish = nn.Hardswish() + self.dropout = Dropout(p=dropout_prob, mode="downscale_in_infer") + self.flatten = nn.Flatten(start_axis=1, stop_axis=-1) + + self.fc = Linear(self.class_expand, class_num) + + super().init_res(stages_pattern, return_patterns=return_patterns, return_stages=return_stages) + + def forward(self, x): + x = self.conv1(x) + + x = self.blocks2(x) + x = self.blocks3(x) + x = self.blocks4(x) + x = self.blocks5(x) + x = self.blocks6(x) + + x = self.avg_pool(x) + x = self.last_conv(x) + x = self.hardswish(x) + x = self.dropout(x) + x = self.flatten(x) + x = self.fc(x) + return x + + +def PPLCNet_x0_5(pretrained=False, use_ssld=False, **kwargs): + model = PPLCNet(scale=0.5, stages_pattern=MODEL_STAGES_PATTERN["PPLCNet"], **kwargs) + return model diff --git a/modules/image/classification/pplcnet_x0_5_imagenet/module.py b/modules/image/classification/pplcnet_x0_5_imagenet/module.py new file mode 100644 index 000000000..05ac64722 --- /dev/null +++ b/modules/image/classification/pplcnet_x0_5_imagenet/module.py @@ -0,0 +1,154 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import copy +import os + +import cv2 +import numpy as np +import paddle +from skimage.io import imread +from skimage.transform import rescale +from skimage.transform import resize + +import paddlehub as hub +from .model import PPLCNet_x0_5 +from .processor import base64_to_cv2 +from .processor import create_operators +from .processor import Topk +from .utils import get_config +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving + + +@moduleinfo(name="pplcnet_x0_5_imagenet", + type="cv/classification", + author="paddlepaddle", + author_email="", + summary="", + version="1.0.0") +class PPLcNet_x0_5: + + def __init__(self): + self.config = get_config(os.path.join(self.directory, 'PPLCNet_x0_5.yaml'), show=False) + self.label_path = os.path.join(self.directory, 'imagenet1k_label_list.txt') + self.pretrain_path = os.path.join(self.directory, 'PPLCNet_x0_5_pretrained.pdparams') + self.config['Infer']['PostProcess']['class_id_map_file'] = self.label_path + self.model = PPLCNet_x0_5() + param_state_dict = paddle.load(self.pretrain_path) + self.model.set_dict(param_state_dict) + self.preprocess_funcs = create_operators(self.config["Infer"]["transforms"]) + + def classification(self, + images: list = None, + paths: list = None, + batch_size: int = 1, + use_gpu: bool = False, + top_k: int = 1): + ''' + Args: + images (list[numpy.ndarray]): data of images, shape of each is [H, W, C], color space must be BGR. + paths (list[str]): The paths of images. + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + top_k (int): Return top k results. + + Returns: + res (list[dict]): The classfication results, each result dict contains key 'class_ids', 'scores' and 'label_names'. + ''' + postprocess_func = Topk(top_k, self.label_path) + inputs = [] + results = [] + paddle.disable_static() + place = 'gpu:0' if use_gpu else 'cpu' + place = paddle.set_device(place) + if images == None and paths == None: + print('No image provided. Please input an image or a image path.') + return + + if images != None: + for image in images: + image = image[:, :, ::-1] + inputs.append(image) + + if paths != None: + for path in paths: + image = cv2.imread(path)[:, :, ::-1] + inputs.append(image) + + batch_data = [] + for idx, imagedata in enumerate(inputs): + for process in self.preprocess_funcs: + imagedata = process(imagedata) + batch_data.append(imagedata) + if len(batch_data) >= batch_size or idx == len(inputs) - 1: + batch_tensor = paddle.to_tensor(batch_data) + out = self.model(batch_tensor) + if isinstance(out, list): + out = out[0] + if isinstance(out, dict) and "logits" in out: + out = out["logits"] + if isinstance(out, dict) and "output" in out: + out = out["output"] + result = postprocess_func(out) + results.extend(result) + batch_data.clear() + return results + + @runnable + def run_cmd(self, argvs: list): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + self.args = self.parser.parse_args(argvs) + results = self.classification(paths=[self.args.input_path], + use_gpu=self.args.use_gpu, + batch_size=self.args.batch_size, + top_k=self.args.top_k) + return results + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.classification(images=images_decode, **kwargs) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument('--use_gpu', action='store_true', help="use GPU or not") + + self.arg_config_group.add_argument('--batch_size', type=int, default=1, help='batch size') + self.arg_config_group.add_argument('--top_k', type=int, default=1, help='Return top k results.') + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to input image.") diff --git a/modules/image/classification/pplcnet_x0_5_imagenet/processor.py b/modules/image/classification/pplcnet_x0_5_imagenet/processor.py new file mode 100644 index 000000000..40cab3917 --- /dev/null +++ b/modules/image/classification/pplcnet_x0_5_imagenet/processor.py @@ -0,0 +1,374 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import base64 +import inspect +import math +import os +import random +import sys +from functools import partial + +import cv2 +import numpy as np +import paddle +import paddle.nn.functional as F +import six +from paddle.vision.transforms import ColorJitter as RawColorJitter +from PIL import Image + + +def create_operators(params, class_num=None): + """ + create operators based on the config + + Args: + params(list): a dict list, used to create some operators + """ + assert isinstance(params, list), ('operator config should be a list') + ops = [] + current_module = sys.modules[__name__] + for operator in params: + assert isinstance(operator, dict) and len(operator) == 1, "yaml format error" + op_name = list(operator)[0] + param = {} if operator[op_name] is None else operator[op_name] + op_func = getattr(current_module, op_name) + if "class_num" in inspect.getfullargspec(op_func).args: + param.update({"class_num": class_num}) + op = op_func(**param) + ops.append(op) + + return ops + + +class UnifiedResize(object): + + def __init__(self, interpolation=None, backend="cv2"): + _cv2_interp_from_str = { + 'nearest': cv2.INTER_NEAREST, + 'bilinear': cv2.INTER_LINEAR, + 'area': cv2.INTER_AREA, + 'bicubic': cv2.INTER_CUBIC, + 'lanczos': cv2.INTER_LANCZOS4 + } + _pil_interp_from_str = { + 'nearest': Image.NEAREST, + 'bilinear': Image.BILINEAR, + 'bicubic': Image.BICUBIC, + 'box': Image.BOX, + 'lanczos': Image.LANCZOS, + 'hamming': Image.HAMMING + } + + def _pil_resize(src, size, resample): + pil_img = Image.fromarray(src) + pil_img = pil_img.resize(size, resample) + return np.asarray(pil_img) + + if backend.lower() == "cv2": + if isinstance(interpolation, str): + interpolation = _cv2_interp_from_str[interpolation.lower()] + # compatible with opencv < version 4.4.0 + elif interpolation is None: + interpolation = cv2.INTER_LINEAR + self.resize_func = partial(cv2.resize, interpolation=interpolation) + elif backend.lower() == "pil": + if isinstance(interpolation, str): + interpolation = _pil_interp_from_str[interpolation.lower()] + self.resize_func = partial(_pil_resize, resample=interpolation) + else: + self.resize_func = cv2.resize + + def __call__(self, src, size): + return self.resize_func(src, size) + + +class OperatorParamError(ValueError): + """ OperatorParamError + """ + pass + + +class DecodeImage(object): + """ decode image """ + + def __init__(self, to_rgb=True, to_np=False, channel_first=False): + self.to_rgb = to_rgb + self.to_np = to_np # to numpy + self.channel_first = channel_first # only enabled when to_np is True + + def __call__(self, img): + if six.PY2: + assert type(img) is str and len(img) > 0, "invalid input 'img' in DecodeImage" + else: + assert type(img) is bytes and len(img) > 0, "invalid input 'img' in DecodeImage" + data = np.frombuffer(img, dtype='uint8') + img = cv2.imdecode(data, 1) + if self.to_rgb: + assert img.shape[2] == 3, 'invalid shape of image[%s]' % (img.shape) + img = img[:, :, ::-1] + + if self.channel_first: + img = img.transpose((2, 0, 1)) + + return img + + +class ResizeImage(object): + """ resize image """ + + def __init__(self, size=None, resize_short=None, interpolation=None, backend="cv2"): + if resize_short is not None and resize_short > 0: + self.resize_short = resize_short + self.w = None + self.h = None + elif size is not None: + self.resize_short = None + self.w = size if type(size) is int else size[0] + self.h = size if type(size) is int else size[1] + else: + raise OperatorParamError("invalid params for ReisizeImage for '\ + 'both 'size' and 'resize_short' are None") + + self._resize_func = UnifiedResize(interpolation=interpolation, backend=backend) + + def __call__(self, img): + img_h, img_w = img.shape[:2] + if self.resize_short is not None: + percent = float(self.resize_short) / min(img_w, img_h) + w = int(round(img_w * percent)) + h = int(round(img_h * percent)) + else: + w = self.w + h = self.h + return self._resize_func(img, (w, h)) + + +class CropImage(object): + """ crop image """ + + def __init__(self, size): + if type(size) is int: + self.size = (size, size) + else: + self.size = size # (h, w) + + def __call__(self, img): + w, h = self.size + img_h, img_w = img.shape[:2] + w_start = (img_w - w) // 2 + h_start = (img_h - h) // 2 + + w_end = w_start + w + h_end = h_start + h + return img[h_start:h_end, w_start:w_end, :] + + +class RandCropImage(object): + """ random crop image """ + + def __init__(self, size, scale=None, ratio=None, interpolation=None, backend="cv2"): + if type(size) is int: + self.size = (size, size) # (h, w) + else: + self.size = size + + self.scale = [0.08, 1.0] if scale is None else scale + self.ratio = [3. / 4., 4. / 3.] if ratio is None else ratio + + self._resize_func = UnifiedResize(interpolation=interpolation, backend=backend) + + def __call__(self, img): + size = self.size + scale = self.scale + ratio = self.ratio + + aspect_ratio = math.sqrt(random.uniform(*ratio)) + w = 1. * aspect_ratio + h = 1. / aspect_ratio + + img_h, img_w = img.shape[:2] + + bound = min((float(img_w) / img_h) / (w**2), (float(img_h) / img_w) / (h**2)) + scale_max = min(scale[1], bound) + scale_min = min(scale[0], bound) + + target_area = img_w * img_h * random.uniform(scale_min, scale_max) + target_size = math.sqrt(target_area) + w = int(target_size * w) + h = int(target_size * h) + + i = random.randint(0, img_w - w) + j = random.randint(0, img_h - h) + + img = img[j:j + h, i:i + w, :] + + return self._resize_func(img, size) + + +class RandFlipImage(object): + """ random flip image + flip_code: + 1: Flipped Horizontally + 0: Flipped Vertically + -1: Flipped Horizontally & Vertically + """ + + def __init__(self, flip_code=1): + assert flip_code in [-1, 0, 1], "flip_code should be a value in [-1, 0, 1]" + self.flip_code = flip_code + + def __call__(self, img): + if random.randint(0, 1) == 1: + return cv2.flip(img, self.flip_code) + else: + return img + + +class NormalizeImage(object): + """ normalize image such as substract mean, divide std + """ + + def __init__(self, scale=None, mean=None, std=None, order='chw', output_fp16=False, channel_num=3): + if isinstance(scale, str): + scale = eval(scale) + assert channel_num in [3, 4], "channel number of input image should be set to 3 or 4." + self.channel_num = channel_num + self.output_dtype = 'float16' if output_fp16 else 'float32' + self.scale = np.float32(scale if scale is not None else 1.0 / 255.0) + self.order = order + mean = mean if mean is not None else [0.485, 0.456, 0.406] + std = std if std is not None else [0.229, 0.224, 0.225] + + shape = (3, 1, 1) if self.order == 'chw' else (1, 1, 3) + self.mean = np.array(mean).reshape(shape).astype('float32') + self.std = np.array(std).reshape(shape).astype('float32') + + def __call__(self, img): + from PIL import Image + if isinstance(img, Image.Image): + img = np.array(img) + + assert isinstance(img, np.ndarray), "invalid input 'img' in NormalizeImage" + + img = (img.astype('float32') * self.scale - self.mean) / self.std + + if self.channel_num == 4: + img_h = img.shape[1] if self.order == 'chw' else img.shape[0] + img_w = img.shape[2] if self.order == 'chw' else img.shape[1] + pad_zeros = np.zeros((1, img_h, img_w)) if self.order == 'chw' else np.zeros((img_h, img_w, 1)) + img = (np.concatenate((img, pad_zeros), axis=0) if self.order == 'chw' else np.concatenate( + (img, pad_zeros), axis=2)) + return img.astype(self.output_dtype) + + +class ToCHWImage(object): + """ convert hwc image to chw image + """ + + def __init__(self): + pass + + def __call__(self, img): + from PIL import Image + if isinstance(img, Image.Image): + img = np.array(img) + + return img.transpose((2, 0, 1)) + + +class ColorJitter(RawColorJitter): + """ColorJitter. + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def __call__(self, img): + if not isinstance(img, Image.Image): + img = np.ascontiguousarray(img) + img = Image.fromarray(img) + img = super()._apply_image(img) + if isinstance(img, Image.Image): + img = np.asarray(img) + return img + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +class Topk(object): + + def __init__(self, topk=1, class_id_map_file=None): + assert isinstance(topk, (int, )) + self.class_id_map = self.parse_class_id_map(class_id_map_file) + self.topk = topk + + def parse_class_id_map(self, class_id_map_file): + if class_id_map_file is None: + return None + if not os.path.exists(class_id_map_file): + print( + "Warning: If want to use your own label_dict, please input legal path!\nOtherwise label_names will be empty!" + ) + return None + + try: + class_id_map = {} + with open(class_id_map_file, "r") as fin: + lines = fin.readlines() + for line in lines: + partition = line.split("\n")[0].partition(" ") + class_id_map[int(partition[0])] = str(partition[-1]) + except Exception as ex: + print(ex) + class_id_map = None + return class_id_map + + def __call__(self, x, file_names=None, multilabel=False): + assert isinstance(x, paddle.Tensor) + if file_names is not None: + assert x.shape[0] == len(file_names) + x = F.softmax(x, axis=-1) if not multilabel else F.sigmoid(x) + x = x.numpy() + y = [] + for idx, probs in enumerate(x): + index = probs.argsort(axis=0)[-self.topk:][::-1].astype("int32") if not multilabel else np.where( + probs >= 0.5)[0].astype("int32") + clas_id_list = [] + score_list = [] + label_name_list = [] + for i in index: + clas_id_list.append(i.item()) + score_list.append(probs[i].item()) + if self.class_id_map is not None: + label_name_list.append(self.class_id_map[i.item()]) + result = { + "class_ids": clas_id_list, + "scores": np.around(score_list, decimals=5).tolist(), + } + if file_names is not None: + result["file_name"] = file_names[idx] + if label_name_list is not None: + result["label_names"] = label_name_list + y.append(result) + return y diff --git a/modules/image/classification/pplcnet_x0_5_imagenet/utils.py b/modules/image/classification/pplcnet_x0_5_imagenet/utils.py new file mode 100644 index 000000000..df2bc36b3 --- /dev/null +++ b/modules/image/classification/pplcnet_x0_5_imagenet/utils.py @@ -0,0 +1,129 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import copy +import os + +import yaml + +__all__ = ['get_config'] + + +class AttrDict(dict): + + def __getattr__(self, key): + return self[key] + + def __setattr__(self, key, value): + if key in self.__dict__: + self.__dict__[key] = value + else: + self[key] = value + + def __deepcopy__(self, content): + return copy.deepcopy(dict(self)) + + +def create_attr_dict(yaml_config): + from ast import literal_eval + for key, value in yaml_config.items(): + if type(value) is dict: + yaml_config[key] = value = AttrDict(value) + if isinstance(value, str): + try: + value = literal_eval(value) + except BaseException: + pass + if isinstance(value, AttrDict): + create_attr_dict(yaml_config[key]) + else: + yaml_config[key] = value + + +def parse_config(cfg_file): + """Load a config file into AttrDict""" + with open(cfg_file, 'r') as fopen: + yaml_config = AttrDict(yaml.load(fopen, Loader=yaml.SafeLoader)) + create_attr_dict(yaml_config) + return yaml_config + + +def override(dl, ks, v): + """ + Recursively replace dict of list + Args: + dl(dict or list): dict or list to be replaced + ks(list): list of keys + v(str): value to be replaced + """ + + def str2num(v): + try: + return eval(v) + except Exception: + return v + + assert isinstance(dl, (list, dict)), ("{} should be a list or a dict") + assert len(ks) > 0, ('lenght of keys should larger than 0') + if isinstance(dl, list): + k = str2num(ks[0]) + if len(ks) == 1: + assert k < len(dl), ('index({}) out of range({})'.format(k, dl)) + dl[k] = str2num(v) + else: + override(dl[k], ks[1:], v) + else: + if len(ks) == 1: + # assert ks[0] in dl, ('{} is not exist in {}'.format(ks[0], dl)) + if not ks[0] in dl: + print('A new filed ({}) detected!'.format(ks[0], dl)) + dl[ks[0]] = str2num(v) + else: + override(dl[ks[0]], ks[1:], v) + + +def override_config(config, options=None): + """ + Recursively override the config + Args: + config(dict): dict to be replaced + options(list): list of pairs(key0.key1.idx.key2=value) + such as: [ + 'topk=2', + 'VALID.transforms.1.ResizeImage.resize_short=300' + ] + Returns: + config(dict): replaced config + """ + if options is not None: + for opt in options: + assert isinstance(opt, str), ("option({}) should be a str".format(opt)) + assert "=" in opt, ("option({}) should contain a =" + "to distinguish between key and value".format(opt)) + pair = opt.split('=') + assert len(pair) == 2, ("there can be only a = in the option") + key, value = pair + keys = key.split('.') + override(config, keys, value) + return config + + +def get_config(fname, overrides=None, show=False): + """ + Read config from file + """ + assert os.path.exists(fname), ('config file({}) is not exist'.format(fname)) + config = parse_config(fname) + override_config(config, overrides) + return config diff --git a/modules/image/classification/pplcnet_x0_75_imagenet/README.md b/modules/image/classification/pplcnet_x0_75_imagenet/README.md new file mode 100644 index 000000000..72c8c072d --- /dev/null +++ b/modules/image/classification/pplcnet_x0_75_imagenet/README.md @@ -0,0 +1,132 @@ +# pplcnet_x0_75_imagenet + +|模型名称|pplcnet_x0_75_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|PPLCNet| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|9 MB| +|最新更新日期|2022-04-02| +|数据指标|Acc| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - PP-LCNet是百度针对Intel CPU 设备以及其加速库 MKLDNN 设计的特定骨干网络 ,比起其他的轻量级的 SOTA 模型,该骨干网络可以在不增加推理时间的情况下,进一步提升模型的性能,最终大幅度超越现有的 SOTA 模型。该模型为模型规模参数scale为x0.75下的PP-LCNet模型,关于模型结构的更多信息,可参考[论文](https://arxiv.org/pdf/2109.15099.pdf)。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install pplcnet_x0_75_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run pplcnet_x0_75_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="pplcnet_x0_75_imagenet") + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + - 分类接口API。 + - **参数** + + - images (list\[numpy.ndarray\]): 图片数据,每一个图片数据的shape 均为 \[H, W, C\],颜色空间为 BGR;
+ - paths (list\[str\]): 图片的路径;
+ - batch\_size (int): batch 的大小;
+ - use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA_VISIBLE_DEVICES环境变量**
+ - top\_k (int): 返回预测结果的前 k 个。 + + - **返回** + + - res (list\[dict\]): 分类结果,列表的每一个元素均为字典,其中 key 包括'class_ids'(种类索引), 'scores'(置信度) 和 'label_names'(种类名称) + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个图像识别的在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m pplcnet_x0_75_imagenet + ``` + + - 这样就完成了一个图像识别的在线服务的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"\} + url = "http://127.0.0.1:8866/predict/pplcnet_x0_75_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + ``` + + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install pplcnet_x0_75_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/pplcnet_x0_75_imagenet/model.py b/modules/image/classification/pplcnet_x0_75_imagenet/model.py new file mode 100644 index 000000000..df546e13b --- /dev/null +++ b/modules/image/classification/pplcnet_x0_75_imagenet/model.py @@ -0,0 +1,478 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any +from typing import Callable +from typing import Dict +from typing import List +from typing import Tuple +from typing import Union + +import paddle +import paddle.nn as nn +from paddle import ParamAttr +from paddle.nn import AdaptiveAvgPool2D +from paddle.nn import BatchNorm +from paddle.nn import Conv2D +from paddle.nn import Dropout +from paddle.nn import Linear +from paddle.nn.initializer import KaimingNormal +from paddle.regularizer import L2Decay + + +class Identity(nn.Layer): + + def __init__(self): + super(Identity, self).__init__() + + def forward(self, inputs): + return inputs + + +class TheseusLayer(nn.Layer): + + def __init__(self, *args, **kwargs): + super(TheseusLayer, self).__init__() + self.res_dict = {} + self.res_name = self.full_name() + self.pruner = None + self.quanter = None + + def _return_dict_hook(self, layer, input, output): + res_dict = {"output": output} + # 'list' is needed to avoid error raised by popping self.res_dict + for res_key in list(self.res_dict): + # clear the res_dict because the forward process may change according to input + res_dict[res_key] = self.res_dict.pop(res_key) + return res_dict + + def init_res(self, stages_pattern, return_patterns=None, return_stages=None): + if return_patterns and return_stages: + msg = f"The 'return_patterns' would be ignored when 'return_stages' is set." + return_stages = None + + if return_stages is True: + return_patterns = stages_pattern + # return_stages is int or bool + if type(return_stages) is int: + return_stages = [return_stages] + if isinstance(return_stages, list): + if max(return_stages) > len(stages_pattern) or min(return_stages) < 0: + msg = f"The 'return_stages' set error. Illegal value(s) have been ignored. The stages' pattern list is {stages_pattern}." + return_stages = [val for val in return_stages if val >= 0 and val < len(stages_pattern)] + return_patterns = [stages_pattern[i] for i in return_stages] + + if return_patterns: + self.update_res(return_patterns) + + def replace_sub(self, *args, **kwargs) -> None: + msg = "The function 'replace_sub()' is deprecated, please use 'upgrade_sublayer()' instead." + raise DeprecationWarning(msg) + + def upgrade_sublayer(self, layer_name_pattern: Union[str, List[str]], + handle_func: Callable[[nn.Layer, str], nn.Layer]) -> Dict[str, nn.Layer]: + """use 'handle_func' to modify the sub-layer(s) specified by 'layer_name_pattern'. + + Args: + layer_name_pattern (Union[str, List[str]]): The name of layer to be modified by 'handle_func'. + handle_func (Callable[[nn.Layer, str], nn.Layer]): The function to modify target layer specified by 'layer_name_pattern'. The formal params are the layer(nn.Layer) and pattern(str) that is (a member of) layer_name_pattern (when layer_name_pattern is List type). And the return is the layer processed. + + Returns: + Dict[str, nn.Layer]: The key is the pattern and corresponding value is the result returned by 'handle_func()'. + + Examples: + + from paddle import nn + import paddleclas + + def rep_func(layer: nn.Layer, pattern: str): + new_layer = nn.Conv2D( + in_channels=layer._in_channels, + out_channels=layer._out_channels, + kernel_size=5, + padding=2 + ) + return new_layer + + net = paddleclas.MobileNetV1() + res = net.replace_sub(layer_name_pattern=["blocks[11].depthwise_conv.conv", "blocks[12].depthwise_conv.conv"], handle_func=rep_func) + print(res) + # {'blocks[11].depthwise_conv.conv': the corresponding new_layer, 'blocks[12].depthwise_conv.conv': the corresponding new_layer} + """ + + if not isinstance(layer_name_pattern, list): + layer_name_pattern = [layer_name_pattern] + + hit_layer_pattern_list = [] + for pattern in layer_name_pattern: + # parse pattern to find target layer and its parent + layer_list = parse_pattern_str(pattern=pattern, parent_layer=self) + if not layer_list: + continue + sub_layer_parent = layer_list[-2]["layer"] if len(layer_list) > 1 else self + + sub_layer = layer_list[-1]["layer"] + sub_layer_name = layer_list[-1]["name"] + sub_layer_index = layer_list[-1]["index"] + + new_sub_layer = handle_func(sub_layer, pattern) + + if sub_layer_index: + getattr(sub_layer_parent, sub_layer_name)[sub_layer_index] = new_sub_layer + else: + setattr(sub_layer_parent, sub_layer_name, new_sub_layer) + + hit_layer_pattern_list.append(pattern) + return hit_layer_pattern_list + + def stop_after(self, stop_layer_name: str) -> bool: + """stop forward and backward after 'stop_layer_name'. + + Args: + stop_layer_name (str): The name of layer that stop forward and backward after this layer. + + Returns: + bool: 'True' if successful, 'False' otherwise. + """ + + layer_list = parse_pattern_str(stop_layer_name, self) + if not layer_list: + return False + + parent_layer = self + for layer_dict in layer_list: + name, index = layer_dict["name"], layer_dict["index"] + if not set_identity(parent_layer, name, index): + msg = f"Failed to set the layers that after stop_layer_name('{stop_layer_name}') to IdentityLayer. The error layer's name is '{name}'." + return False + parent_layer = layer_dict["layer"] + + return True + + def update_res(self, return_patterns: Union[str, List[str]]) -> Dict[str, nn.Layer]: + """update the result(s) to be returned. + + Args: + return_patterns (Union[str, List[str]]): The name of layer to return output. + + Returns: + Dict[str, nn.Layer]: The pattern(str) and corresponding layer(nn.Layer) that have been set successfully. + """ + + # clear res_dict that could have been set + self.res_dict = {} + + class Handler(object): + + def __init__(self, res_dict): + # res_dict is a reference + self.res_dict = res_dict + + def __call__(self, layer, pattern): + layer.res_dict = self.res_dict + layer.res_name = pattern + if hasattr(layer, "hook_remove_helper"): + layer.hook_remove_helper.remove() + layer.hook_remove_helper = layer.register_forward_post_hook(save_sub_res_hook) + return layer + + handle_func = Handler(self.res_dict) + + hit_layer_pattern_list = self.upgrade_sublayer(return_patterns, handle_func=handle_func) + + if hasattr(self, "hook_remove_helper"): + self.hook_remove_helper.remove() + self.hook_remove_helper = self.register_forward_post_hook(self._return_dict_hook) + + return hit_layer_pattern_list + + +def save_sub_res_hook(layer, input, output): + layer.res_dict[layer.res_name] = output + + +def set_identity(parent_layer: nn.Layer, layer_name: str, layer_index: str = None) -> bool: + """set the layer specified by layer_name and layer_index to Indentity. + + Args: + parent_layer (nn.Layer): The parent layer of target layer specified by layer_name and layer_index. + layer_name (str): The name of target layer to be set to Indentity. + layer_index (str, optional): The index of target layer to be set to Indentity in parent_layer. Defaults to None. + + Returns: + bool: True if successfully, False otherwise. + """ + + stop_after = False + for sub_layer_name in parent_layer._sub_layers: + if stop_after: + parent_layer._sub_layers[sub_layer_name] = Identity() + continue + if sub_layer_name == layer_name: + stop_after = True + + if layer_index and stop_after: + stop_after = False + for sub_layer_index in parent_layer._sub_layers[layer_name]._sub_layers: + if stop_after: + parent_layer._sub_layers[layer_name][sub_layer_index] = Identity() + continue + if layer_index == sub_layer_index: + stop_after = True + + return stop_after + + +def parse_pattern_str(pattern: str, parent_layer: nn.Layer) -> Union[None, List[Dict[str, Union[nn.Layer, str, None]]]]: + """parse the string type pattern. + + Args: + pattern (str): The pattern to discribe layer. + parent_layer (nn.Layer): The root layer relative to the pattern. + + Returns: + Union[None, List[Dict[str, Union[nn.Layer, str, None]]]]: None if failed. If successfully, the members are layers parsed in order: + [ + {"layer": first layer, "name": first layer's name parsed, "index": first layer's index parsed if exist}, + {"layer": second layer, "name": second layer's name parsed, "index": second layer's index parsed if exist}, + ... + ] + """ + + pattern_list = pattern.split(".") + if not pattern_list: + msg = f"The pattern('{pattern}') is illegal. Please check and retry." + return None + + layer_list = [] + while len(pattern_list) > 0: + if '[' in pattern_list[0]: + target_layer_name = pattern_list[0].split('[')[0] + target_layer_index = pattern_list[0].split('[')[1].split(']')[0] + else: + target_layer_name = pattern_list[0] + target_layer_index = None + + target_layer = getattr(parent_layer, target_layer_name, None) + + if target_layer is None: + msg = f"Not found layer named('{target_layer_name}') specifed in pattern('{pattern}')." + return None + + if target_layer_index and target_layer: + if int(target_layer_index) < 0 or int(target_layer_index) >= len(target_layer): + msg = f"Not found layer by index('{target_layer_index}') specifed in pattern('{pattern}'). The index should < {len(target_layer)} and > 0." + return None + + target_layer = target_layer[target_layer_index] + + layer_list.append({"layer": target_layer, "name": target_layer_name, "index": target_layer_index}) + + pattern_list = pattern_list[1:] + parent_layer = target_layer + return layer_list + + +MODEL_STAGES_PATTERN = {"PPLCNet": ["blocks2", "blocks3", "blocks4", "blocks5", "blocks6"]} + +# Each element(list) represents a depthwise block, which is composed of k, in_c, out_c, s, use_se. +# k: kernel_size +# in_c: input channel number in depthwise block +# out_c: output channel number in depthwise block +# s: stride in depthwise block +# use_se: whether to use SE block + +NET_CONFIG = { + "blocks2": + #k, in_c, out_c, s, use_se + [[3, 16, 32, 1, False]], + "blocks3": [[3, 32, 64, 2, False], [3, 64, 64, 1, False]], + "blocks4": [[3, 64, 128, 2, False], [3, 128, 128, 1, False]], + "blocks5": [[3, 128, 256, 2, False], [5, 256, 256, 1, False], [5, 256, 256, 1, False], [5, 256, 256, 1, False], + [5, 256, 256, 1, False], [5, 256, 256, 1, False]], + "blocks6": [[5, 256, 512, 2, True], [5, 512, 512, 1, True]] +} + + +def make_divisible(v, divisor=8, min_value=None): + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + if new_v < 0.9 * v: + new_v += divisor + return new_v + + +class ConvBNLayer(TheseusLayer): + + def __init__(self, num_channels, filter_size, num_filters, stride, num_groups=1): + super().__init__() + + self.conv = Conv2D(in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=num_groups, + weight_attr=ParamAttr(initializer=KaimingNormal()), + bias_attr=False) + + self.bn = BatchNorm(num_filters, + param_attr=ParamAttr(regularizer=L2Decay(0.0)), + bias_attr=ParamAttr(regularizer=L2Decay(0.0))) + self.hardswish = nn.Hardswish() + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + x = self.hardswish(x) + return x + + +class DepthwiseSeparable(TheseusLayer): + + def __init__(self, num_channels, num_filters, stride, dw_size=3, use_se=False): + super().__init__() + self.use_se = use_se + self.dw_conv = ConvBNLayer(num_channels=num_channels, + num_filters=num_channels, + filter_size=dw_size, + stride=stride, + num_groups=num_channels) + if use_se: + self.se = SEModule(num_channels) + self.pw_conv = ConvBNLayer(num_channels=num_channels, filter_size=1, num_filters=num_filters, stride=1) + + def forward(self, x): + x = self.dw_conv(x) + if self.use_se: + x = self.se(x) + x = self.pw_conv(x) + return x + + +class SEModule(TheseusLayer): + + def __init__(self, channel, reduction=4): + super().__init__() + self.avg_pool = AdaptiveAvgPool2D(1) + self.conv1 = Conv2D(in_channels=channel, out_channels=channel // reduction, kernel_size=1, stride=1, padding=0) + self.relu = nn.ReLU() + self.conv2 = Conv2D(in_channels=channel // reduction, out_channels=channel, kernel_size=1, stride=1, padding=0) + self.hardsigmoid = nn.Hardsigmoid() + + def forward(self, x): + identity = x + x = self.avg_pool(x) + x = self.conv1(x) + x = self.relu(x) + x = self.conv2(x) + x = self.hardsigmoid(x) + x = paddle.multiply(x=identity, y=x) + return x + + +class PPLCNet(TheseusLayer): + + def __init__(self, + stages_pattern, + scale=1.0, + class_num=1000, + dropout_prob=0.2, + class_expand=1280, + return_patterns=None, + return_stages=None): + super().__init__() + self.scale = scale + self.class_expand = class_expand + + self.conv1 = ConvBNLayer(num_channels=3, filter_size=3, num_filters=make_divisible(16 * scale), stride=2) + + self.blocks2 = nn.Sequential(*[ + DepthwiseSeparable(num_channels=make_divisible(in_c * scale), + num_filters=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se) for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks2"]) + ]) + + self.blocks3 = nn.Sequential(*[ + DepthwiseSeparable(num_channels=make_divisible(in_c * scale), + num_filters=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se) for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks3"]) + ]) + + self.blocks4 = nn.Sequential(*[ + DepthwiseSeparable(num_channels=make_divisible(in_c * scale), + num_filters=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se) for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks4"]) + ]) + + self.blocks5 = nn.Sequential(*[ + DepthwiseSeparable(num_channels=make_divisible(in_c * scale), + num_filters=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se) for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks5"]) + ]) + + self.blocks6 = nn.Sequential(*[ + DepthwiseSeparable(num_channels=make_divisible(in_c * scale), + num_filters=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se) for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks6"]) + ]) + + self.avg_pool = AdaptiveAvgPool2D(1) + + self.last_conv = Conv2D(in_channels=make_divisible(NET_CONFIG["blocks6"][-1][2] * scale), + out_channels=self.class_expand, + kernel_size=1, + stride=1, + padding=0, + bias_attr=False) + + self.hardswish = nn.Hardswish() + self.dropout = Dropout(p=dropout_prob, mode="downscale_in_infer") + self.flatten = nn.Flatten(start_axis=1, stop_axis=-1) + + self.fc = Linear(self.class_expand, class_num) + + super().init_res(stages_pattern, return_patterns=return_patterns, return_stages=return_stages) + + def forward(self, x): + x = self.conv1(x) + + x = self.blocks2(x) + x = self.blocks3(x) + x = self.blocks4(x) + x = self.blocks5(x) + x = self.blocks6(x) + + x = self.avg_pool(x) + x = self.last_conv(x) + x = self.hardswish(x) + x = self.dropout(x) + x = self.flatten(x) + x = self.fc(x) + return x + + +def PPLCNet_x0_75(pretrained=False, use_ssld=False, **kwargs): + model = PPLCNet(scale=0.75, stages_pattern=MODEL_STAGES_PATTERN["PPLCNet"], **kwargs) + return model diff --git a/modules/image/classification/pplcnet_x0_75_imagenet/module.py b/modules/image/classification/pplcnet_x0_75_imagenet/module.py new file mode 100644 index 000000000..7ce6c2eac --- /dev/null +++ b/modules/image/classification/pplcnet_x0_75_imagenet/module.py @@ -0,0 +1,154 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import copy +import os + +import cv2 +import numpy as np +import paddle +from skimage.io import imread +from skimage.transform import rescale +from skimage.transform import resize + +import paddlehub as hub +from .model import PPLCNet_x0_75 +from .processor import base64_to_cv2 +from .processor import create_operators +from .processor import Topk +from .utils import get_config +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving + + +@moduleinfo(name="pplcnet_x0_75_imagenet", + type="cv/classification", + author="paddlepaddle", + author_email="", + summary="", + version="1.0.0") +class PPLcNet_x0_75: + + def __init__(self): + self.config = get_config(os.path.join(self.directory, 'PPLCNet_x0_75.yaml'), show=False) + self.label_path = os.path.join(self.directory, 'imagenet1k_label_list.txt') + self.pretrain_path = os.path.join(self.directory, 'PPLCNet_x0_75_pretrained.pdparams') + self.config['Infer']['PostProcess']['class_id_map_file'] = self.label_path + self.model = PPLCNet_x0_75() + param_state_dict = paddle.load(self.pretrain_path) + self.model.set_dict(param_state_dict) + self.preprocess_funcs = create_operators(self.config["Infer"]["transforms"]) + + def classification(self, + images: list = None, + paths: list = None, + batch_size: int = 1, + use_gpu: bool = False, + top_k: int = 1): + ''' + Args: + images (list[numpy.ndarray]): data of images, shape of each is [H, W, C], color space must be BGR. + paths (list[str]): The paths of images. + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + top_k (int): Return top k results. + + Returns: + res (list[dict]): The classfication results, each result dict contains key 'class_ids', 'scores' and 'label_names'. + ''' + postprocess_func = Topk(top_k, self.label_path) + inputs = [] + results = [] + paddle.disable_static() + place = 'gpu:0' if use_gpu else 'cpu' + place = paddle.set_device(place) + if images == None and paths == None: + print('No image provided. Please input an image or a image path.') + return + + if images != None: + for image in images: + image = image[:, :, ::-1] + inputs.append(image) + + if paths != None: + for path in paths: + image = cv2.imread(path)[:, :, ::-1] + inputs.append(image) + + batch_data = [] + for idx, imagedata in enumerate(inputs): + for process in self.preprocess_funcs: + imagedata = process(imagedata) + batch_data.append(imagedata) + if len(batch_data) >= batch_size or idx == len(inputs) - 1: + batch_tensor = paddle.to_tensor(batch_data) + out = self.model(batch_tensor) + if isinstance(out, list): + out = out[0] + if isinstance(out, dict) and "logits" in out: + out = out["logits"] + if isinstance(out, dict) and "output" in out: + out = out["output"] + result = postprocess_func(out) + results.extend(result) + batch_data.clear() + return results + + @runnable + def run_cmd(self, argvs: list): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + self.args = self.parser.parse_args(argvs) + results = self.classification(paths=[self.args.input_path], + use_gpu=self.args.use_gpu, + batch_size=self.args.batch_size, + top_k=self.args.top_k) + return results + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.classification(images=images_decode, **kwargs) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument('--use_gpu', action='store_true', help="use GPU or not") + + self.arg_config_group.add_argument('--batch_size', type=int, default=1, help='batch size') + self.arg_config_group.add_argument('--top_k', type=int, default=1, help='Return top k results.') + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to input image.") diff --git a/modules/image/classification/pplcnet_x0_75_imagenet/processor.py b/modules/image/classification/pplcnet_x0_75_imagenet/processor.py new file mode 100644 index 000000000..40cab3917 --- /dev/null +++ b/modules/image/classification/pplcnet_x0_75_imagenet/processor.py @@ -0,0 +1,374 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import base64 +import inspect +import math +import os +import random +import sys +from functools import partial + +import cv2 +import numpy as np +import paddle +import paddle.nn.functional as F +import six +from paddle.vision.transforms import ColorJitter as RawColorJitter +from PIL import Image + + +def create_operators(params, class_num=None): + """ + create operators based on the config + + Args: + params(list): a dict list, used to create some operators + """ + assert isinstance(params, list), ('operator config should be a list') + ops = [] + current_module = sys.modules[__name__] + for operator in params: + assert isinstance(operator, dict) and len(operator) == 1, "yaml format error" + op_name = list(operator)[0] + param = {} if operator[op_name] is None else operator[op_name] + op_func = getattr(current_module, op_name) + if "class_num" in inspect.getfullargspec(op_func).args: + param.update({"class_num": class_num}) + op = op_func(**param) + ops.append(op) + + return ops + + +class UnifiedResize(object): + + def __init__(self, interpolation=None, backend="cv2"): + _cv2_interp_from_str = { + 'nearest': cv2.INTER_NEAREST, + 'bilinear': cv2.INTER_LINEAR, + 'area': cv2.INTER_AREA, + 'bicubic': cv2.INTER_CUBIC, + 'lanczos': cv2.INTER_LANCZOS4 + } + _pil_interp_from_str = { + 'nearest': Image.NEAREST, + 'bilinear': Image.BILINEAR, + 'bicubic': Image.BICUBIC, + 'box': Image.BOX, + 'lanczos': Image.LANCZOS, + 'hamming': Image.HAMMING + } + + def _pil_resize(src, size, resample): + pil_img = Image.fromarray(src) + pil_img = pil_img.resize(size, resample) + return np.asarray(pil_img) + + if backend.lower() == "cv2": + if isinstance(interpolation, str): + interpolation = _cv2_interp_from_str[interpolation.lower()] + # compatible with opencv < version 4.4.0 + elif interpolation is None: + interpolation = cv2.INTER_LINEAR + self.resize_func = partial(cv2.resize, interpolation=interpolation) + elif backend.lower() == "pil": + if isinstance(interpolation, str): + interpolation = _pil_interp_from_str[interpolation.lower()] + self.resize_func = partial(_pil_resize, resample=interpolation) + else: + self.resize_func = cv2.resize + + def __call__(self, src, size): + return self.resize_func(src, size) + + +class OperatorParamError(ValueError): + """ OperatorParamError + """ + pass + + +class DecodeImage(object): + """ decode image """ + + def __init__(self, to_rgb=True, to_np=False, channel_first=False): + self.to_rgb = to_rgb + self.to_np = to_np # to numpy + self.channel_first = channel_first # only enabled when to_np is True + + def __call__(self, img): + if six.PY2: + assert type(img) is str and len(img) > 0, "invalid input 'img' in DecodeImage" + else: + assert type(img) is bytes and len(img) > 0, "invalid input 'img' in DecodeImage" + data = np.frombuffer(img, dtype='uint8') + img = cv2.imdecode(data, 1) + if self.to_rgb: + assert img.shape[2] == 3, 'invalid shape of image[%s]' % (img.shape) + img = img[:, :, ::-1] + + if self.channel_first: + img = img.transpose((2, 0, 1)) + + return img + + +class ResizeImage(object): + """ resize image """ + + def __init__(self, size=None, resize_short=None, interpolation=None, backend="cv2"): + if resize_short is not None and resize_short > 0: + self.resize_short = resize_short + self.w = None + self.h = None + elif size is not None: + self.resize_short = None + self.w = size if type(size) is int else size[0] + self.h = size if type(size) is int else size[1] + else: + raise OperatorParamError("invalid params for ReisizeImage for '\ + 'both 'size' and 'resize_short' are None") + + self._resize_func = UnifiedResize(interpolation=interpolation, backend=backend) + + def __call__(self, img): + img_h, img_w = img.shape[:2] + if self.resize_short is not None: + percent = float(self.resize_short) / min(img_w, img_h) + w = int(round(img_w * percent)) + h = int(round(img_h * percent)) + else: + w = self.w + h = self.h + return self._resize_func(img, (w, h)) + + +class CropImage(object): + """ crop image """ + + def __init__(self, size): + if type(size) is int: + self.size = (size, size) + else: + self.size = size # (h, w) + + def __call__(self, img): + w, h = self.size + img_h, img_w = img.shape[:2] + w_start = (img_w - w) // 2 + h_start = (img_h - h) // 2 + + w_end = w_start + w + h_end = h_start + h + return img[h_start:h_end, w_start:w_end, :] + + +class RandCropImage(object): + """ random crop image """ + + def __init__(self, size, scale=None, ratio=None, interpolation=None, backend="cv2"): + if type(size) is int: + self.size = (size, size) # (h, w) + else: + self.size = size + + self.scale = [0.08, 1.0] if scale is None else scale + self.ratio = [3. / 4., 4. / 3.] if ratio is None else ratio + + self._resize_func = UnifiedResize(interpolation=interpolation, backend=backend) + + def __call__(self, img): + size = self.size + scale = self.scale + ratio = self.ratio + + aspect_ratio = math.sqrt(random.uniform(*ratio)) + w = 1. * aspect_ratio + h = 1. / aspect_ratio + + img_h, img_w = img.shape[:2] + + bound = min((float(img_w) / img_h) / (w**2), (float(img_h) / img_w) / (h**2)) + scale_max = min(scale[1], bound) + scale_min = min(scale[0], bound) + + target_area = img_w * img_h * random.uniform(scale_min, scale_max) + target_size = math.sqrt(target_area) + w = int(target_size * w) + h = int(target_size * h) + + i = random.randint(0, img_w - w) + j = random.randint(0, img_h - h) + + img = img[j:j + h, i:i + w, :] + + return self._resize_func(img, size) + + +class RandFlipImage(object): + """ random flip image + flip_code: + 1: Flipped Horizontally + 0: Flipped Vertically + -1: Flipped Horizontally & Vertically + """ + + def __init__(self, flip_code=1): + assert flip_code in [-1, 0, 1], "flip_code should be a value in [-1, 0, 1]" + self.flip_code = flip_code + + def __call__(self, img): + if random.randint(0, 1) == 1: + return cv2.flip(img, self.flip_code) + else: + return img + + +class NormalizeImage(object): + """ normalize image such as substract mean, divide std + """ + + def __init__(self, scale=None, mean=None, std=None, order='chw', output_fp16=False, channel_num=3): + if isinstance(scale, str): + scale = eval(scale) + assert channel_num in [3, 4], "channel number of input image should be set to 3 or 4." + self.channel_num = channel_num + self.output_dtype = 'float16' if output_fp16 else 'float32' + self.scale = np.float32(scale if scale is not None else 1.0 / 255.0) + self.order = order + mean = mean if mean is not None else [0.485, 0.456, 0.406] + std = std if std is not None else [0.229, 0.224, 0.225] + + shape = (3, 1, 1) if self.order == 'chw' else (1, 1, 3) + self.mean = np.array(mean).reshape(shape).astype('float32') + self.std = np.array(std).reshape(shape).astype('float32') + + def __call__(self, img): + from PIL import Image + if isinstance(img, Image.Image): + img = np.array(img) + + assert isinstance(img, np.ndarray), "invalid input 'img' in NormalizeImage" + + img = (img.astype('float32') * self.scale - self.mean) / self.std + + if self.channel_num == 4: + img_h = img.shape[1] if self.order == 'chw' else img.shape[0] + img_w = img.shape[2] if self.order == 'chw' else img.shape[1] + pad_zeros = np.zeros((1, img_h, img_w)) if self.order == 'chw' else np.zeros((img_h, img_w, 1)) + img = (np.concatenate((img, pad_zeros), axis=0) if self.order == 'chw' else np.concatenate( + (img, pad_zeros), axis=2)) + return img.astype(self.output_dtype) + + +class ToCHWImage(object): + """ convert hwc image to chw image + """ + + def __init__(self): + pass + + def __call__(self, img): + from PIL import Image + if isinstance(img, Image.Image): + img = np.array(img) + + return img.transpose((2, 0, 1)) + + +class ColorJitter(RawColorJitter): + """ColorJitter. + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def __call__(self, img): + if not isinstance(img, Image.Image): + img = np.ascontiguousarray(img) + img = Image.fromarray(img) + img = super()._apply_image(img) + if isinstance(img, Image.Image): + img = np.asarray(img) + return img + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +class Topk(object): + + def __init__(self, topk=1, class_id_map_file=None): + assert isinstance(topk, (int, )) + self.class_id_map = self.parse_class_id_map(class_id_map_file) + self.topk = topk + + def parse_class_id_map(self, class_id_map_file): + if class_id_map_file is None: + return None + if not os.path.exists(class_id_map_file): + print( + "Warning: If want to use your own label_dict, please input legal path!\nOtherwise label_names will be empty!" + ) + return None + + try: + class_id_map = {} + with open(class_id_map_file, "r") as fin: + lines = fin.readlines() + for line in lines: + partition = line.split("\n")[0].partition(" ") + class_id_map[int(partition[0])] = str(partition[-1]) + except Exception as ex: + print(ex) + class_id_map = None + return class_id_map + + def __call__(self, x, file_names=None, multilabel=False): + assert isinstance(x, paddle.Tensor) + if file_names is not None: + assert x.shape[0] == len(file_names) + x = F.softmax(x, axis=-1) if not multilabel else F.sigmoid(x) + x = x.numpy() + y = [] + for idx, probs in enumerate(x): + index = probs.argsort(axis=0)[-self.topk:][::-1].astype("int32") if not multilabel else np.where( + probs >= 0.5)[0].astype("int32") + clas_id_list = [] + score_list = [] + label_name_list = [] + for i in index: + clas_id_list.append(i.item()) + score_list.append(probs[i].item()) + if self.class_id_map is not None: + label_name_list.append(self.class_id_map[i.item()]) + result = { + "class_ids": clas_id_list, + "scores": np.around(score_list, decimals=5).tolist(), + } + if file_names is not None: + result["file_name"] = file_names[idx] + if label_name_list is not None: + result["label_names"] = label_name_list + y.append(result) + return y diff --git a/modules/image/classification/pplcnet_x0_75_imagenet/utils.py b/modules/image/classification/pplcnet_x0_75_imagenet/utils.py new file mode 100644 index 000000000..df2bc36b3 --- /dev/null +++ b/modules/image/classification/pplcnet_x0_75_imagenet/utils.py @@ -0,0 +1,129 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import copy +import os + +import yaml + +__all__ = ['get_config'] + + +class AttrDict(dict): + + def __getattr__(self, key): + return self[key] + + def __setattr__(self, key, value): + if key in self.__dict__: + self.__dict__[key] = value + else: + self[key] = value + + def __deepcopy__(self, content): + return copy.deepcopy(dict(self)) + + +def create_attr_dict(yaml_config): + from ast import literal_eval + for key, value in yaml_config.items(): + if type(value) is dict: + yaml_config[key] = value = AttrDict(value) + if isinstance(value, str): + try: + value = literal_eval(value) + except BaseException: + pass + if isinstance(value, AttrDict): + create_attr_dict(yaml_config[key]) + else: + yaml_config[key] = value + + +def parse_config(cfg_file): + """Load a config file into AttrDict""" + with open(cfg_file, 'r') as fopen: + yaml_config = AttrDict(yaml.load(fopen, Loader=yaml.SafeLoader)) + create_attr_dict(yaml_config) + return yaml_config + + +def override(dl, ks, v): + """ + Recursively replace dict of list + Args: + dl(dict or list): dict or list to be replaced + ks(list): list of keys + v(str): value to be replaced + """ + + def str2num(v): + try: + return eval(v) + except Exception: + return v + + assert isinstance(dl, (list, dict)), ("{} should be a list or a dict") + assert len(ks) > 0, ('lenght of keys should larger than 0') + if isinstance(dl, list): + k = str2num(ks[0]) + if len(ks) == 1: + assert k < len(dl), ('index({}) out of range({})'.format(k, dl)) + dl[k] = str2num(v) + else: + override(dl[k], ks[1:], v) + else: + if len(ks) == 1: + # assert ks[0] in dl, ('{} is not exist in {}'.format(ks[0], dl)) + if not ks[0] in dl: + print('A new filed ({}) detected!'.format(ks[0], dl)) + dl[ks[0]] = str2num(v) + else: + override(dl[ks[0]], ks[1:], v) + + +def override_config(config, options=None): + """ + Recursively override the config + Args: + config(dict): dict to be replaced + options(list): list of pairs(key0.key1.idx.key2=value) + such as: [ + 'topk=2', + 'VALID.transforms.1.ResizeImage.resize_short=300' + ] + Returns: + config(dict): replaced config + """ + if options is not None: + for opt in options: + assert isinstance(opt, str), ("option({}) should be a str".format(opt)) + assert "=" in opt, ("option({}) should contain a =" + "to distinguish between key and value".format(opt)) + pair = opt.split('=') + assert len(pair) == 2, ("there can be only a = in the option") + key, value = pair + keys = key.split('.') + override(config, keys, value) + return config + + +def get_config(fname, overrides=None, show=False): + """ + Read config from file + """ + assert os.path.exists(fname), ('config file({}) is not exist'.format(fname)) + config = parse_config(fname) + override_config(config, overrides) + return config diff --git a/modules/image/classification/pplcnet_x1_0_imagenet/README.md b/modules/image/classification/pplcnet_x1_0_imagenet/README.md new file mode 100644 index 000000000..22dc1b235 --- /dev/null +++ b/modules/image/classification/pplcnet_x1_0_imagenet/README.md @@ -0,0 +1,132 @@ +# pplcnet_x1_0_imagenet + +|模型名称|pplcnet_x1_0_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|PPLCNet| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|11 MB| +|最新更新日期|2022-04-02| +|数据指标|Acc| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - PP-LCNet是百度针对Intel CPU 设备以及其加速库 MKLDNN 设计的特定骨干网络 ,比起其他的轻量级的 SOTA 模型,该骨干网络可以在不增加推理时间的情况下,进一步提升模型的性能,最终大幅度超越现有的 SOTA 模型。该模型为模型规模参数scale为x1.0下的PP-LCNet模型,关于模型结构的更多信息,可参考[论文](https://arxiv.org/pdf/2109.15099.pdf)。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install pplcnet_x1_0_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run pplcnet_x1_0_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="pplcnet_x1_0_imagenet") + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + - 分类接口API。 + - **参数** + + - images (list\[numpy.ndarray\]): 图片数据,每一个图片数据的shape 均为 \[H, W, C\],颜色空间为 BGR;
+ - paths (list\[str\]): 图片的路径;
+ - batch\_size (int): batch 的大小;
+ - use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA_VISIBLE_DEVICES环境变量**
+ - top\_k (int): 返回预测结果的前 k 个。 + + - **返回** + + - res (list\[dict\]): 分类结果,列表的每一个元素均为字典,其中 key 包括'class_ids'(种类索引), 'scores'(置信度) 和 'label_names'(种类名称) + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个图像识别的在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m pplcnet_x1_0_imagenet + ``` + + - 这样就完成了一个图像识别的在线服务的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"\} + url = "http://127.0.0.1:8866/predict/pplcnet_x1_0_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + ``` + + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install pplcnet_x1_0_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/pplcnet_x1_0_imagenet/model.py b/modules/image/classification/pplcnet_x1_0_imagenet/model.py new file mode 100644 index 000000000..a69f326d8 --- /dev/null +++ b/modules/image/classification/pplcnet_x1_0_imagenet/model.py @@ -0,0 +1,478 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any +from typing import Callable +from typing import Dict +from typing import List +from typing import Tuple +from typing import Union + +import paddle +import paddle.nn as nn +from paddle import ParamAttr +from paddle.nn import AdaptiveAvgPool2D +from paddle.nn import BatchNorm +from paddle.nn import Conv2D +from paddle.nn import Dropout +from paddle.nn import Linear +from paddle.nn.initializer import KaimingNormal +from paddle.regularizer import L2Decay + + +class Identity(nn.Layer): + + def __init__(self): + super(Identity, self).__init__() + + def forward(self, inputs): + return inputs + + +class TheseusLayer(nn.Layer): + + def __init__(self, *args, **kwargs): + super(TheseusLayer, self).__init__() + self.res_dict = {} + self.res_name = self.full_name() + self.pruner = None + self.quanter = None + + def _return_dict_hook(self, layer, input, output): + res_dict = {"output": output} + # 'list' is needed to avoid error raised by popping self.res_dict + for res_key in list(self.res_dict): + # clear the res_dict because the forward process may change according to input + res_dict[res_key] = self.res_dict.pop(res_key) + return res_dict + + def init_res(self, stages_pattern, return_patterns=None, return_stages=None): + if return_patterns and return_stages: + msg = f"The 'return_patterns' would be ignored when 'return_stages' is set." + return_stages = None + + if return_stages is True: + return_patterns = stages_pattern + # return_stages is int or bool + if type(return_stages) is int: + return_stages = [return_stages] + if isinstance(return_stages, list): + if max(return_stages) > len(stages_pattern) or min(return_stages) < 0: + msg = f"The 'return_stages' set error. Illegal value(s) have been ignored. The stages' pattern list is {stages_pattern}." + return_stages = [val for val in return_stages if val >= 0 and val < len(stages_pattern)] + return_patterns = [stages_pattern[i] for i in return_stages] + + if return_patterns: + self.update_res(return_patterns) + + def replace_sub(self, *args, **kwargs) -> None: + msg = "The function 'replace_sub()' is deprecated, please use 'upgrade_sublayer()' instead." + raise DeprecationWarning(msg) + + def upgrade_sublayer(self, layer_name_pattern: Union[str, List[str]], + handle_func: Callable[[nn.Layer, str], nn.Layer]) -> Dict[str, nn.Layer]: + """use 'handle_func' to modify the sub-layer(s) specified by 'layer_name_pattern'. + + Args: + layer_name_pattern (Union[str, List[str]]): The name of layer to be modified by 'handle_func'. + handle_func (Callable[[nn.Layer, str], nn.Layer]): The function to modify target layer specified by 'layer_name_pattern'. The formal params are the layer(nn.Layer) and pattern(str) that is (a member of) layer_name_pattern (when layer_name_pattern is List type). And the return is the layer processed. + + Returns: + Dict[str, nn.Layer]: The key is the pattern and corresponding value is the result returned by 'handle_func()'. + + Examples: + + from paddle import nn + import paddleclas + + def rep_func(layer: nn.Layer, pattern: str): + new_layer = nn.Conv2D( + in_channels=layer._in_channels, + out_channels=layer._out_channels, + kernel_size=5, + padding=2 + ) + return new_layer + + net = paddleclas.MobileNetV1() + res = net.replace_sub(layer_name_pattern=["blocks[11].depthwise_conv.conv", "blocks[12].depthwise_conv.conv"], handle_func=rep_func) + print(res) + # {'blocks[11].depthwise_conv.conv': the corresponding new_layer, 'blocks[12].depthwise_conv.conv': the corresponding new_layer} + """ + + if not isinstance(layer_name_pattern, list): + layer_name_pattern = [layer_name_pattern] + + hit_layer_pattern_list = [] + for pattern in layer_name_pattern: + # parse pattern to find target layer and its parent + layer_list = parse_pattern_str(pattern=pattern, parent_layer=self) + if not layer_list: + continue + sub_layer_parent = layer_list[-2]["layer"] if len(layer_list) > 1 else self + + sub_layer = layer_list[-1]["layer"] + sub_layer_name = layer_list[-1]["name"] + sub_layer_index = layer_list[-1]["index"] + + new_sub_layer = handle_func(sub_layer, pattern) + + if sub_layer_index: + getattr(sub_layer_parent, sub_layer_name)[sub_layer_index] = new_sub_layer + else: + setattr(sub_layer_parent, sub_layer_name, new_sub_layer) + + hit_layer_pattern_list.append(pattern) + return hit_layer_pattern_list + + def stop_after(self, stop_layer_name: str) -> bool: + """stop forward and backward after 'stop_layer_name'. + + Args: + stop_layer_name (str): The name of layer that stop forward and backward after this layer. + + Returns: + bool: 'True' if successful, 'False' otherwise. + """ + + layer_list = parse_pattern_str(stop_layer_name, self) + if not layer_list: + return False + + parent_layer = self + for layer_dict in layer_list: + name, index = layer_dict["name"], layer_dict["index"] + if not set_identity(parent_layer, name, index): + msg = f"Failed to set the layers that after stop_layer_name('{stop_layer_name}') to IdentityLayer. The error layer's name is '{name}'." + return False + parent_layer = layer_dict["layer"] + + return True + + def update_res(self, return_patterns: Union[str, List[str]]) -> Dict[str, nn.Layer]: + """update the result(s) to be returned. + + Args: + return_patterns (Union[str, List[str]]): The name of layer to return output. + + Returns: + Dict[str, nn.Layer]: The pattern(str) and corresponding layer(nn.Layer) that have been set successfully. + """ + + # clear res_dict that could have been set + self.res_dict = {} + + class Handler(object): + + def __init__(self, res_dict): + # res_dict is a reference + self.res_dict = res_dict + + def __call__(self, layer, pattern): + layer.res_dict = self.res_dict + layer.res_name = pattern + if hasattr(layer, "hook_remove_helper"): + layer.hook_remove_helper.remove() + layer.hook_remove_helper = layer.register_forward_post_hook(save_sub_res_hook) + return layer + + handle_func = Handler(self.res_dict) + + hit_layer_pattern_list = self.upgrade_sublayer(return_patterns, handle_func=handle_func) + + if hasattr(self, "hook_remove_helper"): + self.hook_remove_helper.remove() + self.hook_remove_helper = self.register_forward_post_hook(self._return_dict_hook) + + return hit_layer_pattern_list + + +def save_sub_res_hook(layer, input, output): + layer.res_dict[layer.res_name] = output + + +def set_identity(parent_layer: nn.Layer, layer_name: str, layer_index: str = None) -> bool: + """set the layer specified by layer_name and layer_index to Indentity. + + Args: + parent_layer (nn.Layer): The parent layer of target layer specified by layer_name and layer_index. + layer_name (str): The name of target layer to be set to Indentity. + layer_index (str, optional): The index of target layer to be set to Indentity in parent_layer. Defaults to None. + + Returns: + bool: True if successfully, False otherwise. + """ + + stop_after = False + for sub_layer_name in parent_layer._sub_layers: + if stop_after: + parent_layer._sub_layers[sub_layer_name] = Identity() + continue + if sub_layer_name == layer_name: + stop_after = True + + if layer_index and stop_after: + stop_after = False + for sub_layer_index in parent_layer._sub_layers[layer_name]._sub_layers: + if stop_after: + parent_layer._sub_layers[layer_name][sub_layer_index] = Identity() + continue + if layer_index == sub_layer_index: + stop_after = True + + return stop_after + + +def parse_pattern_str(pattern: str, parent_layer: nn.Layer) -> Union[None, List[Dict[str, Union[nn.Layer, str, None]]]]: + """parse the string type pattern. + + Args: + pattern (str): The pattern to discribe layer. + parent_layer (nn.Layer): The root layer relative to the pattern. + + Returns: + Union[None, List[Dict[str, Union[nn.Layer, str, None]]]]: None if failed. If successfully, the members are layers parsed in order: + [ + {"layer": first layer, "name": first layer's name parsed, "index": first layer's index parsed if exist}, + {"layer": second layer, "name": second layer's name parsed, "index": second layer's index parsed if exist}, + ... + ] + """ + + pattern_list = pattern.split(".") + if not pattern_list: + msg = f"The pattern('{pattern}') is illegal. Please check and retry." + return None + + layer_list = [] + while len(pattern_list) > 0: + if '[' in pattern_list[0]: + target_layer_name = pattern_list[0].split('[')[0] + target_layer_index = pattern_list[0].split('[')[1].split(']')[0] + else: + target_layer_name = pattern_list[0] + target_layer_index = None + + target_layer = getattr(parent_layer, target_layer_name, None) + + if target_layer is None: + msg = f"Not found layer named('{target_layer_name}') specifed in pattern('{pattern}')." + return None + + if target_layer_index and target_layer: + if int(target_layer_index) < 0 or int(target_layer_index) >= len(target_layer): + msg = f"Not found layer by index('{target_layer_index}') specifed in pattern('{pattern}'). The index should < {len(target_layer)} and > 0." + return None + + target_layer = target_layer[target_layer_index] + + layer_list.append({"layer": target_layer, "name": target_layer_name, "index": target_layer_index}) + + pattern_list = pattern_list[1:] + parent_layer = target_layer + return layer_list + + +MODEL_STAGES_PATTERN = {"PPLCNet": ["blocks2", "blocks3", "blocks4", "blocks5", "blocks6"]} + +# Each element(list) represents a depthwise block, which is composed of k, in_c, out_c, s, use_se. +# k: kernel_size +# in_c: input channel number in depthwise block +# out_c: output channel number in depthwise block +# s: stride in depthwise block +# use_se: whether to use SE block + +NET_CONFIG = { + "blocks2": + #k, in_c, out_c, s, use_se + [[3, 16, 32, 1, False]], + "blocks3": [[3, 32, 64, 2, False], [3, 64, 64, 1, False]], + "blocks4": [[3, 64, 128, 2, False], [3, 128, 128, 1, False]], + "blocks5": [[3, 128, 256, 2, False], [5, 256, 256, 1, False], [5, 256, 256, 1, False], [5, 256, 256, 1, False], + [5, 256, 256, 1, False], [5, 256, 256, 1, False]], + "blocks6": [[5, 256, 512, 2, True], [5, 512, 512, 1, True]] +} + + +def make_divisible(v, divisor=8, min_value=None): + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + if new_v < 0.9 * v: + new_v += divisor + return new_v + + +class ConvBNLayer(TheseusLayer): + + def __init__(self, num_channels, filter_size, num_filters, stride, num_groups=1): + super().__init__() + + self.conv = Conv2D(in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=num_groups, + weight_attr=ParamAttr(initializer=KaimingNormal()), + bias_attr=False) + + self.bn = BatchNorm(num_filters, + param_attr=ParamAttr(regularizer=L2Decay(0.0)), + bias_attr=ParamAttr(regularizer=L2Decay(0.0))) + self.hardswish = nn.Hardswish() + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + x = self.hardswish(x) + return x + + +class DepthwiseSeparable(TheseusLayer): + + def __init__(self, num_channels, num_filters, stride, dw_size=3, use_se=False): + super().__init__() + self.use_se = use_se + self.dw_conv = ConvBNLayer(num_channels=num_channels, + num_filters=num_channels, + filter_size=dw_size, + stride=stride, + num_groups=num_channels) + if use_se: + self.se = SEModule(num_channels) + self.pw_conv = ConvBNLayer(num_channels=num_channels, filter_size=1, num_filters=num_filters, stride=1) + + def forward(self, x): + x = self.dw_conv(x) + if self.use_se: + x = self.se(x) + x = self.pw_conv(x) + return x + + +class SEModule(TheseusLayer): + + def __init__(self, channel, reduction=4): + super().__init__() + self.avg_pool = AdaptiveAvgPool2D(1) + self.conv1 = Conv2D(in_channels=channel, out_channels=channel // reduction, kernel_size=1, stride=1, padding=0) + self.relu = nn.ReLU() + self.conv2 = Conv2D(in_channels=channel // reduction, out_channels=channel, kernel_size=1, stride=1, padding=0) + self.hardsigmoid = nn.Hardsigmoid() + + def forward(self, x): + identity = x + x = self.avg_pool(x) + x = self.conv1(x) + x = self.relu(x) + x = self.conv2(x) + x = self.hardsigmoid(x) + x = paddle.multiply(x=identity, y=x) + return x + + +class PPLCNet(TheseusLayer): + + def __init__(self, + stages_pattern, + scale=1.0, + class_num=1000, + dropout_prob=0.2, + class_expand=1280, + return_patterns=None, + return_stages=None): + super().__init__() + self.scale = scale + self.class_expand = class_expand + + self.conv1 = ConvBNLayer(num_channels=3, filter_size=3, num_filters=make_divisible(16 * scale), stride=2) + + self.blocks2 = nn.Sequential(*[ + DepthwiseSeparable(num_channels=make_divisible(in_c * scale), + num_filters=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se) for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks2"]) + ]) + + self.blocks3 = nn.Sequential(*[ + DepthwiseSeparable(num_channels=make_divisible(in_c * scale), + num_filters=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se) for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks3"]) + ]) + + self.blocks4 = nn.Sequential(*[ + DepthwiseSeparable(num_channels=make_divisible(in_c * scale), + num_filters=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se) for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks4"]) + ]) + + self.blocks5 = nn.Sequential(*[ + DepthwiseSeparable(num_channels=make_divisible(in_c * scale), + num_filters=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se) for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks5"]) + ]) + + self.blocks6 = nn.Sequential(*[ + DepthwiseSeparable(num_channels=make_divisible(in_c * scale), + num_filters=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se) for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks6"]) + ]) + + self.avg_pool = AdaptiveAvgPool2D(1) + + self.last_conv = Conv2D(in_channels=make_divisible(NET_CONFIG["blocks6"][-1][2] * scale), + out_channels=self.class_expand, + kernel_size=1, + stride=1, + padding=0, + bias_attr=False) + + self.hardswish = nn.Hardswish() + self.dropout = Dropout(p=dropout_prob, mode="downscale_in_infer") + self.flatten = nn.Flatten(start_axis=1, stop_axis=-1) + + self.fc = Linear(self.class_expand, class_num) + + super().init_res(stages_pattern, return_patterns=return_patterns, return_stages=return_stages) + + def forward(self, x): + x = self.conv1(x) + + x = self.blocks2(x) + x = self.blocks3(x) + x = self.blocks4(x) + x = self.blocks5(x) + x = self.blocks6(x) + + x = self.avg_pool(x) + x = self.last_conv(x) + x = self.hardswish(x) + x = self.dropout(x) + x = self.flatten(x) + x = self.fc(x) + return x + + +def PPLCNet_x1_0(pretrained=False, use_ssld=False, **kwargs): + model = PPLCNet(scale=1.0, stages_pattern=MODEL_STAGES_PATTERN["PPLCNet"], **kwargs) + return model diff --git a/modules/image/classification/pplcnet_x1_0_imagenet/module.py b/modules/image/classification/pplcnet_x1_0_imagenet/module.py new file mode 100644 index 000000000..3119f49bb --- /dev/null +++ b/modules/image/classification/pplcnet_x1_0_imagenet/module.py @@ -0,0 +1,154 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import copy +import os + +import cv2 +import numpy as np +import paddle +from skimage.io import imread +from skimage.transform import rescale +from skimage.transform import resize + +import paddlehub as hub +from .model import PPLCNet_x1_0 +from .processor import base64_to_cv2 +from .processor import create_operators +from .processor import Topk +from .utils import get_config +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving + + +@moduleinfo(name="pplcnet_x1_0_imagenet", + type="cv/classification", + author="paddlepaddle", + author_email="", + summary="", + version="1.0.0") +class PPLcNet_x1_0: + + def __init__(self): + self.config = get_config(os.path.join(self.directory, 'PPLCNet_x1_0.yaml'), show=False) + self.label_path = os.path.join(self.directory, 'imagenet1k_label_list.txt') + self.pretrain_path = os.path.join(self.directory, 'PPLCNet_x1_0_pretrained.pdparams') + self.config['Infer']['PostProcess']['class_id_map_file'] = self.label_path + self.model = PPLCNet_x1_0() + param_state_dict = paddle.load(self.pretrain_path) + self.model.set_dict(param_state_dict) + self.preprocess_funcs = create_operators(self.config["Infer"]["transforms"]) + + def classification(self, + images: list = None, + paths: list = None, + batch_size: int = 1, + use_gpu: bool = False, + top_k: int = 1): + ''' + Args: + images (list[numpy.ndarray]): data of images, shape of each is [H, W, C], color space must be BGR. + paths (list[str]): The paths of images. + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + top_k (int): Return top k results. + + Returns: + res (list[dict]): The classfication results, each result dict contains key 'class_ids', 'scores' and 'label_names'. + ''' + postprocess_func = Topk(top_k, self.label_path) + inputs = [] + results = [] + paddle.disable_static() + place = 'gpu:0' if use_gpu else 'cpu' + place = paddle.set_device(place) + if images == None and paths == None: + print('No image provided. Please input an image or a image path.') + return + + if images != None: + for image in images: + image = image[:, :, ::-1] + inputs.append(image) + + if paths != None: + for path in paths: + image = cv2.imread(path)[:, :, ::-1] + inputs.append(image) + + batch_data = [] + for idx, imagedata in enumerate(inputs): + for process in self.preprocess_funcs: + imagedata = process(imagedata) + batch_data.append(imagedata) + if len(batch_data) >= batch_size or idx == len(inputs) - 1: + batch_tensor = paddle.to_tensor(batch_data) + out = self.model(batch_tensor) + if isinstance(out, list): + out = out[0] + if isinstance(out, dict) and "logits" in out: + out = out["logits"] + if isinstance(out, dict) and "output" in out: + out = out["output"] + result = postprocess_func(out) + results.extend(result) + batch_data.clear() + return results + + @runnable + def run_cmd(self, argvs: list): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + self.args = self.parser.parse_args(argvs) + results = self.classification(paths=[self.args.input_path], + use_gpu=self.args.use_gpu, + batch_size=self.args.batch_size, + top_k=self.args.top_k) + return results + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.classification(images=images_decode, **kwargs) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument('--use_gpu', action='store_true', help="use GPU or not") + + self.arg_config_group.add_argument('--batch_size', type=int, default=1, help='batch size') + self.arg_config_group.add_argument('--top_k', type=int, default=1, help='Return top k results.') + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to input image.") diff --git a/modules/image/classification/pplcnet_x1_0_imagenet/processor.py b/modules/image/classification/pplcnet_x1_0_imagenet/processor.py new file mode 100644 index 000000000..40cab3917 --- /dev/null +++ b/modules/image/classification/pplcnet_x1_0_imagenet/processor.py @@ -0,0 +1,374 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import base64 +import inspect +import math +import os +import random +import sys +from functools import partial + +import cv2 +import numpy as np +import paddle +import paddle.nn.functional as F +import six +from paddle.vision.transforms import ColorJitter as RawColorJitter +from PIL import Image + + +def create_operators(params, class_num=None): + """ + create operators based on the config + + Args: + params(list): a dict list, used to create some operators + """ + assert isinstance(params, list), ('operator config should be a list') + ops = [] + current_module = sys.modules[__name__] + for operator in params: + assert isinstance(operator, dict) and len(operator) == 1, "yaml format error" + op_name = list(operator)[0] + param = {} if operator[op_name] is None else operator[op_name] + op_func = getattr(current_module, op_name) + if "class_num" in inspect.getfullargspec(op_func).args: + param.update({"class_num": class_num}) + op = op_func(**param) + ops.append(op) + + return ops + + +class UnifiedResize(object): + + def __init__(self, interpolation=None, backend="cv2"): + _cv2_interp_from_str = { + 'nearest': cv2.INTER_NEAREST, + 'bilinear': cv2.INTER_LINEAR, + 'area': cv2.INTER_AREA, + 'bicubic': cv2.INTER_CUBIC, + 'lanczos': cv2.INTER_LANCZOS4 + } + _pil_interp_from_str = { + 'nearest': Image.NEAREST, + 'bilinear': Image.BILINEAR, + 'bicubic': Image.BICUBIC, + 'box': Image.BOX, + 'lanczos': Image.LANCZOS, + 'hamming': Image.HAMMING + } + + def _pil_resize(src, size, resample): + pil_img = Image.fromarray(src) + pil_img = pil_img.resize(size, resample) + return np.asarray(pil_img) + + if backend.lower() == "cv2": + if isinstance(interpolation, str): + interpolation = _cv2_interp_from_str[interpolation.lower()] + # compatible with opencv < version 4.4.0 + elif interpolation is None: + interpolation = cv2.INTER_LINEAR + self.resize_func = partial(cv2.resize, interpolation=interpolation) + elif backend.lower() == "pil": + if isinstance(interpolation, str): + interpolation = _pil_interp_from_str[interpolation.lower()] + self.resize_func = partial(_pil_resize, resample=interpolation) + else: + self.resize_func = cv2.resize + + def __call__(self, src, size): + return self.resize_func(src, size) + + +class OperatorParamError(ValueError): + """ OperatorParamError + """ + pass + + +class DecodeImage(object): + """ decode image """ + + def __init__(self, to_rgb=True, to_np=False, channel_first=False): + self.to_rgb = to_rgb + self.to_np = to_np # to numpy + self.channel_first = channel_first # only enabled when to_np is True + + def __call__(self, img): + if six.PY2: + assert type(img) is str and len(img) > 0, "invalid input 'img' in DecodeImage" + else: + assert type(img) is bytes and len(img) > 0, "invalid input 'img' in DecodeImage" + data = np.frombuffer(img, dtype='uint8') + img = cv2.imdecode(data, 1) + if self.to_rgb: + assert img.shape[2] == 3, 'invalid shape of image[%s]' % (img.shape) + img = img[:, :, ::-1] + + if self.channel_first: + img = img.transpose((2, 0, 1)) + + return img + + +class ResizeImage(object): + """ resize image """ + + def __init__(self, size=None, resize_short=None, interpolation=None, backend="cv2"): + if resize_short is not None and resize_short > 0: + self.resize_short = resize_short + self.w = None + self.h = None + elif size is not None: + self.resize_short = None + self.w = size if type(size) is int else size[0] + self.h = size if type(size) is int else size[1] + else: + raise OperatorParamError("invalid params for ReisizeImage for '\ + 'both 'size' and 'resize_short' are None") + + self._resize_func = UnifiedResize(interpolation=interpolation, backend=backend) + + def __call__(self, img): + img_h, img_w = img.shape[:2] + if self.resize_short is not None: + percent = float(self.resize_short) / min(img_w, img_h) + w = int(round(img_w * percent)) + h = int(round(img_h * percent)) + else: + w = self.w + h = self.h + return self._resize_func(img, (w, h)) + + +class CropImage(object): + """ crop image """ + + def __init__(self, size): + if type(size) is int: + self.size = (size, size) + else: + self.size = size # (h, w) + + def __call__(self, img): + w, h = self.size + img_h, img_w = img.shape[:2] + w_start = (img_w - w) // 2 + h_start = (img_h - h) // 2 + + w_end = w_start + w + h_end = h_start + h + return img[h_start:h_end, w_start:w_end, :] + + +class RandCropImage(object): + """ random crop image """ + + def __init__(self, size, scale=None, ratio=None, interpolation=None, backend="cv2"): + if type(size) is int: + self.size = (size, size) # (h, w) + else: + self.size = size + + self.scale = [0.08, 1.0] if scale is None else scale + self.ratio = [3. / 4., 4. / 3.] if ratio is None else ratio + + self._resize_func = UnifiedResize(interpolation=interpolation, backend=backend) + + def __call__(self, img): + size = self.size + scale = self.scale + ratio = self.ratio + + aspect_ratio = math.sqrt(random.uniform(*ratio)) + w = 1. * aspect_ratio + h = 1. / aspect_ratio + + img_h, img_w = img.shape[:2] + + bound = min((float(img_w) / img_h) / (w**2), (float(img_h) / img_w) / (h**2)) + scale_max = min(scale[1], bound) + scale_min = min(scale[0], bound) + + target_area = img_w * img_h * random.uniform(scale_min, scale_max) + target_size = math.sqrt(target_area) + w = int(target_size * w) + h = int(target_size * h) + + i = random.randint(0, img_w - w) + j = random.randint(0, img_h - h) + + img = img[j:j + h, i:i + w, :] + + return self._resize_func(img, size) + + +class RandFlipImage(object): + """ random flip image + flip_code: + 1: Flipped Horizontally + 0: Flipped Vertically + -1: Flipped Horizontally & Vertically + """ + + def __init__(self, flip_code=1): + assert flip_code in [-1, 0, 1], "flip_code should be a value in [-1, 0, 1]" + self.flip_code = flip_code + + def __call__(self, img): + if random.randint(0, 1) == 1: + return cv2.flip(img, self.flip_code) + else: + return img + + +class NormalizeImage(object): + """ normalize image such as substract mean, divide std + """ + + def __init__(self, scale=None, mean=None, std=None, order='chw', output_fp16=False, channel_num=3): + if isinstance(scale, str): + scale = eval(scale) + assert channel_num in [3, 4], "channel number of input image should be set to 3 or 4." + self.channel_num = channel_num + self.output_dtype = 'float16' if output_fp16 else 'float32' + self.scale = np.float32(scale if scale is not None else 1.0 / 255.0) + self.order = order + mean = mean if mean is not None else [0.485, 0.456, 0.406] + std = std if std is not None else [0.229, 0.224, 0.225] + + shape = (3, 1, 1) if self.order == 'chw' else (1, 1, 3) + self.mean = np.array(mean).reshape(shape).astype('float32') + self.std = np.array(std).reshape(shape).astype('float32') + + def __call__(self, img): + from PIL import Image + if isinstance(img, Image.Image): + img = np.array(img) + + assert isinstance(img, np.ndarray), "invalid input 'img' in NormalizeImage" + + img = (img.astype('float32') * self.scale - self.mean) / self.std + + if self.channel_num == 4: + img_h = img.shape[1] if self.order == 'chw' else img.shape[0] + img_w = img.shape[2] if self.order == 'chw' else img.shape[1] + pad_zeros = np.zeros((1, img_h, img_w)) if self.order == 'chw' else np.zeros((img_h, img_w, 1)) + img = (np.concatenate((img, pad_zeros), axis=0) if self.order == 'chw' else np.concatenate( + (img, pad_zeros), axis=2)) + return img.astype(self.output_dtype) + + +class ToCHWImage(object): + """ convert hwc image to chw image + """ + + def __init__(self): + pass + + def __call__(self, img): + from PIL import Image + if isinstance(img, Image.Image): + img = np.array(img) + + return img.transpose((2, 0, 1)) + + +class ColorJitter(RawColorJitter): + """ColorJitter. + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def __call__(self, img): + if not isinstance(img, Image.Image): + img = np.ascontiguousarray(img) + img = Image.fromarray(img) + img = super()._apply_image(img) + if isinstance(img, Image.Image): + img = np.asarray(img) + return img + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +class Topk(object): + + def __init__(self, topk=1, class_id_map_file=None): + assert isinstance(topk, (int, )) + self.class_id_map = self.parse_class_id_map(class_id_map_file) + self.topk = topk + + def parse_class_id_map(self, class_id_map_file): + if class_id_map_file is None: + return None + if not os.path.exists(class_id_map_file): + print( + "Warning: If want to use your own label_dict, please input legal path!\nOtherwise label_names will be empty!" + ) + return None + + try: + class_id_map = {} + with open(class_id_map_file, "r") as fin: + lines = fin.readlines() + for line in lines: + partition = line.split("\n")[0].partition(" ") + class_id_map[int(partition[0])] = str(partition[-1]) + except Exception as ex: + print(ex) + class_id_map = None + return class_id_map + + def __call__(self, x, file_names=None, multilabel=False): + assert isinstance(x, paddle.Tensor) + if file_names is not None: + assert x.shape[0] == len(file_names) + x = F.softmax(x, axis=-1) if not multilabel else F.sigmoid(x) + x = x.numpy() + y = [] + for idx, probs in enumerate(x): + index = probs.argsort(axis=0)[-self.topk:][::-1].astype("int32") if not multilabel else np.where( + probs >= 0.5)[0].astype("int32") + clas_id_list = [] + score_list = [] + label_name_list = [] + for i in index: + clas_id_list.append(i.item()) + score_list.append(probs[i].item()) + if self.class_id_map is not None: + label_name_list.append(self.class_id_map[i.item()]) + result = { + "class_ids": clas_id_list, + "scores": np.around(score_list, decimals=5).tolist(), + } + if file_names is not None: + result["file_name"] = file_names[idx] + if label_name_list is not None: + result["label_names"] = label_name_list + y.append(result) + return y diff --git a/modules/image/classification/pplcnet_x1_0_imagenet/utils.py b/modules/image/classification/pplcnet_x1_0_imagenet/utils.py new file mode 100644 index 000000000..df2bc36b3 --- /dev/null +++ b/modules/image/classification/pplcnet_x1_0_imagenet/utils.py @@ -0,0 +1,129 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import copy +import os + +import yaml + +__all__ = ['get_config'] + + +class AttrDict(dict): + + def __getattr__(self, key): + return self[key] + + def __setattr__(self, key, value): + if key in self.__dict__: + self.__dict__[key] = value + else: + self[key] = value + + def __deepcopy__(self, content): + return copy.deepcopy(dict(self)) + + +def create_attr_dict(yaml_config): + from ast import literal_eval + for key, value in yaml_config.items(): + if type(value) is dict: + yaml_config[key] = value = AttrDict(value) + if isinstance(value, str): + try: + value = literal_eval(value) + except BaseException: + pass + if isinstance(value, AttrDict): + create_attr_dict(yaml_config[key]) + else: + yaml_config[key] = value + + +def parse_config(cfg_file): + """Load a config file into AttrDict""" + with open(cfg_file, 'r') as fopen: + yaml_config = AttrDict(yaml.load(fopen, Loader=yaml.SafeLoader)) + create_attr_dict(yaml_config) + return yaml_config + + +def override(dl, ks, v): + """ + Recursively replace dict of list + Args: + dl(dict or list): dict or list to be replaced + ks(list): list of keys + v(str): value to be replaced + """ + + def str2num(v): + try: + return eval(v) + except Exception: + return v + + assert isinstance(dl, (list, dict)), ("{} should be a list or a dict") + assert len(ks) > 0, ('lenght of keys should larger than 0') + if isinstance(dl, list): + k = str2num(ks[0]) + if len(ks) == 1: + assert k < len(dl), ('index({}) out of range({})'.format(k, dl)) + dl[k] = str2num(v) + else: + override(dl[k], ks[1:], v) + else: + if len(ks) == 1: + # assert ks[0] in dl, ('{} is not exist in {}'.format(ks[0], dl)) + if not ks[0] in dl: + print('A new filed ({}) detected!'.format(ks[0], dl)) + dl[ks[0]] = str2num(v) + else: + override(dl[ks[0]], ks[1:], v) + + +def override_config(config, options=None): + """ + Recursively override the config + Args: + config(dict): dict to be replaced + options(list): list of pairs(key0.key1.idx.key2=value) + such as: [ + 'topk=2', + 'VALID.transforms.1.ResizeImage.resize_short=300' + ] + Returns: + config(dict): replaced config + """ + if options is not None: + for opt in options: + assert isinstance(opt, str), ("option({}) should be a str".format(opt)) + assert "=" in opt, ("option({}) should contain a =" + "to distinguish between key and value".format(opt)) + pair = opt.split('=') + assert len(pair) == 2, ("there can be only a = in the option") + key, value = pair + keys = key.split('.') + override(config, keys, value) + return config + + +def get_config(fname, overrides=None, show=False): + """ + Read config from file + """ + assert os.path.exists(fname), ('config file({}) is not exist'.format(fname)) + config = parse_config(fname) + override_config(config, overrides) + return config diff --git a/modules/image/classification/pplcnet_x1_5_imagenet/README.md b/modules/image/classification/pplcnet_x1_5_imagenet/README.md new file mode 100644 index 000000000..eb8342eff --- /dev/null +++ b/modules/image/classification/pplcnet_x1_5_imagenet/README.md @@ -0,0 +1,132 @@ +# pplcnet_x1_5_imagenet + +|模型名称|pplcnet_x1_5_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|PPLCNet| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|17 MB| +|最新更新日期|2022-04-02| +|数据指标|Acc| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - PP-LCNet是百度针对Intel CPU 设备以及其加速库 MKLDNN 设计的特定骨干网络 ,比起其他的轻量级的 SOTA 模型,该骨干网络可以在不增加推理时间的情况下,进一步提升模型的性能,最终大幅度超越现有的 SOTA 模型。该模型为模型规模参数scale为x1.5下的PP-LCNet模型,关于模型结构的更多信息,可参考[论文](https://arxiv.org/pdf/2109.15099.pdf)。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install pplcnet_x1_5_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run pplcnet_x1_5_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="pplcnet_x1_5_imagenet") + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + - 分类接口API。 + - **参数** + + - images (list\[numpy.ndarray\]): 图片数据,每一个图片数据的shape 均为 \[H, W, C\],颜色空间为 BGR;
+ - paths (list\[str\]): 图片的路径;
+ - batch\_size (int): batch 的大小;
+ - use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA_VISIBLE_DEVICES环境变量**
+ - top\_k (int): 返回预测结果的前 k 个。 + + - **返回** + + - res (list\[dict\]): 分类结果,列表的每一个元素均为字典,其中 key 包括'class_ids'(种类索引), 'scores'(置信度) 和 'label_names'(种类名称) + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个图像识别的在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m pplcnet_x1_5_imagenet + ``` + + - 这样就完成了一个图像识别的在线服务的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"\} + url = "http://127.0.0.1:8866/predict/pplcnet_x1_5_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + ``` + + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install pplcnet_x1_5_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/pplcnet_x1_5_imagenet/model.py b/modules/image/classification/pplcnet_x1_5_imagenet/model.py new file mode 100644 index 000000000..085bb5668 --- /dev/null +++ b/modules/image/classification/pplcnet_x1_5_imagenet/model.py @@ -0,0 +1,478 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any +from typing import Callable +from typing import Dict +from typing import List +from typing import Tuple +from typing import Union + +import paddle +import paddle.nn as nn +from paddle import ParamAttr +from paddle.nn import AdaptiveAvgPool2D +from paddle.nn import BatchNorm +from paddle.nn import Conv2D +from paddle.nn import Dropout +from paddle.nn import Linear +from paddle.nn.initializer import KaimingNormal +from paddle.regularizer import L2Decay + + +class Identity(nn.Layer): + + def __init__(self): + super(Identity, self).__init__() + + def forward(self, inputs): + return inputs + + +class TheseusLayer(nn.Layer): + + def __init__(self, *args, **kwargs): + super(TheseusLayer, self).__init__() + self.res_dict = {} + self.res_name = self.full_name() + self.pruner = None + self.quanter = None + + def _return_dict_hook(self, layer, input, output): + res_dict = {"output": output} + # 'list' is needed to avoid error raised by popping self.res_dict + for res_key in list(self.res_dict): + # clear the res_dict because the forward process may change according to input + res_dict[res_key] = self.res_dict.pop(res_key) + return res_dict + + def init_res(self, stages_pattern, return_patterns=None, return_stages=None): + if return_patterns and return_stages: + msg = f"The 'return_patterns' would be ignored when 'return_stages' is set." + return_stages = None + + if return_stages is True: + return_patterns = stages_pattern + # return_stages is int or bool + if type(return_stages) is int: + return_stages = [return_stages] + if isinstance(return_stages, list): + if max(return_stages) > len(stages_pattern) or min(return_stages) < 0: + msg = f"The 'return_stages' set error. Illegal value(s) have been ignored. The stages' pattern list is {stages_pattern}." + return_stages = [val for val in return_stages if val >= 0 and val < len(stages_pattern)] + return_patterns = [stages_pattern[i] for i in return_stages] + + if return_patterns: + self.update_res(return_patterns) + + def replace_sub(self, *args, **kwargs) -> None: + msg = "The function 'replace_sub()' is deprecated, please use 'upgrade_sublayer()' instead." + raise DeprecationWarning(msg) + + def upgrade_sublayer(self, layer_name_pattern: Union[str, List[str]], + handle_func: Callable[[nn.Layer, str], nn.Layer]) -> Dict[str, nn.Layer]: + """use 'handle_func' to modify the sub-layer(s) specified by 'layer_name_pattern'. + + Args: + layer_name_pattern (Union[str, List[str]]): The name of layer to be modified by 'handle_func'. + handle_func (Callable[[nn.Layer, str], nn.Layer]): The function to modify target layer specified by 'layer_name_pattern'. The formal params are the layer(nn.Layer) and pattern(str) that is (a member of) layer_name_pattern (when layer_name_pattern is List type). And the return is the layer processed. + + Returns: + Dict[str, nn.Layer]: The key is the pattern and corresponding value is the result returned by 'handle_func()'. + + Examples: + + from paddle import nn + import paddleclas + + def rep_func(layer: nn.Layer, pattern: str): + new_layer = nn.Conv2D( + in_channels=layer._in_channels, + out_channels=layer._out_channels, + kernel_size=5, + padding=2 + ) + return new_layer + + net = paddleclas.MobileNetV1() + res = net.replace_sub(layer_name_pattern=["blocks[11].depthwise_conv.conv", "blocks[12].depthwise_conv.conv"], handle_func=rep_func) + print(res) + # {'blocks[11].depthwise_conv.conv': the corresponding new_layer, 'blocks[12].depthwise_conv.conv': the corresponding new_layer} + """ + + if not isinstance(layer_name_pattern, list): + layer_name_pattern = [layer_name_pattern] + + hit_layer_pattern_list = [] + for pattern in layer_name_pattern: + # parse pattern to find target layer and its parent + layer_list = parse_pattern_str(pattern=pattern, parent_layer=self) + if not layer_list: + continue + sub_layer_parent = layer_list[-2]["layer"] if len(layer_list) > 1 else self + + sub_layer = layer_list[-1]["layer"] + sub_layer_name = layer_list[-1]["name"] + sub_layer_index = layer_list[-1]["index"] + + new_sub_layer = handle_func(sub_layer, pattern) + + if sub_layer_index: + getattr(sub_layer_parent, sub_layer_name)[sub_layer_index] = new_sub_layer + else: + setattr(sub_layer_parent, sub_layer_name, new_sub_layer) + + hit_layer_pattern_list.append(pattern) + return hit_layer_pattern_list + + def stop_after(self, stop_layer_name: str) -> bool: + """stop forward and backward after 'stop_layer_name'. + + Args: + stop_layer_name (str): The name of layer that stop forward and backward after this layer. + + Returns: + bool: 'True' if successful, 'False' otherwise. + """ + + layer_list = parse_pattern_str(stop_layer_name, self) + if not layer_list: + return False + + parent_layer = self + for layer_dict in layer_list: + name, index = layer_dict["name"], layer_dict["index"] + if not set_identity(parent_layer, name, index): + msg = f"Failed to set the layers that after stop_layer_name('{stop_layer_name}') to IdentityLayer. The error layer's name is '{name}'." + return False + parent_layer = layer_dict["layer"] + + return True + + def update_res(self, return_patterns: Union[str, List[str]]) -> Dict[str, nn.Layer]: + """update the result(s) to be returned. + + Args: + return_patterns (Union[str, List[str]]): The name of layer to return output. + + Returns: + Dict[str, nn.Layer]: The pattern(str) and corresponding layer(nn.Layer) that have been set successfully. + """ + + # clear res_dict that could have been set + self.res_dict = {} + + class Handler(object): + + def __init__(self, res_dict): + # res_dict is a reference + self.res_dict = res_dict + + def __call__(self, layer, pattern): + layer.res_dict = self.res_dict + layer.res_name = pattern + if hasattr(layer, "hook_remove_helper"): + layer.hook_remove_helper.remove() + layer.hook_remove_helper = layer.register_forward_post_hook(save_sub_res_hook) + return layer + + handle_func = Handler(self.res_dict) + + hit_layer_pattern_list = self.upgrade_sublayer(return_patterns, handle_func=handle_func) + + if hasattr(self, "hook_remove_helper"): + self.hook_remove_helper.remove() + self.hook_remove_helper = self.register_forward_post_hook(self._return_dict_hook) + + return hit_layer_pattern_list + + +def save_sub_res_hook(layer, input, output): + layer.res_dict[layer.res_name] = output + + +def set_identity(parent_layer: nn.Layer, layer_name: str, layer_index: str = None) -> bool: + """set the layer specified by layer_name and layer_index to Indentity. + + Args: + parent_layer (nn.Layer): The parent layer of target layer specified by layer_name and layer_index. + layer_name (str): The name of target layer to be set to Indentity. + layer_index (str, optional): The index of target layer to be set to Indentity in parent_layer. Defaults to None. + + Returns: + bool: True if successfully, False otherwise. + """ + + stop_after = False + for sub_layer_name in parent_layer._sub_layers: + if stop_after: + parent_layer._sub_layers[sub_layer_name] = Identity() + continue + if sub_layer_name == layer_name: + stop_after = True + + if layer_index and stop_after: + stop_after = False + for sub_layer_index in parent_layer._sub_layers[layer_name]._sub_layers: + if stop_after: + parent_layer._sub_layers[layer_name][sub_layer_index] = Identity() + continue + if layer_index == sub_layer_index: + stop_after = True + + return stop_after + + +def parse_pattern_str(pattern: str, parent_layer: nn.Layer) -> Union[None, List[Dict[str, Union[nn.Layer, str, None]]]]: + """parse the string type pattern. + + Args: + pattern (str): The pattern to discribe layer. + parent_layer (nn.Layer): The root layer relative to the pattern. + + Returns: + Union[None, List[Dict[str, Union[nn.Layer, str, None]]]]: None if failed. If successfully, the members are layers parsed in order: + [ + {"layer": first layer, "name": first layer's name parsed, "index": first layer's index parsed if exist}, + {"layer": second layer, "name": second layer's name parsed, "index": second layer's index parsed if exist}, + ... + ] + """ + + pattern_list = pattern.split(".") + if not pattern_list: + msg = f"The pattern('{pattern}') is illegal. Please check and retry." + return None + + layer_list = [] + while len(pattern_list) > 0: + if '[' in pattern_list[0]: + target_layer_name = pattern_list[0].split('[')[0] + target_layer_index = pattern_list[0].split('[')[1].split(']')[0] + else: + target_layer_name = pattern_list[0] + target_layer_index = None + + target_layer = getattr(parent_layer, target_layer_name, None) + + if target_layer is None: + msg = f"Not found layer named('{target_layer_name}') specifed in pattern('{pattern}')." + return None + + if target_layer_index and target_layer: + if int(target_layer_index) < 0 or int(target_layer_index) >= len(target_layer): + msg = f"Not found layer by index('{target_layer_index}') specifed in pattern('{pattern}'). The index should < {len(target_layer)} and > 0." + return None + + target_layer = target_layer[target_layer_index] + + layer_list.append({"layer": target_layer, "name": target_layer_name, "index": target_layer_index}) + + pattern_list = pattern_list[1:] + parent_layer = target_layer + return layer_list + + +MODEL_STAGES_PATTERN = {"PPLCNet": ["blocks2", "blocks3", "blocks4", "blocks5", "blocks6"]} + +# Each element(list) represents a depthwise block, which is composed of k, in_c, out_c, s, use_se. +# k: kernel_size +# in_c: input channel number in depthwise block +# out_c: output channel number in depthwise block +# s: stride in depthwise block +# use_se: whether to use SE block + +NET_CONFIG = { + "blocks2": + #k, in_c, out_c, s, use_se + [[3, 16, 32, 1, False]], + "blocks3": [[3, 32, 64, 2, False], [3, 64, 64, 1, False]], + "blocks4": [[3, 64, 128, 2, False], [3, 128, 128, 1, False]], + "blocks5": [[3, 128, 256, 2, False], [5, 256, 256, 1, False], [5, 256, 256, 1, False], [5, 256, 256, 1, False], + [5, 256, 256, 1, False], [5, 256, 256, 1, False]], + "blocks6": [[5, 256, 512, 2, True], [5, 512, 512, 1, True]] +} + + +def make_divisible(v, divisor=8, min_value=None): + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + if new_v < 0.9 * v: + new_v += divisor + return new_v + + +class ConvBNLayer(TheseusLayer): + + def __init__(self, num_channels, filter_size, num_filters, stride, num_groups=1): + super().__init__() + + self.conv = Conv2D(in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=num_groups, + weight_attr=ParamAttr(initializer=KaimingNormal()), + bias_attr=False) + + self.bn = BatchNorm(num_filters, + param_attr=ParamAttr(regularizer=L2Decay(0.0)), + bias_attr=ParamAttr(regularizer=L2Decay(0.0))) + self.hardswish = nn.Hardswish() + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + x = self.hardswish(x) + return x + + +class DepthwiseSeparable(TheseusLayer): + + def __init__(self, num_channels, num_filters, stride, dw_size=3, use_se=False): + super().__init__() + self.use_se = use_se + self.dw_conv = ConvBNLayer(num_channels=num_channels, + num_filters=num_channels, + filter_size=dw_size, + stride=stride, + num_groups=num_channels) + if use_se: + self.se = SEModule(num_channels) + self.pw_conv = ConvBNLayer(num_channels=num_channels, filter_size=1, num_filters=num_filters, stride=1) + + def forward(self, x): + x = self.dw_conv(x) + if self.use_se: + x = self.se(x) + x = self.pw_conv(x) + return x + + +class SEModule(TheseusLayer): + + def __init__(self, channel, reduction=4): + super().__init__() + self.avg_pool = AdaptiveAvgPool2D(1) + self.conv1 = Conv2D(in_channels=channel, out_channels=channel // reduction, kernel_size=1, stride=1, padding=0) + self.relu = nn.ReLU() + self.conv2 = Conv2D(in_channels=channel // reduction, out_channels=channel, kernel_size=1, stride=1, padding=0) + self.hardsigmoid = nn.Hardsigmoid() + + def forward(self, x): + identity = x + x = self.avg_pool(x) + x = self.conv1(x) + x = self.relu(x) + x = self.conv2(x) + x = self.hardsigmoid(x) + x = paddle.multiply(x=identity, y=x) + return x + + +class PPLCNet(TheseusLayer): + + def __init__(self, + stages_pattern, + scale=1.0, + class_num=1000, + dropout_prob=0.2, + class_expand=1280, + return_patterns=None, + return_stages=None): + super().__init__() + self.scale = scale + self.class_expand = class_expand + + self.conv1 = ConvBNLayer(num_channels=3, filter_size=3, num_filters=make_divisible(16 * scale), stride=2) + + self.blocks2 = nn.Sequential(*[ + DepthwiseSeparable(num_channels=make_divisible(in_c * scale), + num_filters=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se) for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks2"]) + ]) + + self.blocks3 = nn.Sequential(*[ + DepthwiseSeparable(num_channels=make_divisible(in_c * scale), + num_filters=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se) for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks3"]) + ]) + + self.blocks4 = nn.Sequential(*[ + DepthwiseSeparable(num_channels=make_divisible(in_c * scale), + num_filters=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se) for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks4"]) + ]) + + self.blocks5 = nn.Sequential(*[ + DepthwiseSeparable(num_channels=make_divisible(in_c * scale), + num_filters=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se) for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks5"]) + ]) + + self.blocks6 = nn.Sequential(*[ + DepthwiseSeparable(num_channels=make_divisible(in_c * scale), + num_filters=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se) for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks6"]) + ]) + + self.avg_pool = AdaptiveAvgPool2D(1) + + self.last_conv = Conv2D(in_channels=make_divisible(NET_CONFIG["blocks6"][-1][2] * scale), + out_channels=self.class_expand, + kernel_size=1, + stride=1, + padding=0, + bias_attr=False) + + self.hardswish = nn.Hardswish() + self.dropout = Dropout(p=dropout_prob, mode="downscale_in_infer") + self.flatten = nn.Flatten(start_axis=1, stop_axis=-1) + + self.fc = Linear(self.class_expand, class_num) + + super().init_res(stages_pattern, return_patterns=return_patterns, return_stages=return_stages) + + def forward(self, x): + x = self.conv1(x) + + x = self.blocks2(x) + x = self.blocks3(x) + x = self.blocks4(x) + x = self.blocks5(x) + x = self.blocks6(x) + + x = self.avg_pool(x) + x = self.last_conv(x) + x = self.hardswish(x) + x = self.dropout(x) + x = self.flatten(x) + x = self.fc(x) + return x + + +def PPLCNet_x1_5(pretrained=False, use_ssld=False, **kwargs): + model = PPLCNet(scale=1.5, stages_pattern=MODEL_STAGES_PATTERN["PPLCNet"], **kwargs) + return model diff --git a/modules/image/classification/pplcnet_x1_5_imagenet/module.py b/modules/image/classification/pplcnet_x1_5_imagenet/module.py new file mode 100644 index 000000000..25f258db9 --- /dev/null +++ b/modules/image/classification/pplcnet_x1_5_imagenet/module.py @@ -0,0 +1,154 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import copy +import os + +import cv2 +import numpy as np +import paddle +from skimage.io import imread +from skimage.transform import rescale +from skimage.transform import resize + +import paddlehub as hub +from .model import PPLCNet_x1_5 +from .processor import base64_to_cv2 +from .processor import create_operators +from .processor import Topk +from .utils import get_config +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving + + +@moduleinfo(name="pplcnet_x1_5_imagenet", + type="cv/classification", + author="paddlepaddle", + author_email="", + summary="", + version="1.0.0") +class PPLcNet_x1_5: + + def __init__(self): + self.config = get_config(os.path.join(self.directory, 'PPLCNet_x1_5.yaml'), show=False) + self.label_path = os.path.join(self.directory, 'imagenet1k_label_list.txt') + self.pretrain_path = os.path.join(self.directory, 'PPLCNet_x1_5_pretrained.pdparams') + self.config['Infer']['PostProcess']['class_id_map_file'] = self.label_path + self.model = PPLCNet_x1_5() + param_state_dict = paddle.load(self.pretrain_path) + self.model.set_dict(param_state_dict) + self.preprocess_funcs = create_operators(self.config["Infer"]["transforms"]) + + def classification(self, + images: list = None, + paths: list = None, + batch_size: int = 1, + use_gpu: bool = False, + top_k: int = 1): + ''' + Args: + images (list[numpy.ndarray]): data of images, shape of each is [H, W, C], color space must be BGR. + paths (list[str]): The paths of images. + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + top_k (int): Return top k results. + + Returns: + res (list[dict]): The classfication results, each result dict contains key 'class_ids', 'scores' and 'label_names'. + ''' + postprocess_func = Topk(top_k, self.label_path) + inputs = [] + results = [] + paddle.disable_static() + place = 'gpu:0' if use_gpu else 'cpu' + place = paddle.set_device(place) + if images == None and paths == None: + print('No image provided. Please input an image or a image path.') + return + + if images != None: + for image in images: + image = image[:, :, ::-1] + inputs.append(image) + + if paths != None: + for path in paths: + image = cv2.imread(path)[:, :, ::-1] + inputs.append(image) + + batch_data = [] + for idx, imagedata in enumerate(inputs): + for process in self.preprocess_funcs: + imagedata = process(imagedata) + batch_data.append(imagedata) + if len(batch_data) >= batch_size or idx == len(inputs) - 1: + batch_tensor = paddle.to_tensor(batch_data) + out = self.model(batch_tensor) + if isinstance(out, list): + out = out[0] + if isinstance(out, dict) and "logits" in out: + out = out["logits"] + if isinstance(out, dict) and "output" in out: + out = out["output"] + result = postprocess_func(out) + results.extend(result) + batch_data.clear() + return results + + @runnable + def run_cmd(self, argvs: list): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + self.args = self.parser.parse_args(argvs) + results = self.classification(paths=[self.args.input_path], + use_gpu=self.args.use_gpu, + batch_size=self.args.batch_size, + top_k=self.args.top_k) + return results + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.classification(images=images_decode, **kwargs) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument('--use_gpu', action='store_true', help="use GPU or not") + + self.arg_config_group.add_argument('--batch_size', type=int, default=1, help='batch size') + self.arg_config_group.add_argument('--top_k', type=int, default=1, help='Return top k results.') + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to input image.") diff --git a/modules/image/classification/pplcnet_x1_5_imagenet/processor.py b/modules/image/classification/pplcnet_x1_5_imagenet/processor.py new file mode 100644 index 000000000..40cab3917 --- /dev/null +++ b/modules/image/classification/pplcnet_x1_5_imagenet/processor.py @@ -0,0 +1,374 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import base64 +import inspect +import math +import os +import random +import sys +from functools import partial + +import cv2 +import numpy as np +import paddle +import paddle.nn.functional as F +import six +from paddle.vision.transforms import ColorJitter as RawColorJitter +from PIL import Image + + +def create_operators(params, class_num=None): + """ + create operators based on the config + + Args: + params(list): a dict list, used to create some operators + """ + assert isinstance(params, list), ('operator config should be a list') + ops = [] + current_module = sys.modules[__name__] + for operator in params: + assert isinstance(operator, dict) and len(operator) == 1, "yaml format error" + op_name = list(operator)[0] + param = {} if operator[op_name] is None else operator[op_name] + op_func = getattr(current_module, op_name) + if "class_num" in inspect.getfullargspec(op_func).args: + param.update({"class_num": class_num}) + op = op_func(**param) + ops.append(op) + + return ops + + +class UnifiedResize(object): + + def __init__(self, interpolation=None, backend="cv2"): + _cv2_interp_from_str = { + 'nearest': cv2.INTER_NEAREST, + 'bilinear': cv2.INTER_LINEAR, + 'area': cv2.INTER_AREA, + 'bicubic': cv2.INTER_CUBIC, + 'lanczos': cv2.INTER_LANCZOS4 + } + _pil_interp_from_str = { + 'nearest': Image.NEAREST, + 'bilinear': Image.BILINEAR, + 'bicubic': Image.BICUBIC, + 'box': Image.BOX, + 'lanczos': Image.LANCZOS, + 'hamming': Image.HAMMING + } + + def _pil_resize(src, size, resample): + pil_img = Image.fromarray(src) + pil_img = pil_img.resize(size, resample) + return np.asarray(pil_img) + + if backend.lower() == "cv2": + if isinstance(interpolation, str): + interpolation = _cv2_interp_from_str[interpolation.lower()] + # compatible with opencv < version 4.4.0 + elif interpolation is None: + interpolation = cv2.INTER_LINEAR + self.resize_func = partial(cv2.resize, interpolation=interpolation) + elif backend.lower() == "pil": + if isinstance(interpolation, str): + interpolation = _pil_interp_from_str[interpolation.lower()] + self.resize_func = partial(_pil_resize, resample=interpolation) + else: + self.resize_func = cv2.resize + + def __call__(self, src, size): + return self.resize_func(src, size) + + +class OperatorParamError(ValueError): + """ OperatorParamError + """ + pass + + +class DecodeImage(object): + """ decode image """ + + def __init__(self, to_rgb=True, to_np=False, channel_first=False): + self.to_rgb = to_rgb + self.to_np = to_np # to numpy + self.channel_first = channel_first # only enabled when to_np is True + + def __call__(self, img): + if six.PY2: + assert type(img) is str and len(img) > 0, "invalid input 'img' in DecodeImage" + else: + assert type(img) is bytes and len(img) > 0, "invalid input 'img' in DecodeImage" + data = np.frombuffer(img, dtype='uint8') + img = cv2.imdecode(data, 1) + if self.to_rgb: + assert img.shape[2] == 3, 'invalid shape of image[%s]' % (img.shape) + img = img[:, :, ::-1] + + if self.channel_first: + img = img.transpose((2, 0, 1)) + + return img + + +class ResizeImage(object): + """ resize image """ + + def __init__(self, size=None, resize_short=None, interpolation=None, backend="cv2"): + if resize_short is not None and resize_short > 0: + self.resize_short = resize_short + self.w = None + self.h = None + elif size is not None: + self.resize_short = None + self.w = size if type(size) is int else size[0] + self.h = size if type(size) is int else size[1] + else: + raise OperatorParamError("invalid params for ReisizeImage for '\ + 'both 'size' and 'resize_short' are None") + + self._resize_func = UnifiedResize(interpolation=interpolation, backend=backend) + + def __call__(self, img): + img_h, img_w = img.shape[:2] + if self.resize_short is not None: + percent = float(self.resize_short) / min(img_w, img_h) + w = int(round(img_w * percent)) + h = int(round(img_h * percent)) + else: + w = self.w + h = self.h + return self._resize_func(img, (w, h)) + + +class CropImage(object): + """ crop image """ + + def __init__(self, size): + if type(size) is int: + self.size = (size, size) + else: + self.size = size # (h, w) + + def __call__(self, img): + w, h = self.size + img_h, img_w = img.shape[:2] + w_start = (img_w - w) // 2 + h_start = (img_h - h) // 2 + + w_end = w_start + w + h_end = h_start + h + return img[h_start:h_end, w_start:w_end, :] + + +class RandCropImage(object): + """ random crop image """ + + def __init__(self, size, scale=None, ratio=None, interpolation=None, backend="cv2"): + if type(size) is int: + self.size = (size, size) # (h, w) + else: + self.size = size + + self.scale = [0.08, 1.0] if scale is None else scale + self.ratio = [3. / 4., 4. / 3.] if ratio is None else ratio + + self._resize_func = UnifiedResize(interpolation=interpolation, backend=backend) + + def __call__(self, img): + size = self.size + scale = self.scale + ratio = self.ratio + + aspect_ratio = math.sqrt(random.uniform(*ratio)) + w = 1. * aspect_ratio + h = 1. / aspect_ratio + + img_h, img_w = img.shape[:2] + + bound = min((float(img_w) / img_h) / (w**2), (float(img_h) / img_w) / (h**2)) + scale_max = min(scale[1], bound) + scale_min = min(scale[0], bound) + + target_area = img_w * img_h * random.uniform(scale_min, scale_max) + target_size = math.sqrt(target_area) + w = int(target_size * w) + h = int(target_size * h) + + i = random.randint(0, img_w - w) + j = random.randint(0, img_h - h) + + img = img[j:j + h, i:i + w, :] + + return self._resize_func(img, size) + + +class RandFlipImage(object): + """ random flip image + flip_code: + 1: Flipped Horizontally + 0: Flipped Vertically + -1: Flipped Horizontally & Vertically + """ + + def __init__(self, flip_code=1): + assert flip_code in [-1, 0, 1], "flip_code should be a value in [-1, 0, 1]" + self.flip_code = flip_code + + def __call__(self, img): + if random.randint(0, 1) == 1: + return cv2.flip(img, self.flip_code) + else: + return img + + +class NormalizeImage(object): + """ normalize image such as substract mean, divide std + """ + + def __init__(self, scale=None, mean=None, std=None, order='chw', output_fp16=False, channel_num=3): + if isinstance(scale, str): + scale = eval(scale) + assert channel_num in [3, 4], "channel number of input image should be set to 3 or 4." + self.channel_num = channel_num + self.output_dtype = 'float16' if output_fp16 else 'float32' + self.scale = np.float32(scale if scale is not None else 1.0 / 255.0) + self.order = order + mean = mean if mean is not None else [0.485, 0.456, 0.406] + std = std if std is not None else [0.229, 0.224, 0.225] + + shape = (3, 1, 1) if self.order == 'chw' else (1, 1, 3) + self.mean = np.array(mean).reshape(shape).astype('float32') + self.std = np.array(std).reshape(shape).astype('float32') + + def __call__(self, img): + from PIL import Image + if isinstance(img, Image.Image): + img = np.array(img) + + assert isinstance(img, np.ndarray), "invalid input 'img' in NormalizeImage" + + img = (img.astype('float32') * self.scale - self.mean) / self.std + + if self.channel_num == 4: + img_h = img.shape[1] if self.order == 'chw' else img.shape[0] + img_w = img.shape[2] if self.order == 'chw' else img.shape[1] + pad_zeros = np.zeros((1, img_h, img_w)) if self.order == 'chw' else np.zeros((img_h, img_w, 1)) + img = (np.concatenate((img, pad_zeros), axis=0) if self.order == 'chw' else np.concatenate( + (img, pad_zeros), axis=2)) + return img.astype(self.output_dtype) + + +class ToCHWImage(object): + """ convert hwc image to chw image + """ + + def __init__(self): + pass + + def __call__(self, img): + from PIL import Image + if isinstance(img, Image.Image): + img = np.array(img) + + return img.transpose((2, 0, 1)) + + +class ColorJitter(RawColorJitter): + """ColorJitter. + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def __call__(self, img): + if not isinstance(img, Image.Image): + img = np.ascontiguousarray(img) + img = Image.fromarray(img) + img = super()._apply_image(img) + if isinstance(img, Image.Image): + img = np.asarray(img) + return img + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +class Topk(object): + + def __init__(self, topk=1, class_id_map_file=None): + assert isinstance(topk, (int, )) + self.class_id_map = self.parse_class_id_map(class_id_map_file) + self.topk = topk + + def parse_class_id_map(self, class_id_map_file): + if class_id_map_file is None: + return None + if not os.path.exists(class_id_map_file): + print( + "Warning: If want to use your own label_dict, please input legal path!\nOtherwise label_names will be empty!" + ) + return None + + try: + class_id_map = {} + with open(class_id_map_file, "r") as fin: + lines = fin.readlines() + for line in lines: + partition = line.split("\n")[0].partition(" ") + class_id_map[int(partition[0])] = str(partition[-1]) + except Exception as ex: + print(ex) + class_id_map = None + return class_id_map + + def __call__(self, x, file_names=None, multilabel=False): + assert isinstance(x, paddle.Tensor) + if file_names is not None: + assert x.shape[0] == len(file_names) + x = F.softmax(x, axis=-1) if not multilabel else F.sigmoid(x) + x = x.numpy() + y = [] + for idx, probs in enumerate(x): + index = probs.argsort(axis=0)[-self.topk:][::-1].astype("int32") if not multilabel else np.where( + probs >= 0.5)[0].astype("int32") + clas_id_list = [] + score_list = [] + label_name_list = [] + for i in index: + clas_id_list.append(i.item()) + score_list.append(probs[i].item()) + if self.class_id_map is not None: + label_name_list.append(self.class_id_map[i.item()]) + result = { + "class_ids": clas_id_list, + "scores": np.around(score_list, decimals=5).tolist(), + } + if file_names is not None: + result["file_name"] = file_names[idx] + if label_name_list is not None: + result["label_names"] = label_name_list + y.append(result) + return y diff --git a/modules/image/classification/pplcnet_x1_5_imagenet/utils.py b/modules/image/classification/pplcnet_x1_5_imagenet/utils.py new file mode 100644 index 000000000..df2bc36b3 --- /dev/null +++ b/modules/image/classification/pplcnet_x1_5_imagenet/utils.py @@ -0,0 +1,129 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import copy +import os + +import yaml + +__all__ = ['get_config'] + + +class AttrDict(dict): + + def __getattr__(self, key): + return self[key] + + def __setattr__(self, key, value): + if key in self.__dict__: + self.__dict__[key] = value + else: + self[key] = value + + def __deepcopy__(self, content): + return copy.deepcopy(dict(self)) + + +def create_attr_dict(yaml_config): + from ast import literal_eval + for key, value in yaml_config.items(): + if type(value) is dict: + yaml_config[key] = value = AttrDict(value) + if isinstance(value, str): + try: + value = literal_eval(value) + except BaseException: + pass + if isinstance(value, AttrDict): + create_attr_dict(yaml_config[key]) + else: + yaml_config[key] = value + + +def parse_config(cfg_file): + """Load a config file into AttrDict""" + with open(cfg_file, 'r') as fopen: + yaml_config = AttrDict(yaml.load(fopen, Loader=yaml.SafeLoader)) + create_attr_dict(yaml_config) + return yaml_config + + +def override(dl, ks, v): + """ + Recursively replace dict of list + Args: + dl(dict or list): dict or list to be replaced + ks(list): list of keys + v(str): value to be replaced + """ + + def str2num(v): + try: + return eval(v) + except Exception: + return v + + assert isinstance(dl, (list, dict)), ("{} should be a list or a dict") + assert len(ks) > 0, ('lenght of keys should larger than 0') + if isinstance(dl, list): + k = str2num(ks[0]) + if len(ks) == 1: + assert k < len(dl), ('index({}) out of range({})'.format(k, dl)) + dl[k] = str2num(v) + else: + override(dl[k], ks[1:], v) + else: + if len(ks) == 1: + # assert ks[0] in dl, ('{} is not exist in {}'.format(ks[0], dl)) + if not ks[0] in dl: + print('A new filed ({}) detected!'.format(ks[0], dl)) + dl[ks[0]] = str2num(v) + else: + override(dl[ks[0]], ks[1:], v) + + +def override_config(config, options=None): + """ + Recursively override the config + Args: + config(dict): dict to be replaced + options(list): list of pairs(key0.key1.idx.key2=value) + such as: [ + 'topk=2', + 'VALID.transforms.1.ResizeImage.resize_short=300' + ] + Returns: + config(dict): replaced config + """ + if options is not None: + for opt in options: + assert isinstance(opt, str), ("option({}) should be a str".format(opt)) + assert "=" in opt, ("option({}) should contain a =" + "to distinguish between key and value".format(opt)) + pair = opt.split('=') + assert len(pair) == 2, ("there can be only a = in the option") + key, value = pair + keys = key.split('.') + override(config, keys, value) + return config + + +def get_config(fname, overrides=None, show=False): + """ + Read config from file + """ + assert os.path.exists(fname), ('config file({}) is not exist'.format(fname)) + config = parse_config(fname) + override_config(config, overrides) + return config diff --git a/modules/image/classification/pplcnet_x2_0_imagenet/README.md b/modules/image/classification/pplcnet_x2_0_imagenet/README.md new file mode 100644 index 000000000..61c681d00 --- /dev/null +++ b/modules/image/classification/pplcnet_x2_0_imagenet/README.md @@ -0,0 +1,132 @@ +# pplcnet_x2_0_imagenet + +|模型名称|pplcnet_x2_0_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|PPLCNet| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|24 MB| +|最新更新日期|2022-04-02| +|数据指标|Acc| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - PP-LCNet是百度针对Intel CPU 设备以及其加速库 MKLDNN 设计的特定骨干网络 ,比起其他的轻量级的 SOTA 模型,该骨干网络可以在不增加推理时间的情况下,进一步提升模型的性能,最终大幅度超越现有的 SOTA 模型。该模型为模型规模参数scale为x2.0下的PP-LCNet模型,关于模型结构的更多信息,可参考[论文](https://arxiv.org/pdf/2109.15099.pdf)。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install pplcnet_x2_0_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run pplcnet_x2_0_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="pplcnet_x2_0_imagenet") + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + - 分类接口API。 + - **参数** + + - images (list\[numpy.ndarray\]): 图片数据,每一个图片数据的shape 均为 \[H, W, C\],颜色空间为 BGR;
+ - paths (list\[str\]): 图片的路径;
+ - batch\_size (int): batch 的大小;
+ - use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA_VISIBLE_DEVICES环境变量**
+ - top\_k (int): 返回预测结果的前 k 个。 + + - **返回** + + - res (list\[dict\]): 分类结果,列表的每一个元素均为字典,其中 key 包括'class_ids'(种类索引), 'scores'(置信度) 和 'label_names'(种类名称) + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个图像识别的在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m pplcnet_x2_0_imagenet + ``` + + - 这样就完成了一个图像识别的在线服务的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"\} + url = "http://127.0.0.1:8866/predict/pplcnet_x2_0_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + ``` + + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install pplcnet_x2_0_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/pplcnet_x2_0_imagenet/model.py b/modules/image/classification/pplcnet_x2_0_imagenet/model.py new file mode 100644 index 000000000..a3fd8364a --- /dev/null +++ b/modules/image/classification/pplcnet_x2_0_imagenet/model.py @@ -0,0 +1,478 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any +from typing import Callable +from typing import Dict +from typing import List +from typing import Tuple +from typing import Union + +import paddle +import paddle.nn as nn +from paddle import ParamAttr +from paddle.nn import AdaptiveAvgPool2D +from paddle.nn import BatchNorm +from paddle.nn import Conv2D +from paddle.nn import Dropout +from paddle.nn import Linear +from paddle.nn.initializer import KaimingNormal +from paddle.regularizer import L2Decay + + +class Identity(nn.Layer): + + def __init__(self): + super(Identity, self).__init__() + + def forward(self, inputs): + return inputs + + +class TheseusLayer(nn.Layer): + + def __init__(self, *args, **kwargs): + super(TheseusLayer, self).__init__() + self.res_dict = {} + self.res_name = self.full_name() + self.pruner = None + self.quanter = None + + def _return_dict_hook(self, layer, input, output): + res_dict = {"output": output} + # 'list' is needed to avoid error raised by popping self.res_dict + for res_key in list(self.res_dict): + # clear the res_dict because the forward process may change according to input + res_dict[res_key] = self.res_dict.pop(res_key) + return res_dict + + def init_res(self, stages_pattern, return_patterns=None, return_stages=None): + if return_patterns and return_stages: + msg = f"The 'return_patterns' would be ignored when 'return_stages' is set." + return_stages = None + + if return_stages is True: + return_patterns = stages_pattern + # return_stages is int or bool + if type(return_stages) is int: + return_stages = [return_stages] + if isinstance(return_stages, list): + if max(return_stages) > len(stages_pattern) or min(return_stages) < 0: + msg = f"The 'return_stages' set error. Illegal value(s) have been ignored. The stages' pattern list is {stages_pattern}." + return_stages = [val for val in return_stages if val >= 0 and val < len(stages_pattern)] + return_patterns = [stages_pattern[i] for i in return_stages] + + if return_patterns: + self.update_res(return_patterns) + + def replace_sub(self, *args, **kwargs) -> None: + msg = "The function 'replace_sub()' is deprecated, please use 'upgrade_sublayer()' instead." + raise DeprecationWarning(msg) + + def upgrade_sublayer(self, layer_name_pattern: Union[str, List[str]], + handle_func: Callable[[nn.Layer, str], nn.Layer]) -> Dict[str, nn.Layer]: + """use 'handle_func' to modify the sub-layer(s) specified by 'layer_name_pattern'. + + Args: + layer_name_pattern (Union[str, List[str]]): The name of layer to be modified by 'handle_func'. + handle_func (Callable[[nn.Layer, str], nn.Layer]): The function to modify target layer specified by 'layer_name_pattern'. The formal params are the layer(nn.Layer) and pattern(str) that is (a member of) layer_name_pattern (when layer_name_pattern is List type). And the return is the layer processed. + + Returns: + Dict[str, nn.Layer]: The key is the pattern and corresponding value is the result returned by 'handle_func()'. + + Examples: + + from paddle import nn + import paddleclas + + def rep_func(layer: nn.Layer, pattern: str): + new_layer = nn.Conv2D( + in_channels=layer._in_channels, + out_channels=layer._out_channels, + kernel_size=5, + padding=2 + ) + return new_layer + + net = paddleclas.MobileNetV1() + res = net.replace_sub(layer_name_pattern=["blocks[11].depthwise_conv.conv", "blocks[12].depthwise_conv.conv"], handle_func=rep_func) + print(res) + # {'blocks[11].depthwise_conv.conv': the corresponding new_layer, 'blocks[12].depthwise_conv.conv': the corresponding new_layer} + """ + + if not isinstance(layer_name_pattern, list): + layer_name_pattern = [layer_name_pattern] + + hit_layer_pattern_list = [] + for pattern in layer_name_pattern: + # parse pattern to find target layer and its parent + layer_list = parse_pattern_str(pattern=pattern, parent_layer=self) + if not layer_list: + continue + sub_layer_parent = layer_list[-2]["layer"] if len(layer_list) > 1 else self + + sub_layer = layer_list[-1]["layer"] + sub_layer_name = layer_list[-1]["name"] + sub_layer_index = layer_list[-1]["index"] + + new_sub_layer = handle_func(sub_layer, pattern) + + if sub_layer_index: + getattr(sub_layer_parent, sub_layer_name)[sub_layer_index] = new_sub_layer + else: + setattr(sub_layer_parent, sub_layer_name, new_sub_layer) + + hit_layer_pattern_list.append(pattern) + return hit_layer_pattern_list + + def stop_after(self, stop_layer_name: str) -> bool: + """stop forward and backward after 'stop_layer_name'. + + Args: + stop_layer_name (str): The name of layer that stop forward and backward after this layer. + + Returns: + bool: 'True' if successful, 'False' otherwise. + """ + + layer_list = parse_pattern_str(stop_layer_name, self) + if not layer_list: + return False + + parent_layer = self + for layer_dict in layer_list: + name, index = layer_dict["name"], layer_dict["index"] + if not set_identity(parent_layer, name, index): + msg = f"Failed to set the layers that after stop_layer_name('{stop_layer_name}') to IdentityLayer. The error layer's name is '{name}'." + return False + parent_layer = layer_dict["layer"] + + return True + + def update_res(self, return_patterns: Union[str, List[str]]) -> Dict[str, nn.Layer]: + """update the result(s) to be returned. + + Args: + return_patterns (Union[str, List[str]]): The name of layer to return output. + + Returns: + Dict[str, nn.Layer]: The pattern(str) and corresponding layer(nn.Layer) that have been set successfully. + """ + + # clear res_dict that could have been set + self.res_dict = {} + + class Handler(object): + + def __init__(self, res_dict): + # res_dict is a reference + self.res_dict = res_dict + + def __call__(self, layer, pattern): + layer.res_dict = self.res_dict + layer.res_name = pattern + if hasattr(layer, "hook_remove_helper"): + layer.hook_remove_helper.remove() + layer.hook_remove_helper = layer.register_forward_post_hook(save_sub_res_hook) + return layer + + handle_func = Handler(self.res_dict) + + hit_layer_pattern_list = self.upgrade_sublayer(return_patterns, handle_func=handle_func) + + if hasattr(self, "hook_remove_helper"): + self.hook_remove_helper.remove() + self.hook_remove_helper = self.register_forward_post_hook(self._return_dict_hook) + + return hit_layer_pattern_list + + +def save_sub_res_hook(layer, input, output): + layer.res_dict[layer.res_name] = output + + +def set_identity(parent_layer: nn.Layer, layer_name: str, layer_index: str = None) -> bool: + """set the layer specified by layer_name and layer_index to Indentity. + + Args: + parent_layer (nn.Layer): The parent layer of target layer specified by layer_name and layer_index. + layer_name (str): The name of target layer to be set to Indentity. + layer_index (str, optional): The index of target layer to be set to Indentity in parent_layer. Defaults to None. + + Returns: + bool: True if successfully, False otherwise. + """ + + stop_after = False + for sub_layer_name in parent_layer._sub_layers: + if stop_after: + parent_layer._sub_layers[sub_layer_name] = Identity() + continue + if sub_layer_name == layer_name: + stop_after = True + + if layer_index and stop_after: + stop_after = False + for sub_layer_index in parent_layer._sub_layers[layer_name]._sub_layers: + if stop_after: + parent_layer._sub_layers[layer_name][sub_layer_index] = Identity() + continue + if layer_index == sub_layer_index: + stop_after = True + + return stop_after + + +def parse_pattern_str(pattern: str, parent_layer: nn.Layer) -> Union[None, List[Dict[str, Union[nn.Layer, str, None]]]]: + """parse the string type pattern. + + Args: + pattern (str): The pattern to discribe layer. + parent_layer (nn.Layer): The root layer relative to the pattern. + + Returns: + Union[None, List[Dict[str, Union[nn.Layer, str, None]]]]: None if failed. If successfully, the members are layers parsed in order: + [ + {"layer": first layer, "name": first layer's name parsed, "index": first layer's index parsed if exist}, + {"layer": second layer, "name": second layer's name parsed, "index": second layer's index parsed if exist}, + ... + ] + """ + + pattern_list = pattern.split(".") + if not pattern_list: + msg = f"The pattern('{pattern}') is illegal. Please check and retry." + return None + + layer_list = [] + while len(pattern_list) > 0: + if '[' in pattern_list[0]: + target_layer_name = pattern_list[0].split('[')[0] + target_layer_index = pattern_list[0].split('[')[1].split(']')[0] + else: + target_layer_name = pattern_list[0] + target_layer_index = None + + target_layer = getattr(parent_layer, target_layer_name, None) + + if target_layer is None: + msg = f"Not found layer named('{target_layer_name}') specifed in pattern('{pattern}')." + return None + + if target_layer_index and target_layer: + if int(target_layer_index) < 0 or int(target_layer_index) >= len(target_layer): + msg = f"Not found layer by index('{target_layer_index}') specifed in pattern('{pattern}'). The index should < {len(target_layer)} and > 0." + return None + + target_layer = target_layer[target_layer_index] + + layer_list.append({"layer": target_layer, "name": target_layer_name, "index": target_layer_index}) + + pattern_list = pattern_list[1:] + parent_layer = target_layer + return layer_list + + +MODEL_STAGES_PATTERN = {"PPLCNet": ["blocks2", "blocks3", "blocks4", "blocks5", "blocks6"]} + +# Each element(list) represents a depthwise block, which is composed of k, in_c, out_c, s, use_se. +# k: kernel_size +# in_c: input channel number in depthwise block +# out_c: output channel number in depthwise block +# s: stride in depthwise block +# use_se: whether to use SE block + +NET_CONFIG = { + "blocks2": + #k, in_c, out_c, s, use_se + [[3, 16, 32, 1, False]], + "blocks3": [[3, 32, 64, 2, False], [3, 64, 64, 1, False]], + "blocks4": [[3, 64, 128, 2, False], [3, 128, 128, 1, False]], + "blocks5": [[3, 128, 256, 2, False], [5, 256, 256, 1, False], [5, 256, 256, 1, False], [5, 256, 256, 1, False], + [5, 256, 256, 1, False], [5, 256, 256, 1, False]], + "blocks6": [[5, 256, 512, 2, True], [5, 512, 512, 1, True]] +} + + +def make_divisible(v, divisor=8, min_value=None): + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + if new_v < 0.9 * v: + new_v += divisor + return new_v + + +class ConvBNLayer(TheseusLayer): + + def __init__(self, num_channels, filter_size, num_filters, stride, num_groups=1): + super().__init__() + + self.conv = Conv2D(in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=num_groups, + weight_attr=ParamAttr(initializer=KaimingNormal()), + bias_attr=False) + + self.bn = BatchNorm(num_filters, + param_attr=ParamAttr(regularizer=L2Decay(0.0)), + bias_attr=ParamAttr(regularizer=L2Decay(0.0))) + self.hardswish = nn.Hardswish() + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + x = self.hardswish(x) + return x + + +class DepthwiseSeparable(TheseusLayer): + + def __init__(self, num_channels, num_filters, stride, dw_size=3, use_se=False): + super().__init__() + self.use_se = use_se + self.dw_conv = ConvBNLayer(num_channels=num_channels, + num_filters=num_channels, + filter_size=dw_size, + stride=stride, + num_groups=num_channels) + if use_se: + self.se = SEModule(num_channels) + self.pw_conv = ConvBNLayer(num_channels=num_channels, filter_size=1, num_filters=num_filters, stride=1) + + def forward(self, x): + x = self.dw_conv(x) + if self.use_se: + x = self.se(x) + x = self.pw_conv(x) + return x + + +class SEModule(TheseusLayer): + + def __init__(self, channel, reduction=4): + super().__init__() + self.avg_pool = AdaptiveAvgPool2D(1) + self.conv1 = Conv2D(in_channels=channel, out_channels=channel // reduction, kernel_size=1, stride=1, padding=0) + self.relu = nn.ReLU() + self.conv2 = Conv2D(in_channels=channel // reduction, out_channels=channel, kernel_size=1, stride=1, padding=0) + self.hardsigmoid = nn.Hardsigmoid() + + def forward(self, x): + identity = x + x = self.avg_pool(x) + x = self.conv1(x) + x = self.relu(x) + x = self.conv2(x) + x = self.hardsigmoid(x) + x = paddle.multiply(x=identity, y=x) + return x + + +class PPLCNet(TheseusLayer): + + def __init__(self, + stages_pattern, + scale=1.0, + class_num=1000, + dropout_prob=0.2, + class_expand=1280, + return_patterns=None, + return_stages=None): + super().__init__() + self.scale = scale + self.class_expand = class_expand + + self.conv1 = ConvBNLayer(num_channels=3, filter_size=3, num_filters=make_divisible(16 * scale), stride=2) + + self.blocks2 = nn.Sequential(*[ + DepthwiseSeparable(num_channels=make_divisible(in_c * scale), + num_filters=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se) for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks2"]) + ]) + + self.blocks3 = nn.Sequential(*[ + DepthwiseSeparable(num_channels=make_divisible(in_c * scale), + num_filters=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se) for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks3"]) + ]) + + self.blocks4 = nn.Sequential(*[ + DepthwiseSeparable(num_channels=make_divisible(in_c * scale), + num_filters=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se) for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks4"]) + ]) + + self.blocks5 = nn.Sequential(*[ + DepthwiseSeparable(num_channels=make_divisible(in_c * scale), + num_filters=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se) for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks5"]) + ]) + + self.blocks6 = nn.Sequential(*[ + DepthwiseSeparable(num_channels=make_divisible(in_c * scale), + num_filters=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se) for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks6"]) + ]) + + self.avg_pool = AdaptiveAvgPool2D(1) + + self.last_conv = Conv2D(in_channels=make_divisible(NET_CONFIG["blocks6"][-1][2] * scale), + out_channels=self.class_expand, + kernel_size=1, + stride=1, + padding=0, + bias_attr=False) + + self.hardswish = nn.Hardswish() + self.dropout = Dropout(p=dropout_prob, mode="downscale_in_infer") + self.flatten = nn.Flatten(start_axis=1, stop_axis=-1) + + self.fc = Linear(self.class_expand, class_num) + + super().init_res(stages_pattern, return_patterns=return_patterns, return_stages=return_stages) + + def forward(self, x): + x = self.conv1(x) + + x = self.blocks2(x) + x = self.blocks3(x) + x = self.blocks4(x) + x = self.blocks5(x) + x = self.blocks6(x) + + x = self.avg_pool(x) + x = self.last_conv(x) + x = self.hardswish(x) + x = self.dropout(x) + x = self.flatten(x) + x = self.fc(x) + return x + + +def PPLCNet_x2_0(pretrained=False, use_ssld=False, **kwargs): + model = PPLCNet(scale=2.0, stages_pattern=MODEL_STAGES_PATTERN["PPLCNet"], **kwargs) + return model diff --git a/modules/image/classification/pplcnet_x2_0_imagenet/module.py b/modules/image/classification/pplcnet_x2_0_imagenet/module.py new file mode 100644 index 000000000..d67d80800 --- /dev/null +++ b/modules/image/classification/pplcnet_x2_0_imagenet/module.py @@ -0,0 +1,154 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import copy +import os + +import cv2 +import numpy as np +import paddle +from skimage.io import imread +from skimage.transform import rescale +from skimage.transform import resize + +import paddlehub as hub +from .model import PPLCNet_x2_0 +from .processor import base64_to_cv2 +from .processor import create_operators +from .processor import Topk +from .utils import get_config +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving + + +@moduleinfo(name="pplcnet_x2_0_imagenet", + type="cv/classification", + author="paddlepaddle", + author_email="", + summary="", + version="1.0.0") +class PPLcNet_x2_0: + + def __init__(self): + self.config = get_config(os.path.join(self.directory, 'PPLCNet_x2_0.yaml'), show=False) + self.label_path = os.path.join(self.directory, 'imagenet1k_label_list.txt') + self.pretrain_path = os.path.join(self.directory, 'PPLCNet_x2_0_pretrained.pdparams') + self.config['Infer']['PostProcess']['class_id_map_file'] = self.label_path + self.model = PPLCNet_x2_0() + param_state_dict = paddle.load(self.pretrain_path) + self.model.set_dict(param_state_dict) + self.preprocess_funcs = create_operators(self.config["Infer"]["transforms"]) + + def classification(self, + images: list = None, + paths: list = None, + batch_size: int = 1, + use_gpu: bool = False, + top_k: int = 1): + ''' + Args: + images (list[numpy.ndarray]): data of images, shape of each is [H, W, C], color space must be BGR. + paths (list[str]): The paths of images. + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + top_k (int): Return top k results. + + Returns: + res (list[dict]): The classfication results, each result dict contains key 'class_ids', 'scores' and 'label_names'. + ''' + postprocess_func = Topk(top_k, self.label_path) + inputs = [] + results = [] + paddle.disable_static() + place = 'gpu:0' if use_gpu else 'cpu' + place = paddle.set_device(place) + if images == None and paths == None: + print('No image provided. Please input an image or a image path.') + return + + if images != None: + for image in images: + image = image[:, :, ::-1] + inputs.append(image) + + if paths != None: + for path in paths: + image = cv2.imread(path)[:, :, ::-1] + inputs.append(image) + + batch_data = [] + for idx, imagedata in enumerate(inputs): + for process in self.preprocess_funcs: + imagedata = process(imagedata) + batch_data.append(imagedata) + if len(batch_data) >= batch_size or idx == len(inputs) - 1: + batch_tensor = paddle.to_tensor(batch_data) + out = self.model(batch_tensor) + if isinstance(out, list): + out = out[0] + if isinstance(out, dict) and "logits" in out: + out = out["logits"] + if isinstance(out, dict) and "output" in out: + out = out["output"] + result = postprocess_func(out) + results.extend(result) + batch_data.clear() + return results + + @runnable + def run_cmd(self, argvs: list): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + self.args = self.parser.parse_args(argvs) + results = self.classification(paths=[self.args.input_path], + use_gpu=self.args.use_gpu, + batch_size=self.args.batch_size, + top_k=self.args.top_k) + return results + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.classification(images=images_decode, **kwargs) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument('--use_gpu', action='store_true', help="use GPU or not") + + self.arg_config_group.add_argument('--batch_size', type=int, default=1, help='batch size') + self.arg_config_group.add_argument('--top_k', type=int, default=1, help='Return top k results.') + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to input image.") diff --git a/modules/image/classification/pplcnet_x2_0_imagenet/processor.py b/modules/image/classification/pplcnet_x2_0_imagenet/processor.py new file mode 100644 index 000000000..40cab3917 --- /dev/null +++ b/modules/image/classification/pplcnet_x2_0_imagenet/processor.py @@ -0,0 +1,374 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import base64 +import inspect +import math +import os +import random +import sys +from functools import partial + +import cv2 +import numpy as np +import paddle +import paddle.nn.functional as F +import six +from paddle.vision.transforms import ColorJitter as RawColorJitter +from PIL import Image + + +def create_operators(params, class_num=None): + """ + create operators based on the config + + Args: + params(list): a dict list, used to create some operators + """ + assert isinstance(params, list), ('operator config should be a list') + ops = [] + current_module = sys.modules[__name__] + for operator in params: + assert isinstance(operator, dict) and len(operator) == 1, "yaml format error" + op_name = list(operator)[0] + param = {} if operator[op_name] is None else operator[op_name] + op_func = getattr(current_module, op_name) + if "class_num" in inspect.getfullargspec(op_func).args: + param.update({"class_num": class_num}) + op = op_func(**param) + ops.append(op) + + return ops + + +class UnifiedResize(object): + + def __init__(self, interpolation=None, backend="cv2"): + _cv2_interp_from_str = { + 'nearest': cv2.INTER_NEAREST, + 'bilinear': cv2.INTER_LINEAR, + 'area': cv2.INTER_AREA, + 'bicubic': cv2.INTER_CUBIC, + 'lanczos': cv2.INTER_LANCZOS4 + } + _pil_interp_from_str = { + 'nearest': Image.NEAREST, + 'bilinear': Image.BILINEAR, + 'bicubic': Image.BICUBIC, + 'box': Image.BOX, + 'lanczos': Image.LANCZOS, + 'hamming': Image.HAMMING + } + + def _pil_resize(src, size, resample): + pil_img = Image.fromarray(src) + pil_img = pil_img.resize(size, resample) + return np.asarray(pil_img) + + if backend.lower() == "cv2": + if isinstance(interpolation, str): + interpolation = _cv2_interp_from_str[interpolation.lower()] + # compatible with opencv < version 4.4.0 + elif interpolation is None: + interpolation = cv2.INTER_LINEAR + self.resize_func = partial(cv2.resize, interpolation=interpolation) + elif backend.lower() == "pil": + if isinstance(interpolation, str): + interpolation = _pil_interp_from_str[interpolation.lower()] + self.resize_func = partial(_pil_resize, resample=interpolation) + else: + self.resize_func = cv2.resize + + def __call__(self, src, size): + return self.resize_func(src, size) + + +class OperatorParamError(ValueError): + """ OperatorParamError + """ + pass + + +class DecodeImage(object): + """ decode image """ + + def __init__(self, to_rgb=True, to_np=False, channel_first=False): + self.to_rgb = to_rgb + self.to_np = to_np # to numpy + self.channel_first = channel_first # only enabled when to_np is True + + def __call__(self, img): + if six.PY2: + assert type(img) is str and len(img) > 0, "invalid input 'img' in DecodeImage" + else: + assert type(img) is bytes and len(img) > 0, "invalid input 'img' in DecodeImage" + data = np.frombuffer(img, dtype='uint8') + img = cv2.imdecode(data, 1) + if self.to_rgb: + assert img.shape[2] == 3, 'invalid shape of image[%s]' % (img.shape) + img = img[:, :, ::-1] + + if self.channel_first: + img = img.transpose((2, 0, 1)) + + return img + + +class ResizeImage(object): + """ resize image """ + + def __init__(self, size=None, resize_short=None, interpolation=None, backend="cv2"): + if resize_short is not None and resize_short > 0: + self.resize_short = resize_short + self.w = None + self.h = None + elif size is not None: + self.resize_short = None + self.w = size if type(size) is int else size[0] + self.h = size if type(size) is int else size[1] + else: + raise OperatorParamError("invalid params for ReisizeImage for '\ + 'both 'size' and 'resize_short' are None") + + self._resize_func = UnifiedResize(interpolation=interpolation, backend=backend) + + def __call__(self, img): + img_h, img_w = img.shape[:2] + if self.resize_short is not None: + percent = float(self.resize_short) / min(img_w, img_h) + w = int(round(img_w * percent)) + h = int(round(img_h * percent)) + else: + w = self.w + h = self.h + return self._resize_func(img, (w, h)) + + +class CropImage(object): + """ crop image """ + + def __init__(self, size): + if type(size) is int: + self.size = (size, size) + else: + self.size = size # (h, w) + + def __call__(self, img): + w, h = self.size + img_h, img_w = img.shape[:2] + w_start = (img_w - w) // 2 + h_start = (img_h - h) // 2 + + w_end = w_start + w + h_end = h_start + h + return img[h_start:h_end, w_start:w_end, :] + + +class RandCropImage(object): + """ random crop image """ + + def __init__(self, size, scale=None, ratio=None, interpolation=None, backend="cv2"): + if type(size) is int: + self.size = (size, size) # (h, w) + else: + self.size = size + + self.scale = [0.08, 1.0] if scale is None else scale + self.ratio = [3. / 4., 4. / 3.] if ratio is None else ratio + + self._resize_func = UnifiedResize(interpolation=interpolation, backend=backend) + + def __call__(self, img): + size = self.size + scale = self.scale + ratio = self.ratio + + aspect_ratio = math.sqrt(random.uniform(*ratio)) + w = 1. * aspect_ratio + h = 1. / aspect_ratio + + img_h, img_w = img.shape[:2] + + bound = min((float(img_w) / img_h) / (w**2), (float(img_h) / img_w) / (h**2)) + scale_max = min(scale[1], bound) + scale_min = min(scale[0], bound) + + target_area = img_w * img_h * random.uniform(scale_min, scale_max) + target_size = math.sqrt(target_area) + w = int(target_size * w) + h = int(target_size * h) + + i = random.randint(0, img_w - w) + j = random.randint(0, img_h - h) + + img = img[j:j + h, i:i + w, :] + + return self._resize_func(img, size) + + +class RandFlipImage(object): + """ random flip image + flip_code: + 1: Flipped Horizontally + 0: Flipped Vertically + -1: Flipped Horizontally & Vertically + """ + + def __init__(self, flip_code=1): + assert flip_code in [-1, 0, 1], "flip_code should be a value in [-1, 0, 1]" + self.flip_code = flip_code + + def __call__(self, img): + if random.randint(0, 1) == 1: + return cv2.flip(img, self.flip_code) + else: + return img + + +class NormalizeImage(object): + """ normalize image such as substract mean, divide std + """ + + def __init__(self, scale=None, mean=None, std=None, order='chw', output_fp16=False, channel_num=3): + if isinstance(scale, str): + scale = eval(scale) + assert channel_num in [3, 4], "channel number of input image should be set to 3 or 4." + self.channel_num = channel_num + self.output_dtype = 'float16' if output_fp16 else 'float32' + self.scale = np.float32(scale if scale is not None else 1.0 / 255.0) + self.order = order + mean = mean if mean is not None else [0.485, 0.456, 0.406] + std = std if std is not None else [0.229, 0.224, 0.225] + + shape = (3, 1, 1) if self.order == 'chw' else (1, 1, 3) + self.mean = np.array(mean).reshape(shape).astype('float32') + self.std = np.array(std).reshape(shape).astype('float32') + + def __call__(self, img): + from PIL import Image + if isinstance(img, Image.Image): + img = np.array(img) + + assert isinstance(img, np.ndarray), "invalid input 'img' in NormalizeImage" + + img = (img.astype('float32') * self.scale - self.mean) / self.std + + if self.channel_num == 4: + img_h = img.shape[1] if self.order == 'chw' else img.shape[0] + img_w = img.shape[2] if self.order == 'chw' else img.shape[1] + pad_zeros = np.zeros((1, img_h, img_w)) if self.order == 'chw' else np.zeros((img_h, img_w, 1)) + img = (np.concatenate((img, pad_zeros), axis=0) if self.order == 'chw' else np.concatenate( + (img, pad_zeros), axis=2)) + return img.astype(self.output_dtype) + + +class ToCHWImage(object): + """ convert hwc image to chw image + """ + + def __init__(self): + pass + + def __call__(self, img): + from PIL import Image + if isinstance(img, Image.Image): + img = np.array(img) + + return img.transpose((2, 0, 1)) + + +class ColorJitter(RawColorJitter): + """ColorJitter. + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def __call__(self, img): + if not isinstance(img, Image.Image): + img = np.ascontiguousarray(img) + img = Image.fromarray(img) + img = super()._apply_image(img) + if isinstance(img, Image.Image): + img = np.asarray(img) + return img + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +class Topk(object): + + def __init__(self, topk=1, class_id_map_file=None): + assert isinstance(topk, (int, )) + self.class_id_map = self.parse_class_id_map(class_id_map_file) + self.topk = topk + + def parse_class_id_map(self, class_id_map_file): + if class_id_map_file is None: + return None + if not os.path.exists(class_id_map_file): + print( + "Warning: If want to use your own label_dict, please input legal path!\nOtherwise label_names will be empty!" + ) + return None + + try: + class_id_map = {} + with open(class_id_map_file, "r") as fin: + lines = fin.readlines() + for line in lines: + partition = line.split("\n")[0].partition(" ") + class_id_map[int(partition[0])] = str(partition[-1]) + except Exception as ex: + print(ex) + class_id_map = None + return class_id_map + + def __call__(self, x, file_names=None, multilabel=False): + assert isinstance(x, paddle.Tensor) + if file_names is not None: + assert x.shape[0] == len(file_names) + x = F.softmax(x, axis=-1) if not multilabel else F.sigmoid(x) + x = x.numpy() + y = [] + for idx, probs in enumerate(x): + index = probs.argsort(axis=0)[-self.topk:][::-1].astype("int32") if not multilabel else np.where( + probs >= 0.5)[0].astype("int32") + clas_id_list = [] + score_list = [] + label_name_list = [] + for i in index: + clas_id_list.append(i.item()) + score_list.append(probs[i].item()) + if self.class_id_map is not None: + label_name_list.append(self.class_id_map[i.item()]) + result = { + "class_ids": clas_id_list, + "scores": np.around(score_list, decimals=5).tolist(), + } + if file_names is not None: + result["file_name"] = file_names[idx] + if label_name_list is not None: + result["label_names"] = label_name_list + y.append(result) + return y diff --git a/modules/image/classification/pplcnet_x2_0_imagenet/utils.py b/modules/image/classification/pplcnet_x2_0_imagenet/utils.py new file mode 100644 index 000000000..df2bc36b3 --- /dev/null +++ b/modules/image/classification/pplcnet_x2_0_imagenet/utils.py @@ -0,0 +1,129 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import copy +import os + +import yaml + +__all__ = ['get_config'] + + +class AttrDict(dict): + + def __getattr__(self, key): + return self[key] + + def __setattr__(self, key, value): + if key in self.__dict__: + self.__dict__[key] = value + else: + self[key] = value + + def __deepcopy__(self, content): + return copy.deepcopy(dict(self)) + + +def create_attr_dict(yaml_config): + from ast import literal_eval + for key, value in yaml_config.items(): + if type(value) is dict: + yaml_config[key] = value = AttrDict(value) + if isinstance(value, str): + try: + value = literal_eval(value) + except BaseException: + pass + if isinstance(value, AttrDict): + create_attr_dict(yaml_config[key]) + else: + yaml_config[key] = value + + +def parse_config(cfg_file): + """Load a config file into AttrDict""" + with open(cfg_file, 'r') as fopen: + yaml_config = AttrDict(yaml.load(fopen, Loader=yaml.SafeLoader)) + create_attr_dict(yaml_config) + return yaml_config + + +def override(dl, ks, v): + """ + Recursively replace dict of list + Args: + dl(dict or list): dict or list to be replaced + ks(list): list of keys + v(str): value to be replaced + """ + + def str2num(v): + try: + return eval(v) + except Exception: + return v + + assert isinstance(dl, (list, dict)), ("{} should be a list or a dict") + assert len(ks) > 0, ('lenght of keys should larger than 0') + if isinstance(dl, list): + k = str2num(ks[0]) + if len(ks) == 1: + assert k < len(dl), ('index({}) out of range({})'.format(k, dl)) + dl[k] = str2num(v) + else: + override(dl[k], ks[1:], v) + else: + if len(ks) == 1: + # assert ks[0] in dl, ('{} is not exist in {}'.format(ks[0], dl)) + if not ks[0] in dl: + print('A new filed ({}) detected!'.format(ks[0], dl)) + dl[ks[0]] = str2num(v) + else: + override(dl[ks[0]], ks[1:], v) + + +def override_config(config, options=None): + """ + Recursively override the config + Args: + config(dict): dict to be replaced + options(list): list of pairs(key0.key1.idx.key2=value) + such as: [ + 'topk=2', + 'VALID.transforms.1.ResizeImage.resize_short=300' + ] + Returns: + config(dict): replaced config + """ + if options is not None: + for opt in options: + assert isinstance(opt, str), ("option({}) should be a str".format(opt)) + assert "=" in opt, ("option({}) should contain a =" + "to distinguish between key and value".format(opt)) + pair = opt.split('=') + assert len(pair) == 2, ("there can be only a = in the option") + key, value = pair + keys = key.split('.') + override(config, keys, value) + return config + + +def get_config(fname, overrides=None, show=False): + """ + Read config from file + """ + assert os.path.exists(fname), ('config file({}) is not exist'.format(fname)) + config = parse_config(fname) + override_config(config, overrides) + return config diff --git a/modules/image/classification/pplcnet_x2_5_imagenet/README.md b/modules/image/classification/pplcnet_x2_5_imagenet/README.md new file mode 100644 index 000000000..a7099ebce --- /dev/null +++ b/modules/image/classification/pplcnet_x2_5_imagenet/README.md @@ -0,0 +1,132 @@ +# pplcnet_x2_5_imagenet + +|模型名称|pplcnet_x2_5_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|PPLCNet| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|34 MB| +|最新更新日期|2022-04-02| +|数据指标|Acc| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - PP-LCNet是百度针对Intel CPU 设备以及其加速库 MKLDNN 设计的特定骨干网络 ,比起其他的轻量级的 SOTA 模型,该骨干网络可以在不增加推理时间的情况下,进一步提升模型的性能,最终大幅度超越现有的 SOTA 模型。该模型为模型规模参数scale为x2.5下的PP-LCNet模型,关于模型结构的更多信息,可参考[论文](https://arxiv.org/pdf/2109.15099.pdf)。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install pplcnet_x2_5_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run pplcnet_x2_5_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="pplcnet_x2_5_imagenet") + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + - 分类接口API。 + - **参数** + + - images (list\[numpy.ndarray\]): 图片数据,每一个图片数据的shape 均为 \[H, W, C\],颜色空间为 BGR;
+ - paths (list\[str\]): 图片的路径;
+ - batch\_size (int): batch 的大小;
+ - use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA_VISIBLE_DEVICES环境变量**
+ - top\_k (int): 返回预测结果的前 k 个。 + + - **返回** + + - res (list\[dict\]): 分类结果,列表的每一个元素均为字典,其中 key 包括'class_ids'(种类索引), 'scores'(置信度) 和 'label_names'(种类名称) + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个图像识别的在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m pplcnet_x2_5_imagenet + ``` + + - 这样就完成了一个图像识别的在线服务的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"\} + url = "http://127.0.0.1:8866/predict/pplcnet_x2_5_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + ``` + + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install pplcnet_x2_5_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/pplcnet_x2_5_imagenet/model.py b/modules/image/classification/pplcnet_x2_5_imagenet/model.py new file mode 100644 index 000000000..b13957701 --- /dev/null +++ b/modules/image/classification/pplcnet_x2_5_imagenet/model.py @@ -0,0 +1,478 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any +from typing import Callable +from typing import Dict +from typing import List +from typing import Tuple +from typing import Union + +import paddle +import paddle.nn as nn +from paddle import ParamAttr +from paddle.nn import AdaptiveAvgPool2D +from paddle.nn import BatchNorm +from paddle.nn import Conv2D +from paddle.nn import Dropout +from paddle.nn import Linear +from paddle.nn.initializer import KaimingNormal +from paddle.regularizer import L2Decay + + +class Identity(nn.Layer): + + def __init__(self): + super(Identity, self).__init__() + + def forward(self, inputs): + return inputs + + +class TheseusLayer(nn.Layer): + + def __init__(self, *args, **kwargs): + super(TheseusLayer, self).__init__() + self.res_dict = {} + self.res_name = self.full_name() + self.pruner = None + self.quanter = None + + def _return_dict_hook(self, layer, input, output): + res_dict = {"output": output} + # 'list' is needed to avoid error raised by popping self.res_dict + for res_key in list(self.res_dict): + # clear the res_dict because the forward process may change according to input + res_dict[res_key] = self.res_dict.pop(res_key) + return res_dict + + def init_res(self, stages_pattern, return_patterns=None, return_stages=None): + if return_patterns and return_stages: + msg = f"The 'return_patterns' would be ignored when 'return_stages' is set." + return_stages = None + + if return_stages is True: + return_patterns = stages_pattern + # return_stages is int or bool + if type(return_stages) is int: + return_stages = [return_stages] + if isinstance(return_stages, list): + if max(return_stages) > len(stages_pattern) or min(return_stages) < 0: + msg = f"The 'return_stages' set error. Illegal value(s) have been ignored. The stages' pattern list is {stages_pattern}." + return_stages = [val for val in return_stages if val >= 0 and val < len(stages_pattern)] + return_patterns = [stages_pattern[i] for i in return_stages] + + if return_patterns: + self.update_res(return_patterns) + + def replace_sub(self, *args, **kwargs) -> None: + msg = "The function 'replace_sub()' is deprecated, please use 'upgrade_sublayer()' instead." + raise DeprecationWarning(msg) + + def upgrade_sublayer(self, layer_name_pattern: Union[str, List[str]], + handle_func: Callable[[nn.Layer, str], nn.Layer]) -> Dict[str, nn.Layer]: + """use 'handle_func' to modify the sub-layer(s) specified by 'layer_name_pattern'. + + Args: + layer_name_pattern (Union[str, List[str]]): The name of layer to be modified by 'handle_func'. + handle_func (Callable[[nn.Layer, str], nn.Layer]): The function to modify target layer specified by 'layer_name_pattern'. The formal params are the layer(nn.Layer) and pattern(str) that is (a member of) layer_name_pattern (when layer_name_pattern is List type). And the return is the layer processed. + + Returns: + Dict[str, nn.Layer]: The key is the pattern and corresponding value is the result returned by 'handle_func()'. + + Examples: + + from paddle import nn + import paddleclas + + def rep_func(layer: nn.Layer, pattern: str): + new_layer = nn.Conv2D( + in_channels=layer._in_channels, + out_channels=layer._out_channels, + kernel_size=5, + padding=2 + ) + return new_layer + + net = paddleclas.MobileNetV1() + res = net.replace_sub(layer_name_pattern=["blocks[11].depthwise_conv.conv", "blocks[12].depthwise_conv.conv"], handle_func=rep_func) + print(res) + # {'blocks[11].depthwise_conv.conv': the corresponding new_layer, 'blocks[12].depthwise_conv.conv': the corresponding new_layer} + """ + + if not isinstance(layer_name_pattern, list): + layer_name_pattern = [layer_name_pattern] + + hit_layer_pattern_list = [] + for pattern in layer_name_pattern: + # parse pattern to find target layer and its parent + layer_list = parse_pattern_str(pattern=pattern, parent_layer=self) + if not layer_list: + continue + sub_layer_parent = layer_list[-2]["layer"] if len(layer_list) > 1 else self + + sub_layer = layer_list[-1]["layer"] + sub_layer_name = layer_list[-1]["name"] + sub_layer_index = layer_list[-1]["index"] + + new_sub_layer = handle_func(sub_layer, pattern) + + if sub_layer_index: + getattr(sub_layer_parent, sub_layer_name)[sub_layer_index] = new_sub_layer + else: + setattr(sub_layer_parent, sub_layer_name, new_sub_layer) + + hit_layer_pattern_list.append(pattern) + return hit_layer_pattern_list + + def stop_after(self, stop_layer_name: str) -> bool: + """stop forward and backward after 'stop_layer_name'. + + Args: + stop_layer_name (str): The name of layer that stop forward and backward after this layer. + + Returns: + bool: 'True' if successful, 'False' otherwise. + """ + + layer_list = parse_pattern_str(stop_layer_name, self) + if not layer_list: + return False + + parent_layer = self + for layer_dict in layer_list: + name, index = layer_dict["name"], layer_dict["index"] + if not set_identity(parent_layer, name, index): + msg = f"Failed to set the layers that after stop_layer_name('{stop_layer_name}') to IdentityLayer. The error layer's name is '{name}'." + return False + parent_layer = layer_dict["layer"] + + return True + + def update_res(self, return_patterns: Union[str, List[str]]) -> Dict[str, nn.Layer]: + """update the result(s) to be returned. + + Args: + return_patterns (Union[str, List[str]]): The name of layer to return output. + + Returns: + Dict[str, nn.Layer]: The pattern(str) and corresponding layer(nn.Layer) that have been set successfully. + """ + + # clear res_dict that could have been set + self.res_dict = {} + + class Handler(object): + + def __init__(self, res_dict): + # res_dict is a reference + self.res_dict = res_dict + + def __call__(self, layer, pattern): + layer.res_dict = self.res_dict + layer.res_name = pattern + if hasattr(layer, "hook_remove_helper"): + layer.hook_remove_helper.remove() + layer.hook_remove_helper = layer.register_forward_post_hook(save_sub_res_hook) + return layer + + handle_func = Handler(self.res_dict) + + hit_layer_pattern_list = self.upgrade_sublayer(return_patterns, handle_func=handle_func) + + if hasattr(self, "hook_remove_helper"): + self.hook_remove_helper.remove() + self.hook_remove_helper = self.register_forward_post_hook(self._return_dict_hook) + + return hit_layer_pattern_list + + +def save_sub_res_hook(layer, input, output): + layer.res_dict[layer.res_name] = output + + +def set_identity(parent_layer: nn.Layer, layer_name: str, layer_index: str = None) -> bool: + """set the layer specified by layer_name and layer_index to Indentity. + + Args: + parent_layer (nn.Layer): The parent layer of target layer specified by layer_name and layer_index. + layer_name (str): The name of target layer to be set to Indentity. + layer_index (str, optional): The index of target layer to be set to Indentity in parent_layer. Defaults to None. + + Returns: + bool: True if successfully, False otherwise. + """ + + stop_after = False + for sub_layer_name in parent_layer._sub_layers: + if stop_after: + parent_layer._sub_layers[sub_layer_name] = Identity() + continue + if sub_layer_name == layer_name: + stop_after = True + + if layer_index and stop_after: + stop_after = False + for sub_layer_index in parent_layer._sub_layers[layer_name]._sub_layers: + if stop_after: + parent_layer._sub_layers[layer_name][sub_layer_index] = Identity() + continue + if layer_index == sub_layer_index: + stop_after = True + + return stop_after + + +def parse_pattern_str(pattern: str, parent_layer: nn.Layer) -> Union[None, List[Dict[str, Union[nn.Layer, str, None]]]]: + """parse the string type pattern. + + Args: + pattern (str): The pattern to discribe layer. + parent_layer (nn.Layer): The root layer relative to the pattern. + + Returns: + Union[None, List[Dict[str, Union[nn.Layer, str, None]]]]: None if failed. If successfully, the members are layers parsed in order: + [ + {"layer": first layer, "name": first layer's name parsed, "index": first layer's index parsed if exist}, + {"layer": second layer, "name": second layer's name parsed, "index": second layer's index parsed if exist}, + ... + ] + """ + + pattern_list = pattern.split(".") + if not pattern_list: + msg = f"The pattern('{pattern}') is illegal. Please check and retry." + return None + + layer_list = [] + while len(pattern_list) > 0: + if '[' in pattern_list[0]: + target_layer_name = pattern_list[0].split('[')[0] + target_layer_index = pattern_list[0].split('[')[1].split(']')[0] + else: + target_layer_name = pattern_list[0] + target_layer_index = None + + target_layer = getattr(parent_layer, target_layer_name, None) + + if target_layer is None: + msg = f"Not found layer named('{target_layer_name}') specifed in pattern('{pattern}')." + return None + + if target_layer_index and target_layer: + if int(target_layer_index) < 0 or int(target_layer_index) >= len(target_layer): + msg = f"Not found layer by index('{target_layer_index}') specifed in pattern('{pattern}'). The index should < {len(target_layer)} and > 0." + return None + + target_layer = target_layer[target_layer_index] + + layer_list.append({"layer": target_layer, "name": target_layer_name, "index": target_layer_index}) + + pattern_list = pattern_list[1:] + parent_layer = target_layer + return layer_list + + +MODEL_STAGES_PATTERN = {"PPLCNet": ["blocks2", "blocks3", "blocks4", "blocks5", "blocks6"]} + +# Each element(list) represents a depthwise block, which is composed of k, in_c, out_c, s, use_se. +# k: kernel_size +# in_c: input channel number in depthwise block +# out_c: output channel number in depthwise block +# s: stride in depthwise block +# use_se: whether to use SE block + +NET_CONFIG = { + "blocks2": + #k, in_c, out_c, s, use_se + [[3, 16, 32, 1, False]], + "blocks3": [[3, 32, 64, 2, False], [3, 64, 64, 1, False]], + "blocks4": [[3, 64, 128, 2, False], [3, 128, 128, 1, False]], + "blocks5": [[3, 128, 256, 2, False], [5, 256, 256, 1, False], [5, 256, 256, 1, False], [5, 256, 256, 1, False], + [5, 256, 256, 1, False], [5, 256, 256, 1, False]], + "blocks6": [[5, 256, 512, 2, True], [5, 512, 512, 1, True]] +} + + +def make_divisible(v, divisor=8, min_value=None): + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + if new_v < 0.9 * v: + new_v += divisor + return new_v + + +class ConvBNLayer(TheseusLayer): + + def __init__(self, num_channels, filter_size, num_filters, stride, num_groups=1): + super().__init__() + + self.conv = Conv2D(in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=num_groups, + weight_attr=ParamAttr(initializer=KaimingNormal()), + bias_attr=False) + + self.bn = BatchNorm(num_filters, + param_attr=ParamAttr(regularizer=L2Decay(0.0)), + bias_attr=ParamAttr(regularizer=L2Decay(0.0))) + self.hardswish = nn.Hardswish() + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + x = self.hardswish(x) + return x + + +class DepthwiseSeparable(TheseusLayer): + + def __init__(self, num_channels, num_filters, stride, dw_size=3, use_se=False): + super().__init__() + self.use_se = use_se + self.dw_conv = ConvBNLayer(num_channels=num_channels, + num_filters=num_channels, + filter_size=dw_size, + stride=stride, + num_groups=num_channels) + if use_se: + self.se = SEModule(num_channels) + self.pw_conv = ConvBNLayer(num_channels=num_channels, filter_size=1, num_filters=num_filters, stride=1) + + def forward(self, x): + x = self.dw_conv(x) + if self.use_se: + x = self.se(x) + x = self.pw_conv(x) + return x + + +class SEModule(TheseusLayer): + + def __init__(self, channel, reduction=4): + super().__init__() + self.avg_pool = AdaptiveAvgPool2D(1) + self.conv1 = Conv2D(in_channels=channel, out_channels=channel // reduction, kernel_size=1, stride=1, padding=0) + self.relu = nn.ReLU() + self.conv2 = Conv2D(in_channels=channel // reduction, out_channels=channel, kernel_size=1, stride=1, padding=0) + self.hardsigmoid = nn.Hardsigmoid() + + def forward(self, x): + identity = x + x = self.avg_pool(x) + x = self.conv1(x) + x = self.relu(x) + x = self.conv2(x) + x = self.hardsigmoid(x) + x = paddle.multiply(x=identity, y=x) + return x + + +class PPLCNet(TheseusLayer): + + def __init__(self, + stages_pattern, + scale=1.0, + class_num=1000, + dropout_prob=0.2, + class_expand=1280, + return_patterns=None, + return_stages=None): + super().__init__() + self.scale = scale + self.class_expand = class_expand + + self.conv1 = ConvBNLayer(num_channels=3, filter_size=3, num_filters=make_divisible(16 * scale), stride=2) + + self.blocks2 = nn.Sequential(*[ + DepthwiseSeparable(num_channels=make_divisible(in_c * scale), + num_filters=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se) for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks2"]) + ]) + + self.blocks3 = nn.Sequential(*[ + DepthwiseSeparable(num_channels=make_divisible(in_c * scale), + num_filters=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se) for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks3"]) + ]) + + self.blocks4 = nn.Sequential(*[ + DepthwiseSeparable(num_channels=make_divisible(in_c * scale), + num_filters=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se) for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks4"]) + ]) + + self.blocks5 = nn.Sequential(*[ + DepthwiseSeparable(num_channels=make_divisible(in_c * scale), + num_filters=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se) for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks5"]) + ]) + + self.blocks6 = nn.Sequential(*[ + DepthwiseSeparable(num_channels=make_divisible(in_c * scale), + num_filters=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se) for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks6"]) + ]) + + self.avg_pool = AdaptiveAvgPool2D(1) + + self.last_conv = Conv2D(in_channels=make_divisible(NET_CONFIG["blocks6"][-1][2] * scale), + out_channels=self.class_expand, + kernel_size=1, + stride=1, + padding=0, + bias_attr=False) + + self.hardswish = nn.Hardswish() + self.dropout = Dropout(p=dropout_prob, mode="downscale_in_infer") + self.flatten = nn.Flatten(start_axis=1, stop_axis=-1) + + self.fc = Linear(self.class_expand, class_num) + + super().init_res(stages_pattern, return_patterns=return_patterns, return_stages=return_stages) + + def forward(self, x): + x = self.conv1(x) + + x = self.blocks2(x) + x = self.blocks3(x) + x = self.blocks4(x) + x = self.blocks5(x) + x = self.blocks6(x) + + x = self.avg_pool(x) + x = self.last_conv(x) + x = self.hardswish(x) + x = self.dropout(x) + x = self.flatten(x) + x = self.fc(x) + return x + + +def PPLCNet_x2_5(pretrained=False, use_ssld=False, **kwargs): + model = PPLCNet(scale=2.5, stages_pattern=MODEL_STAGES_PATTERN["PPLCNet"], **kwargs) + return model diff --git a/modules/image/classification/pplcnet_x2_5_imagenet/module.py b/modules/image/classification/pplcnet_x2_5_imagenet/module.py new file mode 100644 index 000000000..479cf4a61 --- /dev/null +++ b/modules/image/classification/pplcnet_x2_5_imagenet/module.py @@ -0,0 +1,154 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import copy +import os + +import cv2 +import numpy as np +import paddle +from skimage.io import imread +from skimage.transform import rescale +from skimage.transform import resize + +import paddlehub as hub +from .model import PPLCNet_x2_5 +from .processor import base64_to_cv2 +from .processor import create_operators +from .processor import Topk +from .utils import get_config +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving + + +@moduleinfo(name="pplcnet_x2_5_imagenet", + type="cv/classification", + author="paddlepaddle", + author_email="", + summary="", + version="1.0.0") +class PPLcNet_x2_5: + + def __init__(self): + self.config = get_config(os.path.join(self.directory, 'PPLCNet_x2_5.yaml'), show=False) + self.label_path = os.path.join(self.directory, 'imagenet1k_label_list.txt') + self.pretrain_path = os.path.join(self.directory, 'PPLCNet_x2_5_pretrained.pdparams') + self.config['Infer']['PostProcess']['class_id_map_file'] = self.label_path + self.model = PPLCNet_x2_5() + param_state_dict = paddle.load(self.pretrain_path) + self.model.set_dict(param_state_dict) + self.preprocess_funcs = create_operators(self.config["Infer"]["transforms"]) + + def classification(self, + images: list = None, + paths: list = None, + batch_size: int = 1, + use_gpu: bool = False, + top_k: int = 1): + ''' + Args: + images (list[numpy.ndarray]): data of images, shape of each is [H, W, C], color space must be BGR. + paths (list[str]): The paths of images. + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + top_k (int): Return top k results. + + Returns: + res (list[dict]): The classfication results, each result dict contains key 'class_ids', 'scores' and 'label_names'. + ''' + postprocess_func = Topk(top_k, self.label_path) + inputs = [] + results = [] + paddle.disable_static() + place = 'gpu:0' if use_gpu else 'cpu' + place = paddle.set_device(place) + if images == None and paths == None: + print('No image provided. Please input an image or a image path.') + return + + if images != None: + for image in images: + image = image[:, :, ::-1] + inputs.append(image) + + if paths != None: + for path in paths: + image = cv2.imread(path)[:, :, ::-1] + inputs.append(image) + + batch_data = [] + for idx, imagedata in enumerate(inputs): + for process in self.preprocess_funcs: + imagedata = process(imagedata) + batch_data.append(imagedata) + if len(batch_data) >= batch_size or idx == len(inputs) - 1: + batch_tensor = paddle.to_tensor(batch_data) + out = self.model(batch_tensor) + if isinstance(out, list): + out = out[0] + if isinstance(out, dict) and "logits" in out: + out = out["logits"] + if isinstance(out, dict) and "output" in out: + out = out["output"] + result = postprocess_func(out) + results.extend(result) + batch_data.clear() + return results + + @runnable + def run_cmd(self, argvs: list): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + self.args = self.parser.parse_args(argvs) + results = self.classification(paths=[self.args.input_path], + use_gpu=self.args.use_gpu, + batch_size=self.args.batch_size, + top_k=self.args.top_k) + return results + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.classification(images=images_decode, **kwargs) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument('--use_gpu', action='store_true', help="use GPU or not") + + self.arg_config_group.add_argument('--batch_size', type=int, default=1, help='batch size') + self.arg_config_group.add_argument('--top_k', type=int, default=1, help='Return top k results.') + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to input image.") diff --git a/modules/image/classification/pplcnet_x2_5_imagenet/processor.py b/modules/image/classification/pplcnet_x2_5_imagenet/processor.py new file mode 100644 index 000000000..40cab3917 --- /dev/null +++ b/modules/image/classification/pplcnet_x2_5_imagenet/processor.py @@ -0,0 +1,374 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import base64 +import inspect +import math +import os +import random +import sys +from functools import partial + +import cv2 +import numpy as np +import paddle +import paddle.nn.functional as F +import six +from paddle.vision.transforms import ColorJitter as RawColorJitter +from PIL import Image + + +def create_operators(params, class_num=None): + """ + create operators based on the config + + Args: + params(list): a dict list, used to create some operators + """ + assert isinstance(params, list), ('operator config should be a list') + ops = [] + current_module = sys.modules[__name__] + for operator in params: + assert isinstance(operator, dict) and len(operator) == 1, "yaml format error" + op_name = list(operator)[0] + param = {} if operator[op_name] is None else operator[op_name] + op_func = getattr(current_module, op_name) + if "class_num" in inspect.getfullargspec(op_func).args: + param.update({"class_num": class_num}) + op = op_func(**param) + ops.append(op) + + return ops + + +class UnifiedResize(object): + + def __init__(self, interpolation=None, backend="cv2"): + _cv2_interp_from_str = { + 'nearest': cv2.INTER_NEAREST, + 'bilinear': cv2.INTER_LINEAR, + 'area': cv2.INTER_AREA, + 'bicubic': cv2.INTER_CUBIC, + 'lanczos': cv2.INTER_LANCZOS4 + } + _pil_interp_from_str = { + 'nearest': Image.NEAREST, + 'bilinear': Image.BILINEAR, + 'bicubic': Image.BICUBIC, + 'box': Image.BOX, + 'lanczos': Image.LANCZOS, + 'hamming': Image.HAMMING + } + + def _pil_resize(src, size, resample): + pil_img = Image.fromarray(src) + pil_img = pil_img.resize(size, resample) + return np.asarray(pil_img) + + if backend.lower() == "cv2": + if isinstance(interpolation, str): + interpolation = _cv2_interp_from_str[interpolation.lower()] + # compatible with opencv < version 4.4.0 + elif interpolation is None: + interpolation = cv2.INTER_LINEAR + self.resize_func = partial(cv2.resize, interpolation=interpolation) + elif backend.lower() == "pil": + if isinstance(interpolation, str): + interpolation = _pil_interp_from_str[interpolation.lower()] + self.resize_func = partial(_pil_resize, resample=interpolation) + else: + self.resize_func = cv2.resize + + def __call__(self, src, size): + return self.resize_func(src, size) + + +class OperatorParamError(ValueError): + """ OperatorParamError + """ + pass + + +class DecodeImage(object): + """ decode image """ + + def __init__(self, to_rgb=True, to_np=False, channel_first=False): + self.to_rgb = to_rgb + self.to_np = to_np # to numpy + self.channel_first = channel_first # only enabled when to_np is True + + def __call__(self, img): + if six.PY2: + assert type(img) is str and len(img) > 0, "invalid input 'img' in DecodeImage" + else: + assert type(img) is bytes and len(img) > 0, "invalid input 'img' in DecodeImage" + data = np.frombuffer(img, dtype='uint8') + img = cv2.imdecode(data, 1) + if self.to_rgb: + assert img.shape[2] == 3, 'invalid shape of image[%s]' % (img.shape) + img = img[:, :, ::-1] + + if self.channel_first: + img = img.transpose((2, 0, 1)) + + return img + + +class ResizeImage(object): + """ resize image """ + + def __init__(self, size=None, resize_short=None, interpolation=None, backend="cv2"): + if resize_short is not None and resize_short > 0: + self.resize_short = resize_short + self.w = None + self.h = None + elif size is not None: + self.resize_short = None + self.w = size if type(size) is int else size[0] + self.h = size if type(size) is int else size[1] + else: + raise OperatorParamError("invalid params for ReisizeImage for '\ + 'both 'size' and 'resize_short' are None") + + self._resize_func = UnifiedResize(interpolation=interpolation, backend=backend) + + def __call__(self, img): + img_h, img_w = img.shape[:2] + if self.resize_short is not None: + percent = float(self.resize_short) / min(img_w, img_h) + w = int(round(img_w * percent)) + h = int(round(img_h * percent)) + else: + w = self.w + h = self.h + return self._resize_func(img, (w, h)) + + +class CropImage(object): + """ crop image """ + + def __init__(self, size): + if type(size) is int: + self.size = (size, size) + else: + self.size = size # (h, w) + + def __call__(self, img): + w, h = self.size + img_h, img_w = img.shape[:2] + w_start = (img_w - w) // 2 + h_start = (img_h - h) // 2 + + w_end = w_start + w + h_end = h_start + h + return img[h_start:h_end, w_start:w_end, :] + + +class RandCropImage(object): + """ random crop image """ + + def __init__(self, size, scale=None, ratio=None, interpolation=None, backend="cv2"): + if type(size) is int: + self.size = (size, size) # (h, w) + else: + self.size = size + + self.scale = [0.08, 1.0] if scale is None else scale + self.ratio = [3. / 4., 4. / 3.] if ratio is None else ratio + + self._resize_func = UnifiedResize(interpolation=interpolation, backend=backend) + + def __call__(self, img): + size = self.size + scale = self.scale + ratio = self.ratio + + aspect_ratio = math.sqrt(random.uniform(*ratio)) + w = 1. * aspect_ratio + h = 1. / aspect_ratio + + img_h, img_w = img.shape[:2] + + bound = min((float(img_w) / img_h) / (w**2), (float(img_h) / img_w) / (h**2)) + scale_max = min(scale[1], bound) + scale_min = min(scale[0], bound) + + target_area = img_w * img_h * random.uniform(scale_min, scale_max) + target_size = math.sqrt(target_area) + w = int(target_size * w) + h = int(target_size * h) + + i = random.randint(0, img_w - w) + j = random.randint(0, img_h - h) + + img = img[j:j + h, i:i + w, :] + + return self._resize_func(img, size) + + +class RandFlipImage(object): + """ random flip image + flip_code: + 1: Flipped Horizontally + 0: Flipped Vertically + -1: Flipped Horizontally & Vertically + """ + + def __init__(self, flip_code=1): + assert flip_code in [-1, 0, 1], "flip_code should be a value in [-1, 0, 1]" + self.flip_code = flip_code + + def __call__(self, img): + if random.randint(0, 1) == 1: + return cv2.flip(img, self.flip_code) + else: + return img + + +class NormalizeImage(object): + """ normalize image such as substract mean, divide std + """ + + def __init__(self, scale=None, mean=None, std=None, order='chw', output_fp16=False, channel_num=3): + if isinstance(scale, str): + scale = eval(scale) + assert channel_num in [3, 4], "channel number of input image should be set to 3 or 4." + self.channel_num = channel_num + self.output_dtype = 'float16' if output_fp16 else 'float32' + self.scale = np.float32(scale if scale is not None else 1.0 / 255.0) + self.order = order + mean = mean if mean is not None else [0.485, 0.456, 0.406] + std = std if std is not None else [0.229, 0.224, 0.225] + + shape = (3, 1, 1) if self.order == 'chw' else (1, 1, 3) + self.mean = np.array(mean).reshape(shape).astype('float32') + self.std = np.array(std).reshape(shape).astype('float32') + + def __call__(self, img): + from PIL import Image + if isinstance(img, Image.Image): + img = np.array(img) + + assert isinstance(img, np.ndarray), "invalid input 'img' in NormalizeImage" + + img = (img.astype('float32') * self.scale - self.mean) / self.std + + if self.channel_num == 4: + img_h = img.shape[1] if self.order == 'chw' else img.shape[0] + img_w = img.shape[2] if self.order == 'chw' else img.shape[1] + pad_zeros = np.zeros((1, img_h, img_w)) if self.order == 'chw' else np.zeros((img_h, img_w, 1)) + img = (np.concatenate((img, pad_zeros), axis=0) if self.order == 'chw' else np.concatenate( + (img, pad_zeros), axis=2)) + return img.astype(self.output_dtype) + + +class ToCHWImage(object): + """ convert hwc image to chw image + """ + + def __init__(self): + pass + + def __call__(self, img): + from PIL import Image + if isinstance(img, Image.Image): + img = np.array(img) + + return img.transpose((2, 0, 1)) + + +class ColorJitter(RawColorJitter): + """ColorJitter. + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def __call__(self, img): + if not isinstance(img, Image.Image): + img = np.ascontiguousarray(img) + img = Image.fromarray(img) + img = super()._apply_image(img) + if isinstance(img, Image.Image): + img = np.asarray(img) + return img + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +class Topk(object): + + def __init__(self, topk=1, class_id_map_file=None): + assert isinstance(topk, (int, )) + self.class_id_map = self.parse_class_id_map(class_id_map_file) + self.topk = topk + + def parse_class_id_map(self, class_id_map_file): + if class_id_map_file is None: + return None + if not os.path.exists(class_id_map_file): + print( + "Warning: If want to use your own label_dict, please input legal path!\nOtherwise label_names will be empty!" + ) + return None + + try: + class_id_map = {} + with open(class_id_map_file, "r") as fin: + lines = fin.readlines() + for line in lines: + partition = line.split("\n")[0].partition(" ") + class_id_map[int(partition[0])] = str(partition[-1]) + except Exception as ex: + print(ex) + class_id_map = None + return class_id_map + + def __call__(self, x, file_names=None, multilabel=False): + assert isinstance(x, paddle.Tensor) + if file_names is not None: + assert x.shape[0] == len(file_names) + x = F.softmax(x, axis=-1) if not multilabel else F.sigmoid(x) + x = x.numpy() + y = [] + for idx, probs in enumerate(x): + index = probs.argsort(axis=0)[-self.topk:][::-1].astype("int32") if not multilabel else np.where( + probs >= 0.5)[0].astype("int32") + clas_id_list = [] + score_list = [] + label_name_list = [] + for i in index: + clas_id_list.append(i.item()) + score_list.append(probs[i].item()) + if self.class_id_map is not None: + label_name_list.append(self.class_id_map[i.item()]) + result = { + "class_ids": clas_id_list, + "scores": np.around(score_list, decimals=5).tolist(), + } + if file_names is not None: + result["file_name"] = file_names[idx] + if label_name_list is not None: + result["label_names"] = label_name_list + y.append(result) + return y diff --git a/modules/image/classification/pplcnet_x2_5_imagenet/utils.py b/modules/image/classification/pplcnet_x2_5_imagenet/utils.py new file mode 100644 index 000000000..df2bc36b3 --- /dev/null +++ b/modules/image/classification/pplcnet_x2_5_imagenet/utils.py @@ -0,0 +1,129 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import copy +import os + +import yaml + +__all__ = ['get_config'] + + +class AttrDict(dict): + + def __getattr__(self, key): + return self[key] + + def __setattr__(self, key, value): + if key in self.__dict__: + self.__dict__[key] = value + else: + self[key] = value + + def __deepcopy__(self, content): + return copy.deepcopy(dict(self)) + + +def create_attr_dict(yaml_config): + from ast import literal_eval + for key, value in yaml_config.items(): + if type(value) is dict: + yaml_config[key] = value = AttrDict(value) + if isinstance(value, str): + try: + value = literal_eval(value) + except BaseException: + pass + if isinstance(value, AttrDict): + create_attr_dict(yaml_config[key]) + else: + yaml_config[key] = value + + +def parse_config(cfg_file): + """Load a config file into AttrDict""" + with open(cfg_file, 'r') as fopen: + yaml_config = AttrDict(yaml.load(fopen, Loader=yaml.SafeLoader)) + create_attr_dict(yaml_config) + return yaml_config + + +def override(dl, ks, v): + """ + Recursively replace dict of list + Args: + dl(dict or list): dict or list to be replaced + ks(list): list of keys + v(str): value to be replaced + """ + + def str2num(v): + try: + return eval(v) + except Exception: + return v + + assert isinstance(dl, (list, dict)), ("{} should be a list or a dict") + assert len(ks) > 0, ('lenght of keys should larger than 0') + if isinstance(dl, list): + k = str2num(ks[0]) + if len(ks) == 1: + assert k < len(dl), ('index({}) out of range({})'.format(k, dl)) + dl[k] = str2num(v) + else: + override(dl[k], ks[1:], v) + else: + if len(ks) == 1: + # assert ks[0] in dl, ('{} is not exist in {}'.format(ks[0], dl)) + if not ks[0] in dl: + print('A new filed ({}) detected!'.format(ks[0], dl)) + dl[ks[0]] = str2num(v) + else: + override(dl[ks[0]], ks[1:], v) + + +def override_config(config, options=None): + """ + Recursively override the config + Args: + config(dict): dict to be replaced + options(list): list of pairs(key0.key1.idx.key2=value) + such as: [ + 'topk=2', + 'VALID.transforms.1.ResizeImage.resize_short=300' + ] + Returns: + config(dict): replaced config + """ + if options is not None: + for opt in options: + assert isinstance(opt, str), ("option({}) should be a str".format(opt)) + assert "=" in opt, ("option({}) should contain a =" + "to distinguish between key and value".format(opt)) + pair = opt.split('=') + assert len(pair) == 2, ("there can be only a = in the option") + key, value = pair + keys = key.split('.') + override(config, keys, value) + return config + + +def get_config(fname, overrides=None, show=False): + """ + Read config from file + """ + assert os.path.exists(fname), ('config file({}) is not exist'.format(fname)) + config = parse_config(fname) + override_config(config, overrides) + return config From c327da380e178fa5468d4522e227d00a1bf28e28 Mon Sep 17 00:00:00 2001 From: chenjian Date: Tue, 19 Apr 2022 11:53:57 +0000 Subject: [PATCH 004/117] delete doc --- .../text/text_generation/ernie_tiny/README.md | 126 ------------- .../text_generation/ernie_tiny/README_en.md | 171 ------------------ 2 files changed, 297 deletions(-) delete mode 100644 modules/text/text_generation/ernie_tiny/README.md delete mode 100644 modules/text/text_generation/ernie_tiny/README_en.md diff --git a/modules/text/text_generation/ernie_tiny/README.md b/modules/text/text_generation/ernie_tiny/README.md deleted file mode 100644 index 15c654328..000000000 --- a/modules/text/text_generation/ernie_tiny/README.md +++ /dev/null @@ -1,126 +0,0 @@ -# ernie_tiny - -|模型名称|ernie_tiny| -| :--- | :---: | -|类别|图像 - 图像生成| -|网络|SPADEGenerator| -|数据集|coco_stuff| -|是否支持Fine-tuning|否| -|模型大小|74MB| -|最新更新日期|2021-12-14| -|数据指标|-| - - -## 一、模型基本信息 - -- ### 应用效果展示 - - 样例结果示例: -

- -
- -- ### 模型介绍 - - - 本模块采用一个像素风格迁移网络 Pix2PixHD,能够根据输入的语义分割标签生成照片风格的图片。为了解决模型归一化层导致标签语义信息丢失的问题,向 Pix2PixHD 的生成器网络中添加了 SPADE(Spatially-Adaptive - Normalization)空间自适应归一化模块,通过两个卷积层保留了归一化时训练的缩放与偏置参数的空间维度,以增强生成图片的质量。语义风格标签图像可以参考[coco_stuff数据集](https://github.com/nightrome/cocostuff)获取, 也可以通过[PaddleGAN repo中的该项目](https://github.com/PaddlePaddle/PaddleGAN/blob/87537ad9d4eeda17eaa5916c6a585534ab989ea8/docs/zh_CN/tutorials/photopen.md)来自定义生成图像进行体验。 - - - -## 二、安装 - -- ### 1、环境依赖 - - ppgan - -- ### 2、安装 - - - ```shell - $ hub install photopen - ``` - - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) - | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) - -## 三、模型API预测 - -- ### 1、命令行预测 - - - ```shell - # Read from a file - $ hub run photopen --input_path "/PATH/TO/IMAGE" - ``` - - 通过命令行方式实现图像生成模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) - -- ### 2、预测代码示例 - - - ```python - import paddlehub as hub - - module = hub.Module(name="photopen") - input_path = ["/PATH/TO/IMAGE"] - # Read from a file - module.photo_transfer(paths=input_path, output_dir='./transfer_result/', use_gpu=True) - ``` - -- ### 3、API - - - ```python - photo_transfer(images=None, paths=None, output_dir='./transfer_result/', use_gpu=False, visualization=True): - ``` - - 图像转换生成API。 - - - **参数** - - - images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\];
- - paths (list\[str\]): 图片的路径;
- - output\_dir (str): 结果保存的路径;
- - use\_gpu (bool): 是否使用 GPU;
- - visualization(bool): 是否保存结果到本地文件夹 - - -## 四、服务部署 - -- PaddleHub Serving可以部署一个在线图像转换生成服务。 - -- ### 第一步:启动PaddleHub Serving - - - 运行启动命令: - - ```shell - $ hub serving start -m photopen - ``` - - - 这样就完成了一个图像转换生成的在线服务API的部署,默认端口号为8866。 - - - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 - -- ### 第二步:发送预测请求 - - - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 - - - ```python - import requests - import json - import cv2 - import base64 - - - def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') - - # 发送HTTP请求 - data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} - headers = {"Content-type": "application/json"} - url = "http://127.0.0.1:8866/predict/photopen" - r = requests.post(url=url, headers=headers, data=json.dumps(data)) - - # 打印预测结果 - print(r.json()["results"]) - -## 五、更新历史 - -* 1.0.0 - - 初始发布 - - - ```shell - $ hub install ernie_tiny==1.1.0 - ``` diff --git a/modules/text/text_generation/ernie_tiny/README_en.md b/modules/text/text_generation/ernie_tiny/README_en.md deleted file mode 100644 index 373348799..000000000 --- a/modules/text/text_generation/ernie_tiny/README_en.md +++ /dev/null @@ -1,171 +0,0 @@ -# ernie_tiny - -|Module Name|ernie_tiny| -| :--- | :---: | -|Category|object detection| -|Network|faster_rcnn| -|Dataset|COCO2017| -|Fine-tuning supported or not|No| -|Module Size|161MB| -|Latest update date|2021-03-15| -|Data indicators|-| - - -## I.Basic Information - -- ### Application Effect Display - - Sample results: -

- -
-

- -- ### Module Introduction - - - Faster_RCNN is a two-stage detector, it consists of feature extraction, proposal, classification and refinement processes. This module is trained on COCO2017 dataset, and can be used for object detection. - - -## II.Installation - -- ### 1、Environmental Dependence - - - paddlepaddle >= 1.6.2 - - - paddlehub >= 1.6.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) - -- ### 2、Installation - - - ```shell - $ hub install faster_rcnn_resnet50_fpn_coco2017 - ``` - - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) - -## III.Module API Prediction - -- ### 1、Command line Prediction - - - ```shell - $ hub run faster_rcnn_resnet50_fpn_coco2017 --input_path "/PATH/TO/IMAGE" - ``` - - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) - -- ### 2、Prediction Code Example - - - ```python - import paddlehub as hub - import cv2 - - object_detector = hub.Module(name="faster_rcnn_resnet50_fpn_coco2017") - result = object_detector.object_detection(images=[cv2.imread('/PATH/TO/IMAGE')]) - # or - # result = object_detector.object_detection((paths=['/PATH/TO/IMAGE']) - ``` - -- ### 3、API - - - ```python - def object_detection(paths=None, - images=None, - batch_size=1, - use_gpu=False, - output_dir='detection_result', - score_thresh=0.5, - visualization=True) - ``` - - - Detection API, detect positions of all objects in image - - - **Parameters** - - - paths (list[str]): image path; - - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; - - batch_size (int): the size of batch; - - use_gpu (bool): use GPU or not; **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** - - output_dir (str): save path of images; - - score\_thresh (float): confidence threshold;
- - visualization (bool): Whether to save the results as picture files; - - **NOTE:** choose one parameter to provide data from paths and images - - - **Return** - - - res (list\[dict\]): results - - data (list): detection results, each element in the list is dict - - confidence (float): the confidence of the result - - label (str): label - - left (int): the upper left corner x coordinate of the detection box - - top (int): the upper left corner y coordinate of the detection box - - right (int): the lower right corner x coordinate of the detection box - - bottom (int): the lower right corner y coordinate of the detection box - - save\_path (str, optional): output path for saving results - - - - ```python - def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True) - ``` - - Save model to specific path - - - **Parameters** - - - dirname: output dir for saving model - - model\_filename: filename for saving model - - params\_filename: filename for saving parameters - - combined: whether save parameters into one file - - -## IV.Server Deployment - -- PaddleHub Serving can deploy an online service of object detection. - -- ### Step 1: Start PaddleHub Serving - - - Run the startup command: - - ```shell - $ hub serving start -m faster_rcnn_resnet50_fpn_coco2017 - ``` - - - The servitization API is now deployed and the default port number is 8866. - - - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. - -- ### Step 2: Send a predictive request - - - With a configured server, use the following lines of code to send the prediction request and obtain the result - - - ```python - import requests - import json - import cv2 - import base64 - - - def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') - - # Send an HTTP request - data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} - headers = {"Content-type": "application/json"} - url = "http://127.0.0.1:8866/predict/faster_rcnn_resnet50_fpn_coco2017" - r = requests.post(url=url, headers=headers, data=json.dumps(data)) - - # print prediction results - print(r.json()["results"]) - ``` - - -## V.Release Note - -* 1.0.0 - - First release - -* 1.0.1 - - Fix the problem of reading numpy - - ```shell - $ hub install ernie_tiny==1.1.0 - ``` From 4fb31a666c950719a805518ff335fff24b5cbe13 Mon Sep 17 00:00:00 2001 From: Yulv-git Date: Fri, 29 Apr 2022 12:36:26 +0800 Subject: [PATCH 005/117] Fix some typos in demo, paddlehub, etc. --- README_ch.md | 2 +- demo/audio_classification/audioset_predict.py | 2 +- demo/audio_classification/predict.py | 2 +- demo/text_classification/embedding/model.py | 2 +- paddlehub/compat/task/hook.py | 2 +- paddlehub/module/audio_module.py | 2 +- paddlehub/module/nlp_module.py | 4 ++-- 7 files changed, 8 insertions(+), 8 deletions(-) diff --git a/README_ch.md b/README_ch.md index 7d6811b67..d33d8619e 100644 --- a/README_ch.md +++ b/README_ch.md @@ -273,7 +273,7 @@ print(results) * 非常感谢[BurrowsWang](https://github.com/BurrowsWang)修复Markdown表格显示问题 * 非常感谢[huqi](https://github.com/hu-qi)修复了readme中的错别字 * 非常感谢[parano](https://github.com/parano)、[cqvu](https://github.com/cqvu)、[deehrlic](https://github.com/deehrlic)三位的贡献与支持 -* 非常感谢[paopjian](https://github.com/paopjian)修改了中文readme模型搜索指向的的网站地址错误[#1424](https://github.com/PaddlePaddle/PaddleHub/issues/1424) +* 非常感谢[paopjian](https://github.com/paopjian)修改了中文readme模型搜索指向的网站地址错误[#1424](https://github.com/PaddlePaddle/PaddleHub/issues/1424) * 非常感谢[Wgm-Inspur](https://github.com/Wgm-Inspur)修复了readme中的代码示例问题,并优化了文本分类、序列标注demo中的RNN示例图 * 非常感谢[zl1271](https://github.com/zl1271)修复了serving文档中的错别字 * 非常感谢[AK391](https://github.com/AK391)在Hugging Face spaces中添加了UGATIT和deoldify模型的web demo diff --git a/demo/audio_classification/audioset_predict.py b/demo/audio_classification/audioset_predict.py index 300351a3e..cc9328203 100644 --- a/demo/audio_classification/audioset_predict.py +++ b/demo/audio_classification/audioset_predict.py @@ -25,7 +25,7 @@ parser = argparse.ArgumentParser(__doc__) parser.add_argument("--wav", type=str, required=True, help="Audio file to infer.") parser.add_argument("--sr", type=int, default=32000, help="Sample rate of inference audio.") -parser.add_argument("--model_type", type=str, default='panns_cnn14', help="Select model to to inference.") +parser.add_argument("--model_type", type=str, default='panns_cnn14', help="Select model to inference.") parser.add_argument("--topk", type=int, default=10, help="Show top k results of audioset labels.") args = parser.parse_args() diff --git a/demo/audio_classification/predict.py b/demo/audio_classification/predict.py index 389a986fc..4bf07b850 100644 --- a/demo/audio_classification/predict.py +++ b/demo/audio_classification/predict.py @@ -24,7 +24,7 @@ parser = argparse.ArgumentParser(__doc__) parser.add_argument("--wav", type=str, required=True, help="Audio file to infer.") parser.add_argument("--sr", type=int, default=44100, help="Sample rate of inference audio.") -parser.add_argument("--model_type", type=str, default='panns_cnn14', help="Select model to to inference.") +parser.add_argument("--model_type", type=str, default='panns_cnn14', help="Select model to inference.") parser.add_argument("--topk", type=int, default=1, help="Show top k results of prediction labels.") parser.add_argument( "--checkpoint", type=str, default='./checkpoint/best_model/model.pdparams', help="Checkpoint of model.") diff --git a/demo/text_classification/embedding/model.py b/demo/text_classification/embedding/model.py index f7e029c69..c0f2ab40e 100644 --- a/demo/text_classification/embedding/model.py +++ b/demo/text_classification/embedding/model.py @@ -136,7 +136,7 @@ def _batchify(self, data: List[List[str]], max_seq_len: int, batch_size: int): ids = pad_sequence(ids, max_seq_len, pad_token_id) examples.append(ids) - # Seperates data into some batches. + # Separates data into some batches. one_batch = [] for example in examples: one_batch.append(example) diff --git a/paddlehub/compat/task/hook.py b/paddlehub/compat/task/hook.py index 20606697a..444022dc1 100644 --- a/paddlehub/compat/task/hook.py +++ b/paddlehub/compat/task/hook.py @@ -109,7 +109,7 @@ def modify(self, hook_type: str, name: str, func: Callable): def exist(self, hook_type: str, name: str) -> bool: ''' - check if the the handler function of spectific event is existing. + check if the handler function of spectific event is existing. Args: hook_type (str): the spectific event name name (str): the handler function name diff --git a/paddlehub/module/audio_module.py b/paddlehub/module/audio_module.py index 28e8f1875..71f25bbd6 100644 --- a/paddlehub/module/audio_module.py +++ b/paddlehub/module/audio_module.py @@ -39,7 +39,7 @@ def _batchify(self, data: List[List[float]], sample_rate: int, feat_type: str, b else: examples.append(waveform) - # Seperates data into some batches. + # Separates data into some batches. one_batch = [] for example in examples: one_batch.append(example) diff --git a/paddlehub/module/nlp_module.py b/paddlehub/module/nlp_module.py index 9e39d3467..967fce9cc 100644 --- a/paddlehub/module/nlp_module.py +++ b/paddlehub/module/nlp_module.py @@ -141,7 +141,7 @@ class PretrainedModel(nn.Layer): - `pretrained_init_configuration` (dict): The dict has pretrained model names as keys, and the values are also dict preserving corresponding configuration for model initialization. - - `base_model_prefix` (str): represents the the attribute associated to the + - `base_model_prefix` (str): represents the attribute associated to the base model in derived classes of the same architecture adding layers on top of the base model. """ @@ -477,7 +477,7 @@ def _parse_batch(batch): example.extend((input_ids, token_type_ids)) examples.append(example) - # Seperates data into some batches. + # Separates data into some batches. one_batch = [] for example in examples: one_batch.append(example) From c68355efa897601de21e6c56c9d66060ba34b32b Mon Sep 17 00:00:00 2001 From: Yulv-git Date: Fri, 29 Apr 2022 12:36:46 +0800 Subject: [PATCH 006/117] Fix some typos in modules. --- modules/audio/tts/deepvoice3_ljspeech/module.py | 2 +- modules/audio/tts/fastspeech_ljspeech/module.py | 2 +- modules/audio/tts/transformer_tts_ljspeech/module.py | 2 +- modules/audio/voice_cloning/lstm_tacotron2/module.py | 2 +- .../colorization/user_guided_colorization/data_feed.py | 2 +- .../image/Image_editing/super_resolution/realsr/rrdb.py | 2 +- .../Image_gan/style_transfer/Photo2Cartoon/README.md | 2 +- .../Image_gan/style_transfer/U2Net_Portrait/README.md | 2 +- .../face_landmark_localization/processor.py | 2 +- modules/image/matting/modnet_resnet50vd_matting/resnet.py | 2 +- .../FCN_HRNet_W18_Face_Seg/README.md | 2 +- modules/text/lexical_analysis/jieba_paddle/module.py | 2 +- modules/text/lexical_analysis/lac/module.py | 8 ++++---- .../text/machine_translation/transformer/en-de/module.py | 2 +- .../text/machine_translation/transformer/zh-en/module.py | 2 +- modules/text/text_generation/plato-mini/module.py | 2 +- .../unified_transformer-12L-cn-luge/module.py | 2 +- .../text_generation/unified_transformer-12L-cn/module.py | 2 +- modules/video/Video_editing/SkyAR/rain.py | 2 +- .../fairmot_dla34/modeling/mot/tracker/jde_tracker.py | 2 +- .../jde_darknet53/modeling/mot/tracker/jde_tracker.py | 2 +- 21 files changed, 24 insertions(+), 24 deletions(-) diff --git a/modules/audio/tts/deepvoice3_ljspeech/module.py b/modules/audio/tts/deepvoice3_ljspeech/module.py index 580b85803..4c8ab2784 100644 --- a/modules/audio/tts/deepvoice3_ljspeech/module.py +++ b/modules/audio/tts/deepvoice3_ljspeech/module.py @@ -287,7 +287,7 @@ def run_cmd(self, argvs): self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") self.arg_input_group = self.parser.add_argument_group( - title="Ouput options", description="Ouput path. Optional.") + title="Output options", description="Output path. Optional.") self.arg_config_group = self.parser.add_argument_group( title="Config options", description="Run configuration for controlling module behavior, optional.") diff --git a/modules/audio/tts/fastspeech_ljspeech/module.py b/modules/audio/tts/fastspeech_ljspeech/module.py index f0b7a8d9f..48c21ced4 100644 --- a/modules/audio/tts/fastspeech_ljspeech/module.py +++ b/modules/audio/tts/fastspeech_ljspeech/module.py @@ -228,7 +228,7 @@ def run_cmd(self, argvs): self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") self.arg_input_group = self.parser.add_argument_group( - title="Ouput options", description="Ouput path. Optional.") + title="Output options", description="Output path. Optional.") self.arg_config_group = self.parser.add_argument_group( title="Config options", description="Run configuration for controlling module behavior, optional.") diff --git a/modules/audio/tts/transformer_tts_ljspeech/module.py b/modules/audio/tts/transformer_tts_ljspeech/module.py index ee8e68cde..25c09d821 100644 --- a/modules/audio/tts/transformer_tts_ljspeech/module.py +++ b/modules/audio/tts/transformer_tts_ljspeech/module.py @@ -245,7 +245,7 @@ def run_cmd(self, argvs): self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") self.arg_input_group = self.parser.add_argument_group( - title="Ouput options", description="Ouput path. Optional.") + title="Output options", description="Output path. Optional.") self.arg_config_group = self.parser.add_argument_group( title="Config options", description="Run configuration for controlling module behavior, optional.") diff --git a/modules/audio/voice_cloning/lstm_tacotron2/module.py b/modules/audio/voice_cloning/lstm_tacotron2/module.py index 8e60afa2b..51f12f7dd 100644 --- a/modules/audio/voice_cloning/lstm_tacotron2/module.py +++ b/modules/audio/voice_cloning/lstm_tacotron2/module.py @@ -157,7 +157,7 @@ def _parse_batch(batch_data): for text in data: examples.append(self._convert_text_to_input(text)) - # Seperates data into some batches. + # Separates data into some batches. one_batch = [] for example in examples: one_batch.append(example) diff --git a/modules/image/Image_editing/colorization/user_guided_colorization/data_feed.py b/modules/image/Image_editing/colorization/user_guided_colorization/data_feed.py index 0c628eb18..21610d8d1 100644 --- a/modules/image/Image_editing/colorization/user_guided_colorization/data_feed.py +++ b/modules/image/Image_editing/colorization/user_guided_colorization/data_feed.py @@ -97,7 +97,7 @@ def __init__(self, def __call__(self, data_lab): """ - This method seperates the L channel and AB channel, obtain hint, mask and real_B_enc as the input for colorization task. + This method separates the L channel and AB channel, obtain hint, mask and real_B_enc as the input for colorization task. Args: img(np.ndarray|paddle.Tensor): LAB image. diff --git a/modules/image/Image_editing/super_resolution/realsr/rrdb.py b/modules/image/Image_editing/super_resolution/realsr/rrdb.py index 79e5de8aa..9a3f48913 100644 --- a/modules/image/Image_editing/super_resolution/realsr/rrdb.py +++ b/modules/image/Image_editing/super_resolution/realsr/rrdb.py @@ -36,7 +36,7 @@ def _do_register(self, name, obj): def register(self, obj=None, name=None): """ - Register the given object under the the name `obj.__name__`. + Register the given object under the name `obj.__name__`. Can be used as either a decorator or not. See docstring of this class for usage. """ if obj is None: diff --git a/modules/image/Image_gan/style_transfer/Photo2Cartoon/README.md b/modules/image/Image_gan/style_transfer/Photo2Cartoon/README.md index 160d9d3b3..412522e6a 100644 --- a/modules/image/Image_gan/style_transfer/Photo2Cartoon/README.md +++ b/modules/image/Image_gan/style_transfer/Photo2Cartoon/README.md @@ -75,7 +75,7 @@ - paths (list\[str\]): 输入图像路径;
- output\_dir (str): 图片的保存路径,默认设为 output;
- batch_size (int) : batch大小;
- - visualization (bool) : 是否将结果保存为图片文件;;
+ - visualization (bool) : 是否将结果保存为图片文件;
- use_gpu (bool) : 是否使用 GPU 进行推理。 **NOTE:** paths和images两个参数选择其一进行提供数据 diff --git a/modules/image/Image_gan/style_transfer/U2Net_Portrait/README.md b/modules/image/Image_gan/style_transfer/U2Net_Portrait/README.md index c387e12f6..3a4494044 100644 --- a/modules/image/Image_gan/style_transfer/U2Net_Portrait/README.md +++ b/modules/image/Image_gan/style_transfer/U2Net_Portrait/README.md @@ -83,7 +83,7 @@ - scale (float) : 缩放因子(与face_detection相关联);
- batch_size (int) : batch大小;
- output\_dir (str): 图片的保存路径,默认设为 output;
- - visualization (bool) : 是否将结果保存为图片文件;;
+ - visualization (bool) : 是否将结果保存为图片文件;
**NOTE:** paths和images两个参数选择其一进行提供数据 diff --git a/modules/image/keypoint_detection/face_landmark_localization/processor.py b/modules/image/keypoint_detection/face_landmark_localization/processor.py index 2ac325479..d0ffff40b 100644 --- a/modules/image/keypoint_detection/face_landmark_localization/processor.py +++ b/modules/image/keypoint_detection/face_landmark_localization/processor.py @@ -37,7 +37,7 @@ def get_image_ext(image): def postprocess(res, output_dir, visualization): """ - postprocess ouput of network, one face at a time. + postprocess output of network, one face at a time. """ output = [] _cur_id = -1 diff --git a/modules/image/matting/modnet_resnet50vd_matting/resnet.py b/modules/image/matting/modnet_resnet50vd_matting/resnet.py index 19abe41c8..f4d69acd1 100644 --- a/modules/image/matting/modnet_resnet50vd_matting/resnet.py +++ b/modules/image/matting/modnet_resnet50vd_matting/resnet.py @@ -268,7 +268,7 @@ def __init__(self, block] if dilation_dict and block in dilation_dict else 1 # Actually block here is 'stage', and i is 'block' in 'stage' - # At the stage 4, expand the the dilation_rate if given multi_grid + # At the stage 4, expand the dilation_rate if given multi_grid if block == 3: dilation_rate = dilation_rate * multi_grid[i] ############################################################################### diff --git a/modules/image/semantic_segmentation/FCN_HRNet_W18_Face_Seg/README.md b/modules/image/semantic_segmentation/FCN_HRNet_W18_Face_Seg/README.md index a892e978f..9399687e9 100644 --- a/modules/image/semantic_segmentation/FCN_HRNet_W18_Face_Seg/README.md +++ b/modules/image/semantic_segmentation/FCN_HRNet_W18_Face_Seg/README.md @@ -73,7 +73,7 @@ - paths (list\[str\]): 输入图像路径;
- batch_size (int) : batch大小;
- output\_dir (str): 图片的保存路径,默认设为 output;
- - visualization (bool) : 是否将结果保存为图片文件;;
+ - visualization (bool) : 是否将结果保存为图片文件;
**NOTE:** paths和images两个参数选择其一进行提供数据 diff --git a/modules/text/lexical_analysis/jieba_paddle/module.py b/modules/text/lexical_analysis/jieba_paddle/module.py index c4ed5844f..6fc6bdc48 100644 --- a/modules/text/lexical_analysis/jieba_paddle/module.py +++ b/modules/text/lexical_analysis/jieba_paddle/module.py @@ -61,7 +61,7 @@ def check_dependency(self): import jieba except ImportError: print( - 'This module requires jieba tools. The running enviroment does not meet the requirments. Please install jieba packages.' + 'This module requires jieba tools. The running environment does not meet the requirments. Please install jieba packages.' ) exit() diff --git a/modules/text/lexical_analysis/lac/module.py b/modules/text/lexical_analysis/lac/module.py index 40136fe63..bfd859374 100644 --- a/modules/text/lexical_analysis/lac/module.py +++ b/modules/text/lexical_analysis/lac/module.py @@ -190,10 +190,10 @@ def if_exist(var): def set_user_dict(self, dict_path, sep=None): """ - Set the costomized dictionary if you wanna exploit the self-defined dictionary + Set the customized dictionary if you wanna exploit the self-defined dictionary Args: - dict_path(str): The directory to the costomized dictionary. + dict_path(str): The directory to the customized dictionary. sep: The seperation token in phases. Default as ' ' or '\t'. """ if not os.path.exists(dict_path): @@ -203,7 +203,7 @@ def set_user_dict(self, dict_path, sep=None): def del_user_dict(self): """ - Delete the costomized dictionary if you don't wanna exploit the self-defined dictionary any longer + Delete the customized dictionary if you don't wanna exploit the self-defined dictionary any longer """ if self.custom: @@ -508,7 +508,7 @@ def check_input_data(self, args): print(result['word']) print(result['tag']) - # delete the costomized dictionary + # delete the customized dictionary lac.del_user_dict() results = lac.cut(text="春天的花开秋天的风以及冬天的落阳", use_gpu=False, batch_size=1, return_tag=False) diff --git a/modules/text/machine_translation/transformer/en-de/module.py b/modules/text/machine_translation/transformer/en-de/module.py index 75b0389b8..e59f5e1f4 100644 --- a/modules/text/machine_translation/transformer/en-de/module.py +++ b/modules/text/machine_translation/transformer/en-de/module.py @@ -139,7 +139,7 @@ def _parse_batch(batch_ids): for text in data: examples.append(self._convert_text_to_input(text)) - # Seperates data into some batches. + # Separates data into some batches. one_batch = [] for example in examples: one_batch.append(example) diff --git a/modules/text/machine_translation/transformer/zh-en/module.py b/modules/text/machine_translation/transformer/zh-en/module.py index 318d57284..fc49959ff 100644 --- a/modules/text/machine_translation/transformer/zh-en/module.py +++ b/modules/text/machine_translation/transformer/zh-en/module.py @@ -147,7 +147,7 @@ def _parse_batch(batch_ids): for text in data: examples.append(self._convert_text_to_input(text)) - # Seperates data into some batches. + # Separates data into some batches. one_batch = [] for example in examples: one_batch.append(example) diff --git a/modules/text/text_generation/plato-mini/module.py b/modules/text/text_generation/plato-mini/module.py index b6ba1be9e..63c07cbed 100644 --- a/modules/text/text_generation/plato-mini/module.py +++ b/modules/text/text_generation/plato-mini/module.py @@ -87,7 +87,7 @@ def _parse_batch(batch_examples): for texts in data: examples.append(self._convert_text_to_input(texts, max_seq_len)) - # Seperates data into some batches. + # Separates data into some batches. one_batch = [] for example in examples: one_batch.append(example) diff --git a/modules/text/text_generation/unified_transformer-12L-cn-luge/module.py b/modules/text/text_generation/unified_transformer-12L-cn-luge/module.py index 115b1e0e9..45e6ca27b 100644 --- a/modules/text/text_generation/unified_transformer-12L-cn-luge/module.py +++ b/modules/text/text_generation/unified_transformer-12L-cn-luge/module.py @@ -87,7 +87,7 @@ def _parse_batch(batch_examples): for texts in data: examples.append(self._convert_text_to_input(texts, max_seq_len)) - # Seperates data into some batches. + # Separates data into some batches. one_batch = [] for example in examples: one_batch.append(example) diff --git a/modules/text/text_generation/unified_transformer-12L-cn/module.py b/modules/text/text_generation/unified_transformer-12L-cn/module.py index 363d15d70..e875c38ae 100644 --- a/modules/text/text_generation/unified_transformer-12L-cn/module.py +++ b/modules/text/text_generation/unified_transformer-12L-cn/module.py @@ -87,7 +87,7 @@ def _parse_batch(batch_examples): for texts in data: examples.append(self._convert_text_to_input(texts, max_seq_len)) - # Seperates data into some batches. + # Separates data into some batches. one_batch = [] for example in examples: one_batch.append(example) diff --git a/modules/video/Video_editing/SkyAR/rain.py b/modules/video/Video_editing/SkyAR/rain.py index f6cfd224e..83d8d772d 100644 --- a/modules/video/Video_editing/SkyAR/rain.py +++ b/modules/video/Video_editing/SkyAR/rain.py @@ -18,7 +18,7 @@ def _get_rain_layer(self): ret, frame = self.cap.read() if ret: rain_layer = frame - else: # if reach the last frame, read from the begining + else: # if reach the last frame, read from the beginning self.cap.set(cv2.CAP_PROP_POS_FRAMES, 0) ret, frame = self.cap.read() rain_layer = frame diff --git a/modules/video/multiple_object_tracking/fairmot_dla34/modeling/mot/tracker/jde_tracker.py b/modules/video/multiple_object_tracking/fairmot_dla34/modeling/mot/tracker/jde_tracker.py index 2e1cafb34..f71a59979 100644 --- a/modules/video/multiple_object_tracking/fairmot_dla34/modeling/mot/tracker/jde_tracker.py +++ b/modules/video/multiple_object_tracking/fairmot_dla34/modeling/mot/tracker/jde_tracker.py @@ -97,7 +97,7 @@ def update(self, pred_dets, pred_embs): Return: output_stracks (list): The list contains information regarding the - online_tracklets for the recieved image tensor. + online_tracklets for the received image tensor. """ self.frame_id += 1 activated_starcks = [] diff --git a/modules/video/multiple_object_tracking/jde_darknet53/modeling/mot/tracker/jde_tracker.py b/modules/video/multiple_object_tracking/jde_darknet53/modeling/mot/tracker/jde_tracker.py index 2e1cafb34..f71a59979 100644 --- a/modules/video/multiple_object_tracking/jde_darknet53/modeling/mot/tracker/jde_tracker.py +++ b/modules/video/multiple_object_tracking/jde_darknet53/modeling/mot/tracker/jde_tracker.py @@ -97,7 +97,7 @@ def update(self, pred_dets, pred_embs): Return: output_stracks (list): The list contains information regarding the - online_tracklets for the recieved image tensor. + online_tracklets for the received image tensor. """ self.frame_id += 1 activated_starcks = [] From 9b3119dfb63c4cbb7acfb9f1f1c09ac24e6d68d2 Mon Sep 17 00:00:00 2001 From: chenjian Date: Tue, 10 May 2022 11:43:50 +0000 Subject: [PATCH 007/117] add module --- .../text_recognition/ppocrv3_det_ch/module.py | 305 ++++++++++++ .../ppocrv3_det_ch/processor.py | 270 +++++++++++ .../ppocrv3_rec_ch/character.py | 228 +++++++++ .../text_recognition/ppocrv3_rec_ch/module.py | 458 ++++++++++++++++++ .../text_recognition/ppocrv3_rec_ch/utils.py | 179 +++++++ 5 files changed, 1440 insertions(+) create mode 100644 modules/image/text_recognition/ppocrv3_det_ch/module.py create mode 100644 modules/image/text_recognition/ppocrv3_det_ch/processor.py create mode 100644 modules/image/text_recognition/ppocrv3_rec_ch/character.py create mode 100644 modules/image/text_recognition/ppocrv3_rec_ch/module.py create mode 100644 modules/image/text_recognition/ppocrv3_rec_ch/utils.py diff --git a/modules/image/text_recognition/ppocrv3_det_ch/module.py b/modules/image/text_recognition/ppocrv3_det_ch/module.py new file mode 100644 index 000000000..675feb202 --- /dev/null +++ b/modules/image/text_recognition/ppocrv3_det_ch/module.py @@ -0,0 +1,305 @@ +# -*- coding:utf-8 -*- +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import ast +import base64 +import math +import os +import time + +import cv2 +import numpy as np +import paddle.fluid as fluid +import paddle.inference as paddle_infer +from paddle.fluid.core import AnalysisConfig +from paddle.fluid.core import create_paddle_predictor +from paddle.fluid.core import PaddleTensor +from PIL import Image + +import paddlehub as hub +from paddlehub.common.logger import logger +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +@moduleinfo( + name="ppocrv3_det_ch", + version="1.0.0", + summary= + "The module aims to detect chinese text position in the image, which is based on differentiable_binarization algorithm.", + author="paddle-dev", + author_email="paddle-dev@baidu.com", + type="cv/text_recognition") +class ChineseTextDetectionDB(hub.Module): + + def _initialize(self, enable_mkldnn=False): + """ + initialize with the necessary elements + """ + self.pretrained_model_path = os.path.join(self.directory, 'inference_model', 'ppocrv3_det') + self.enable_mkldnn = enable_mkldnn + + self._set_config() + + def check_requirements(self): + try: + import shapely, pyclipper + except: + raise ImportError( + 'This module requires the shapely, pyclipper tools. The running environment does not meet the requirements. Please install the two packages.' + ) + + def _set_config(self): + """ + predictor config setting + """ + model_file_path = self.pretrained_model_path + '.pdmodel' + params_file_path = self.pretrained_model_path + '.pdiparams' + + config = paddle_infer.Config(model_file_path, params_file_path) + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + + if use_gpu: + config.enable_use_gpu(8000, 0) + else: + config.disable_gpu() + config.set_cpu_math_library_num_threads(6) + if self.enable_mkldnn: + # cache 10 different shapes for mkldnn to avoid memory leak + config.set_mkldnn_cache_capacity(10) + config.enable_mkldnn() + + config.disable_glog_info() + + # use zero copy + config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass") + config.switch_use_feed_fetch_ops(False) + self.predictor = paddle_infer.create_predictor(config) + input_names = self.predictor.get_input_names() + self.input_tensor = self.predictor.get_input_handle(input_names[0]) + output_names = self.predictor.get_output_names() + self.output_tensors = [] + for output_name in output_names: + output_tensor = self.predictor.get_output_handle(output_name) + self.output_tensors.append(output_tensor) + + def read_images(self, paths=[]): + images = [] + for img_path in paths: + assert os.path.isfile(img_path), "The {} isn't a valid file.".format(img_path) + img = cv2.imread(img_path) + if img is None: + logger.info("error in loading image:{}".format(img_path)) + continue + images.append(img) + return images + + def order_points_clockwise(self, pts): + rect = np.zeros((4, 2), dtype="float32") + s = pts.sum(axis=1) + rect[0] = pts[np.argmin(s)] + rect[2] = pts[np.argmax(s)] + diff = np.diff(pts, axis=1) + rect[1] = pts[np.argmin(diff)] + rect[3] = pts[np.argmax(diff)] + return rect + + def clip_det_res(self, points, img_height, img_width): + for pno in range(points.shape[0]): + points[pno, 0] = int(min(max(points[pno, 0], 0), img_width - 1)) + points[pno, 1] = int(min(max(points[pno, 1], 0), img_height - 1)) + return points + + def filter_tag_det_res(self, dt_boxes, image_shape): + img_height, img_width = image_shape[0:2] + dt_boxes_new = [] + for box in dt_boxes: + box = self.order_points_clockwise(box) + box = self.clip_det_res(box, img_height, img_width) + rect_width = int(np.linalg.norm(box[0] - box[1])) + rect_height = int(np.linalg.norm(box[0] - box[3])) + if rect_width <= 3 or rect_height <= 3: + continue + dt_boxes_new.append(box) + dt_boxes = np.array(dt_boxes_new) + return dt_boxes + + def filter_tag_det_res_only_clip(self, dt_boxes, image_shape): + img_height, img_width = image_shape[0:2] + dt_boxes_new = [] + for box in dt_boxes: + box = self.clip_det_res(box, img_height, img_width) + dt_boxes_new.append(box) + dt_boxes = np.array(dt_boxes_new) + return dt_boxes + + def detect_text(self, + images=[], + paths=[], + use_gpu=False, + output_dir='detection_result', + visualization=False, + box_thresh=0.5): + """ + Get the text box in the predicted images. + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C]. If images not paths + paths (list[str]): The paths of images. If paths not images + use_gpu (bool): Whether to use gpu. Default false. + output_dir (str): The directory to store output images. + visualization (bool): Whether to save image or not. + box_thresh(float): the threshold of the detected text box's confidence + Returns: + res (list): The result of text detection box and save path of images. + """ + self.check_requirements() + + from .processor import DBProcessTest, DBPostProcess, draw_boxes, get_image_ext + + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES via export CUDA_VISIBLE_DEVICES=cuda_device_id." + ) + + if images != [] and isinstance(images, list) and paths == []: + predicted_data = images + elif images == [] and isinstance(paths, list) and paths != []: + predicted_data = self.read_images(paths) + else: + raise TypeError("The input data is inconsistent with expectations.") + + assert predicted_data != [], "There is not any image to be predicted. Please check the input data." + + preprocessor = DBProcessTest(params={'max_side_len': 960}) + postprocessor = DBPostProcess(params={ + 'thresh': 0.3, + 'box_thresh': 0.6, + 'max_candidates': 1000, + 'unclip_ratio': 1.5 + }) + + all_imgs = [] + all_ratios = [] + all_results = [] + for original_image in predicted_data: + ori_im = original_image.copy() + im, ratio_list = preprocessor(original_image) + print('after preprocess int det, shape{}'.format(im.shape)) + res = {'save_path': ''} + if im is None: + res['data'] = [] + + else: + im = im.copy() + self.input_tensor.copy_from_cpu(im) + self.predictor.run() + + outputs = [] + for output_tensor in self.output_tensors: + output = output_tensor.copy_to_cpu() + outputs.append(output) + + outs_dict = {} + outs_dict['maps'] = outputs[0] + + # data_out = self.output_tensors[0].copy_to_cpu() + print('Outputs[0] in det, shape: {}'.format(outputs[0].shape)) + dt_boxes_list = postprocessor(outs_dict, [ratio_list]) + dt_boxes = dt_boxes_list[0] + print('after postprocess int det, shape{}'.format(dt_boxes.shape)) + boxes = self.filter_tag_det_res(dt_boxes_list[0], original_image.shape) + print('after fitler tag int det, shape{}'.format(boxes.shape)) + res['data'] = boxes.astype(np.int).tolist() + print('boxes: {}'.format(boxes)) + all_imgs.append(im) + all_ratios.append(ratio_list) + if visualization: + img = Image.fromarray(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)) + draw_img = draw_boxes(img, boxes) + draw_img = np.array(draw_img) + if not os.path.exists(output_dir): + os.makedirs(output_dir) + ext = get_image_ext(original_image) + saved_name = 'ndarray_{}{}'.format(time.time(), ext) + cv2.imwrite(os.path.join(output_dir, saved_name), draw_img[:, :, ::-1]) + res['save_path'] = os.path.join(output_dir, saved_name) + + all_results.append(res) + + return all_results + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.detect_text(images=images_decode, **kwargs) + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command + """ + self.parser = argparse.ArgumentParser(description="Run the %s module." % self.name, + prog='hub run %s' % self.name, + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + + self.add_module_config_arg() + self.add_module_input_arg() + + args = self.parser.parse_args(argvs) + results = self.detect_text(paths=[args.input_path], + use_gpu=args.use_gpu, + output_dir=args.output_dir, + visualization=args.visualization) + return results + + def add_module_config_arg(self): + """ + Add the command config options + """ + self.arg_config_group.add_argument('--use_gpu', + type=ast.literal_eval, + default=False, + help="whether use GPU or not") + self.arg_config_group.add_argument('--output_dir', + type=str, + default='detection_result', + help="The directory to save output images.") + self.arg_config_group.add_argument('--visualization', + type=ast.literal_eval, + default=False, + help="whether to save output as images.") + + def add_module_input_arg(self): + """ + Add the command input options + """ + self.arg_input_group.add_argument('--input_path', type=str, default=None, help="diretory to image") diff --git a/modules/image/text_recognition/ppocrv3_det_ch/processor.py b/modules/image/text_recognition/ppocrv3_det_ch/processor.py new file mode 100644 index 000000000..a93aecef0 --- /dev/null +++ b/modules/image/text_recognition/ppocrv3_det_ch/processor.py @@ -0,0 +1,270 @@ +# -*- coding:utf-8 -*- +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import sys + +import cv2 +import numpy as np +import paddle +import pyclipper +from PIL import Image +from PIL import ImageDraw +from PIL import ImageFont +from shapely.geometry import Polygon + + +class DBProcessTest(object): + """ + DB pre-process for Test mode + """ + + def __init__(self, params): + super(DBProcessTest, self).__init__() + self.resize_type = 0 + if 'test_image_shape' in params: + self.image_shape = params['test_image_shape'] + # print(self.image_shape) + self.resize_type = 1 + if 'max_side_len' in params: + self.max_side_len = params['max_side_len'] + else: + self.max_side_len = 2400 + + def resize_image_type0(self, img): + """ + resize image to a size multiple of 32 which is required by the network + args: + img(array): array with shape [h, w, c] + return(tuple): + img, (ratio_h, ratio_w) + """ + limit_side_len = self.max_side_len + h, w, _ = img.shape + + # limit the max side + if max(h, w) > limit_side_len: + if h > w: + ratio = float(limit_side_len) / h + else: + ratio = float(limit_side_len) / w + else: + ratio = 1. + resize_h = int(h * ratio) + resize_w = int(w * ratio) + + resize_h = int(round(resize_h / 32) * 32) + resize_w = int(round(resize_w / 32) * 32) + + try: + if int(resize_w) <= 0 or int(resize_h) <= 0: + return None, (None, None) + img = cv2.resize(img, (int(resize_w), int(resize_h))) + except: + print(img.shape, resize_w, resize_h) + sys.exit(0) + ratio_h = resize_h / float(h) + ratio_w = resize_w / float(w) + # return img, np.array([h, w]) + return img, [ratio_h, ratio_w] + + def resize_image_type1(self, im): + resize_h, resize_w = self.image_shape + ori_h, ori_w = im.shape[:2] # (h, w, c) + im = cv2.resize(im, (int(resize_w), int(resize_h))) + ratio_h = float(resize_h) / ori_h + ratio_w = float(resize_w) / ori_w + return im, (ratio_h, ratio_w) + + def normalize(self, im): + img_mean = [0.485, 0.456, 0.406] + img_std = [0.229, 0.224, 0.225] + im = im.astype(np.float32, copy=False) + im = im / 255 + im[:, :, 0] -= img_mean[0] + im[:, :, 1] -= img_mean[1] + im[:, :, 2] -= img_mean[2] + im[:, :, 0] /= img_std[0] + im[:, :, 1] /= img_std[1] + im[:, :, 2] /= img_std[2] + channel_swap = (2, 0, 1) + im = im.transpose(channel_swap) + return im + + def __call__(self, im): + if self.resize_type == 0: + im, (ratio_h, ratio_w) = self.resize_image_type0(im) + else: + im, (ratio_h, ratio_w) = self.resize_image_type1(im) + im = self.normalize(im) + im = im[np.newaxis, :] + return [im, (ratio_h, ratio_w)] + + +class DBPostProcess(object): + """ + The post process for Differentiable Binarization (DB). + """ + + def __init__(self, params): + self.thresh = params['thresh'] + self.box_thresh = params['box_thresh'] + self.max_candidates = params['max_candidates'] + self.unclip_ratio = params['unclip_ratio'] + self.min_size = 3 + self.dilation_kernel = None + self.score_mode = 'fast' + + def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height): + ''' + _bitmap: single map with shape (1, H, W), + whose values are binarized as {0, 1} + ''' + + bitmap = _bitmap + height, width = bitmap.shape + + outs = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) + if len(outs) == 3: + img, contours, _ = outs[0], outs[1], outs[2] + elif len(outs) == 2: + contours, _ = outs[0], outs[1] + + num_contours = min(len(contours), self.max_candidates) + + boxes = [] + scores = [] + for index in range(num_contours): + contour = contours[index] + points, sside = self.get_mini_boxes(contour) + if sside < self.min_size: + continue + points = np.array(points) + if self.score_mode == "fast": + score = self.box_score_fast(pred, points.reshape(-1, 2)) + else: + score = self.box_score_slow(pred, contour) + if self.box_thresh > score: + continue + + box = self.unclip(points).reshape(-1, 1, 2) + box, sside = self.get_mini_boxes(box) + if sside < self.min_size + 2: + continue + box = np.array(box) + + box[:, 0] = np.clip(np.round(box[:, 0] / width * dest_width), 0, dest_width) + box[:, 1] = np.clip(np.round(box[:, 1] / height * dest_height), 0, dest_height) + boxes.append(box.astype(np.int16)) + scores.append(score) + return np.array(boxes, dtype=np.int16), scores + + def unclip(self, box): + unclip_ratio = self.unclip_ratio + poly = Polygon(box) + distance = poly.area * unclip_ratio / poly.length + offset = pyclipper.PyclipperOffset() + offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON) + expanded = np.array(offset.Execute(distance)) + return expanded + + def get_mini_boxes(self, contour): + bounding_box = cv2.minAreaRect(contour) + points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0]) + + index_1, index_2, index_3, index_4 = 0, 1, 2, 3 + if points[1][1] > points[0][1]: + index_1 = 0 + index_4 = 1 + else: + index_1 = 1 + index_4 = 0 + if points[3][1] > points[2][1]: + index_2 = 2 + index_3 = 3 + else: + index_2 = 3 + index_3 = 2 + + box = [points[index_1], points[index_2], points[index_3], points[index_4]] + return box, min(bounding_box[1]) + + def box_score_fast(self, bitmap, _box): + ''' + box_score_fast: use bbox mean score as the mean score + ''' + h, w = bitmap.shape[:2] + box = _box.copy() + xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int), 0, w - 1) + xmax = np.clip(np.ceil(box[:, 0].max()).astype(np.int), 0, w - 1) + ymin = np.clip(np.floor(box[:, 1].min()).astype(np.int), 0, h - 1) + ymax = np.clip(np.ceil(box[:, 1].max()).astype(np.int), 0, h - 1) + + mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8) + box[:, 0] = box[:, 0] - xmin + box[:, 1] = box[:, 1] - ymin + cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1) + return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0] + + def box_score_slow(self, bitmap, contour): + ''' + box_score_slow: use polyon mean score as the mean score + ''' + h, w = bitmap.shape[:2] + contour = contour.copy() + contour = np.reshape(contour, (-1, 2)) + + xmin = np.clip(np.min(contour[:, 0]), 0, w - 1) + xmax = np.clip(np.max(contour[:, 0]), 0, w - 1) + ymin = np.clip(np.min(contour[:, 1]), 0, h - 1) + ymax = np.clip(np.max(contour[:, 1]), 0, h - 1) + + mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8) + + contour[:, 0] = contour[:, 0] - xmin + contour[:, 1] = contour[:, 1] - ymin + + cv2.fillPoly(mask, contour.reshape(1, -1, 2).astype(np.int32), 1) + return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0] + + def __call__(self, outs_dict, ratio_list): + pred = outs_dict['maps'] + + pred = pred[:, 0, :, :] + segmentation = pred > self.thresh + + boxes_batch = [] + for batch_index in range(pred.shape[0]): + height, width = pred.shape[-2:] + + mask = segmentation[batch_index] + tmp_boxes, tmp_scores = self.boxes_from_bitmap(pred[batch_index], mask, width, height) + + boxes_batch.append(tmp_boxes) + return boxes_batch + + +def draw_boxes(image, boxes, scores=None, drop_score=0.5): + img = image.copy() + draw = ImageDraw.Draw(img) + if scores is None: + scores = [1] * len(boxes) + for (box, score) in zip(boxes, scores): + if score < drop_score: + continue + draw.line([(box[0][0], box[0][1]), (box[1][0], box[1][1])], fill='red') + draw.line([(box[1][0], box[1][1]), (box[2][0], box[2][1])], fill='red') + draw.line([(box[2][0], box[2][1]), (box[3][0], box[3][1])], fill='red') + draw.line([(box[3][0], box[3][1]), (box[0][0], box[0][1])], fill='red') + draw.line([(box[0][0] - 1, box[0][1] + 1), (box[1][0] - 1, box[1][1] + 1)], fill='red') + draw.line([(box[1][0] - 1, box[1][1] + 1), (box[2][0] - 1, box[2][1] + 1)], fill='red') + draw.line([(box[2][0] - 1, box[2][1] + 1), (box[3][0] - 1, box[3][1] + 1)], fill='red') + draw.line([(box[3][0] - 1, box[3][1] + 1), (box[0][0] - 1, box[0][1] + 1)], fill='red') + return img + + +def get_image_ext(image): + if image.shape[2] == 4: + return ".png" + return ".jpg" diff --git a/modules/image/text_recognition/ppocrv3_rec_ch/character.py b/modules/image/text_recognition/ppocrv3_rec_ch/character.py new file mode 100644 index 000000000..3d40d7183 --- /dev/null +++ b/modules/image/text_recognition/ppocrv3_rec_ch/character.py @@ -0,0 +1,228 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import string + +import numpy as np + + +class CharacterOps(object): + """ Convert between text-label and text-index + Args: + config: config from yaml file + """ + + def __init__(self, config): + self.character_type = config['character_type'] + self.max_text_len = config['max_text_length'] + if self.character_type == "en": + self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz" + dict_character = list(self.character_str) + # use the custom dictionary + elif self.character_type == "ch": + character_dict_path = config['character_dict_path'] + add_space = False + if 'use_space_char' in config: + add_space = config['use_space_char'] + self.character_str = [] + with open(character_dict_path, "rb") as fin: + lines = fin.readlines() + for line in lines: + line = line.decode('utf-8').strip("\n").strip("\r\n") + self.character_str.append(line) + if add_space: + self.character_str.append(" ") + dict_character = list(self.character_str) + elif self.character_type == "en_sensitive": + # same with ASTER setting (use 94 char). + self.character_str = string.printable[:-6] + dict_character = list(self.character_str) + else: + self.character_str = None + self.beg_str = "sos" + self.end_str = "eos" + + dict_character = self.add_special_char(dict_character) + self.dict = {} + for i, char in enumerate(dict_character): + self.dict[char] = i + self.character = dict_character + + def add_special_char(self, dict_character): + return dict_character + + def encode(self, text): + """convert text-label into text-index. + input: + text: text labels of each image. [batch_size] + + output: + text: concatenated text index for CTCLoss. + [sum(text_lengths)] = [text_index_0 + text_index_1 + ... + text_index_(n - 1)] + length: length of each text. [batch_size] + """ + if self.character_type == "en": + text = text.lower() + + text_list = [] + for char in text: + if char not in self.dict: + continue + text_list.append(self.dict[char]) + text = np.array(text_list) + return text + + def decode(self, text_index, text_prob=None, is_remove_duplicate=False): + """ convert text-index into text-label. """ + result_list = [] + ignored_tokens = self.get_ignored_tokens() + batch_size = len(text_index) + for batch_idx in range(batch_size): + selection = np.ones(len(text_index[batch_idx]), dtype=bool) + if is_remove_duplicate: + selection[1:] = text_index[batch_idx][1:] != text_index[batch_idx][:-1] + for ignored_token in ignored_tokens: + selection &= text_index[batch_idx] != ignored_token + # print(text_index) + # print(batch_idx) + # print(selection) + # for text_id in text_index[batch_idx][selection]: + # print(text_id) + # print(self.character[text_id]) + char_list = [self.character[text_id] for text_id in text_index[batch_idx][selection]] + if text_prob is not None: + conf_list = text_prob[batch_idx][selection] + else: + conf_list = [1] * len(selection) + if len(conf_list) == 0: + conf_list = [0] + + text = ''.join(char_list) + result_list.append((text, np.mean(conf_list).tolist())) + return result_list + + def get_char_num(self): + return len(self.character) + + def get_beg_end_flag_idx(self, beg_or_end): + if self.loss_type == "attention": + if beg_or_end == "beg": + idx = np.array(self.dict[self.beg_str]) + elif beg_or_end == "end": + idx = np.array(self.dict[self.end_str]) + else: + assert False, "Unsupport type %s in get_beg_end_flag_idx"\ + % beg_or_end + return idx + else: + err = "error in get_beg_end_flag_idx when using the loss %s"\ + % (self.loss_type) + assert False, err + + def get_ignored_tokens(self): + return [0] # for ctc blank + + +def cal_predicts_accuracy(char_ops, preds, preds_lod, labels, labels_lod, is_remove_duplicate=False): + """ + Calculate prediction accuracy + Args: + char_ops: CharacterOps + preds: preds result,text index + preds_lod: lod tensor of preds + labels: label of input image, text index + labels_lod: lod tensor of label + is_remove_duplicate: Whether to remove duplicate characters, + The default is False + Return: + acc: The accuracy of test set + acc_num: The correct number of samples predicted + img_num: The total sample number of the test set + """ + acc_num = 0 + img_num = 0 + for ino in range(len(labels_lod) - 1): + beg_no = preds_lod[ino] + end_no = preds_lod[ino + 1] + preds_text = preds[beg_no:end_no].reshape(-1) + preds_text = char_ops.decode(preds_text, is_remove_duplicate) + + beg_no = labels_lod[ino] + end_no = labels_lod[ino + 1] + labels_text = labels[beg_no:end_no].reshape(-1) + labels_text = char_ops.decode(labels_text, is_remove_duplicate) + img_num += 1 + + if preds_text == labels_text: + acc_num += 1 + acc = acc_num * 1.0 / img_num + return acc, acc_num, img_num + + +def cal_predicts_accuracy_srn(char_ops, preds, labels, max_text_len, is_debug=False): + acc_num = 0 + img_num = 0 + + char_num = char_ops.get_char_num() + + total_len = preds.shape[0] + img_num = int(total_len / max_text_len) + for i in range(img_num): + cur_label = [] + cur_pred = [] + for j in range(max_text_len): + if labels[j + i * max_text_len] != int(char_num - 1): #0 + cur_label.append(labels[j + i * max_text_len][0]) + else: + break + + for j in range(max_text_len + 1): + if j < len(cur_label) and preds[j + i * max_text_len][0] != cur_label[j]: + break + elif j == len(cur_label) and j == max_text_len: + acc_num += 1 + break + elif j == len(cur_label) and preds[j + i * max_text_len][0] == int(char_num - 1): + acc_num += 1 + break + acc = acc_num * 1.0 / img_num + return acc, acc_num, img_num + + +def convert_rec_attention_infer_res(preds): + img_num = preds.shape[0] + target_lod = [0] + convert_ids = [] + for ino in range(img_num): + end_pos = np.where(preds[ino, :] == 1)[0] + if len(end_pos) <= 1: + text_list = preds[ino, 1:] + else: + text_list = preds[ino, 1:end_pos[1]] + target_lod.append(target_lod[ino] + len(text_list)) + convert_ids = convert_ids + list(text_list) + convert_ids = np.array(convert_ids) + convert_ids = convert_ids.reshape((-1, 1)) + return convert_ids, target_lod + + +def convert_rec_label_to_lod(ori_labels): + img_num = len(ori_labels) + target_lod = [0] + convert_ids = [] + for ino in range(img_num): + target_lod.append(target_lod[ino] + len(ori_labels[ino])) + convert_ids = convert_ids + list(ori_labels[ino]) + convert_ids = np.array(convert_ids) + convert_ids = convert_ids.reshape((-1, 1)) + return convert_ids, target_lod diff --git a/modules/image/text_recognition/ppocrv3_rec_ch/module.py b/modules/image/text_recognition/ppocrv3_rec_ch/module.py new file mode 100644 index 000000000..5df255fcf --- /dev/null +++ b/modules/image/text_recognition/ppocrv3_rec_ch/module.py @@ -0,0 +1,458 @@ +# -*- coding:utf-8 -*- +import argparse +import ast +import copy +import math +import os +import time + +import cv2 +import numpy as np +import paddle +import paddle.fluid as fluid +import paddle.inference as paddle_infer +from paddle.fluid.core import AnalysisConfig +from paddle.fluid.core import create_paddle_predictor +from paddle.fluid.core import PaddleTensor +from PIL import Image + +import paddlehub as hub +from .character import CharacterOps +from .utils import base64_to_cv2 +from .utils import draw_ocr +from .utils import get_image_ext +from .utils import sorted_boxes +from paddlehub.common.logger import logger +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving + + +@moduleinfo( + name="ppocrv3_rec_ch", + version="1.0.0", + summary="The module can recognize the chinese texts in an image. Firstly, it will detect the text box positions \ + based on the differentiable_binarization_chn module. Then it classifies the text angle and recognizes the chinese texts. ", + author="paddle-dev", + author_email="paddle-dev@baidu.com", + type="cv/text_recognition") +class ChineseOCRDBCRNN(hub.Module): + + def _initialize(self, text_detector_module=None, enable_mkldnn=False): + """ + initialize with the necessary elements + """ + self.character_dict_path = os.path.join(self.directory, 'assets', 'ppocr_keys_v1.txt') + char_ops_params = { + 'character_type': 'ch', + 'character_dict_path': self.character_dict_path, + 'loss_type': 'ctc', + 'max_text_length': 25, + 'use_space_char': True + } + self.char_ops = CharacterOps(char_ops_params) + self.rec_image_shape = [3, 32, 320] + self._text_detector_module = text_detector_module + self.font_file = os.path.join(self.directory, 'assets', 'simfang.ttf') + self.enable_mkldnn = enable_mkldnn + + self.rec_pretrained_model_path = os.path.join(self.directory, 'inference_model', 'ppocrv3_rec') + self.cls_pretrained_model_path = os.path.join(self.directory, 'inference_model', 'ppocr_cls') + self.rec_predictor, self.rec_input_tensor, self.rec_output_tensors = self._set_config( + self.rec_pretrained_model_path) + self.cls_predictor, self.cls_input_tensor, self.cls_output_tensors = self._set_config( + self.cls_pretrained_model_path) + + def _set_config(self, pretrained_model_path): + """ + predictor config path + """ + model_file_path = pretrained_model_path + '.pdmodel' + params_file_path = pretrained_model_path + '.pdiparams' + + config = paddle_infer.Config(model_file_path, params_file_path) + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + + if use_gpu: + config.enable_use_gpu(8000, 0) + else: + config.disable_gpu() + if self.enable_mkldnn: + # cache 10 different shapes for mkldnn to avoid memory leak + config.set_mkldnn_cache_capacity(10) + config.enable_mkldnn() + + config.disable_glog_info() + config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass") + config.switch_use_feed_fetch_ops(False) + + predictor = paddle_infer.create_predictor(config) + + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + output_names = predictor.get_output_names() + output_handles = [] + for output_name in output_names: + output_handle = predictor.get_output_handle(output_name) + output_handles.append(output_handle) + + return predictor, input_handle, output_handles + + @property + def text_detector_module(self): + """ + text detect module + """ + if not self._text_detector_module: + self._text_detector_module = hub.Module(name='ppocrv3_det_ch', + enable_mkldnn=self.enable_mkldnn, + version='1.0.0') + return self._text_detector_module + + def read_images(self, paths=[]): + images = [] + for img_path in paths: + assert os.path.isfile(img_path), "The {} isn't a valid file.".format(img_path) + img = cv2.imread(img_path) + if img is None: + logger.info("error in loading image:{}".format(img_path)) + continue + images.append(img) + return images + + def get_rotate_crop_image(self, img, points): + ''' + img_height, img_width = img.shape[0:2] + left = int(np.min(points[:, 0])) + right = int(np.max(points[:, 0])) + top = int(np.min(points[:, 1])) + bottom = int(np.max(points[:, 1])) + img_crop = img[top:bottom, left:right, :].copy() + points[:, 0] = points[:, 0] - left + points[:, 1] = points[:, 1] - top + ''' + img_crop_width = int(max(np.linalg.norm(points[0] - points[1]), np.linalg.norm(points[2] - points[3]))) + img_crop_height = int(max(np.linalg.norm(points[0] - points[3]), np.linalg.norm(points[1] - points[2]))) + pts_std = np.float32([[0, 0], [img_crop_width, 0], [img_crop_width, img_crop_height], [0, img_crop_height]]) + M = cv2.getPerspectiveTransform(points, pts_std) + dst_img = cv2.warpPerspective(img, + M, (img_crop_width, img_crop_height), + borderMode=cv2.BORDER_REPLICATE, + flags=cv2.INTER_CUBIC) + dst_img_height, dst_img_width = dst_img.shape[0:2] + if dst_img_height * 1.0 / dst_img_width >= 1.5: + dst_img = np.rot90(dst_img) + return dst_img + + def resize_norm_img_rec(self, img, max_wh_ratio): + imgC, imgH, imgW = self.rec_image_shape + assert imgC == img.shape[2] + imgW = int((32 * max_wh_ratio)) + h, w = img.shape[:2] + ratio = w / float(h) + if math.ceil(imgH * ratio) > imgW: + resized_w = imgW + else: + resized_w = int(math.ceil(imgH * ratio)) + resized_image = cv2.resize(img, (resized_w, imgH)) + resized_image = resized_image.astype('float32') + resized_image = resized_image.transpose((2, 0, 1)) / 255 + resized_image -= 0.5 + resized_image /= 0.5 + padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32) + padding_im[:, :, 0:resized_w] = resized_image + return padding_im + + def resize_norm_img_cls(self, img): + cls_image_shape = [3, 48, 192] + imgC, imgH, imgW = cls_image_shape + h = img.shape[0] + w = img.shape[1] + ratio = w / float(h) + if math.ceil(imgH * ratio) > imgW: + resized_w = imgW + else: + resized_w = int(math.ceil(imgH * ratio)) + resized_image = cv2.resize(img, (resized_w, imgH)) + resized_image = resized_image.astype('float32') + if cls_image_shape[0] == 1: + resized_image = resized_image / 255 + resized_image = resized_image[np.newaxis, :] + else: + resized_image = resized_image.transpose((2, 0, 1)) / 255 + resized_image -= 0.5 + resized_image /= 0.5 + padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32) + padding_im[:, :, 0:resized_w] = resized_image + return padding_im + + def recognize_text(self, + images=[], + paths=[], + use_gpu=False, + output_dir='ocr_result', + visualization=False, + box_thresh=0.5, + text_thresh=0.5, + angle_classification_thresh=0.9): + """ + Get the chinese texts in the predicted images. + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C]. If images not paths + paths (list[str]): The paths of images. If paths not images + use_gpu (bool): Whether to use gpu. + batch_size(int): the program deals once with one + output_dir (str): The directory to store output images. + visualization (bool): Whether to save image or not. + box_thresh(float): the threshold of the detected text box's confidence + text_thresh(float): the threshold of the chinese text recognition confidence + angle_classification_thresh(float): the threshold of the angle classification confidence + + Returns: + res (list): The result of chinese texts and save path of images. + """ + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES via export CUDA_VISIBLE_DEVICES=cuda_device_id." + ) + + self.use_gpu = use_gpu + + if images != [] and isinstance(images, list) and paths == []: + predicted_data = images + elif images == [] and isinstance(paths, list) and paths != []: + predicted_data = self.read_images(paths) + else: + raise TypeError("The input data is inconsistent with expectations.") + + assert predicted_data != [], "There is not any image to be predicted. Please check the input data." + + detection_results = self.text_detector_module.detect_text(images=predicted_data, + use_gpu=self.use_gpu, + box_thresh=box_thresh) + + boxes = [np.array(item['data']).astype(np.float32) for item in detection_results] + print("dt_boxes num : {}".format(len(boxes[0]))) + all_results = [] + for index, img_boxes in enumerate(boxes): + original_image = predicted_data[index].copy() + result = {'save_path': ''} + if img_boxes.size == 0: + result['data'] = [] + else: + img_crop_list = [] + boxes = sorted_boxes(img_boxes) + for num_box in range(len(boxes)): + tmp_box = copy.deepcopy(boxes[num_box]) + img_crop = self.get_rotate_crop_image(original_image, tmp_box) + img_crop_list.append(img_crop) + print('img_crop shape {}'.format(img_crop.shape)) + img_crop_list, angle_list = self._classify_text(img_crop_list, + angle_classification_thresh=angle_classification_thresh) + rec_results = self._recognize_text(img_crop_list) + + # if the recognized text confidence score is lower than text_thresh, then drop it + rec_res_final = [] + for index, res in enumerate(rec_results): + text, score = res + if score >= text_thresh: + rec_res_final.append({ + 'text': text, + 'confidence': float(score), + 'text_box_position': boxes[index].astype(np.int).tolist() + }) + result['data'] = rec_res_final + + if visualization and result['data']: + result['save_path'] = self.save_result_image(original_image, boxes, rec_results, output_dir, + text_thresh) + all_results.append(result) + + return all_results + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.recognize_text(images_decode, **kwargs) + return results + + def save_result_image( + self, + original_image, + detection_boxes, + rec_results, + output_dir='ocr_result', + text_thresh=0.5, + ): + image = Image.fromarray(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)) + txts = [item[0] for item in rec_results] + scores = [item[1] for item in rec_results] + draw_img = draw_ocr(image, + detection_boxes, + txts, + scores, + font_file=self.font_file, + draw_txt=True, + drop_score=text_thresh) + + if not os.path.exists(output_dir): + os.makedirs(output_dir) + ext = get_image_ext(original_image) + saved_name = 'ndarray_{}{}'.format(time.time(), ext) + save_file_path = os.path.join(output_dir, saved_name) + cv2.imwrite(save_file_path, draw_img[:, :, ::-1]) + return save_file_path + + def _classify_text(self, image_list, angle_classification_thresh=0.9): + img_list = copy.deepcopy(image_list) + img_num = len(img_list) + # Calculate the aspect ratio of all text bars + width_list = [] + for img in img_list: + width_list.append(img.shape[1] / float(img.shape[0])) + # Sorting can speed up the cls process + indices = np.argsort(np.array(width_list)) + + cls_res = [['', 0.0]] * img_num + batch_num = 6 + for beg_img_no in range(0, img_num, batch_num): + end_img_no = min(img_num, beg_img_no + batch_num) + norm_img_batch = [] + max_wh_ratio = 0 + for ino in range(beg_img_no, end_img_no): + h, w = img_list[indices[ino]].shape[0:2] + wh_ratio = w * 1.0 / h + max_wh_ratio = max(max_wh_ratio, wh_ratio) + for ino in range(beg_img_no, end_img_no): + norm_img = self.resize_norm_img_cls(img_list[indices[ino]]) + norm_img = norm_img[np.newaxis, :] + norm_img_batch.append(norm_img) + norm_img_batch = np.concatenate(norm_img_batch) + norm_img_batch = norm_img_batch.copy() + + self.cls_input_tensor.copy_from_cpu(norm_img_batch) + self.cls_predictor.run() + + prob_out = self.cls_output_tensors[0].copy_to_cpu() + ## post process + label_list = ['0', '180'] + pred_idxs = prob_out.argmax(axis=1) + cls_result = [(label_list[idx], prob_out[i, idx]) for i, idx in enumerate(pred_idxs)] + for rno in range(len(cls_result)): + label, score = cls_result[rno] + cls_res[indices[beg_img_no + rno]] = [label, score] + if '180' in label and score > angle_classification_thresh: + img_list[indices[beg_img_no + rno]] = cv2.rotate(img_list[indices[beg_img_no + rno]], 1) + return img_list, cls_res + + def _recognize_text(self, img_list): + img_num = len(img_list) + # Calculate the aspect ratio of all text bars + width_list = [] + for img in img_list: + width_list.append(img.shape[1] / float(img.shape[0])) + # Sorting can speed up the recognition process + indices = np.argsort(np.array(width_list)) + + rec_res = [['', 0.0]] * img_num + batch_num = 6 + for beg_img_no in range(0, img_num, batch_num): + end_img_no = min(img_num, beg_img_no + batch_num) + norm_img_batch = [] + max_wh_ratio = 0 + for ino in range(beg_img_no, end_img_no): + h, w = img_list[indices[ino]].shape[0:2] + wh_ratio = w * 1.0 / h + max_wh_ratio = max(max_wh_ratio, wh_ratio) + for ino in range(beg_img_no, end_img_no): + norm_img = self.resize_norm_img_rec(img_list[indices[ino]], max_wh_ratio) + norm_img = norm_img[np.newaxis, :] + norm_img_batch.append(norm_img) + + norm_img_batch = np.concatenate(norm_img_batch, axis=0) + norm_img_batch = norm_img_batch.copy() + + self.rec_input_tensor.copy_from_cpu(norm_img_batch) + self.rec_predictor.run() + + ## + outputs = [] + for output_tensor in self.rec_output_tensors: + output = output_tensor.copy_to_cpu() + outputs.append(output) + if len(outputs) != 1: + preds = outputs + else: + preds = outputs[0] + if isinstance(preds, tuple) or isinstance(preds, list): + preds = preds[-1] + if isinstance(preds, paddle.Tensor): + preds = preds.numpy() + print('preds.shape: {}', preds.shape) + preds_idx = preds.argmax(axis=2) + preds_prob = preds.max(axis=2) + # print('preds_idx: {} \n preds_prob: {}'.format(preds_idx, preds_prob) ) + rec_result = self.char_ops.decode(preds_idx, preds_prob, is_remove_duplicate=True) + for rno in range(len(rec_result)): + rec_res[indices[beg_img_no + rno]] = rec_result[rno] + + return rec_res + + @runnable + def run_cmd(self, argvs): + """ + Run as a command + """ + self.parser = argparse.ArgumentParser(description="Run the %s module." % self.name, + prog='hub run %s' % self.name, + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + + self.add_module_config_arg() + self.add_module_input_arg() + + args = self.parser.parse_args(argvs) + results = self.recognize_text(paths=[args.input_path], + use_gpu=args.use_gpu, + output_dir=args.output_dir, + visualization=args.visualization) + return results + + def add_module_config_arg(self): + """ + Add the command config options + """ + self.arg_config_group.add_argument('--use_gpu', + type=ast.literal_eval, + default=False, + help="whether use GPU or not") + self.arg_config_group.add_argument('--output_dir', + type=str, + default='ocr_result', + help="The directory to save output images.") + self.arg_config_group.add_argument('--visualization', + type=ast.literal_eval, + default=False, + help="whether to save output as images.") + + def add_module_input_arg(self): + """ + Add the command input options + """ + self.arg_input_group.add_argument('--input_path', type=str, default=None, help="diretory to image") diff --git a/modules/image/text_recognition/ppocrv3_rec_ch/utils.py b/modules/image/text_recognition/ppocrv3_rec_ch/utils.py new file mode 100644 index 000000000..18aa6ceec --- /dev/null +++ b/modules/image/text_recognition/ppocrv3_rec_ch/utils.py @@ -0,0 +1,179 @@ +# -*- coding:utf-8 -*- +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import base64 +import math + +import cv2 +import numpy as np +from PIL import Image +from PIL import ImageDraw +from PIL import ImageFont + + +def draw_ocr(image, boxes, txts, scores, font_file, draw_txt=True, drop_score=0.5): + """ + Visualize the results of OCR detection and recognition + args: + image(Image|array): RGB image + boxes(list): boxes with shape(N, 4, 2) + txts(list): the texts + scores(list): txxs corresponding scores + draw_txt(bool): whether draw text or not + drop_score(float): only scores greater than drop_threshold will be visualized + return(array): + the visualized img + """ + if scores is None: + scores = [1] * len(boxes) + for (box, score) in zip(boxes, scores): + if score < drop_score or math.isnan(score): + continue + box = np.reshape(np.array(box), [-1, 1, 2]).astype(np.int64) + image = cv2.polylines(np.array(image), [box], True, (255, 0, 0), 2) + + if draw_txt: + img = np.array(resize_img(image, input_size=600)) + txt_img = text_visual(txts, scores, font_file, img_h=img.shape[0], img_w=600, threshold=drop_score) + img = np.concatenate([np.array(img), np.array(txt_img)], axis=1) + return img + return image + + +def text_visual(texts, scores, font_file, img_h=400, img_w=600, threshold=0.): + """ + create new blank img and draw txt on it + args: + texts(list): the text will be draw + scores(list|None): corresponding score of each txt + img_h(int): the height of blank img + img_w(int): the width of blank img + return(array): + """ + if scores is not None: + assert len(texts) == len(scores), "The number of txts and corresponding scores must match" + + def create_blank_img(): + blank_img = np.ones(shape=[img_h, img_w], dtype=np.int8) * 255 + blank_img[:, img_w - 1:] = 0 + blank_img = Image.fromarray(blank_img).convert("RGB") + draw_txt = ImageDraw.Draw(blank_img) + return blank_img, draw_txt + + blank_img, draw_txt = create_blank_img() + + font_size = 20 + txt_color = (0, 0, 0) + font = ImageFont.truetype(font_file, font_size, encoding="utf-8") + + gap = font_size + 5 + txt_img_list = [] + count, index = 1, 0 + for idx, txt in enumerate(texts): + index += 1 + if scores[idx] < threshold or math.isnan(scores[idx]): + index -= 1 + continue + first_line = True + while str_count(txt) >= img_w // font_size - 4: + tmp = txt + txt = tmp[:img_w // font_size - 4] + if first_line: + new_txt = str(index) + ': ' + txt + first_line = False + else: + new_txt = ' ' + txt + draw_txt.text((0, gap * count), new_txt, txt_color, font=font) + txt = tmp[img_w // font_size - 4:] + if count >= img_h // gap - 1: + txt_img_list.append(np.array(blank_img)) + blank_img, draw_txt = create_blank_img() + count = 0 + count += 1 + if first_line: + new_txt = str(index) + ': ' + txt + ' ' + '%.3f' % (scores[idx]) + else: + new_txt = " " + txt + " " + '%.3f' % (scores[idx]) + draw_txt.text((0, gap * count), new_txt, txt_color, font=font) + # whether add new blank img or not + if count >= img_h // gap - 1 and idx + 1 < len(texts): + txt_img_list.append(np.array(blank_img)) + blank_img, draw_txt = create_blank_img() + count = 0 + count += 1 + txt_img_list.append(np.array(blank_img)) + if len(txt_img_list) == 1: + blank_img = np.array(txt_img_list[0]) + else: + blank_img = np.concatenate(txt_img_list, axis=1) + return np.array(blank_img) + + +def str_count(s): + """ + Count the number of Chinese characters, + a single English character and a single number + equal to half the length of Chinese characters. + args: + s(string): the input of string + return(int): + the number of Chinese characters + """ + import string + count_zh = count_pu = 0 + s_len = len(s) + en_dg_count = 0 + for c in s: + if c in string.ascii_letters or c.isdigit() or c.isspace(): + en_dg_count += 1 + elif c.isalpha(): + count_zh += 1 + else: + count_pu += 1 + return s_len - math.ceil(en_dg_count / 2) + + +def resize_img(img, input_size=600): + img = np.array(img) + im_shape = img.shape + im_size_min = np.min(im_shape[0:2]) + im_size_max = np.max(im_shape[0:2]) + im_scale = float(input_size) / float(im_size_max) + im = cv2.resize(img, None, None, fx=im_scale, fy=im_scale) + return im + + +def get_image_ext(image): + if image.shape[2] == 4: + return ".png" + return ".jpg" + + +def sorted_boxes(dt_boxes): + """ + Sort text boxes in order from top to bottom, left to right + args: + dt_boxes(array):detected text boxes with shape [4, 2] + return: + sorted boxes(array) with shape [4, 2] + """ + num_boxes = dt_boxes.shape[0] + sorted_boxes = sorted(dt_boxes, key=lambda x: (x[0][1], x[0][0])) + _boxes = list(sorted_boxes) + + for i in range(num_boxes - 1): + if abs(_boxes[i + 1][0][1] - _boxes[i][0][1]) < 10 and \ + (_boxes[i + 1][0][0] < _boxes[i][0][0]): + tmp = _boxes[i] + _boxes[i] = _boxes[i + 1] + _boxes[i + 1] = tmp + return _boxes + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data From 8f5e5177440dc61f424ed1d4c25901177eec589a Mon Sep 17 00:00:00 2001 From: chenjian Date: Wed, 11 May 2022 08:55:28 +0000 Subject: [PATCH 008/117] fix --- .../text_recognition/ppocrv3_det_ch/module.py | 21 ++++++------- .../ppocrv3_det_ch/processor.py | 15 +++++----- .../ppocrv3_rec_ch/character.py | 7 +---- .../text_recognition/ppocrv3_rec_ch/module.py | 30 +++++++++++-------- 4 files changed, 36 insertions(+), 37 deletions(-) diff --git a/modules/image/text_recognition/ppocrv3_det_ch/module.py b/modules/image/text_recognition/ppocrv3_det_ch/module.py index 675feb202..679681530 100644 --- a/modules/image/text_recognition/ppocrv3_det_ch/module.py +++ b/modules/image/text_recognition/ppocrv3_det_ch/module.py @@ -34,14 +34,14 @@ def base64_to_cv2(b64str): @moduleinfo( - name="ppocrv3_det_ch", + name="ch_pp-ocrv3_det", version="1.0.0", summary= "The module aims to detect chinese text position in the image, which is based on differentiable_binarization algorithm.", author="paddle-dev", author_email="paddle-dev@baidu.com", type="cv/text_recognition") -class ChineseTextDetectionDB(hub.Module): +class ChPPOCRv3Det(hub.Module): def _initialize(self, enable_mkldnn=False): """ @@ -155,7 +155,8 @@ def detect_text(self, use_gpu=False, output_dir='detection_result', visualization=False, - box_thresh=0.5): + box_thresh=0.5, + det_db_unclip_ratio=1.5): """ Get the text box in the predicted images. Args: @@ -165,6 +166,7 @@ def detect_text(self, output_dir (str): The directory to store output images. visualization (bool): Whether to save image or not. box_thresh(float): the threshold of the detected text box's confidence + det_db_unclip_ratio(float): unclip ratio for post processing in DB detection. Returns: res (list): The result of text detection box and save path of images. """ @@ -195,7 +197,7 @@ def detect_text(self, 'thresh': 0.3, 'box_thresh': 0.6, 'max_candidates': 1000, - 'unclip_ratio': 1.5 + 'unclip_ratio': det_db_unclip_ratio }) all_imgs = [] @@ -204,7 +206,6 @@ def detect_text(self, for original_image in predicted_data: ori_im = original_image.copy() im, ratio_list = preprocessor(original_image) - print('after preprocess int det, shape{}'.format(im.shape)) res = {'save_path': ''} if im is None: res['data'] = [] @@ -222,15 +223,10 @@ def detect_text(self, outs_dict = {} outs_dict['maps'] = outputs[0] - # data_out = self.output_tensors[0].copy_to_cpu() - print('Outputs[0] in det, shape: {}'.format(outputs[0].shape)) dt_boxes_list = postprocessor(outs_dict, [ratio_list]) dt_boxes = dt_boxes_list[0] - print('after postprocess int det, shape{}'.format(dt_boxes.shape)) boxes = self.filter_tag_det_res(dt_boxes_list[0], original_image.shape) - print('after fitler tag int det, shape{}'.format(boxes.shape)) res['data'] = boxes.astype(np.int).tolist() - print('boxes: {}'.format(boxes)) all_imgs.append(im) all_ratios.append(ratio_list) if visualization: @@ -278,6 +274,7 @@ def run_cmd(self, argvs): results = self.detect_text(paths=[args.input_path], use_gpu=args.use_gpu, output_dir=args.output_dir, + det_db_unclip_ratio=args.det_db_unclip_ratio, visualization=args.visualization) return results @@ -297,6 +294,10 @@ def add_module_config_arg(self): type=ast.literal_eval, default=False, help="whether to save output as images.") + self.arg_config_group.add_argument('--det_db_unclip_ratio', + type=float, + default=1.5, + help="unclip ratio for post processing in DB detection.") def add_module_input_arg(self): """ diff --git a/modules/image/text_recognition/ppocrv3_det_ch/processor.py b/modules/image/text_recognition/ppocrv3_det_ch/processor.py index a93aecef0..2de682548 100644 --- a/modules/image/text_recognition/ppocrv3_det_ch/processor.py +++ b/modules/image/text_recognition/ppocrv3_det_ch/processor.py @@ -25,7 +25,6 @@ def __init__(self, params): self.resize_type = 0 if 'test_image_shape' in params: self.image_shape = params['test_image_shape'] - # print(self.image_shape) self.resize_type = 1 if 'max_side_len' in params: self.max_side_len = params['max_side_len'] @@ -54,15 +53,14 @@ def resize_image_type0(self, img): resize_h = int(h * ratio) resize_w = int(w * ratio) - resize_h = int(round(resize_h / 32) * 32) - resize_w = int(round(resize_w / 32) * 32) + resize_h = max(int(round(resize_h / 32) * 32), 32) + resize_w = max(int(round(resize_w / 32) * 32), 32) try: if int(resize_w) <= 0 or int(resize_h) <= 0: return None, (None, None) img = cv2.resize(img, (int(resize_w), int(resize_h))) except: - print(img.shape, resize_w, resize_h) sys.exit(0) ratio_h = resize_h / float(h) ratio_w = resize_w / float(w) @@ -93,13 +91,14 @@ def normalize(self, im): return im def __call__(self, im): + src_h, src_w, _ = im.shape if self.resize_type == 0: im, (ratio_h, ratio_w) = self.resize_image_type0(im) else: im, (ratio_h, ratio_w) = self.resize_image_type1(im) im = self.normalize(im) im = im[np.newaxis, :] - return [im, (ratio_h, ratio_w)] + return [im, (src_h, src_w, ratio_h, ratio_w)] class DBPostProcess(object): @@ -228,7 +227,7 @@ def box_score_slow(self, bitmap, contour): cv2.fillPoly(mask, contour.reshape(1, -1, 2).astype(np.int32), 1) return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0] - def __call__(self, outs_dict, ratio_list): + def __call__(self, outs_dict, shape_list): pred = outs_dict['maps'] pred = pred[:, 0, :, :] @@ -236,10 +235,10 @@ def __call__(self, outs_dict, ratio_list): boxes_batch = [] for batch_index in range(pred.shape[0]): - height, width = pred.shape[-2:] + src_h, src_w, ratio_h, ratio_w = shape_list[batch_index] mask = segmentation[batch_index] - tmp_boxes, tmp_scores = self.boxes_from_bitmap(pred[batch_index], mask, width, height) + tmp_boxes, tmp_scores = self.boxes_from_bitmap(pred[batch_index], mask, src_w, src_h) boxes_batch.append(tmp_boxes) return boxes_batch diff --git a/modules/image/text_recognition/ppocrv3_rec_ch/character.py b/modules/image/text_recognition/ppocrv3_rec_ch/character.py index 3d40d7183..571f8f999 100644 --- a/modules/image/text_recognition/ppocrv3_rec_ch/character.py +++ b/modules/image/text_recognition/ppocrv3_rec_ch/character.py @@ -59,6 +59,7 @@ def __init__(self, config): self.character = dict_character def add_special_char(self, dict_character): + dict_character = ['blank'] + dict_character return dict_character def encode(self, text): @@ -93,12 +94,6 @@ def decode(self, text_index, text_prob=None, is_remove_duplicate=False): selection[1:] = text_index[batch_idx][1:] != text_index[batch_idx][:-1] for ignored_token in ignored_tokens: selection &= text_index[batch_idx] != ignored_token - # print(text_index) - # print(batch_idx) - # print(selection) - # for text_id in text_index[batch_idx][selection]: - # print(text_id) - # print(self.character[text_id]) char_list = [self.character[text_id] for text_id in text_index[batch_idx][selection]] if text_prob is not None: conf_list = text_prob[batch_idx][selection] diff --git a/modules/image/text_recognition/ppocrv3_rec_ch/module.py b/modules/image/text_recognition/ppocrv3_rec_ch/module.py index 5df255fcf..402ac8a28 100644 --- a/modules/image/text_recognition/ppocrv3_rec_ch/module.py +++ b/modules/image/text_recognition/ppocrv3_rec_ch/module.py @@ -29,14 +29,14 @@ @moduleinfo( - name="ppocrv3_rec_ch", + name="ch_pp-ocrv3", version="1.0.0", summary="The module can recognize the chinese texts in an image. Firstly, it will detect the text box positions \ based on the differentiable_binarization_chn module. Then it classifies the text angle and recognizes the chinese texts. ", author="paddle-dev", author_email="paddle-dev@baidu.com", type="cv/text_recognition") -class ChineseOCRDBCRNN(hub.Module): +class ChPPOCRv3(hub.Module): def _initialize(self, text_detector_module=None, enable_mkldnn=False): """ @@ -51,7 +51,7 @@ def _initialize(self, text_detector_module=None, enable_mkldnn=False): 'use_space_char': True } self.char_ops = CharacterOps(char_ops_params) - self.rec_image_shape = [3, 32, 320] + self.rec_image_shape = [3, 48, 320] self._text_detector_module = text_detector_module self.font_file = os.path.join(self.directory, 'assets', 'simfang.ttf') self.enable_mkldnn = enable_mkldnn @@ -109,7 +109,7 @@ def text_detector_module(self): text detect module """ if not self._text_detector_module: - self._text_detector_module = hub.Module(name='ppocrv3_det_ch', + self._text_detector_module = hub.Module(name='ch_pp-ocrv3_det', enable_mkldnn=self.enable_mkldnn, version='1.0.0') return self._text_detector_module @@ -152,7 +152,7 @@ def get_rotate_crop_image(self, img, points): def resize_norm_img_rec(self, img, max_wh_ratio): imgC, imgH, imgW = self.rec_image_shape assert imgC == img.shape[2] - imgW = int((32 * max_wh_ratio)) + imgW = int((imgH * max_wh_ratio)) h, w = img.shape[:2] ratio = w / float(h) if math.ceil(imgH * ratio) > imgW: @@ -199,7 +199,8 @@ def recognize_text(self, visualization=False, box_thresh=0.5, text_thresh=0.5, - angle_classification_thresh=0.9): + angle_classification_thresh=0.9, + det_db_unclip_ratio=1.5): """ Get the chinese texts in the predicted images. Args: @@ -212,7 +213,7 @@ def recognize_text(self, box_thresh(float): the threshold of the detected text box's confidence text_thresh(float): the threshold of the chinese text recognition confidence angle_classification_thresh(float): the threshold of the angle classification confidence - + det_db_unclip_ratio(float): unclip ratio for post processing in DB detection. Returns: res (list): The result of chinese texts and save path of images. """ @@ -238,10 +239,10 @@ def recognize_text(self, detection_results = self.text_detector_module.detect_text(images=predicted_data, use_gpu=self.use_gpu, - box_thresh=box_thresh) + box_thresh=box_thresh, + det_db_unclip_ratio=det_db_unclip_ratio) boxes = [np.array(item['data']).astype(np.float32) for item in detection_results] - print("dt_boxes num : {}".format(len(boxes[0]))) all_results = [] for index, img_boxes in enumerate(boxes): original_image = predicted_data[index].copy() @@ -255,7 +256,6 @@ def recognize_text(self, tmp_box = copy.deepcopy(boxes[num_box]) img_crop = self.get_rotate_crop_image(original_image, tmp_box) img_crop_list.append(img_crop) - print('img_crop shape {}'.format(img_crop.shape)) img_crop_list, angle_list = self._classify_text(img_crop_list, angle_classification_thresh=angle_classification_thresh) rec_results = self._recognize_text(img_crop_list) @@ -371,7 +371,8 @@ def _recognize_text(self, img_list): for beg_img_no in range(0, img_num, batch_num): end_img_no = min(img_num, beg_img_no + batch_num) norm_img_batch = [] - max_wh_ratio = 0 + imgC, imgH, imgW = self.rec_image_shape + max_wh_ratio = imgW / imgH for ino in range(beg_img_no, end_img_no): h, w = img_list[indices[ino]].shape[0:2] wh_ratio = w * 1.0 / h @@ -400,10 +401,8 @@ def _recognize_text(self, img_list): preds = preds[-1] if isinstance(preds, paddle.Tensor): preds = preds.numpy() - print('preds.shape: {}', preds.shape) preds_idx = preds.argmax(axis=2) preds_prob = preds.max(axis=2) - # print('preds_idx: {} \n preds_prob: {}'.format(preds_idx, preds_prob) ) rec_result = self.char_ops.decode(preds_idx, preds_prob, is_remove_duplicate=True) for rno in range(len(rec_result)): rec_res[indices[beg_img_no + rno]] = rec_result[rno] @@ -431,6 +430,7 @@ def run_cmd(self, argvs): results = self.recognize_text(paths=[args.input_path], use_gpu=args.use_gpu, output_dir=args.output_dir, + det_db_unclip_ratio=args.det_db_unclip_ratio, visualization=args.visualization) return results @@ -450,6 +450,10 @@ def add_module_config_arg(self): type=ast.literal_eval, default=False, help="whether to save output as images.") + self.arg_config_group.add_argument('--det_db_unclip_ratio', + type=float, + default=1.5, + help="unclip ratio for post processing in DB detection.") def add_module_input_arg(self): """ From ef8e55866b9838bc69a7001358aa8671100ea0a7 Mon Sep 17 00:00:00 2001 From: chenjian Date: Wed, 11 May 2022 08:56:55 +0000 Subject: [PATCH 009/117] rename --- .../text_recognition/{ppocrv3_rec_ch => ch_pp-ocrv3}/character.py | 0 .../text_recognition/{ppocrv3_rec_ch => ch_pp-ocrv3}/module.py | 0 .../text_recognition/{ppocrv3_rec_ch => ch_pp-ocrv3}/utils.py | 0 .../{ppocrv3_det_ch => ch_pp-ocrv3_det}/module.py | 0 .../{ppocrv3_det_ch => ch_pp-ocrv3_det}/processor.py | 0 5 files changed, 0 insertions(+), 0 deletions(-) rename modules/image/text_recognition/{ppocrv3_rec_ch => ch_pp-ocrv3}/character.py (100%) rename modules/image/text_recognition/{ppocrv3_rec_ch => ch_pp-ocrv3}/module.py (100%) rename modules/image/text_recognition/{ppocrv3_rec_ch => ch_pp-ocrv3}/utils.py (100%) rename modules/image/text_recognition/{ppocrv3_det_ch => ch_pp-ocrv3_det}/module.py (100%) rename modules/image/text_recognition/{ppocrv3_det_ch => ch_pp-ocrv3_det}/processor.py (100%) diff --git a/modules/image/text_recognition/ppocrv3_rec_ch/character.py b/modules/image/text_recognition/ch_pp-ocrv3/character.py similarity index 100% rename from modules/image/text_recognition/ppocrv3_rec_ch/character.py rename to modules/image/text_recognition/ch_pp-ocrv3/character.py diff --git a/modules/image/text_recognition/ppocrv3_rec_ch/module.py b/modules/image/text_recognition/ch_pp-ocrv3/module.py similarity index 100% rename from modules/image/text_recognition/ppocrv3_rec_ch/module.py rename to modules/image/text_recognition/ch_pp-ocrv3/module.py diff --git a/modules/image/text_recognition/ppocrv3_rec_ch/utils.py b/modules/image/text_recognition/ch_pp-ocrv3/utils.py similarity index 100% rename from modules/image/text_recognition/ppocrv3_rec_ch/utils.py rename to modules/image/text_recognition/ch_pp-ocrv3/utils.py diff --git a/modules/image/text_recognition/ppocrv3_det_ch/module.py b/modules/image/text_recognition/ch_pp-ocrv3_det/module.py similarity index 100% rename from modules/image/text_recognition/ppocrv3_det_ch/module.py rename to modules/image/text_recognition/ch_pp-ocrv3_det/module.py diff --git a/modules/image/text_recognition/ppocrv3_det_ch/processor.py b/modules/image/text_recognition/ch_pp-ocrv3_det/processor.py similarity index 100% rename from modules/image/text_recognition/ppocrv3_det_ch/processor.py rename to modules/image/text_recognition/ch_pp-ocrv3_det/processor.py From 42720de42fe963cd68cfb245e449a809c2ac7a8d Mon Sep 17 00:00:00 2001 From: chenjian Date: Wed, 11 May 2022 09:54:35 +0000 Subject: [PATCH 010/117] add readme --- .../text_recognition/ch_pp-ocrv3/README.md | 180 ++++++++++++++++++ .../ch_pp-ocrv3_det/README.md | 172 +++++++++++++++++ 2 files changed, 352 insertions(+) create mode 100644 modules/image/text_recognition/ch_pp-ocrv3/README.md create mode 100644 modules/image/text_recognition/ch_pp-ocrv3_det/README.md diff --git a/modules/image/text_recognition/ch_pp-ocrv3/README.md b/modules/image/text_recognition/ch_pp-ocrv3/README.md new file mode 100644 index 000000000..0bf8f2156 --- /dev/null +++ b/modules/image/text_recognition/ch_pp-ocrv3/README.md @@ -0,0 +1,180 @@ +# ch_pp-ocrv3 + +|模型名称|ch_pp-ocrv3| +| :--- | :---: | +|类别|图像-文字识别| +|网络|Differentiable Binarization+SVTR_LCNet| +|数据集|icdar2015数据集| +|是否支持Fine-tuning|否| +|模型大小|13M| +|最新更新日期|2022-05-11| +|数据指标|-| + + +## 一、模型基本信息 + +- ### 应用效果展示 + - [OCR文字识别场景在线体验](https://www.paddlepaddle.org.cn/hub/scene/ocr) + - 样例结果示例: +

+
+

+ +- ### 模型介绍 + + - PP-OCR是PaddleOCR自研的实用的超轻量OCR系统。在实现前沿算法的基础上,考虑精度与速度的平衡,进行模型瘦身和深度优化,使其尽可能满足产业落地需求。该系统包含文本检测和文本识别两个阶段,其中文本检测算法选用DB,文本识别算法选用CRNN,并在检测和识别模块之间添加文本方向分类器,以应对不同方向的文本识别。当前模块为PP-OCRv3,在PP-OCRv2的基础上,针对检测模型和识别模型,进行了共计9个方面的升级,进一步提升了模型效果。 +

+
+

+ + - 更多详情参考:[PP-OCRv3](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.5/doc/doc_ch/PP-OCRv3_introduction.md)。 + + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.7.2 + + - paddlehub >= 1.6.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + - shapely + + - pyclipper + + - ```shell + $ pip install shapely pyclipper + ``` + - **该Module依赖于第三方库shapely和pyclipper,使用该Module之前,请先安装shapely和pyclipper。** + +- ### 2、安装 + + - ```shell + $ hub install ch_pp-ocrv3 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run ch_pp-ocrv3 --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现文字识别模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、代码示例 + + - ```python + import paddlehub as hub + import cv2 + + ocr = hub.Module(name="ch_pp-ocrv3", enable_mkldnn=True) # mkldnn加速仅在CPU下有效 + result = ocr.recognize_text(images=[cv2.imread('/PATH/TO/IMAGE')]) + + # or + # result = ocr.recognize_text(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + - ```python + __init__(text_detector_module=None, enable_mkldnn=False) + ``` + + - 构造用于文本检测的模块 + + - **参数** + + - text_detector_module(str): 文字检测PaddleHub Module名字,如设置为None,则默认使用[ch_pp-ocrv3_det Module](../ch_pp-ocrv3_det/)。其作用为检测图片当中的文本。 + - enable_mkldnn(bool): 是否开启mkldnn加速CPU计算。该参数仅在CPU运行下设置有效。默认为False。 + + + - ```python + def recognize_text(images=[], + paths=[], + use_gpu=False, + output_dir='ocr_result', + visualization=False, + box_thresh=0.5, + text_thresh=0.5, + angle_classification_thresh=0.9, + det_db_unclip_ratio=1.5) + ``` + + - 预测API,检测输入图片中的所有中文文本的位置。 + + - **参数** + + - paths (list\[str\]): 图片的路径; + - images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; + - use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA_VISIBLE_DEVICES环境变量** + - box\_thresh (float): 检测文本框置信度的阈值; + - text\_thresh (float): 识别中文文本置信度的阈值; + - angle_classification_thresh(float): 文本角度分类置信度的阈值 + - visualization (bool): 是否将识别结果保存为图片文件; + - output\_dir (str): 图片的保存路径,默认设为 ocr\_result; + - det\_db\_unclip\_ratio: 设置检测框的大小; + - **返回** + + - res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,各字段为: + - data (list\[dict\]): 识别文本结果,列表中每一个元素为 dict,各字段为: + - text(str): 识别得到的文本 + - confidence(float): 识别文本结果置信度 + - text_box_position(list): 文本框在原图中的像素坐标,4*2的矩阵,依次表示文本框左下、右下、右上、左上顶点的坐标 + 如果无识别结果则data为\[\] + - save_path (str, optional): 识别结果的保存路径,如不保存图片则save_path为'' + + +## 四、服务部署 + +- PaddleHub Serving 可以部署一个目标检测的在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m ch_pp-ocrv3 + ``` + + - 这样就完成了一个目标检测的服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/ch_pp-ocrv3" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install ch_pp-ocrv3==1.0.0 + ``` diff --git a/modules/image/text_recognition/ch_pp-ocrv3_det/README.md b/modules/image/text_recognition/ch_pp-ocrv3_det/README.md new file mode 100644 index 000000000..a974bdd98 --- /dev/null +++ b/modules/image/text_recognition/ch_pp-ocrv3_det/README.md @@ -0,0 +1,172 @@ +# ch_pp-ocrv3_det + +|模型名称|ch_pp-ocrv3_det| +| :--- | :---: | +|类别|图像-文字检测| +|网络|Differentiable Binarization| +|数据集|icdar2015数据集| +|是否支持Fine-tuning|否| +|模型大小|3.7MB| +|最新更新日期|2022-05-11| +|数据指标|-| + + +## 一、模型基本信息 + +- ### 应用效果展示 + - 样例结果示例: +

+package + +

+ +- ### 模型介绍 + + - DB(Differentiable Binarization)是一种基于分割的文本检测算法。此类算法可以更好地处理弯曲等不规则形状文本,因此检测效果往往会更好。但其后处理步骤中将分割结果转化为检测框的流程复杂,耗时严重。DB将二值化阈值加入训练中学习,可以获得更准确的检测边界,从而简化后处理流程。该Module是PP-OCRv3的检测模型,对PP-OCRv2中的CML(Collaborative Mutual Learning) 协同互学习文本检测蒸馏策略进行了升级。 + +

+
+

+ + - 更多详情参考:[PP-OCRv3](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.5/doc/doc_ch/PP-OCRv3_introduction.md) + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.7.2 + + - paddlehub >= 1.6.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + - shapely + + - pyclipper + + - ```shell + $ pip install shapely pyclipper + ``` + - **该Module依赖于第三方库shapely和pyclipper,使用该Module之前,请先安装shapely和pyclipper。** + +- ### 2、安装 + + - ```shell + $ hub install ch_pp-ocrv3_det + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run ch_pp-ocrv3_det --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现文字识别模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、代码示例 + + - ```python + import paddlehub as hub + import cv2 + + text_detector = hub.Module(name="ch_pp-ocrv3_det", enable_mkldnn=True) # mkldnn加速仅在CPU下有效 + result = text_detector.detect_text(images=[cv2.imread('/PATH/TO/IMAGE')]) + + # or + # result =text_detector.detect_text(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + - ```python + __init__(enable_mkldnn=False) + ``` + + - 构造检测模块的对象 + + - **参数** + - enable_mkldnn(bool): 是否开启mkldnn加速CPU计算。该参数仅在CPU运行下设置有效。默认为False。 + + + - ```python + def detect_text(paths=[], + images=[], + use_gpu=False, + output_dir='detection_result', + box_thresh=0.5, + visualization=False, + det_db_unclip_ratio=1.5) + ``` + + - 预测API,检测输入图片中的所有中文文本的位置。 + + - **参数** + + - paths (list\[str\]): 图片的路径; + - images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; + - use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA_VISIBLE_DEVICES环境变量** + - box\_thresh (float): 检测文本框置信度的阈值; + - visualization (bool): 是否将识别结果保存为图片文件; + - output\_dir (str): 图片的保存路径,默认设为 detection\_result; + - det\_db\_unclip\_ratio: 设置检测框的大小; + - **返回** + + - res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,各字段为: + - data (list): 检测文本框结果,文本框在原图中的像素坐标,4*2的矩阵,依次表示文本框左下、右下、右上、左上顶点的坐标 + - save_path (str): 识别结果的保存路径, 如不保存图片则save_path为'' + + + +## 四、服务部署 + +- PaddleHub Serving 可以部署一个目标检测的在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m ch_pp-ocrv3_det + ``` + + - 这样就完成了一个目标检测的服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/ch_pp-ocrv3_det" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + ``` + + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install ch_pp-ocrv3_det==1.0.0 + ``` From d5724f1f09375fa817cc122833051de014683fd2 Mon Sep 17 00:00:00 2001 From: chenjian Date: Thu, 12 May 2022 12:57:24 +0000 Subject: [PATCH 011/117] fix --- .../image/text_recognition/ch_pp-ocrv3/README.md | 13 ++----------- .../image/text_recognition/ch_pp-ocrv3/character.py | 2 +- .../image/text_recognition/ch_pp-ocrv3/module.py | 13 +++++++++++++ .../text_recognition/ch_pp-ocrv3/requirements.txt | 2 ++ modules/image/text_recognition/ch_pp-ocrv3/utils.py | 13 +++++++++++++ .../text_recognition/ch_pp-ocrv3_det/README.md | 13 ++----------- .../text_recognition/ch_pp-ocrv3_det/module.py | 13 +++++++++++++ .../text_recognition/ch_pp-ocrv3_det/processor.py | 13 +++++++++++++ .../ch_pp-ocrv3_det/requirements.txt | 2 ++ 9 files changed, 61 insertions(+), 23 deletions(-) mode change 100644 => 100755 modules/image/text_recognition/ch_pp-ocrv3/README.md create mode 100644 modules/image/text_recognition/ch_pp-ocrv3/requirements.txt mode change 100644 => 100755 modules/image/text_recognition/ch_pp-ocrv3_det/README.md create mode 100644 modules/image/text_recognition/ch_pp-ocrv3_det/requirements.txt diff --git a/modules/image/text_recognition/ch_pp-ocrv3/README.md b/modules/image/text_recognition/ch_pp-ocrv3/README.md old mode 100644 new mode 100755 index 0bf8f2156..ac3812a7d --- a/modules/image/text_recognition/ch_pp-ocrv3/README.md +++ b/modules/image/text_recognition/ch_pp-ocrv3/README.md @@ -35,18 +35,9 @@ - ### 1、环境依赖 - - paddlepaddle >= 1.7.2 + - paddlepaddle >= 2.2 - - paddlehub >= 1.6.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) - - - shapely - - - pyclipper - - - ```shell - $ pip install shapely pyclipper - ``` - - **该Module依赖于第三方库shapely和pyclipper,使用该Module之前,请先安装shapely和pyclipper。** + - paddlehub >= 2.2 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) - ### 2、安装 diff --git a/modules/image/text_recognition/ch_pp-ocrv3/character.py b/modules/image/text_recognition/ch_pp-ocrv3/character.py index 571f8f999..eaa45a581 100644 --- a/modules/image/text_recognition/ch_pp-ocrv3/character.py +++ b/modules/image/text_recognition/ch_pp-ocrv3/character.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/modules/image/text_recognition/ch_pp-ocrv3/module.py b/modules/image/text_recognition/ch_pp-ocrv3/module.py index 402ac8a28..133f8d785 100644 --- a/modules/image/text_recognition/ch_pp-ocrv3/module.py +++ b/modules/image/text_recognition/ch_pp-ocrv3/module.py @@ -1,4 +1,17 @@ # -*- coding:utf-8 -*- +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import argparse import ast import copy diff --git a/modules/image/text_recognition/ch_pp-ocrv3/requirements.txt b/modules/image/text_recognition/ch_pp-ocrv3/requirements.txt new file mode 100644 index 000000000..7159e62c0 --- /dev/null +++ b/modules/image/text_recognition/ch_pp-ocrv3/requirements.txt @@ -0,0 +1,2 @@ +shapely +pyclipper diff --git a/modules/image/text_recognition/ch_pp-ocrv3/utils.py b/modules/image/text_recognition/ch_pp-ocrv3/utils.py index 18aa6ceec..d6309f2f5 100644 --- a/modules/image/text_recognition/ch_pp-ocrv3/utils.py +++ b/modules/image/text_recognition/ch_pp-ocrv3/utils.py @@ -1,4 +1,17 @@ # -*- coding:utf-8 -*- +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import absolute_import from __future__ import division from __future__ import print_function diff --git a/modules/image/text_recognition/ch_pp-ocrv3_det/README.md b/modules/image/text_recognition/ch_pp-ocrv3_det/README.md old mode 100644 new mode 100755 index a974bdd98..d1bf63bfb --- a/modules/image/text_recognition/ch_pp-ocrv3_det/README.md +++ b/modules/image/text_recognition/ch_pp-ocrv3_det/README.md @@ -35,18 +35,9 @@ - ### 1、环境依赖 - - paddlepaddle >= 1.7.2 + - paddlepaddle >= 2.2 - - paddlehub >= 1.6.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) - - - shapely - - - pyclipper - - - ```shell - $ pip install shapely pyclipper - ``` - - **该Module依赖于第三方库shapely和pyclipper,使用该Module之前,请先安装shapely和pyclipper。** + - paddlehub >= 2.2 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) - ### 2、安装 diff --git a/modules/image/text_recognition/ch_pp-ocrv3_det/module.py b/modules/image/text_recognition/ch_pp-ocrv3_det/module.py index 679681530..52d50a60e 100644 --- a/modules/image/text_recognition/ch_pp-ocrv3_det/module.py +++ b/modules/image/text_recognition/ch_pp-ocrv3_det/module.py @@ -1,4 +1,17 @@ # -*- coding:utf-8 -*- +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import absolute_import from __future__ import division from __future__ import print_function diff --git a/modules/image/text_recognition/ch_pp-ocrv3_det/processor.py b/modules/image/text_recognition/ch_pp-ocrv3_det/processor.py index 2de682548..46a3b2638 100644 --- a/modules/image/text_recognition/ch_pp-ocrv3_det/processor.py +++ b/modules/image/text_recognition/ch_pp-ocrv3_det/processor.py @@ -1,4 +1,17 @@ # -*- coding:utf-8 -*- +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import absolute_import from __future__ import division from __future__ import print_function diff --git a/modules/image/text_recognition/ch_pp-ocrv3_det/requirements.txt b/modules/image/text_recognition/ch_pp-ocrv3_det/requirements.txt new file mode 100644 index 000000000..7159e62c0 --- /dev/null +++ b/modules/image/text_recognition/ch_pp-ocrv3_det/requirements.txt @@ -0,0 +1,2 @@ +shapely +pyclipper From e0c027f34acd27794a483509cdddeadf1ac4a504 Mon Sep 17 00:00:00 2001 From: chenjian Date: Mon, 23 May 2022 08:20:45 +0000 Subject: [PATCH 012/117] add pp_tinypose --- .../keypoint_detection/pp-tinypose/README.md | 136 ++++ .../pp-tinypose/__init__.py | 5 + .../pp-tinypose/benchmark_utils.py | 262 +++++++ .../pp-tinypose/det_keypoint_unite_infer.py | 230 ++++++ .../pp-tinypose/det_keypoint_unite_utils.py | 86 +++ .../keypoint_detection/pp-tinypose/infer.py | 694 ++++++++++++++++++ .../pp-tinypose/keypoint_infer.py | 381 ++++++++++ .../pp-tinypose/keypoint_postprocess.py | 192 +++++ .../pp-tinypose/keypoint_preprocess.py | 232 ++++++ .../keypoint_detection/pp-tinypose/logger.py | 68 ++ .../keypoint_detection/pp-tinypose/module.py | 148 ++++ .../pp-tinypose/preprocess.py | 332 +++++++++ .../keypoint_detection/pp-tinypose/utils.py | 217 ++++++ .../pp-tinypose/visualize.py | 208 ++++++ 14 files changed, 3191 insertions(+) create mode 100644 modules/image/keypoint_detection/pp-tinypose/README.md create mode 100644 modules/image/keypoint_detection/pp-tinypose/__init__.py create mode 100644 modules/image/keypoint_detection/pp-tinypose/benchmark_utils.py create mode 100644 modules/image/keypoint_detection/pp-tinypose/det_keypoint_unite_infer.py create mode 100644 modules/image/keypoint_detection/pp-tinypose/det_keypoint_unite_utils.py create mode 100644 modules/image/keypoint_detection/pp-tinypose/infer.py create mode 100644 modules/image/keypoint_detection/pp-tinypose/keypoint_infer.py create mode 100644 modules/image/keypoint_detection/pp-tinypose/keypoint_postprocess.py create mode 100644 modules/image/keypoint_detection/pp-tinypose/keypoint_preprocess.py create mode 100644 modules/image/keypoint_detection/pp-tinypose/logger.py create mode 100644 modules/image/keypoint_detection/pp-tinypose/module.py create mode 100644 modules/image/keypoint_detection/pp-tinypose/preprocess.py create mode 100644 modules/image/keypoint_detection/pp-tinypose/utils.py create mode 100644 modules/image/keypoint_detection/pp-tinypose/visualize.py diff --git a/modules/image/keypoint_detection/pp-tinypose/README.md b/modules/image/keypoint_detection/pp-tinypose/README.md new file mode 100644 index 000000000..7c11c495c --- /dev/null +++ b/modules/image/keypoint_detection/pp-tinypose/README.md @@ -0,0 +1,136 @@ +# pp-tinypose + +|模型名称|pp-tinypose| +| :--- | :---: | +|类别|图像-关键点检测| +|网络|PicoDet + HRNet| +|数据集|COCO + AI Challenger| +|是否支持Fine-tuning|否| +|模型大小|125M| +|最新更新日期|2022-05-20| +|数据指标|-| + + +## 一、模型基本信息 + +- ### 应用效果展示 + - 样例结果示例: +

+ +
+ 输入图像 +
+ +
+ 输出图像 + +- ### 模型介绍 + + - PP-TinyPose是PaddleDetecion针对移动端设备优化的实时关键点检测模型,可流畅地在移动端设备上执行多人姿态估计任务。借助PaddleDetecion自研的优秀轻量级检测模型PicoDet以及轻量级姿态估计任务骨干网络HRNet, 结合多种策略有效平衡了模型的速度和精度表现。 + + - 更多详情参考:[PP-TinyPose](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.4/configs/keypoint/tiny_pose)。 + + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.2 + + - paddlehub >= 2.2 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install pp-tinypose + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run pp-tinypose --input_path "/PATH/TO/IMAGE" --visualization True --use_gpu + ``` + - 通过命令行方式实现关键点检测模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、代码示例 + + - ```python + import paddlehub as hub + import cv2 + + model = hub.Module(name="pp-tinypose") + result = model.predict('/PATH/TO/IMAGE', save_path='pp_tinypose_output', visualization=True, use_gpu=True) + ``` + +- ### 3、API + + + - ```python + def predict(self, img: Union[str, np.ndarray], save_path: str = "pp_tinypose_output", visualization: bool = True, use_gpu = False) + ``` + + - 预测API,识别输入图片中的所有人肢体关键点。 + + - **参数** + + - img (numpy.ndarray|str): 图片数据,使用图片路径或者输入numpy.ndarray,BGR格式; + - save_path (str): 图片保存路径, 默认为'pp_tinypose_output'; + - visualization (bool): 是否将识别结果保存为图片文件; + - use_gpu: 是否使用gpu; + - **返回** + + - res (list\[dict\]): 识别结果的列表,列表元素依然为列表,存的内容为[图像名称,检测框,关键点]。 + + +## 四、服务部署 + +- PaddleHub Serving 可以部署一个关键点检测的在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m pp-tinypose + ``` + + - 这样就完成了一个关键点检测的服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/pp-tinypose" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install pp-tinypose==1.0.0 + ``` diff --git a/modules/image/keypoint_detection/pp-tinypose/__init__.py b/modules/image/keypoint_detection/pp-tinypose/__init__.py new file mode 100644 index 000000000..55916b319 --- /dev/null +++ b/modules/image/keypoint_detection/pp-tinypose/__init__.py @@ -0,0 +1,5 @@ +import os +import sys + +CUR_DIR = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(CUR_DIR) diff --git a/modules/image/keypoint_detection/pp-tinypose/benchmark_utils.py b/modules/image/keypoint_detection/pp-tinypose/benchmark_utils.py new file mode 100644 index 000000000..e1dd4ec35 --- /dev/null +++ b/modules/image/keypoint_detection/pp-tinypose/benchmark_utils.py @@ -0,0 +1,262 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging +import os +from pathlib import Path + +import paddle +import paddle.inference as paddle_infer + +CUR_DIR = os.path.dirname(os.path.abspath(__file__)) +LOG_PATH_ROOT = f"{CUR_DIR}/../../output" + + +class PaddleInferBenchmark(object): + + def __init__(self, + config, + model_info: dict = {}, + data_info: dict = {}, + perf_info: dict = {}, + resource_info: dict = {}, + **kwargs): + """ + Construct PaddleInferBenchmark Class to format logs. + args: + config(paddle.inference.Config): paddle inference config + model_info(dict): basic model info + {'model_name': 'resnet50' + 'precision': 'fp32'} + data_info(dict): input data info + {'batch_size': 1 + 'shape': '3,224,224' + 'data_num': 1000} + perf_info(dict): performance result + {'preprocess_time_s': 1.0 + 'inference_time_s': 2.0 + 'postprocess_time_s': 1.0 + 'total_time_s': 4.0} + resource_info(dict): + cpu and gpu resources + {'cpu_rss': 100 + 'gpu_rss': 100 + 'gpu_util': 60} + """ + # PaddleInferBenchmark Log Version + self.log_version = "1.0.3" + + # Paddle Version + self.paddle_version = paddle.__version__ + self.paddle_commit = paddle.__git_commit__ + paddle_infer_info = paddle_infer.get_version() + self.paddle_branch = paddle_infer_info.strip().split(': ')[-1] + + # model info + self.model_info = model_info + + # data info + self.data_info = data_info + + # perf info + self.perf_info = perf_info + + try: + # required value + self.model_name = model_info['model_name'] + self.precision = model_info['precision'] + + self.batch_size = data_info['batch_size'] + self.shape = data_info['shape'] + self.data_num = data_info['data_num'] + + self.inference_time_s = round(perf_info['inference_time_s'], 4) + except: + self.print_help() + raise ValueError("Set argument wrong, please check input argument and its type") + + self.preprocess_time_s = perf_info.get('preprocess_time_s', 0) + self.postprocess_time_s = perf_info.get('postprocess_time_s', 0) + self.with_tracker = True if 'tracking_time_s' in perf_info else False + self.tracking_time_s = perf_info.get('tracking_time_s', 0) + self.total_time_s = perf_info.get('total_time_s', 0) + + self.inference_time_s_90 = perf_info.get("inference_time_s_90", "") + self.inference_time_s_99 = perf_info.get("inference_time_s_99", "") + self.succ_rate = perf_info.get("succ_rate", "") + self.qps = perf_info.get("qps", "") + + # conf info + self.config_status = self.parse_config(config) + + # mem info + if isinstance(resource_info, dict): + self.cpu_rss_mb = int(resource_info.get('cpu_rss_mb', 0)) + self.cpu_vms_mb = int(resource_info.get('cpu_vms_mb', 0)) + self.cpu_shared_mb = int(resource_info.get('cpu_shared_mb', 0)) + self.cpu_dirty_mb = int(resource_info.get('cpu_dirty_mb', 0)) + self.cpu_util = round(resource_info.get('cpu_util', 0), 2) + + self.gpu_rss_mb = int(resource_info.get('gpu_rss_mb', 0)) + self.gpu_util = round(resource_info.get('gpu_util', 0), 2) + self.gpu_mem_util = round(resource_info.get('gpu_mem_util', 0), 2) + else: + self.cpu_rss_mb = 0 + self.cpu_vms_mb = 0 + self.cpu_shared_mb = 0 + self.cpu_dirty_mb = 0 + self.cpu_util = 0 + + self.gpu_rss_mb = 0 + self.gpu_util = 0 + self.gpu_mem_util = 0 + + # init benchmark logger + self.benchmark_logger() + + def benchmark_logger(self): + """ + benchmark logger + """ + # remove other logging handler + for handler in logging.root.handlers[:]: + logging.root.removeHandler(handler) + + # Init logger + FORMAT = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' + log_output = f"{LOG_PATH_ROOT}/{self.model_name}.log" + Path(f"{LOG_PATH_ROOT}").mkdir(parents=True, exist_ok=True) + logging.basicConfig(level=logging.INFO, + format=FORMAT, + handlers=[ + logging.FileHandler(filename=log_output, mode='w'), + logging.StreamHandler(), + ]) + self.logger = logging.getLogger(__name__) + self.logger.info(f"Paddle Inference benchmark log will be saved to {log_output}") + + def parse_config(self, config) -> dict: + """ + parse paddle predictor config + args: + config(paddle.inference.Config): paddle inference config + return: + config_status(dict): dict style config info + """ + if isinstance(config, paddle_infer.Config): + config_status = {} + config_status['runtime_device'] = "gpu" if config.use_gpu() else "cpu" + config_status['ir_optim'] = config.ir_optim() + config_status['enable_tensorrt'] = config.tensorrt_engine_enabled() + config_status['precision'] = self.precision + config_status['enable_mkldnn'] = config.mkldnn_enabled() + config_status['cpu_math_library_num_threads'] = config.cpu_math_library_num_threads() + elif isinstance(config, dict): + config_status['runtime_device'] = config.get('runtime_device', "") + config_status['ir_optim'] = config.get('ir_optim', "") + config_status['enable_tensorrt'] = config.get('enable_tensorrt', "") + config_status['precision'] = config.get('precision', "") + config_status['enable_mkldnn'] = config.get('enable_mkldnn', "") + config_status['cpu_math_library_num_threads'] = config.get('cpu_math_library_num_threads', "") + else: + self.print_help() + raise ValueError("Set argument config wrong, please check input argument and its type") + return config_status + + def report(self, identifier=None): + """ + print log report + args: + identifier(string): identify log + """ + if identifier: + identifier = f"[{identifier}]" + else: + identifier = "" + + self.logger.info("\n") + self.logger.info("---------------------- Paddle info ----------------------") + self.logger.info(f"{identifier} paddle_version: {self.paddle_version}") + self.logger.info(f"{identifier} paddle_commit: {self.paddle_commit}") + self.logger.info(f"{identifier} paddle_branch: {self.paddle_branch}") + self.logger.info(f"{identifier} log_api_version: {self.log_version}") + self.logger.info("----------------------- Conf info -----------------------") + self.logger.info(f"{identifier} runtime_device: {self.config_status['runtime_device']}") + self.logger.info(f"{identifier} ir_optim: {self.config_status['ir_optim']}") + self.logger.info(f"{identifier} enable_memory_optim: {True}") + self.logger.info(f"{identifier} enable_tensorrt: {self.config_status['enable_tensorrt']}") + self.logger.info(f"{identifier} enable_mkldnn: {self.config_status['enable_mkldnn']}") + self.logger.info( + f"{identifier} cpu_math_library_num_threads: {self.config_status['cpu_math_library_num_threads']}") + self.logger.info("----------------------- Model info ----------------------") + self.logger.info(f"{identifier} model_name: {self.model_name}") + self.logger.info(f"{identifier} precision: {self.precision}") + self.logger.info("----------------------- Data info -----------------------") + self.logger.info(f"{identifier} batch_size: {self.batch_size}") + self.logger.info(f"{identifier} input_shape: {self.shape}") + self.logger.info(f"{identifier} data_num: {self.data_num}") + self.logger.info("----------------------- Perf info -----------------------") + self.logger.info( + f"{identifier} cpu_rss(MB): {self.cpu_rss_mb}, cpu_vms: {self.cpu_vms_mb}, cpu_shared_mb: {self.cpu_shared_mb}, cpu_dirty_mb: {self.cpu_dirty_mb}, cpu_util: {self.cpu_util}%" + ) + self.logger.info( + f"{identifier} gpu_rss(MB): {self.gpu_rss_mb}, gpu_util: {self.gpu_util}%, gpu_mem_util: {self.gpu_mem_util}%" + ) + self.logger.info(f"{identifier} total time spent(s): {self.total_time_s}") + + if self.with_tracker: + self.logger.info(f"{identifier} preprocess_time(ms): {round(self.preprocess_time_s*1000, 1)}, " + f"inference_time(ms): {round(self.inference_time_s*1000, 1)}, " + f"postprocess_time(ms): {round(self.postprocess_time_s*1000, 1)}, " + f"tracking_time(ms): {round(self.tracking_time_s*1000, 1)}") + else: + self.logger.info(f"{identifier} preprocess_time(ms): {round(self.preprocess_time_s*1000, 1)}, " + f"inference_time(ms): {round(self.inference_time_s*1000, 1)}, " + f"postprocess_time(ms): {round(self.postprocess_time_s*1000, 1)}") + if self.inference_time_s_90: + self.looger.info( + f"{identifier} 90%_cost: {self.inference_time_s_90}, 99%_cost: {self.inference_time_s_99}, succ_rate: {self.succ_rate}" + ) + if self.qps: + self.logger.info(f"{identifier} QPS: {self.qps}") + + def print_help(self): + """ + print function help + """ + print("""Usage: + ==== Print inference benchmark logs. ==== + config = paddle.inference.Config() + model_info = {'model_name': 'resnet50' + 'precision': 'fp32'} + data_info = {'batch_size': 1 + 'shape': '3,224,224' + 'data_num': 1000} + perf_info = {'preprocess_time_s': 1.0 + 'inference_time_s': 2.0 + 'postprocess_time_s': 1.0 + 'total_time_s': 4.0} + resource_info = {'cpu_rss_mb': 100 + 'gpu_rss_mb': 100 + 'gpu_util': 60} + log = PaddleInferBenchmark(config, model_info, data_info, perf_info, resource_info) + log('Test') + """) + + def __call__(self, identifier=None): + """ + __call__ + args: + identifier(string): identify log + """ + self.report(identifier) diff --git a/modules/image/keypoint_detection/pp-tinypose/det_keypoint_unite_infer.py b/modules/image/keypoint_detection/pp-tinypose/det_keypoint_unite_infer.py new file mode 100644 index 000000000..612f6dd51 --- /dev/null +++ b/modules/image/keypoint_detection/pp-tinypose/det_keypoint_unite_infer.py @@ -0,0 +1,230 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import json +import math +import os + +import cv2 +import numpy as np +import paddle +import yaml +from benchmark_utils import PaddleInferBenchmark +from det_keypoint_unite_utils import argsparser +from infer import bench_log +from infer import Detector +from infer import get_test_images +from infer import PredictConfig +from infer import print_arguments +from keypoint_infer import KeyPointDetector +from keypoint_infer import PredictConfig_KeyPoint +from keypoint_postprocess import translate_to_ori_images +from preprocess import decode_image +from utils import get_current_memory_mb +from visualize import visualize_pose + +KEYPOINT_SUPPORT_MODELS = {'HigherHRNet': 'keypoint_bottomup', 'HRNet': 'keypoint_topdown'} + + +def predict_with_given_det(image, det_res, keypoint_detector, keypoint_batch_size, run_benchmark): + rec_images, records, det_rects = keypoint_detector.get_person_from_rect(image, det_res) + keypoint_vector = [] + score_vector = [] + + rect_vector = det_rects + keypoint_results = keypoint_detector.predict_image(rec_images, run_benchmark, repeats=10, visual=False) + keypoint_vector, score_vector = translate_to_ori_images(keypoint_results, np.array(records)) + keypoint_res = {} + keypoint_res['keypoint'] = [keypoint_vector.tolist(), score_vector.tolist()] if len(keypoint_vector) > 0 else [[], + []] + keypoint_res['bbox'] = rect_vector + return keypoint_res + + +def topdown_unite_predict(detector, topdown_keypoint_detector, image_list, keypoint_batch_size=1, save_res=False): + det_timer = detector.get_timer() + store_res = [] + for i, img_file in enumerate(image_list): + # Decode image in advance in det + pose prediction + det_timer.preprocess_time_s.start() + image, _ = decode_image(img_file, {}) + det_timer.preprocess_time_s.end() + + if FLAGS.run_benchmark: + results = detector.predict_image([image], run_benchmark=True, repeats=10) + + cm, gm, gu = get_current_memory_mb() + detector.cpu_mem += cm + detector.gpu_mem += gm + detector.gpu_util += gu + else: + results = detector.predict_image([image], visual=False) + results = detector.filter_box(results, FLAGS.det_threshold) + if results['boxes_num'] > 0: + keypoint_res = predict_with_given_det(image, results, topdown_keypoint_detector, keypoint_batch_size, + FLAGS.run_benchmark) + + if save_res: + save_name = img_file if isinstance(img_file, str) else i + store_res.append( + [save_name, keypoint_res['bbox'], [keypoint_res['keypoint'][0], keypoint_res['keypoint'][1]]]) + else: + results["keypoint"] = [[], []] + keypoint_res = results + if FLAGS.run_benchmark: + cm, gm, gu = get_current_memory_mb() + topdown_keypoint_detector.cpu_mem += cm + topdown_keypoint_detector.gpu_mem += gm + topdown_keypoint_detector.gpu_util += gu + else: + if not os.path.exists(FLAGS.output_dir): + os.makedirs(FLAGS.output_dir) + visualize_pose(img_file, keypoint_res, visual_thresh=FLAGS.keypoint_threshold, save_dir=FLAGS.output_dir) + if save_res: + """ + 1) store_res: a list of image_data + 2) image_data: [imageid, rects, [keypoints, scores]] + 3) rects: list of rect [xmin, ymin, xmax, ymax] + 4) keypoints: 17(joint numbers)*[x, y, conf], total 51 data in list + 5) scores: mean of all joint conf + """ + with open("det_keypoint_unite_image_results.json", 'w') as wf: + json.dump(store_res, wf, indent=4) + + +def topdown_unite_predict_video(detector, topdown_keypoint_detector, camera_id, keypoint_batch_size=1, save_res=False): + video_name = 'output.mp4' + if camera_id != -1: + capture = cv2.VideoCapture(camera_id) + else: + capture = cv2.VideoCapture(FLAGS.video_file) + video_name = os.path.split(FLAGS.video_file)[-1] + # Get Video info : resolution, fps, frame count + width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) + fps = int(capture.get(cv2.CAP_PROP_FPS)) + frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT)) + print("fps: %d, frame_count: %d" % (fps, frame_count)) + + if not os.path.exists(FLAGS.output_dir): + os.makedirs(FLAGS.output_dir) + out_path = os.path.join(FLAGS.output_dir, video_name) + fourcc = cv2.VideoWriter_fourcc(*'mp4v') + writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height)) + index = 0 + store_res = [] + while (1): + ret, frame = capture.read() + if not ret: + break + index += 1 + print('detect frame: %d' % (index)) + + frame2 = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + + results = detector.predict_image([frame2], visual=False) + results = detector.filter_box(results, FLAGS.det_threshold) + if results['boxes_num'] == 0: + writer.write(frame) + continue + + keypoint_res = predict_with_given_det(frame2, results, topdown_keypoint_detector, keypoint_batch_size, + FLAGS.run_benchmark) + + im = visualize_pose(frame, keypoint_res, visual_thresh=FLAGS.keypoint_threshold, returnimg=True) + if save_res: + store_res.append([index, keypoint_res['bbox'], [keypoint_res['keypoint'][0], keypoint_res['keypoint'][1]]]) + + writer.write(im) + if camera_id != -1: + cv2.imshow('Mask Detection', im) + if cv2.waitKey(1) & 0xFF == ord('q'): + break + writer.release() + print('output_video saved to: {}'.format(out_path)) + if save_res: + """ + 1) store_res: a list of frame_data + 2) frame_data: [frameid, rects, [keypoints, scores]] + 3) rects: list of rect [xmin, ymin, xmax, ymax] + 4) keypoints: 17(joint numbers)*[x, y, conf], total 51 data in list + 5) scores: mean of all joint conf + """ + with open("det_keypoint_unite_video_results.json", 'w') as wf: + json.dump(store_res, wf, indent=4) + + +def main(): + deploy_file = os.path.join(FLAGS.det_model_dir, 'infer_cfg.yml') + with open(deploy_file) as f: + yml_conf = yaml.safe_load(f) + arch = yml_conf['arch'] + detector = Detector(FLAGS.det_model_dir, + device=FLAGS.device, + run_mode=FLAGS.run_mode, + trt_min_shape=FLAGS.trt_min_shape, + trt_max_shape=FLAGS.trt_max_shape, + trt_opt_shape=FLAGS.trt_opt_shape, + trt_calib_mode=FLAGS.trt_calib_mode, + cpu_threads=FLAGS.cpu_threads, + enable_mkldnn=FLAGS.enable_mkldnn, + threshold=FLAGS.det_threshold) + + topdown_keypoint_detector = KeyPointDetector(FLAGS.keypoint_model_dir, + device=FLAGS.device, + run_mode=FLAGS.run_mode, + batch_size=FLAGS.keypoint_batch_size, + trt_min_shape=FLAGS.trt_min_shape, + trt_max_shape=FLAGS.trt_max_shape, + trt_opt_shape=FLAGS.trt_opt_shape, + trt_calib_mode=FLAGS.trt_calib_mode, + cpu_threads=FLAGS.cpu_threads, + enable_mkldnn=FLAGS.enable_mkldnn, + use_dark=FLAGS.use_dark) + keypoint_arch = topdown_keypoint_detector.pred_config.arch + assert KEYPOINT_SUPPORT_MODELS[ + keypoint_arch] == 'keypoint_topdown', 'Detection-Keypoint unite inference only supports topdown models.' + + # predict from video file or camera video stream + if FLAGS.video_file is not None or FLAGS.camera_id != -1: + topdown_unite_predict_video(detector, topdown_keypoint_detector, FLAGS.camera_id, FLAGS.keypoint_batch_size, + FLAGS.save_res) + else: + # predict from image + img_list = get_test_images(FLAGS.image_dir, FLAGS.image_file) + topdown_unite_predict(detector, topdown_keypoint_detector, img_list, FLAGS.keypoint_batch_size, FLAGS.save_res) + if not FLAGS.run_benchmark: + detector.det_times.info(average=True) + topdown_keypoint_detector.det_times.info(average=True) + else: + mode = FLAGS.run_mode + det_model_dir = FLAGS.det_model_dir + det_model_info = {'model_name': det_model_dir.strip('/').split('/')[-1], 'precision': mode.split('_')[-1]} + bench_log(detector, img_list, det_model_info, name='Det') + keypoint_model_dir = FLAGS.keypoint_model_dir + keypoint_model_info = { + 'model_name': keypoint_model_dir.strip('/').split('/')[-1], + 'precision': mode.split('_')[-1] + } + bench_log(topdown_keypoint_detector, img_list, keypoint_model_info, FLAGS.keypoint_batch_size, 'KeyPoint') + + +if __name__ == '__main__': + paddle.enable_static() + parser = argsparser() + FLAGS = parser.parse_args() + print_arguments(FLAGS) + FLAGS.device = FLAGS.device.upper() + assert FLAGS.device in ['CPU', 'GPU', 'XPU'], "device should be CPU, GPU or XPU" + + main() diff --git a/modules/image/keypoint_detection/pp-tinypose/det_keypoint_unite_utils.py b/modules/image/keypoint_detection/pp-tinypose/det_keypoint_unite_utils.py new file mode 100644 index 000000000..309c80814 --- /dev/null +++ b/modules/image/keypoint_detection/pp-tinypose/det_keypoint_unite_utils.py @@ -0,0 +1,86 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import ast + + +def argsparser(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--det_model_dir", + type=str, + default=None, + help=("Directory include:'model.pdiparams', 'model.pdmodel', " + "'infer_cfg.yml', created by tools/export_model.py."), + required=True) + parser.add_argument("--keypoint_model_dir", + type=str, + default=None, + help=("Directory include:'model.pdiparams', 'model.pdmodel', " + "'infer_cfg.yml', created by tools/export_model.py."), + required=True) + parser.add_argument("--image_file", type=str, default=None, help="Path of image file.") + parser.add_argument("--image_dir", + type=str, + default=None, + help="Dir of image file, `image_file` has a higher priority.") + parser.add_argument("--keypoint_batch_size", + type=int, + default=8, + help=("batch_size for keypoint inference. In detection-keypoint unit" + "inference, the batch size in detection is 1. Then collate det " + "result in batch for keypoint inference.")) + parser.add_argument("--video_file", + type=str, + default=None, + help="Path of video file, `video_file` or `camera_id` has a highest priority.") + parser.add_argument("--camera_id", type=int, default=-1, help="device id of camera to predict.") + parser.add_argument("--det_threshold", type=float, default=0.5, help="Threshold of score.") + parser.add_argument("--keypoint_threshold", type=float, default=0.5, help="Threshold of score.") + parser.add_argument("--output_dir", type=str, default="output", help="Directory of output visualization files.") + parser.add_argument("--run_mode", + type=str, + default='paddle', + help="mode of running(paddle/trt_fp32/trt_fp16/trt_int8)") + parser.add_argument("--device", + type=str, + default='cpu', + help="Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU.") + parser.add_argument("--run_benchmark", + type=ast.literal_eval, + default=False, + help="Whether to predict a image_file repeatedly for benchmark") + parser.add_argument("--enable_mkldnn", type=ast.literal_eval, default=False, help="Whether use mkldnn with CPU.") + parser.add_argument("--cpu_threads", type=int, default=1, help="Num of threads with CPU.") + parser.add_argument("--trt_min_shape", type=int, default=1, help="min_shape for TensorRT.") + parser.add_argument("--trt_max_shape", type=int, default=1280, help="max_shape for TensorRT.") + parser.add_argument("--trt_opt_shape", type=int, default=640, help="opt_shape for TensorRT.") + parser.add_argument("--trt_calib_mode", + type=bool, + default=False, + help="If the model is produced by TRT offline quantitative " + "calibration, trt_calib_mode need to set True.") + parser.add_argument('--use_dark', + type=ast.literal_eval, + default=True, + help='whether to use darkpose to get better keypoint position predict ') + parser.add_argument('--save_res', + type=bool, + default=False, + help=("whether to save predict results to json file" + "1) store_res: a list of image_data" + "2) image_data: [imageid, rects, [keypoints, scores]]" + "3) rects: list of rect [xmin, ymin, xmax, ymax]" + "4) keypoints: 17(joint numbers)*[x, y, conf], total 51 data in list" + "5) scores: mean of all joint conf")) + return parser diff --git a/modules/image/keypoint_detection/pp-tinypose/infer.py b/modules/image/keypoint_detection/pp-tinypose/infer.py new file mode 100644 index 000000000..fe0764e97 --- /dev/null +++ b/modules/image/keypoint_detection/pp-tinypose/infer.py @@ -0,0 +1,694 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import glob +import json +import math +import os +import sys +from functools import reduce +from pathlib import Path + +import cv2 +import numpy as np +import paddle +import yaml +from benchmark_utils import PaddleInferBenchmark +from keypoint_preprocess import EvalAffine +from keypoint_preprocess import expand_crop +from keypoint_preprocess import TopDownEvalAffine +from paddle.inference import Config +from paddle.inference import create_predictor +from preprocess import decode_image +from preprocess import LetterBoxResize +from preprocess import NormalizeImage +from preprocess import Pad +from preprocess import PadStride +from preprocess import Permute +from preprocess import preprocess +from preprocess import Resize +from preprocess import WarpAffine +from utils import argsparser +from utils import get_current_memory_mb +from utils import Timer +from visualize import visualize_box + +# Global dictionary +SUPPORT_MODELS = { + 'YOLO', + 'RCNN', + 'SSD', + 'Face', + 'FCOS', + 'SOLOv2', + 'TTFNet', + 'S2ANet', + 'JDE', + 'FairMOT', + 'DeepSORT', + 'GFL', + 'PicoDet', + 'CenterNet', + 'TOOD', + 'RetinaNet', + 'StrongBaseline', + 'STGCN', + 'YOLOX', +} + + +def bench_log(detector, img_list, model_info, batch_size=1, name=None): + mems = { + 'cpu_rss_mb': detector.cpu_mem / len(img_list), + 'gpu_rss_mb': detector.gpu_mem / len(img_list), + 'gpu_util': detector.gpu_util * 100 / len(img_list) + } + perf_info = detector.det_times.report(average=True) + data_info = {'batch_size': batch_size, 'shape': "dynamic_shape", 'data_num': perf_info['img_num']} + log = PaddleInferBenchmark(detector.config, model_info, data_info, perf_info, mems) + log(name) + + +class Detector(object): + """ + Args: + pred_config (object): config of model, defined by `Config(model_dir)` + model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml + device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU + run_mode (str): mode of running(paddle/trt_fp32/trt_fp16) + batch_size (int): size of pre batch in inference + trt_min_shape (int): min shape for dynamic shape in trt + trt_max_shape (int): max shape for dynamic shape in trt + trt_opt_shape (int): opt shape for dynamic shape in trt + trt_calib_mode (bool): If the model is produced by TRT offline quantitative + calibration, trt_calib_mode need to set True + cpu_threads (int): cpu threads + enable_mkldnn (bool): whether to open MKLDNN + enable_mkldnn_bfloat16 (bool): whether to turn on mkldnn bfloat16 + output_dir (str): The path of output + threshold (float): The threshold of score for visualization + delete_shuffle_pass (bool): whether to remove shuffle_channel_detect_pass in TensorRT. + Used by action model. + """ + + def __init__(self, + model_dir, + device='CPU', + run_mode='paddle', + batch_size=1, + trt_min_shape=1, + trt_max_shape=1280, + trt_opt_shape=640, + trt_calib_mode=False, + cpu_threads=1, + enable_mkldnn=False, + enable_mkldnn_bfloat16=False, + output_dir='output', + threshold=0.5, + delete_shuffle_pass=False): + self.pred_config = self.set_config(model_dir) + self.device = device + self.predictor, self.config = load_predictor(model_dir, + run_mode=run_mode, + batch_size=batch_size, + min_subgraph_size=self.pred_config.min_subgraph_size, + device=device, + use_dynamic_shape=self.pred_config.use_dynamic_shape, + trt_min_shape=trt_min_shape, + trt_max_shape=trt_max_shape, + trt_opt_shape=trt_opt_shape, + trt_calib_mode=trt_calib_mode, + cpu_threads=cpu_threads, + enable_mkldnn=enable_mkldnn, + enable_mkldnn_bfloat16=enable_mkldnn_bfloat16, + delete_shuffle_pass=delete_shuffle_pass) + self.det_times = Timer() + self.cpu_mem, self.gpu_mem, self.gpu_util = 0, 0, 0 + self.batch_size = batch_size + self.output_dir = output_dir + self.threshold = threshold + + def set_config(self, model_dir): + return PredictConfig(model_dir) + + def preprocess(self, image_list): + preprocess_ops = [] + for op_info in self.pred_config.preprocess_infos: + new_op_info = op_info.copy() + op_type = new_op_info.pop('type') + preprocess_ops.append(eval(op_type)(**new_op_info)) + + input_im_lst = [] + input_im_info_lst = [] + for im_path in image_list: + im, im_info = preprocess(im_path, preprocess_ops) + input_im_lst.append(im) + input_im_info_lst.append(im_info) + inputs = create_inputs(input_im_lst, input_im_info_lst) + input_names = self.predictor.get_input_names() + for i in range(len(input_names)): + input_tensor = self.predictor.get_input_handle(input_names[i]) + input_tensor.copy_from_cpu(inputs[input_names[i]]) + + return inputs + + def postprocess(self, inputs, result): + # postprocess output of predictor + np_boxes_num = result['boxes_num'] + if np_boxes_num[0] <= 0: + print('[WARNNING] No object detected.') + result = {'boxes': np.zeros([0, 6]), 'boxes_num': [0]} + result = {k: v for k, v in result.items() if v is not None} + return result + + def filter_box(self, result, threshold): + np_boxes_num = result['boxes_num'] + boxes = result['boxes'] + start_idx = 0 + filter_boxes = [] + filter_num = [] + for i in range(len(np_boxes_num)): + boxes_num = np_boxes_num[i] + boxes_i = boxes[start_idx:start_idx + boxes_num, :] + idx = boxes_i[:, 1] > threshold + filter_boxes_i = boxes_i[idx, :] + filter_boxes.append(filter_boxes_i) + filter_num.append(filter_boxes_i.shape[0]) + start_idx += boxes_num + boxes = np.concatenate(filter_boxes) + filter_num = np.array(filter_num) + filter_res = {'boxes': boxes, 'boxes_num': filter_num} + return filter_res + + def predict(self, repeats=1): + ''' + Args: + repeats (int): repeats number for prediction + Returns: + result (dict): include 'boxes': np.ndarray: shape:[N,6], N: number of box, + matix element:[class, score, x_min, y_min, x_max, y_max] + MaskRCNN's result include 'masks': np.ndarray: + shape: [N, im_h, im_w] + ''' + # model prediction + np_boxes, np_masks = None, None + for i in range(repeats): + self.predictor.run() + output_names = self.predictor.get_output_names() + boxes_tensor = self.predictor.get_output_handle(output_names[0]) + np_boxes = boxes_tensor.copy_to_cpu() + boxes_num = self.predictor.get_output_handle(output_names[1]) + np_boxes_num = boxes_num.copy_to_cpu() + if self.pred_config.mask: + masks_tensor = self.predictor.get_output_handle(output_names[2]) + np_masks = masks_tensor.copy_to_cpu() + result = dict(boxes=np_boxes, masks=np_masks, boxes_num=np_boxes_num) + return result + + def merge_batch_result(self, batch_result): + if len(batch_result) == 1: + return batch_result[0] + res_key = batch_result[0].keys() + results = {k: [] for k in res_key} + for res in batch_result: + for k, v in res.items(): + results[k].append(v) + for k, v in results.items(): + if k != 'masks': + results[k] = np.concatenate(v) + return results + + def get_timer(self): + return self.det_times + + def predict_image(self, image_list, run_benchmark=False, repeats=1, visual=True, save_file=None): + batch_loop_cnt = math.ceil(float(len(image_list)) / self.batch_size) + results = [] + for i in range(batch_loop_cnt): + start_index = i * self.batch_size + end_index = min((i + 1) * self.batch_size, len(image_list)) + batch_image_list = image_list[start_index:end_index] + if run_benchmark: + # preprocess + inputs = self.preprocess(batch_image_list) # warmup + self.det_times.preprocess_time_s.start() + inputs = self.preprocess(batch_image_list) + self.det_times.preprocess_time_s.end() + + # model prediction + result = self.predict(repeats=50) # warmup + self.det_times.inference_time_s.start() + result = self.predict(repeats=repeats) + self.det_times.inference_time_s.end(repeats=repeats) + + # postprocess + result_warmup = self.postprocess(inputs, result) # warmup + self.det_times.postprocess_time_s.start() + result = self.postprocess(inputs, result) + self.det_times.postprocess_time_s.end() + self.det_times.img_num += len(batch_image_list) + + cm, gm, gu = get_current_memory_mb() + self.cpu_mem += cm + self.gpu_mem += gm + self.gpu_util += gu + else: + # preprocess + self.det_times.preprocess_time_s.start() + inputs = self.preprocess(batch_image_list) + self.det_times.preprocess_time_s.end() + + # model prediction + self.det_times.inference_time_s.start() + result = self.predict() + self.det_times.inference_time_s.end() + + # postprocess + self.det_times.postprocess_time_s.start() + result = self.postprocess(inputs, result) + self.det_times.postprocess_time_s.end() + self.det_times.img_num += len(batch_image_list) + + if visual: + visualize(batch_image_list, + result, + self.pred_config.labels, + output_dir=self.output_dir, + threshold=self.threshold) + + results.append(result) + if visual: + print('Test iter {}'.format(i)) + + if save_file is not None: + Path(self.output_dir).mkdir(exist_ok=True) + self.format_coco_results(image_list, results, save_file=save_file) + + results = self.merge_batch_result(results) + return results + + def predict_video(self, video_file, camera_id): + video_out_name = 'output.mp4' + if camera_id != -1: + capture = cv2.VideoCapture(camera_id) + else: + capture = cv2.VideoCapture(video_file) + video_out_name = os.path.split(video_file)[-1] + # Get Video info : resolution, fps, frame count + width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) + fps = int(capture.get(cv2.CAP_PROP_FPS)) + frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT)) + print("fps: %d, frame_count: %d" % (fps, frame_count)) + + if not os.path.exists(self.output_dir): + os.makedirs(self.output_dir) + out_path = os.path.join(self.output_dir, video_out_name) + fourcc = cv2.VideoWriter_fourcc(*'mp4v') + writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height)) + index = 1 + while (1): + ret, frame = capture.read() + if not ret: + break + print('detect frame: %d' % (index)) + index += 1 + results = self.predict_image([frame[:, :, ::-1]], visual=False) + + im = visualize_box(frame, results, self.pred_config.labels, threshold=self.threshold) + im = np.array(im) + writer.write(im) + if camera_id != -1: + cv2.imshow('Mask Detection', im) + if cv2.waitKey(1) & 0xFF == ord('q'): + break + writer.release() + + @staticmethod + def format_coco_results(image_list, results, save_file=None): + coco_results = [] + image_id = 0 + + for result in results: + start_idx = 0 + for box_num in result['boxes_num']: + idx_slice = slice(start_idx, start_idx + box_num) + start_idx += box_num + + image_file = image_list[image_id] + image_id += 1 + + if 'boxes' in result: + boxes = result['boxes'][idx_slice, :] + per_result = [ + { + 'image_file': image_file, + 'bbox': [box[2], box[3], box[4] - box[2], box[5] - box[3]], # xyxy -> xywh + 'score': box[1], + 'category_id': int(box[0]), + } for k, box in enumerate(boxes.tolist()) + ] + + elif 'segm' in result: + import pycocotools.mask as mask_util + + scores = result['score'][idx_slice].tolist() + category_ids = result['label'][idx_slice].tolist() + segms = result['segm'][idx_slice, :] + rles = [ + mask_util.encode(np.array(mask[:, :, np.newaxis], dtype=np.uint8, order='F'))[0] + for mask in segms + ] + for rle in rles: + rle['counts'] = rle['counts'].decode('utf-8') + + per_result = [{ + 'image_file': image_file, + 'segmentation': rle, + 'score': scores[k], + 'category_id': category_ids[k], + } for k, rle in enumerate(rles)] + + else: + raise RuntimeError('') + + # per_result = [item for item in per_result if item['score'] > threshold] + coco_results.extend(per_result) + + if save_file: + with open(os.path.join(save_file), 'w') as f: + json.dump(coco_results, f) + + return coco_results + + +def create_inputs(imgs, im_info): + """generate input for different model type + Args: + imgs (list(numpy)): list of images (np.ndarray) + im_info (list(dict)): list of image info + Returns: + inputs (dict): input of model + """ + inputs = {} + + im_shape = [] + scale_factor = [] + if len(imgs) == 1: + inputs['image'] = np.array((imgs[0], )).astype('float32') + inputs['im_shape'] = np.array((im_info[0]['im_shape'], )).astype('float32') + inputs['scale_factor'] = np.array((im_info[0]['scale_factor'], )).astype('float32') + return inputs + + for e in im_info: + im_shape.append(np.array((e['im_shape'], )).astype('float32')) + scale_factor.append(np.array((e['scale_factor'], )).astype('float32')) + + inputs['im_shape'] = np.concatenate(im_shape, axis=0) + inputs['scale_factor'] = np.concatenate(scale_factor, axis=0) + + imgs_shape = [[e.shape[1], e.shape[2]] for e in imgs] + max_shape_h = max([e[0] for e in imgs_shape]) + max_shape_w = max([e[1] for e in imgs_shape]) + padding_imgs = [] + for img in imgs: + im_c, im_h, im_w = img.shape[:] + padding_im = np.zeros((im_c, max_shape_h, max_shape_w), dtype=np.float32) + padding_im[:, :im_h, :im_w] = img + padding_imgs.append(padding_im) + inputs['image'] = np.stack(padding_imgs, axis=0) + return inputs + + +class PredictConfig(): + """set config of preprocess, postprocess and visualize + Args: + model_dir (str): root path of model.yml + """ + + def __init__(self, model_dir): + # parsing Yaml config for Preprocess + deploy_file = os.path.join(model_dir, 'infer_cfg.yml') + with open(deploy_file) as f: + yml_conf = yaml.safe_load(f) + self.check_model(yml_conf) + self.arch = yml_conf['arch'] + self.preprocess_infos = yml_conf['Preprocess'] + self.min_subgraph_size = yml_conf['min_subgraph_size'] + self.labels = yml_conf['label_list'] + self.mask = False + self.use_dynamic_shape = yml_conf['use_dynamic_shape'] + if 'mask' in yml_conf: + self.mask = yml_conf['mask'] + self.tracker = None + if 'tracker' in yml_conf: + self.tracker = yml_conf['tracker'] + if 'NMS' in yml_conf: + self.nms = yml_conf['NMS'] + if 'fpn_stride' in yml_conf: + self.fpn_stride = yml_conf['fpn_stride'] + if self.arch == 'RCNN' and yml_conf.get('export_onnx', False): + print('The RCNN export model is used for ONNX and it only supports batch_size = 1') + self.print_config() + + def check_model(self, yml_conf): + """ + Raises: + ValueError: loaded model not in supported model type + """ + for support_model in SUPPORT_MODELS: + if support_model in yml_conf['arch']: + return True + raise ValueError("Unsupported arch: {}, expect {}".format(yml_conf['arch'], SUPPORT_MODELS)) + + def print_config(self): + print('----------- Model Configuration -----------') + print('%s: %s' % ('Model Arch', self.arch)) + print('%s: ' % ('Transform Order')) + for op_info in self.preprocess_infos: + print('--%s: %s' % ('transform op', op_info['type'])) + print('--------------------------------------------') + + +def load_predictor(model_dir, + run_mode='paddle', + batch_size=1, + device='CPU', + min_subgraph_size=3, + use_dynamic_shape=False, + trt_min_shape=1, + trt_max_shape=1280, + trt_opt_shape=640, + trt_calib_mode=False, + cpu_threads=1, + enable_mkldnn=False, + enable_mkldnn_bfloat16=False, + delete_shuffle_pass=False): + """set AnalysisConfig, generate AnalysisPredictor + Args: + model_dir (str): root path of __model__ and __params__ + device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU + run_mode (str): mode of running(paddle/trt_fp32/trt_fp16/trt_int8) + use_dynamic_shape (bool): use dynamic shape or not + trt_min_shape (int): min shape for dynamic shape in trt + trt_max_shape (int): max shape for dynamic shape in trt + trt_opt_shape (int): opt shape for dynamic shape in trt + trt_calib_mode (bool): If the model is produced by TRT offline quantitative + calibration, trt_calib_mode need to set True + delete_shuffle_pass (bool): whether to remove shuffle_channel_detect_pass in TensorRT. + Used by action model. + Returns: + predictor (PaddlePredictor): AnalysisPredictor + Raises: + ValueError: predict by TensorRT need device == 'GPU'. + """ + if device != 'GPU' and run_mode != 'paddle': + raise ValueError("Predict by TensorRT mode: {}, expect device=='GPU', but device == {}".format( + run_mode, device)) + config = Config(os.path.join(model_dir, 'model.pdmodel'), os.path.join(model_dir, 'model.pdiparams')) + if device == 'GPU': + # initial GPU memory(M), device ID + config.enable_use_gpu(200, 0) + # optimize graph and fuse op + config.switch_ir_optim(True) + elif device == 'XPU': + config.enable_lite_engine() + config.enable_xpu(10 * 1024 * 1024) + else: + config.disable_gpu() + config.set_cpu_math_library_num_threads(cpu_threads) + if enable_mkldnn: + try: + # cache 10 different shapes for mkldnn to avoid memory leak + config.set_mkldnn_cache_capacity(10) + config.enable_mkldnn() + if enable_mkldnn_bfloat16: + config.enable_mkldnn_bfloat16() + except Exception as e: + print("The current environment does not support `mkldnn`, so disable mkldnn.") + pass + + precision_map = { + 'trt_int8': Config.Precision.Int8, + 'trt_fp32': Config.Precision.Float32, + 'trt_fp16': Config.Precision.Half + } + if run_mode in precision_map.keys(): + config.enable_tensorrt_engine(workspace_size=(1 << 25) * batch_size, + max_batch_size=batch_size, + min_subgraph_size=min_subgraph_size, + precision_mode=precision_map[run_mode], + use_static=False, + use_calib_mode=trt_calib_mode) + + if use_dynamic_shape: + min_input_shape = {'image': [batch_size, 3, trt_min_shape, trt_min_shape]} + max_input_shape = {'image': [batch_size, 3, trt_max_shape, trt_max_shape]} + opt_input_shape = {'image': [batch_size, 3, trt_opt_shape, trt_opt_shape]} + config.set_trt_dynamic_shape_info(min_input_shape, max_input_shape, opt_input_shape) + print('trt set dynamic shape done!') + + # disable print log when predict + config.disable_glog_info() + # enable shared memory + config.enable_memory_optim() + # disable feed, fetch OP, needed by zero_copy_run + config.switch_use_feed_fetch_ops(False) + if delete_shuffle_pass: + config.delete_pass("shuffle_channel_detect_pass") + predictor = create_predictor(config) + return predictor, config + + +def get_test_images(infer_dir, infer_img): + """ + Get image path list in TEST mode + """ + assert infer_img is not None or infer_dir is not None, \ + "--image_file or --image_dir should be set" + assert infer_img is None or os.path.isfile(infer_img), \ + "{} is not a file".format(infer_img) + assert infer_dir is None or os.path.isdir(infer_dir), \ + "{} is not a directory".format(infer_dir) + + # infer_img has a higher priority + if infer_img and os.path.isfile(infer_img): + return [infer_img] + + images = set() + infer_dir = os.path.abspath(infer_dir) + assert os.path.isdir(infer_dir), \ + "infer_dir {} is not a directory".format(infer_dir) + exts = ['jpg', 'jpeg', 'png', 'bmp'] + exts += [ext.upper() for ext in exts] + for ext in exts: + images.update(glob.glob('{}/*.{}'.format(infer_dir, ext))) + images = list(images) + + assert len(images) > 0, "no image found in {}".format(infer_dir) + print("Found {} inference images in total.".format(len(images))) + + return images + + +def visualize(image_list, result, labels, output_dir='output/', threshold=0.5): + # visualize the predict result + start_idx = 0 + for idx, image_file in enumerate(image_list): + im_bboxes_num = result['boxes_num'][idx] + im_results = {} + if 'boxes' in result: + im_results['boxes'] = result['boxes'][start_idx:start_idx + im_bboxes_num, :] + if 'masks' in result: + im_results['masks'] = result['masks'][start_idx:start_idx + im_bboxes_num, :] + if 'segm' in result: + im_results['segm'] = result['segm'][start_idx:start_idx + im_bboxes_num, :] + if 'label' in result: + im_results['label'] = result['label'][start_idx:start_idx + im_bboxes_num] + if 'score' in result: + im_results['score'] = result['score'][start_idx:start_idx + im_bboxes_num] + + start_idx += im_bboxes_num + im = visualize_box(image_file, im_results, labels, threshold=threshold) + img_name = os.path.split(image_file)[-1] + if not os.path.exists(output_dir): + os.makedirs(output_dir) + out_path = os.path.join(output_dir, img_name) + im.save(out_path, quality=95) + print("save result to: " + out_path) + + +def print_arguments(args): + print('----------- Running Arguments -----------') + for arg, value in sorted(vars(args).items()): + print('%s: %s' % (arg, value)) + print('------------------------------------------') + + +def main(): + deploy_file = os.path.join(FLAGS.model_dir, 'infer_cfg.yml') + with open(deploy_file) as f: + yml_conf = yaml.safe_load(f) + arch = yml_conf['arch'] + detector_func = 'Detector' + if arch == 'SOLOv2': + detector_func = 'DetectorSOLOv2' + elif arch == 'PicoDet': + detector_func = 'DetectorPicoDet' + + detector = eval(detector_func)(FLAGS.model_dir, + device=FLAGS.device, + run_mode=FLAGS.run_mode, + batch_size=FLAGS.batch_size, + trt_min_shape=FLAGS.trt_min_shape, + trt_max_shape=FLAGS.trt_max_shape, + trt_opt_shape=FLAGS.trt_opt_shape, + trt_calib_mode=FLAGS.trt_calib_mode, + cpu_threads=FLAGS.cpu_threads, + enable_mkldnn=FLAGS.enable_mkldnn, + enable_mkldnn_bfloat16=FLAGS.enable_mkldnn_bfloat16, + threshold=FLAGS.threshold, + output_dir=FLAGS.output_dir) + + # predict from video file or camera video stream + if FLAGS.video_file is not None or FLAGS.camera_id != -1: + detector.predict_video(FLAGS.video_file, FLAGS.camera_id) + else: + # predict from image + if FLAGS.image_dir is None and FLAGS.image_file is not None: + assert FLAGS.batch_size == 1, "batch_size should be 1, when image_file is not None" + img_list = get_test_images(FLAGS.image_dir, FLAGS.image_file) + save_file = os.path.join(FLAGS.output_dir, 'results.json') if FLAGS.save_results else None + detector.predict_image(img_list, FLAGS.run_benchmark, repeats=100, save_file=save_file) + if not FLAGS.run_benchmark: + detector.det_times.info(average=True) + else: + mode = FLAGS.run_mode + model_dir = FLAGS.model_dir + model_info = {'model_name': model_dir.strip('/').split('/')[-1], 'precision': mode.split('_')[-1]} + bench_log(detector, img_list, model_info, name='DET') + + +if __name__ == '__main__': + paddle.enable_static() + parser = argsparser() + FLAGS = parser.parse_args() + print_arguments(FLAGS) + FLAGS.device = FLAGS.device.upper() + assert FLAGS.device in ['CPU', 'GPU', 'XPU'], "device should be CPU, GPU or XPU" + assert not FLAGS.use_gpu, "use_gpu has been deprecated, please use --device" + + assert not (FLAGS.enable_mkldnn == False and FLAGS.enable_mkldnn_bfloat16 + == True), 'To enable mkldnn bfloat, please turn on both enable_mkldnn and enable_mkldnn_bfloat16' + + main() diff --git a/modules/image/keypoint_detection/pp-tinypose/keypoint_infer.py b/modules/image/keypoint_detection/pp-tinypose/keypoint_infer.py new file mode 100644 index 000000000..e782ac1be --- /dev/null +++ b/modules/image/keypoint_detection/pp-tinypose/keypoint_infer.py @@ -0,0 +1,381 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import glob +import math +import os +import sys +import time +from functools import reduce + +import cv2 +import numpy as np +import paddle +import yaml +from PIL import Image +# add deploy path of PadleDetection to sys.path +parent_path = os.path.abspath(os.path.join(__file__, *(['..']))) +sys.path.insert(0, parent_path) + +from preprocess import preprocess, NormalizeImage, Permute +from keypoint_preprocess import EvalAffine, TopDownEvalAffine, expand_crop +from keypoint_postprocess import HRNetPostProcess +from visualize import visualize_pose +from paddle.inference import Config +from paddle.inference import create_predictor +from utils import argsparser, Timer, get_current_memory_mb +from benchmark_utils import PaddleInferBenchmark +from infer import Detector, get_test_images, print_arguments + +# Global dictionary +KEYPOINT_SUPPORT_MODELS = {'HigherHRNet': 'keypoint_bottomup', 'HRNet': 'keypoint_topdown'} + + +class KeyPointDetector(Detector): + """ + Args: + model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml + device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU + run_mode (str): mode of running(paddle/trt_fp32/trt_fp16) + batch_size (int): size of pre batch in inference + trt_min_shape (int): min shape for dynamic shape in trt + trt_max_shape (int): max shape for dynamic shape in trt + trt_opt_shape (int): opt shape for dynamic shape in trt + trt_calib_mode (bool): If the model is produced by TRT offline quantitative + calibration, trt_calib_mode need to set True + cpu_threads (int): cpu threads + enable_mkldnn (bool): whether to open MKLDNN + use_dark(bool): whether to use postprocess in DarkPose + """ + + def __init__(self, + model_dir, + device='CPU', + run_mode='paddle', + batch_size=1, + trt_min_shape=1, + trt_max_shape=1280, + trt_opt_shape=640, + trt_calib_mode=False, + cpu_threads=1, + enable_mkldnn=False, + output_dir='output', + threshold=0.5, + use_dark=True): + super(KeyPointDetector, self).__init__( + model_dir=model_dir, + device=device, + run_mode=run_mode, + batch_size=batch_size, + trt_min_shape=trt_min_shape, + trt_max_shape=trt_max_shape, + trt_opt_shape=trt_opt_shape, + trt_calib_mode=trt_calib_mode, + cpu_threads=cpu_threads, + enable_mkldnn=enable_mkldnn, + output_dir=output_dir, + threshold=threshold, + ) + self.use_dark = use_dark + + def set_config(self, model_dir): + return PredictConfig_KeyPoint(model_dir) + + def get_person_from_rect(self, image, results): + # crop the person result from image + self.det_times.preprocess_time_s.start() + valid_rects = results['boxes'] + rect_images = [] + new_rects = [] + org_rects = [] + for rect in valid_rects: + rect_image, new_rect, org_rect = expand_crop(image, rect) + if rect_image is None or rect_image.size == 0: + continue + rect_images.append(rect_image) + new_rects.append(new_rect) + org_rects.append(org_rect) + self.det_times.preprocess_time_s.end() + return rect_images, new_rects, org_rects + + def postprocess(self, inputs, result): + np_heatmap = result['heatmap'] + np_masks = result['masks'] + # postprocess output of predictor + if KEYPOINT_SUPPORT_MODELS[self.pred_config.arch] == 'keypoint_bottomup': + results = {} + h, w = inputs['im_shape'][0] + preds = [np_heatmap] + if np_masks is not None: + preds += np_masks + preds += [h, w] + keypoint_postprocess = HRNetPostProcess() + kpts, scores = keypoint_postprocess(*preds) + results['keypoint'] = kpts + results['score'] = scores + return results + elif KEYPOINT_SUPPORT_MODELS[self.pred_config.arch] == 'keypoint_topdown': + results = {} + imshape = inputs['im_shape'][:, ::-1] + center = np.round(imshape / 2.) + scale = imshape / 200. + keypoint_postprocess = HRNetPostProcess(use_dark=self.use_dark) + kpts, scores = keypoint_postprocess(np_heatmap, center, scale) + results['keypoint'] = kpts + results['score'] = scores + return results + else: + raise ValueError("Unsupported arch: {}, expect {}".format(self.pred_config.arch, KEYPOINT_SUPPORT_MODELS)) + + def predict(self, repeats=1): + ''' + Args: + repeats (int): repeat number for prediction + Returns: + results (dict): include 'boxes': np.ndarray: shape:[N,6], N: number of box, + matix element:[class, score, x_min, y_min, x_max, y_max] + MaskRCNN's results include 'masks': np.ndarray: + shape: [N, im_h, im_w] + ''' + # model prediction + np_heatmap, np_masks = None, None + for i in range(repeats): + self.predictor.run() + output_names = self.predictor.get_output_names() + heatmap_tensor = self.predictor.get_output_handle(output_names[0]) + np_heatmap = heatmap_tensor.copy_to_cpu() + if self.pred_config.tagmap: + masks_tensor = self.predictor.get_output_handle(output_names[1]) + heat_k = self.predictor.get_output_handle(output_names[2]) + inds_k = self.predictor.get_output_handle(output_names[3]) + np_masks = [masks_tensor.copy_to_cpu(), heat_k.copy_to_cpu(), inds_k.copy_to_cpu()] + result = dict(heatmap=np_heatmap, masks=np_masks) + return result + + def predict_image(self, image_list, run_benchmark=False, repeats=1, visual=True): + results = [] + batch_loop_cnt = math.ceil(float(len(image_list)) / self.batch_size) + for i in range(batch_loop_cnt): + start_index = i * self.batch_size + end_index = min((i + 1) * self.batch_size, len(image_list)) + batch_image_list = image_list[start_index:end_index] + if run_benchmark: + # preprocess + inputs = self.preprocess(batch_image_list) # warmup + self.det_times.preprocess_time_s.start() + inputs = self.preprocess(batch_image_list) + self.det_times.preprocess_time_s.end() + + # model prediction + result_warmup = self.predict(repeats=repeats) # warmup + self.det_times.inference_time_s.start() + result = self.predict(repeats=repeats) + self.det_times.inference_time_s.end(repeats=repeats) + + # postprocess + result_warmup = self.postprocess(inputs, result) # warmup + self.det_times.postprocess_time_s.start() + result = self.postprocess(inputs, result) + self.det_times.postprocess_time_s.end() + self.det_times.img_num += len(batch_image_list) + + cm, gm, gu = get_current_memory_mb() + self.cpu_mem += cm + self.gpu_mem += gm + self.gpu_util += gu + + else: + # preprocess + self.det_times.preprocess_time_s.start() + inputs = self.preprocess(batch_image_list) + self.det_times.preprocess_time_s.end() + + # model prediction + self.det_times.inference_time_s.start() + result = self.predict() + self.det_times.inference_time_s.end() + + # postprocess + self.det_times.postprocess_time_s.start() + result = self.postprocess(inputs, result) + self.det_times.postprocess_time_s.end() + self.det_times.img_num += len(batch_image_list) + + if visual: + if not os.path.exists(self.output_dir): + os.makedirs(self.output_dir) + visualize(batch_image_list, result, visual_thresh=self.threshold, save_dir=self.output_dir) + + results.append(result) + if visual: + print('Test iter {}'.format(i)) + results = self.merge_batch_result(results) + return results + + def predict_video(self, video_file, camera_id): + video_name = 'output.mp4' + if camera_id != -1: + capture = cv2.VideoCapture(camera_id) + else: + capture = cv2.VideoCapture(video_file) + video_name = os.path.split(video_file)[-1] + # Get Video info : resolution, fps, frame count + width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) + fps = int(capture.get(cv2.CAP_PROP_FPS)) + frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT)) + print("fps: %d, frame_count: %d" % (fps, frame_count)) + + if not os.path.exists(self.output_dir): + os.makedirs(self.output_dir) + out_path = os.path.join(self.output_dir, video_name) + fourcc = cv2.VideoWriter_fourcc(*'mp4v') + writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height)) + index = 1 + while (1): + ret, frame = capture.read() + if not ret: + break + print('detect frame: %d' % (index)) + index += 1 + results = self.predict_image([frame[:, :, ::-1]], visual=False) + im_results = {} + im_results['keypoint'] = [results['keypoint'], results['score']] + im = visualize_pose(frame, im_results, visual_thresh=self.threshold, returnimg=True) + writer.write(im) + if camera_id != -1: + cv2.imshow('Mask Detection', im) + if cv2.waitKey(1) & 0xFF == ord('q'): + break + writer.release() + + +def create_inputs(imgs, im_info): + """generate input for different model type + Args: + imgs (list(numpy)): list of image (np.ndarray) + im_info (list(dict)): list of image info + Returns: + inputs (dict): input of model + """ + inputs = {} + inputs['image'] = np.stack(imgs, axis=0).astype('float32') + im_shape = [] + for e in im_info: + im_shape.append(np.array((e['im_shape'])).astype('float32')) + inputs['im_shape'] = np.stack(im_shape, axis=0) + return inputs + + +class PredictConfig_KeyPoint(): + """set config of preprocess, postprocess and visualize + Args: + model_dir (str): root path of model.yml + """ + + def __init__(self, model_dir): + # parsing Yaml config for Preprocess + deploy_file = os.path.join(model_dir, 'infer_cfg.yml') + with open(deploy_file) as f: + yml_conf = yaml.safe_load(f) + self.check_model(yml_conf) + self.arch = yml_conf['arch'] + self.archcls = KEYPOINT_SUPPORT_MODELS[yml_conf['arch']] + self.preprocess_infos = yml_conf['Preprocess'] + self.min_subgraph_size = yml_conf['min_subgraph_size'] + self.labels = yml_conf['label_list'] + self.tagmap = False + self.use_dynamic_shape = yml_conf['use_dynamic_shape'] + if 'keypoint_bottomup' == self.archcls: + self.tagmap = True + self.print_config() + + def check_model(self, yml_conf): + """ + Raises: + ValueError: loaded model not in supported model type + """ + for support_model in KEYPOINT_SUPPORT_MODELS: + if support_model in yml_conf['arch']: + return True + raise ValueError("Unsupported arch: {}, expect {}".format(yml_conf['arch'], KEYPOINT_SUPPORT_MODELS)) + + def print_config(self): + print('----------- Model Configuration -----------') + print('%s: %s' % ('Model Arch', self.arch)) + print('%s: ' % ('Transform Order')) + for op_info in self.preprocess_infos: + print('--%s: %s' % ('transform op', op_info['type'])) + print('--------------------------------------------') + + +def visualize(image_list, results, visual_thresh=0.6, save_dir='output'): + im_results = {} + for i, image_file in enumerate(image_list): + skeletons = results['keypoint'] + scores = results['score'] + skeleton = skeletons[i:i + 1] + score = scores[i:i + 1] + im_results['keypoint'] = [skeleton, score] + visualize_pose(image_file, im_results, visual_thresh=visual_thresh, save_dir=save_dir) + + +def main(): + detector = KeyPointDetector(FLAGS.model_dir, + device=FLAGS.device, + run_mode=FLAGS.run_mode, + batch_size=FLAGS.batch_size, + trt_min_shape=FLAGS.trt_min_shape, + trt_max_shape=FLAGS.trt_max_shape, + trt_opt_shape=FLAGS.trt_opt_shape, + trt_calib_mode=FLAGS.trt_calib_mode, + cpu_threads=FLAGS.cpu_threads, + enable_mkldnn=FLAGS.enable_mkldnn, + threshold=FLAGS.threshold, + output_dir=FLAGS.output_dir, + use_dark=FLAGS.use_dark) + + # predict from video file or camera video stream + if FLAGS.video_file is not None or FLAGS.camera_id != -1: + detector.predict_video(FLAGS.video_file, FLAGS.camera_id) + else: + # predict from image + img_list = get_test_images(FLAGS.image_dir, FLAGS.image_file) + detector.predict_image(img_list, FLAGS.run_benchmark, repeats=10) + if not FLAGS.run_benchmark: + detector.det_times.info(average=True) + else: + mems = { + 'cpu_rss_mb': detector.cpu_mem / len(img_list), + 'gpu_rss_mb': detector.gpu_mem / len(img_list), + 'gpu_util': detector.gpu_util * 100 / len(img_list) + } + perf_info = detector.det_times.report(average=True) + model_dir = FLAGS.model_dir + mode = FLAGS.run_mode + model_info = {'model_name': model_dir.strip('/').split('/')[-1], 'precision': mode.split('_')[-1]} + data_info = {'batch_size': 1, 'shape': "dynamic_shape", 'data_num': perf_info['img_num']} + det_log = PaddleInferBenchmark(detector.config, model_info, data_info, perf_info, mems) + det_log('KeyPoint') + + +if __name__ == '__main__': + paddle.enable_static() + parser = argsparser() + FLAGS = parser.parse_args() + print_arguments(FLAGS) + FLAGS.device = FLAGS.device.upper() + assert FLAGS.device in ['CPU', 'GPU', 'XPU'], "device should be CPU, GPU or XPU" + assert not FLAGS.use_gpu, "use_gpu has been deprecated, please use --device" + + main() diff --git a/modules/image/keypoint_detection/pp-tinypose/keypoint_postprocess.py b/modules/image/keypoint_detection/pp-tinypose/keypoint_postprocess.py new file mode 100644 index 000000000..64d479f61 --- /dev/null +++ b/modules/image/keypoint_detection/pp-tinypose/keypoint_postprocess.py @@ -0,0 +1,192 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +from collections import abc +from collections import defaultdict + +import cv2 +import numpy as np +import paddle +import paddle.nn as nn +from keypoint_preprocess import get_affine_mat_kernel +from keypoint_preprocess import get_affine_transform +from scipy.optimize import linear_sum_assignment + + +class HRNetPostProcess(object): + + def __init__(self, use_dark=True): + self.use_dark = use_dark + + def flip_back(self, output_flipped, matched_parts): + assert output_flipped.ndim == 4,\ + 'output_flipped should be [batch_size, num_joints, height, width]' + + output_flipped = output_flipped[:, :, :, ::-1] + + for pair in matched_parts: + tmp = output_flipped[:, pair[0], :, :].copy() + output_flipped[:, pair[0], :, :] = output_flipped[:, pair[1], :, :] + output_flipped[:, pair[1], :, :] = tmp + + return output_flipped + + def get_max_preds(self, heatmaps): + """get predictions from score maps + + Args: + heatmaps: numpy.ndarray([batch_size, num_joints, height, width]) + + Returns: + preds: numpy.ndarray([batch_size, num_joints, 2]), keypoints coords + maxvals: numpy.ndarray([batch_size, num_joints, 2]), the maximum confidence of the keypoints + """ + assert isinstance(heatmaps, np.ndarray), 'heatmaps should be numpy.ndarray' + assert heatmaps.ndim == 4, 'batch_images should be 4-ndim' + + batch_size = heatmaps.shape[0] + num_joints = heatmaps.shape[1] + width = heatmaps.shape[3] + heatmaps_reshaped = heatmaps.reshape((batch_size, num_joints, -1)) + idx = np.argmax(heatmaps_reshaped, 2) + maxvals = np.amax(heatmaps_reshaped, 2) + + maxvals = maxvals.reshape((batch_size, num_joints, 1)) + idx = idx.reshape((batch_size, num_joints, 1)) + + preds = np.tile(idx, (1, 1, 2)).astype(np.float32) + + preds[:, :, 0] = (preds[:, :, 0]) % width + preds[:, :, 1] = np.floor((preds[:, :, 1]) / width) + + pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2)) + pred_mask = pred_mask.astype(np.float32) + + preds *= pred_mask + + return preds, maxvals + + def gaussian_blur(self, heatmap, kernel): + border = (kernel - 1) // 2 + batch_size = heatmap.shape[0] + num_joints = heatmap.shape[1] + height = heatmap.shape[2] + width = heatmap.shape[3] + for i in range(batch_size): + for j in range(num_joints): + origin_max = np.max(heatmap[i, j]) + dr = np.zeros((height + 2 * border, width + 2 * border)) + dr[border:-border, border:-border] = heatmap[i, j].copy() + dr = cv2.GaussianBlur(dr, (kernel, kernel), 0) + heatmap[i, j] = dr[border:-border, border:-border].copy() + heatmap[i, j] *= origin_max / np.max(heatmap[i, j]) + return heatmap + + def dark_parse(self, hm, coord): + heatmap_height = hm.shape[0] + heatmap_width = hm.shape[1] + px = int(coord[0]) + py = int(coord[1]) + if 1 < px < heatmap_width - 2 and 1 < py < heatmap_height - 2: + dx = 0.5 * (hm[py][px + 1] - hm[py][px - 1]) + dy = 0.5 * (hm[py + 1][px] - hm[py - 1][px]) + dxx = 0.25 * (hm[py][px + 2] - 2 * hm[py][px] + hm[py][px - 2]) + dxy = 0.25 * (hm[py+1][px+1] - hm[py-1][px+1] - hm[py+1][px-1] \ + + hm[py-1][px-1]) + dyy = 0.25 * (hm[py + 2 * 1][px] - 2 * hm[py][px] + hm[py - 2 * 1][px]) + derivative = np.matrix([[dx], [dy]]) + hessian = np.matrix([[dxx, dxy], [dxy, dyy]]) + if dxx * dyy - dxy**2 != 0: + hessianinv = hessian.I + offset = -hessianinv * derivative + offset = np.squeeze(np.array(offset.T), axis=0) + coord += offset + return coord + + def dark_postprocess(self, hm, coords, kernelsize): + """ + refer to https://github.com/ilovepose/DarkPose/lib/core/inference.py + + """ + hm = self.gaussian_blur(hm, kernelsize) + hm = np.maximum(hm, 1e-10) + hm = np.log(hm) + for n in range(coords.shape[0]): + for p in range(coords.shape[1]): + coords[n, p] = self.dark_parse(hm[n][p], coords[n][p]) + return coords + + def get_final_preds(self, heatmaps, center, scale, kernelsize=3): + """the highest heatvalue location with a quarter offset in the + direction from the highest response to the second highest response. + + Args: + heatmaps (numpy.ndarray): The predicted heatmaps + center (numpy.ndarray): The boxes center + scale (numpy.ndarray): The scale factor + + Returns: + preds: numpy.ndarray([batch_size, num_joints, 2]), keypoints coords + maxvals: numpy.ndarray([batch_size, num_joints, 1]), the maximum confidence of the keypoints + """ + + coords, maxvals = self.get_max_preds(heatmaps) + + heatmap_height = heatmaps.shape[2] + heatmap_width = heatmaps.shape[3] + + if self.use_dark: + coords = self.dark_postprocess(heatmaps, coords, kernelsize) + else: + for n in range(coords.shape[0]): + for p in range(coords.shape[1]): + hm = heatmaps[n][p] + px = int(math.floor(coords[n][p][0] + 0.5)) + py = int(math.floor(coords[n][p][1] + 0.5)) + if 1 < px < heatmap_width - 1 and 1 < py < heatmap_height - 1: + diff = np.array([hm[py][px + 1] - hm[py][px - 1], hm[py + 1][px] - hm[py - 1][px]]) + coords[n][p] += np.sign(diff) * .25 + preds = coords.copy() + + # Transform back + for i in range(coords.shape[0]): + preds[i] = transform_preds(coords[i], center[i], scale[i], [heatmap_width, heatmap_height]) + + return preds, maxvals + + def __call__(self, output, center, scale): + preds, maxvals = self.get_final_preds(output, center, scale) + return np.concatenate((preds, maxvals), axis=-1), np.mean(maxvals, axis=1) + + +def transform_preds(coords, center, scale, output_size): + target_coords = np.zeros(coords.shape) + trans = get_affine_transform(center, scale * 200, 0, output_size, inv=1) + for p in range(coords.shape[0]): + target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans) + return target_coords + + +def affine_transform(pt, t): + new_pt = np.array([pt[0], pt[1], 1.]).T + new_pt = np.dot(t, new_pt) + return new_pt[:2] + + +def translate_to_ori_images(keypoint_result, batch_records): + kpts = keypoint_result['keypoint'] + scores = keypoint_result['score'] + kpts[..., 0] += batch_records[:, 0:1] + kpts[..., 1] += batch_records[:, 1:2] + return kpts, scores diff --git a/modules/image/keypoint_detection/pp-tinypose/keypoint_preprocess.py b/modules/image/keypoint_detection/pp-tinypose/keypoint_preprocess.py new file mode 100644 index 000000000..9e4eb3fd4 --- /dev/null +++ b/modules/image/keypoint_detection/pp-tinypose/keypoint_preprocess.py @@ -0,0 +1,232 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +this code is based on https://github.com/open-mmlab/mmpose/mmpose/core/post_processing/post_transforms.py +""" +import cv2 +import numpy as np + + +class EvalAffine(object): + + def __init__(self, size, stride=64): + super(EvalAffine, self).__init__() + self.size = size + self.stride = stride + + def __call__(self, image, im_info): + s = self.size + h, w, _ = image.shape + trans, size_resized = get_affine_mat_kernel(h, w, s, inv=False) + image_resized = cv2.warpAffine(image, trans, size_resized) + return image_resized, im_info + + +def get_affine_mat_kernel(h, w, s, inv=False): + if w < h: + w_ = s + h_ = int(np.ceil((s / w * h) / 64.) * 64) + scale_w = w + scale_h = h_ / w_ * w + + else: + h_ = s + w_ = int(np.ceil((s / h * w) / 64.) * 64) + scale_h = h + scale_w = w_ / h_ * h + + center = np.array([np.round(w / 2.), np.round(h / 2.)]) + + size_resized = (w_, h_) + trans = get_affine_transform(center, np.array([scale_w, scale_h]), 0, size_resized, inv=inv) + + return trans, size_resized + + +def get_affine_transform(center, input_size, rot, output_size, shift=(0., 0.), inv=False): + """Get the affine transform matrix, given the center/scale/rot/output_size. + + Args: + center (np.ndarray[2, ]): Center of the bounding box (x, y). + scale (np.ndarray[2, ]): Scale of the bounding box + wrt [width, height]. + rot (float): Rotation angle (degree). + output_size (np.ndarray[2, ]): Size of the destination heatmaps. + shift (0-100%): Shift translation ratio wrt the width/height. + Default (0., 0.). + inv (bool): Option to inverse the affine transform direction. + (inv=False: src->dst or inv=True: dst->src) + + Returns: + np.ndarray: The transform matrix. + """ + assert len(center) == 2 + assert len(output_size) == 2 + assert len(shift) == 2 + if not isinstance(input_size, (np.ndarray, list)): + input_size = np.array([input_size, input_size], dtype=np.float32) + scale_tmp = input_size + + shift = np.array(shift) + src_w = scale_tmp[0] + dst_w = output_size[0] + dst_h = output_size[1] + + rot_rad = np.pi * rot / 180 + src_dir = rotate_point([0., src_w * -0.5], rot_rad) + dst_dir = np.array([0., dst_w * -0.5]) + + src = np.zeros((3, 2), dtype=np.float32) + src[0, :] = center + scale_tmp * shift + src[1, :] = center + src_dir + scale_tmp * shift + src[2, :] = _get_3rd_point(src[0, :], src[1, :]) + + dst = np.zeros((3, 2), dtype=np.float32) + dst[0, :] = [dst_w * 0.5, dst_h * 0.5] + dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir + dst[2, :] = _get_3rd_point(dst[0, :], dst[1, :]) + + if inv: + trans = cv2.getAffineTransform(np.float32(dst), np.float32(src)) + else: + trans = cv2.getAffineTransform(np.float32(src), np.float32(dst)) + + return trans + + +def get_warp_matrix(theta, size_input, size_dst, size_target): + """This code is based on + https://github.com/open-mmlab/mmpose/blob/master/mmpose/core/post_processing/post_transforms.py + + Calculate the transformation matrix under the constraint of unbiased. + Paper ref: Huang et al. The Devil is in the Details: Delving into Unbiased + Data Processing for Human Pose Estimation (CVPR 2020). + + Args: + theta (float): Rotation angle in degrees. + size_input (np.ndarray): Size of input image [w, h]. + size_dst (np.ndarray): Size of output image [w, h]. + size_target (np.ndarray): Size of ROI in input plane [w, h]. + + Returns: + matrix (np.ndarray): A matrix for transformation. + """ + theta = np.deg2rad(theta) + matrix = np.zeros((2, 3), dtype=np.float32) + scale_x = size_dst[0] / size_target[0] + scale_y = size_dst[1] / size_target[1] + matrix[0, 0] = np.cos(theta) * scale_x + matrix[0, 1] = -np.sin(theta) * scale_x + matrix[0, 2] = scale_x * (-0.5 * size_input[0] * np.cos(theta) + 0.5 * size_input[1] * np.sin(theta) + + 0.5 * size_target[0]) + matrix[1, 0] = np.sin(theta) * scale_y + matrix[1, 1] = np.cos(theta) * scale_y + matrix[1, 2] = scale_y * (-0.5 * size_input[0] * np.sin(theta) - 0.5 * size_input[1] * np.cos(theta) + + 0.5 * size_target[1]) + return matrix + + +def rotate_point(pt, angle_rad): + """Rotate a point by an angle. + + Args: + pt (list[float]): 2 dimensional point to be rotated + angle_rad (float): rotation angle by radian + + Returns: + list[float]: Rotated point. + """ + assert len(pt) == 2 + sn, cs = np.sin(angle_rad), np.cos(angle_rad) + new_x = pt[0] * cs - pt[1] * sn + new_y = pt[0] * sn + pt[1] * cs + rotated_pt = [new_x, new_y] + + return rotated_pt + + +def _get_3rd_point(a, b): + """To calculate the affine matrix, three pairs of points are required. This + function is used to get the 3rd point, given 2D points a & b. + + The 3rd point is defined by rotating vector `a - b` by 90 degrees + anticlockwise, using b as the rotation center. + + Args: + a (np.ndarray): point(x,y) + b (np.ndarray): point(x,y) + + Returns: + np.ndarray: The 3rd point. + """ + assert len(a) == 2 + assert len(b) == 2 + direction = a - b + third_pt = b + np.array([-direction[1], direction[0]], dtype=np.float32) + + return third_pt + + +class TopDownEvalAffine(object): + """apply affine transform to image and coords + + Args: + trainsize (list): [w, h], the standard size used to train + use_udp (bool): whether to use Unbiased Data Processing. + records(dict): the dict contained the image and coords + + Returns: + records (dict): contain the image and coords after tranformed + + """ + + def __init__(self, trainsize, use_udp=False): + self.trainsize = trainsize + self.use_udp = use_udp + + def __call__(self, image, im_info): + rot = 0 + imshape = im_info['im_shape'][::-1] + center = im_info['center'] if 'center' in im_info else imshape / 2. + scale = im_info['scale'] if 'scale' in im_info else imshape + if self.use_udp: + trans = get_warp_matrix(rot, center * 2.0, [self.trainsize[0] - 1.0, self.trainsize[1] - 1.0], scale) + image = cv2.warpAffine(image, + trans, (int(self.trainsize[0]), int(self.trainsize[1])), + flags=cv2.INTER_LINEAR) + else: + trans = get_affine_transform(center, scale, rot, self.trainsize) + image = cv2.warpAffine(image, + trans, (int(self.trainsize[0]), int(self.trainsize[1])), + flags=cv2.INTER_LINEAR) + + return image, im_info + + +def expand_crop(images, rect, expand_ratio=0.3): + imgh, imgw, c = images.shape + label, conf, xmin, ymin, xmax, ymax = [int(x) for x in rect.tolist()] + if label != 0: + return None, None, None + org_rect = [xmin, ymin, xmax, ymax] + h_half = (ymax - ymin) * (1 + expand_ratio) / 2. + w_half = (xmax - xmin) * (1 + expand_ratio) / 2. + if h_half > w_half * 4 / 3: + w_half = h_half * 0.75 + center = [(ymin + ymax) / 2., (xmin + xmax) / 2.] + ymin = max(0, int(center[0] - h_half)) + ymax = min(imgh - 1, int(center[0] + h_half)) + xmin = max(0, int(center[1] - w_half)) + xmax = min(imgw - 1, int(center[1] + w_half)) + return images[ymin:ymax, xmin:xmax, :], [xmin, ymin, xmax, ymax], org_rect diff --git a/modules/image/keypoint_detection/pp-tinypose/logger.py b/modules/image/keypoint_detection/pp-tinypose/logger.py new file mode 100644 index 000000000..f7a5c5bea --- /dev/null +++ b/modules/image/keypoint_detection/pp-tinypose/logger.py @@ -0,0 +1,68 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import functools +import logging +import os +import sys + +import paddle.distributed as dist + +__all__ = ['setup_logger'] + +logger_initialized = [] + + +def setup_logger(name="ppdet", output=None): + """ + Initialize logger and set its verbosity level to INFO. + Args: + output (str): a file name or a directory to save log. If None, will not save log file. + If ends with ".txt" or ".log", assumed to be a file name. + Otherwise, logs will be saved to `output/log.txt`. + name (str): the root module name of this logger + + Returns: + logging.Logger: a logger + """ + logger = logging.getLogger(name) + if name in logger_initialized: + return logger + + logger.setLevel(logging.INFO) + logger.propagate = False + + formatter = logging.Formatter("[%(asctime)s] %(name)s %(levelname)s: %(message)s", datefmt="%m/%d %H:%M:%S") + # stdout logging: master only + local_rank = dist.get_rank() + if local_rank == 0: + ch = logging.StreamHandler(stream=sys.stdout) + ch.setLevel(logging.DEBUG) + ch.setFormatter(formatter) + logger.addHandler(ch) + + # file logging: all workers + if output is not None: + if output.endswith(".txt") or output.endswith(".log"): + filename = output + else: + filename = os.path.join(output, "log.txt") + if local_rank > 0: + filename = filename + ".rank{}".format(local_rank) + os.makedirs(os.path.dirname(filename)) + fh = logging.FileHandler(filename, mode='a') + fh.setLevel(logging.DEBUG) + fh.setFormatter(logging.Formatter()) + logger.addHandler(fh) + logger_initialized.append(name) + return logger diff --git a/modules/image/keypoint_detection/pp-tinypose/module.py b/modules/image/keypoint_detection/pp-tinypose/module.py new file mode 100644 index 000000000..4c9e920ee --- /dev/null +++ b/modules/image/keypoint_detection/pp-tinypose/module.py @@ -0,0 +1,148 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import json +import math +import os +import time +from typing import Union + +import cv2 +import numpy as np +import paddle +import yaml +from det_keypoint_unite_infer import predict_with_given_det +from infer import bench_log +from infer import Detector +from infer import get_test_images +from infer import PredictConfig +from infer import print_arguments +from keypoint_infer import KeyPointDetector +from keypoint_infer import PredictConfig_KeyPoint +from keypoint_postprocess import translate_to_ori_images +from preprocess import base64_to_cv2 +from preprocess import decode_image +from visualize import visualize_pose + +import paddlehub.vision.transforms as T +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving + + +@moduleinfo(name="pp-tinypose", + type="CV/image_editing", + author="paddlepaddle", + author_email="", + summary="Openpose_body_estimation is a body pose estimation model based on Realtime Multi-Person 2D Pose \ + Estimation using Part Affinity Fields.", + version="1.0.0") +class PP_TinyPose: + """ + PP-TinyPose Model. + + Args: + load_checkpoint(str): Checkpoint save path, default is None. + """ + + def __init__(self): + self.det_model_dir = os.path.join(self.directory, 'model/picodet_s_320_coco_lcnet/') + self.keypoint_model_dir = os.path.join(self.directory, 'model/dark_hrnet_w32_256x192/') + self.detector = Detector(self.det_model_dir) + self.topdown_keypoint_detector = KeyPointDetector(self.keypoint_model_dir) + + def predict(self, + img: Union[str, np.ndarray], + save_path: str = "pp_tinypose_output", + visualization: bool = False, + use_gpu=False): + if use_gpu: + device = 'GPU' + else: + device = 'CPU' + if self.detector.device != device: + self.detector = Detector(self.det_model_dir, device=device) + self.topdown_keypoint_detector = KeyPointDetector(self.keypoint_model_dir, device=device) + + self.visualization = visualization + store_res = [] + + # Decode image in advance in det + pose prediction + image, _ = decode_image(img, {}) + results = self.detector.predict_image([image], visual=False) + results = self.detector.filter_box(results, 0.5) + if results['boxes_num'] > 0: + keypoint_res = predict_with_given_det(image, results, self.topdown_keypoint_detector, 1, False) + save_name = img if isinstance(img, str) else (str(time.time()) + '.png') + store_res.append( + [save_name, keypoint_res['bbox'], [keypoint_res['keypoint'][0], keypoint_res['keypoint'][1]]]) + if not os.path.exists(save_path): + os.makedirs(save_path) + if self.visualization: + visualize_pose(save_name, keypoint_res, visual_thresh=0.5, save_dir=save_path) + return store_res + + @serving + def serving_method(self, images: list, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.predict(img=images_decode[0], **kwargs) + results = json.dumps(results) + return results + + @runnable + def run_cmd(self, argvs: list): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.predict(img=args.input_path, + save_path=args.output_dir, + visualization=args.visualization, + use_gpu=args.use_gpu) + + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + + self.arg_config_group.add_argument('--output_dir', + type=str, + default='pp_tinypose_output', + help="The directory to save output images.") + self.arg_config_group.add_argument('--visualization', + type=bool, + default=True, + help="whether to save output as images.") + + self.arg_config_group.add_argument('--use_gpu', action='store_true', help="use GPU or not") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") diff --git a/modules/image/keypoint_detection/pp-tinypose/preprocess.py b/modules/image/keypoint_detection/pp-tinypose/preprocess.py new file mode 100644 index 000000000..a0d44c45d --- /dev/null +++ b/modules/image/keypoint_detection/pp-tinypose/preprocess.py @@ -0,0 +1,332 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import base64 + +import cv2 +import numpy as np +from keypoint_preprocess import get_affine_transform + + +def decode_image(im_file, im_info): + """read rgb image + Args: + im_file (str|np.ndarray): input can be image path or np.ndarray + im_info (dict): info of image + Returns: + im (np.ndarray): processed image (np.ndarray) + im_info (dict): info of processed image + """ + if isinstance(im_file, str): + with open(im_file, 'rb') as f: + im_read = f.read() + data = np.frombuffer(im_read, dtype='uint8') + im = cv2.imdecode(data, 1) # BGR mode, but need RGB mode + im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) + else: + im = cv2.cvtColor(im_file, cv2.COLOR_BGR2RGB) + im_info['im_shape'] = np.array(im.shape[:2], dtype=np.float32) + im_info['scale_factor'] = np.array([1., 1.], dtype=np.float32) + return im, im_info + + +class Resize(object): + """resize image by target_size and max_size + Args: + target_size (int): the target size of image + keep_ratio (bool): whether keep_ratio or not, default true + interp (int): method of resize + """ + + def __init__(self, target_size, keep_ratio=True, interp=cv2.INTER_LINEAR): + if isinstance(target_size, int): + target_size = [target_size, target_size] + self.target_size = target_size + self.keep_ratio = keep_ratio + self.interp = interp + + def __call__(self, im, im_info): + """ + Args: + im (np.ndarray): image (np.ndarray) + im_info (dict): info of image + Returns: + im (np.ndarray): processed image (np.ndarray) + im_info (dict): info of processed image + """ + assert len(self.target_size) == 2 + assert self.target_size[0] > 0 and self.target_size[1] > 0 + im_channel = im.shape[2] + im_scale_y, im_scale_x = self.generate_scale(im) + im = cv2.resize(im, None, None, fx=im_scale_x, fy=im_scale_y, interpolation=self.interp) + im_info['im_shape'] = np.array(im.shape[:2]).astype('float32') + im_info['scale_factor'] = np.array([im_scale_y, im_scale_x]).astype('float32') + return im, im_info + + def generate_scale(self, im): + """ + Args: + im (np.ndarray): image (np.ndarray) + Returns: + im_scale_x: the resize ratio of X + im_scale_y: the resize ratio of Y + """ + origin_shape = im.shape[:2] + im_c = im.shape[2] + if self.keep_ratio: + im_size_min = np.min(origin_shape) + im_size_max = np.max(origin_shape) + target_size_min = np.min(self.target_size) + target_size_max = np.max(self.target_size) + im_scale = float(target_size_min) / float(im_size_min) + if np.round(im_scale * im_size_max) > target_size_max: + im_scale = float(target_size_max) / float(im_size_max) + im_scale_x = im_scale + im_scale_y = im_scale + else: + resize_h, resize_w = self.target_size + im_scale_y = resize_h / float(origin_shape[0]) + im_scale_x = resize_w / float(origin_shape[1]) + return im_scale_y, im_scale_x + + +class NormalizeImage(object): + """normalize image + Args: + mean (list): im - mean + std (list): im / std + is_scale (bool): whether need im / 255 + is_channel_first (bool): if True: image shape is CHW, else: HWC + """ + + def __init__(self, mean, std, is_scale=True): + self.mean = mean + self.std = std + self.is_scale = is_scale + + def __call__(self, im, im_info): + """ + Args: + im (np.ndarray): image (np.ndarray) + im_info (dict): info of image + Returns: + im (np.ndarray): processed image (np.ndarray) + im_info (dict): info of processed image + """ + im = im.astype(np.float32, copy=False) + mean = np.array(self.mean)[np.newaxis, np.newaxis, :] + std = np.array(self.std)[np.newaxis, np.newaxis, :] + + if self.is_scale: + im = im / 255.0 + im -= mean + im /= std + return im, im_info + + +class Permute(object): + """permute image + Args: + to_bgr (bool): whether convert RGB to BGR + channel_first (bool): whether convert HWC to CHW + """ + + def __init__(self, ): + super(Permute, self).__init__() + + def __call__(self, im, im_info): + """ + Args: + im (np.ndarray): image (np.ndarray) + im_info (dict): info of image + Returns: + im (np.ndarray): processed image (np.ndarray) + im_info (dict): info of processed image + """ + im = im.transpose((2, 0, 1)).copy() + return im, im_info + + +class PadStride(object): + """ padding image for model with FPN, instead PadBatch(pad_to_stride) in original config + Args: + stride (bool): model with FPN need image shape % stride == 0 + """ + + def __init__(self, stride=0): + self.coarsest_stride = stride + + def __call__(self, im, im_info): + """ + Args: + im (np.ndarray): image (np.ndarray) + im_info (dict): info of image + Returns: + im (np.ndarray): processed image (np.ndarray) + im_info (dict): info of processed image + """ + coarsest_stride = self.coarsest_stride + if coarsest_stride <= 0: + return im, im_info + im_c, im_h, im_w = im.shape + pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride) + pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride) + padding_im = np.zeros((im_c, pad_h, pad_w), dtype=np.float32) + padding_im[:, :im_h, :im_w] = im + return padding_im, im_info + + +class LetterBoxResize(object): + + def __init__(self, target_size): + """ + Resize image to target size, convert normalized xywh to pixel xyxy + format ([x_center, y_center, width, height] -> [x0, y0, x1, y1]). + Args: + target_size (int|list): image target size. + """ + super(LetterBoxResize, self).__init__() + if isinstance(target_size, int): + target_size = [target_size, target_size] + self.target_size = target_size + + def letterbox(self, img, height, width, color=(127.5, 127.5, 127.5)): + # letterbox: resize a rectangular image to a padded rectangular + shape = img.shape[:2] # [height, width] + ratio_h = float(height) / shape[0] + ratio_w = float(width) / shape[1] + ratio = min(ratio_h, ratio_w) + new_shape = (round(shape[1] * ratio), round(shape[0] * ratio)) # [width, height] + padw = (width - new_shape[0]) / 2 + padh = (height - new_shape[1]) / 2 + top, bottom = round(padh - 0.1), round(padh + 0.1) + left, right = round(padw - 0.1), round(padw + 0.1) + + img = cv2.resize(img, new_shape, interpolation=cv2.INTER_AREA) # resized, no border + img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # padded rectangular + return img, ratio, padw, padh + + def __call__(self, im, im_info): + """ + Args: + im (np.ndarray): image (np.ndarray) + im_info (dict): info of image + Returns: + im (np.ndarray): processed image (np.ndarray) + im_info (dict): info of processed image + """ + assert len(self.target_size) == 2 + assert self.target_size[0] > 0 and self.target_size[1] > 0 + height, width = self.target_size + h, w = im.shape[:2] + im, ratio, padw, padh = self.letterbox(im, height=height, width=width) + + new_shape = [round(h * ratio), round(w * ratio)] + im_info['im_shape'] = np.array(new_shape, dtype=np.float32) + im_info['scale_factor'] = np.array([ratio, ratio], dtype=np.float32) + return im, im_info + + +class Pad(object): + + def __init__(self, size, fill_value=[114.0, 114.0, 114.0]): + """ + Pad image to a specified size. + Args: + size (list[int]): image target size + fill_value (list[float]): rgb value of pad area, default (114.0, 114.0, 114.0) + """ + super(Pad, self).__init__() + if isinstance(size, int): + size = [size, size] + self.size = size + self.fill_value = fill_value + + def __call__(self, im, im_info): + im_h, im_w = im.shape[:2] + h, w = self.size + if h == im_h and w == im_w: + im = im.astype(np.float32) + return im, im_info + + canvas = np.ones((h, w, 3), dtype=np.float32) + canvas *= np.array(self.fill_value, dtype=np.float32) + canvas[0:im_h, 0:im_w, :] = im.astype(np.float32) + im = canvas + return im, im_info + + +class WarpAffine(object): + """Warp affine the image + """ + + def __init__(self, keep_res=False, pad=31, input_h=512, input_w=512, scale=0.4, shift=0.1): + self.keep_res = keep_res + self.pad = pad + self.input_h = input_h + self.input_w = input_w + self.scale = scale + self.shift = shift + + def __call__(self, im, im_info): + """ + Args: + im (np.ndarray): image (np.ndarray) + im_info (dict): info of image + Returns: + im (np.ndarray): processed image (np.ndarray) + im_info (dict): info of processed image + """ + img = cv2.cvtColor(im, cv2.COLOR_RGB2BGR) + + h, w = img.shape[:2] + + if self.keep_res: + input_h = (h | self.pad) + 1 + input_w = (w | self.pad) + 1 + s = np.array([input_w, input_h], dtype=np.float32) + c = np.array([w // 2, h // 2], dtype=np.float32) + + else: + s = max(h, w) * 1.0 + input_h, input_w = self.input_h, self.input_w + c = np.array([w / 2., h / 2.], dtype=np.float32) + + trans_input = get_affine_transform(c, s, 0, [input_w, input_h]) + img = cv2.resize(img, (w, h)) + inp = cv2.warpAffine(img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR) + return inp, im_info + + +def preprocess(im, preprocess_ops): + # process image by preprocess_ops + im_info = { + 'scale_factor': np.array([1., 1.], dtype=np.float32), + 'im_shape': None, + } + im, im_info = decode_image(im, im_info) + for operator in preprocess_ops: + im, im_info = operator(im, im_info) + return im, im_info + + +def cv2_to_base64(image: np.ndarray): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + +def base64_to_cv2(b64str: str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data diff --git a/modules/image/keypoint_detection/pp-tinypose/utils.py b/modules/image/keypoint_detection/pp-tinypose/utils.py new file mode 100644 index 000000000..4e0b46b77 --- /dev/null +++ b/modules/image/keypoint_detection/pp-tinypose/utils.py @@ -0,0 +1,217 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import ast +import os +import time + + +def argsparser(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--model_dir", + type=str, + default=None, + help=("Directory include:'model.pdiparams', 'model.pdmodel', " + "'infer_cfg.yml', created by tools/export_model.py."), + required=True) + parser.add_argument("--image_file", type=str, default=None, help="Path of image file.") + parser.add_argument("--image_dir", + type=str, + default=None, + help="Dir of image file, `image_file` has a higher priority.") + parser.add_argument("--batch_size", type=int, default=1, help="batch_size for inference.") + parser.add_argument("--video_file", + type=str, + default=None, + help="Path of video file, `video_file` or `camera_id` has a highest priority.") + parser.add_argument("--camera_id", type=int, default=-1, help="device id of camera to predict.") + parser.add_argument("--threshold", type=float, default=0.5, help="Threshold of score.") + parser.add_argument("--output_dir", type=str, default="output", help="Directory of output visualization files.") + parser.add_argument("--run_mode", + type=str, + default='paddle', + help="mode of running(paddle/trt_fp32/trt_fp16/trt_int8)") + parser.add_argument("--device", + type=str, + default='cpu', + help="Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU.") + parser.add_argument("--use_gpu", type=ast.literal_eval, default=False, help="Deprecated, please use `--device`.") + parser.add_argument("--run_benchmark", + type=ast.literal_eval, + default=False, + help="Whether to predict a image_file repeatedly for benchmark") + parser.add_argument("--enable_mkldnn", type=ast.literal_eval, default=False, help="Whether use mkldnn with CPU.") + parser.add_argument("--enable_mkldnn_bfloat16", + type=ast.literal_eval, + default=False, + help="Whether use mkldnn bfloat16 inference with CPU.") + parser.add_argument("--cpu_threads", type=int, default=1, help="Num of threads with CPU.") + parser.add_argument("--trt_min_shape", type=int, default=1, help="min_shape for TensorRT.") + parser.add_argument("--trt_max_shape", type=int, default=1280, help="max_shape for TensorRT.") + parser.add_argument("--trt_opt_shape", type=int, default=640, help="opt_shape for TensorRT.") + parser.add_argument("--trt_calib_mode", + type=bool, + default=False, + help="If the model is produced by TRT offline quantitative " + "calibration, trt_calib_mode need to set True.") + parser.add_argument('--save_images', action='store_true', help='Save visualization image results.') + parser.add_argument('--save_mot_txts', action='store_true', help='Save tracking results (txt).') + parser.add_argument('--save_mot_txt_per_img', + action='store_true', + help='Save tracking results (txt) for each image.') + parser.add_argument('--scaled', + type=bool, + default=False, + help="Whether coords after detector outputs are scaled, False in JDE YOLOv3 " + "True in general detector.") + parser.add_argument("--tracker_config", type=str, default=None, help=("tracker donfig")) + parser.add_argument("--reid_model_dir", + type=str, + default=None, + help=("Directory include:'model.pdiparams', 'model.pdmodel', " + "'infer_cfg.yml', created by tools/export_model.py.")) + parser.add_argument("--reid_batch_size", type=int, default=50, help="max batch_size for reid model inference.") + parser.add_argument('--use_dark', + type=ast.literal_eval, + default=True, + help='whether to use darkpose to get better keypoint position predict ') + parser.add_argument("--action_file", type=str, default=None, help="Path of input file for action recognition.") + parser.add_argument("--window_size", + type=int, + default=50, + help="Temporal size of skeleton feature for action recognition.") + parser.add_argument("--random_pad", + type=ast.literal_eval, + default=False, + help="Whether do random padding for action recognition.") + parser.add_argument("--save_results", + type=bool, + default=False, + help="Whether save detection result to file using coco format") + + return parser + + +class Times(object): + + def __init__(self): + self.time = 0. + # start time + self.st = 0. + # end time + self.et = 0. + + def start(self): + self.st = time.time() + + def end(self, repeats=1, accumulative=True): + self.et = time.time() + if accumulative: + self.time += (self.et - self.st) / repeats + else: + self.time = (self.et - self.st) / repeats + + def reset(self): + self.time = 0. + self.st = 0. + self.et = 0. + + def value(self): + return round(self.time, 4) + + +class Timer(Times): + + def __init__(self, with_tracker=False): + super(Timer, self).__init__() + self.with_tracker = with_tracker + self.preprocess_time_s = Times() + self.inference_time_s = Times() + self.postprocess_time_s = Times() + self.tracking_time_s = Times() + self.img_num = 0 + + def info(self, average=False): + pre_time = self.preprocess_time_s.value() + infer_time = self.inference_time_s.value() + post_time = self.postprocess_time_s.value() + track_time = self.tracking_time_s.value() + + total_time = pre_time + infer_time + post_time + if self.with_tracker: + total_time = total_time + track_time + total_time = round(total_time, 4) + print("------------------ Inference Time Info ----------------------") + print("total_time(ms): {}, img_num: {}".format(total_time * 1000, self.img_num)) + preprocess_time = round(pre_time / max(1, self.img_num), 4) if average else pre_time + postprocess_time = round(post_time / max(1, self.img_num), 4) if average else post_time + inference_time = round(infer_time / max(1, self.img_num), 4) if average else infer_time + tracking_time = round(track_time / max(1, self.img_num), 4) if average else track_time + + average_latency = total_time / max(1, self.img_num) + qps = 0 + if total_time > 0: + qps = 1 / average_latency + print("average latency time(ms): {:.2f}, QPS: {:2f}".format(average_latency * 1000, qps)) + if self.with_tracker: + print( + "preprocess_time(ms): {:.2f}, inference_time(ms): {:.2f}, postprocess_time(ms): {:.2f}, tracking_time(ms): {:.2f}" + .format(preprocess_time * 1000, inference_time * 1000, postprocess_time * 1000, tracking_time * 1000)) + else: + print("preprocess_time(ms): {:.2f}, inference_time(ms): {:.2f}, postprocess_time(ms): {:.2f}".format( + preprocess_time * 1000, inference_time * 1000, postprocess_time * 1000)) + + def report(self, average=False): + dic = {} + pre_time = self.preprocess_time_s.value() + infer_time = self.inference_time_s.value() + post_time = self.postprocess_time_s.value() + track_time = self.tracking_time_s.value() + + dic['preprocess_time_s'] = round(pre_time / max(1, self.img_num), 4) if average else pre_time + dic['inference_time_s'] = round(infer_time / max(1, self.img_num), 4) if average else infer_time + dic['postprocess_time_s'] = round(post_time / max(1, self.img_num), 4) if average else post_time + dic['img_num'] = self.img_num + total_time = pre_time + infer_time + post_time + if self.with_tracker: + dic['tracking_time_s'] = round(track_time / max(1, self.img_num), 4) if average else track_time + total_time = total_time + track_time + dic['total_time_s'] = round(total_time, 4) + return dic + + +def get_current_memory_mb(): + """ + It is used to Obtain the memory usage of the CPU and GPU during the running of the program. + And this function Current program is time-consuming. + """ + import pynvml + import psutil + import GPUtil + gpu_id = int(os.environ.get('CUDA_VISIBLE_DEVICES', 0)) + + pid = os.getpid() + p = psutil.Process(pid) + info = p.memory_full_info() + cpu_mem = info.uss / 1024. / 1024. + gpu_mem = 0 + gpu_percent = 0 + gpus = GPUtil.getGPUs() + if gpu_id is not None and len(gpus) > 0: + gpu_percent = gpus[gpu_id].load + pynvml.nvmlInit() + handle = pynvml.nvmlDeviceGetHandleByIndex(0) + meminfo = pynvml.nvmlDeviceGetMemoryInfo(handle) + gpu_mem = meminfo.used / 1024. / 1024. + return round(cpu_mem, 4), round(gpu_mem, 4), round(gpu_percent, 4) diff --git a/modules/image/keypoint_detection/pp-tinypose/visualize.py b/modules/image/keypoint_detection/pp-tinypose/visualize.py new file mode 100644 index 000000000..18da3cbf6 --- /dev/null +++ b/modules/image/keypoint_detection/pp-tinypose/visualize.py @@ -0,0 +1,208 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import division + +import os + +import cv2 +import numpy as np +from PIL import Image +from PIL import ImageDraw +from PIL import ImageFile + +ImageFile.LOAD_TRUNCATED_IMAGES = True +import math + + +def visualize_box(im, results, labels, threshold=0.5): + """ + Args: + im (str/np.ndarray): path of image/np.ndarray read by cv2 + results (dict): include 'boxes': np.ndarray: shape:[N,6], N: number of box, + matix element:[class, score, x_min, y_min, x_max, y_max] + MaskRCNN's results include 'masks': np.ndarray: + shape:[N, im_h, im_w] + labels (list): labels:['class1', ..., 'classn'] + threshold (float): Threshold of score. + Returns: + im (PIL.Image.Image): visualized image + """ + if isinstance(im, str): + im = Image.open(im).convert('RGB') + elif isinstance(im, np.ndarray): + im = Image.fromarray(im) + if 'boxes' in results and len(results['boxes']) > 0: + im = draw_box(im, results['boxes'], labels, threshold=threshold) + return im + + +def get_color_map_list(num_classes): + """ + Args: + num_classes (int): number of class + Returns: + color_map (list): RGB color list + """ + color_map = num_classes * [0, 0, 0] + for i in range(0, num_classes): + j = 0 + lab = i + while lab: + color_map[i * 3] |= (((lab >> 0) & 1) << (7 - j)) + color_map[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j)) + color_map[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j)) + j += 1 + lab >>= 3 + color_map = [color_map[i:i + 3] for i in range(0, len(color_map), 3)] + return color_map + + +def draw_box(im, np_boxes, labels, threshold=0.5): + """ + Args: + im (PIL.Image.Image): PIL image + np_boxes (np.ndarray): shape:[N,6], N: number of box, + matix element:[class, score, x_min, y_min, x_max, y_max] + labels (list): labels:['class1', ..., 'classn'] + threshold (float): threshold of box + Returns: + im (PIL.Image.Image): visualized image + """ + draw_thickness = min(im.size) // 320 + draw = ImageDraw.Draw(im) + clsid2color = {} + color_list = get_color_map_list(len(labels)) + expect_boxes = (np_boxes[:, 1] > threshold) & (np_boxes[:, 0] > -1) + np_boxes = np_boxes[expect_boxes, :] + + for dt in np_boxes: + clsid, bbox, score = int(dt[0]), dt[2:], dt[1] + if clsid not in clsid2color: + clsid2color[clsid] = color_list[clsid] + color = tuple(clsid2color[clsid]) + + if len(bbox) == 4: + xmin, ymin, xmax, ymax = bbox + print('class_id:{:d}, confidence:{:.4f}, left_top:[{:.2f},{:.2f}],' + 'right_bottom:[{:.2f},{:.2f}]'.format(int(clsid), score, xmin, ymin, xmax, ymax)) + # draw bbox + draw.line([(xmin, ymin), (xmin, ymax), (xmax, ymax), (xmax, ymin), (xmin, ymin)], + width=draw_thickness, + fill=color) + elif len(bbox) == 8: + x1, y1, x2, y2, x3, y3, x4, y4 = bbox + draw.line([(x1, y1), (x2, y2), (x3, y3), (x4, y4), (x1, y1)], width=2, fill=color) + xmin = min(x1, x2, x3, x4) + ymin = min(y1, y2, y3, y4) + + # draw label + text = "{} {:.4f}".format(labels[clsid], score) + tw, th = draw.textsize(text) + draw.rectangle([(xmin + 1, ymin - th), (xmin + tw + 1, ymin)], fill=color) + draw.text((xmin + 1, ymin - th), text, fill=(255, 255, 255)) + return im + + +def get_color(idx): + idx = idx * 3 + color = ((37 * idx) % 255, (17 * idx) % 255, (29 * idx) % 255) + return color + + +def visualize_pose(imgfile, + results, + visual_thresh=0.6, + save_name='pose.jpg', + save_dir='output', + returnimg=False, + ids=None): + try: + import matplotlib.pyplot as plt + import matplotlib + plt.switch_backend('agg') + except Exception as e: + raise e + skeletons, scores = results['keypoint'] + skeletons = np.array(skeletons) + kpt_nums = 17 + if len(skeletons) > 0: + kpt_nums = skeletons.shape[1] + if kpt_nums == 17: #plot coco keypoint + EDGES = [(0, 1), (0, 2), (1, 3), (2, 4), (3, 5), (4, 6), (5, 7), (6, 8), (7, 9), (8, 10), (5, 11), (6, 12), + (11, 13), (12, 14), (13, 15), (14, 16), (11, 12)] + else: #plot mpii keypoint + EDGES = [(0, 1), (1, 2), (3, 4), (4, 5), (2, 6), (3, 6), (6, 7), (7, 8), (8, 9), (10, 11), (11, 12), (13, 14), + (14, 15), (8, 12), (8, 13)] + NUM_EDGES = len(EDGES) + + colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \ + [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \ + [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]] + cmap = matplotlib.cm.get_cmap('hsv') + plt.figure() + + img = cv2.imread(imgfile) if type(imgfile) == str else imgfile + + color_set = results['colors'] if 'colors' in results else None + + if 'bbox' in results and ids is None: + bboxs = results['bbox'] + for j, rect in enumerate(bboxs): + xmin, ymin, xmax, ymax = rect + color = colors[0] if color_set is None else colors[color_set[j] % len(colors)] + cv2.rectangle(img, (xmin, ymin), (xmax, ymax), color, 1) + + canvas = img.copy() + for i in range(kpt_nums): + for j in range(len(skeletons)): + if skeletons[j][i, 2] < visual_thresh: + continue + if ids is None: + color = colors[i] if color_set is None else colors[color_set[j] % len(colors)] + else: + color = get_color(ids[j]) + + cv2.circle(canvas, tuple(skeletons[j][i, 0:2].astype('int32')), 2, color, thickness=-1) + + to_plot = cv2.addWeighted(img, 0.3, canvas, 0.7, 0) + fig = matplotlib.pyplot.gcf() + + stickwidth = 2 + + for i in range(NUM_EDGES): + for j in range(len(skeletons)): + edge = EDGES[i] + if skeletons[j][edge[0], 2] < visual_thresh or skeletons[j][edge[1], 2] < visual_thresh: + continue + + cur_canvas = canvas.copy() + X = [skeletons[j][edge[0], 1], skeletons[j][edge[1], 1]] + Y = [skeletons[j][edge[0], 0], skeletons[j][edge[1], 0]] + mX = np.mean(X) + mY = np.mean(Y) + length = ((X[0] - X[1])**2 + (Y[0] - Y[1])**2)**0.5 + angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1])) + polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0, 360, 1) + if ids is None: + color = colors[i] if color_set is None else colors[color_set[j] % len(colors)] + else: + color = get_color(ids[j]) + cv2.fillConvexPoly(cur_canvas, polygon, color) + canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0) + if returnimg: + return canvas + save_name = os.path.join(save_dir, os.path.splitext(os.path.basename(imgfile))[0] + '_vis.jpg') + plt.imsave(save_name, canvas[:, :, ::-1]) + print("keypoint visualize image saved to: " + save_name) + plt.close() From 6667867155ce8d05f48ad4e9a2247eedf81c3e4b Mon Sep 17 00:00:00 2001 From: chenjian Date: Mon, 23 May 2022 08:25:16 +0000 Subject: [PATCH 013/117] fix --- .../image/keypoint_detection/pp-tinypose/module.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/modules/image/keypoint_detection/pp-tinypose/module.py b/modules/image/keypoint_detection/pp-tinypose/module.py index 4c9e920ee..89ac4d994 100644 --- a/modules/image/keypoint_detection/pp-tinypose/module.py +++ b/modules/image/keypoint_detection/pp-tinypose/module.py @@ -41,13 +41,13 @@ from paddlehub.module.module import serving -@moduleinfo(name="pp-tinypose", - type="CV/image_editing", - author="paddlepaddle", - author_email="", - summary="Openpose_body_estimation is a body pose estimation model based on Realtime Multi-Person 2D Pose \ - Estimation using Part Affinity Fields.", - version="1.0.0") +@moduleinfo( + name="pp-tinypose", + type="CV/keypoint_detection", + author="paddlepaddle", + author_email="", + summary="PP-TinyPose is a real-time keypoint detection model optimized by PaddleDetecion for mobile devices.", + version="1.0.0") class PP_TinyPose: """ PP-TinyPose Model. From 35da85b7098943df9484930fc95dc6fb504610c5 Mon Sep 17 00:00:00 2001 From: chenjian Date: Mon, 23 May 2022 08:25:48 +0000 Subject: [PATCH 014/117] fix --- modules/image/keypoint_detection/pp-tinypose/module.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/modules/image/keypoint_detection/pp-tinypose/module.py b/modules/image/keypoint_detection/pp-tinypose/module.py index 89ac4d994..a710fe04f 100644 --- a/modules/image/keypoint_detection/pp-tinypose/module.py +++ b/modules/image/keypoint_detection/pp-tinypose/module.py @@ -51,9 +51,6 @@ class PP_TinyPose: """ PP-TinyPose Model. - - Args: - load_checkpoint(str): Checkpoint save path, default is None. """ def __init__(self): From 2957e638c7490271fabbf4aa46f6510b18ce69bd Mon Sep 17 00:00:00 2001 From: chenjian Date: Tue, 24 May 2022 05:17:07 +0000 Subject: [PATCH 015/117] delete useless files --- .../pp-tinypose/__init__.py | 14 + .../pp-tinypose/benchmark_utils.py | 262 ------------------ .../pp-tinypose/det_keypoint_unite_infer.py | 185 ------------- .../pp-tinypose/det_keypoint_unite_utils.py | 86 ------ .../keypoint_detection/pp-tinypose/infer.py | 155 ++--------- .../pp-tinypose/keypoint_infer.py | 119 ++------ .../keypoint_detection/pp-tinypose/logger.py | 68 ----- .../keypoint_detection/pp-tinypose/module.py | 7 - .../keypoint_detection/pp-tinypose/utils.py | 217 --------------- 9 files changed, 56 insertions(+), 1057 deletions(-) delete mode 100644 modules/image/keypoint_detection/pp-tinypose/benchmark_utils.py delete mode 100644 modules/image/keypoint_detection/pp-tinypose/det_keypoint_unite_utils.py delete mode 100644 modules/image/keypoint_detection/pp-tinypose/logger.py delete mode 100644 modules/image/keypoint_detection/pp-tinypose/utils.py diff --git a/modules/image/keypoint_detection/pp-tinypose/__init__.py b/modules/image/keypoint_detection/pp-tinypose/__init__.py index 55916b319..4e438baee 100644 --- a/modules/image/keypoint_detection/pp-tinypose/__init__.py +++ b/modules/image/keypoint_detection/pp-tinypose/__init__.py @@ -1,3 +1,17 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import glob import os import sys diff --git a/modules/image/keypoint_detection/pp-tinypose/benchmark_utils.py b/modules/image/keypoint_detection/pp-tinypose/benchmark_utils.py deleted file mode 100644 index e1dd4ec35..000000000 --- a/modules/image/keypoint_detection/pp-tinypose/benchmark_utils.py +++ /dev/null @@ -1,262 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import logging -import os -from pathlib import Path - -import paddle -import paddle.inference as paddle_infer - -CUR_DIR = os.path.dirname(os.path.abspath(__file__)) -LOG_PATH_ROOT = f"{CUR_DIR}/../../output" - - -class PaddleInferBenchmark(object): - - def __init__(self, - config, - model_info: dict = {}, - data_info: dict = {}, - perf_info: dict = {}, - resource_info: dict = {}, - **kwargs): - """ - Construct PaddleInferBenchmark Class to format logs. - args: - config(paddle.inference.Config): paddle inference config - model_info(dict): basic model info - {'model_name': 'resnet50' - 'precision': 'fp32'} - data_info(dict): input data info - {'batch_size': 1 - 'shape': '3,224,224' - 'data_num': 1000} - perf_info(dict): performance result - {'preprocess_time_s': 1.0 - 'inference_time_s': 2.0 - 'postprocess_time_s': 1.0 - 'total_time_s': 4.0} - resource_info(dict): - cpu and gpu resources - {'cpu_rss': 100 - 'gpu_rss': 100 - 'gpu_util': 60} - """ - # PaddleInferBenchmark Log Version - self.log_version = "1.0.3" - - # Paddle Version - self.paddle_version = paddle.__version__ - self.paddle_commit = paddle.__git_commit__ - paddle_infer_info = paddle_infer.get_version() - self.paddle_branch = paddle_infer_info.strip().split(': ')[-1] - - # model info - self.model_info = model_info - - # data info - self.data_info = data_info - - # perf info - self.perf_info = perf_info - - try: - # required value - self.model_name = model_info['model_name'] - self.precision = model_info['precision'] - - self.batch_size = data_info['batch_size'] - self.shape = data_info['shape'] - self.data_num = data_info['data_num'] - - self.inference_time_s = round(perf_info['inference_time_s'], 4) - except: - self.print_help() - raise ValueError("Set argument wrong, please check input argument and its type") - - self.preprocess_time_s = perf_info.get('preprocess_time_s', 0) - self.postprocess_time_s = perf_info.get('postprocess_time_s', 0) - self.with_tracker = True if 'tracking_time_s' in perf_info else False - self.tracking_time_s = perf_info.get('tracking_time_s', 0) - self.total_time_s = perf_info.get('total_time_s', 0) - - self.inference_time_s_90 = perf_info.get("inference_time_s_90", "") - self.inference_time_s_99 = perf_info.get("inference_time_s_99", "") - self.succ_rate = perf_info.get("succ_rate", "") - self.qps = perf_info.get("qps", "") - - # conf info - self.config_status = self.parse_config(config) - - # mem info - if isinstance(resource_info, dict): - self.cpu_rss_mb = int(resource_info.get('cpu_rss_mb', 0)) - self.cpu_vms_mb = int(resource_info.get('cpu_vms_mb', 0)) - self.cpu_shared_mb = int(resource_info.get('cpu_shared_mb', 0)) - self.cpu_dirty_mb = int(resource_info.get('cpu_dirty_mb', 0)) - self.cpu_util = round(resource_info.get('cpu_util', 0), 2) - - self.gpu_rss_mb = int(resource_info.get('gpu_rss_mb', 0)) - self.gpu_util = round(resource_info.get('gpu_util', 0), 2) - self.gpu_mem_util = round(resource_info.get('gpu_mem_util', 0), 2) - else: - self.cpu_rss_mb = 0 - self.cpu_vms_mb = 0 - self.cpu_shared_mb = 0 - self.cpu_dirty_mb = 0 - self.cpu_util = 0 - - self.gpu_rss_mb = 0 - self.gpu_util = 0 - self.gpu_mem_util = 0 - - # init benchmark logger - self.benchmark_logger() - - def benchmark_logger(self): - """ - benchmark logger - """ - # remove other logging handler - for handler in logging.root.handlers[:]: - logging.root.removeHandler(handler) - - # Init logger - FORMAT = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' - log_output = f"{LOG_PATH_ROOT}/{self.model_name}.log" - Path(f"{LOG_PATH_ROOT}").mkdir(parents=True, exist_ok=True) - logging.basicConfig(level=logging.INFO, - format=FORMAT, - handlers=[ - logging.FileHandler(filename=log_output, mode='w'), - logging.StreamHandler(), - ]) - self.logger = logging.getLogger(__name__) - self.logger.info(f"Paddle Inference benchmark log will be saved to {log_output}") - - def parse_config(self, config) -> dict: - """ - parse paddle predictor config - args: - config(paddle.inference.Config): paddle inference config - return: - config_status(dict): dict style config info - """ - if isinstance(config, paddle_infer.Config): - config_status = {} - config_status['runtime_device'] = "gpu" if config.use_gpu() else "cpu" - config_status['ir_optim'] = config.ir_optim() - config_status['enable_tensorrt'] = config.tensorrt_engine_enabled() - config_status['precision'] = self.precision - config_status['enable_mkldnn'] = config.mkldnn_enabled() - config_status['cpu_math_library_num_threads'] = config.cpu_math_library_num_threads() - elif isinstance(config, dict): - config_status['runtime_device'] = config.get('runtime_device', "") - config_status['ir_optim'] = config.get('ir_optim', "") - config_status['enable_tensorrt'] = config.get('enable_tensorrt', "") - config_status['precision'] = config.get('precision', "") - config_status['enable_mkldnn'] = config.get('enable_mkldnn', "") - config_status['cpu_math_library_num_threads'] = config.get('cpu_math_library_num_threads', "") - else: - self.print_help() - raise ValueError("Set argument config wrong, please check input argument and its type") - return config_status - - def report(self, identifier=None): - """ - print log report - args: - identifier(string): identify log - """ - if identifier: - identifier = f"[{identifier}]" - else: - identifier = "" - - self.logger.info("\n") - self.logger.info("---------------------- Paddle info ----------------------") - self.logger.info(f"{identifier} paddle_version: {self.paddle_version}") - self.logger.info(f"{identifier} paddle_commit: {self.paddle_commit}") - self.logger.info(f"{identifier} paddle_branch: {self.paddle_branch}") - self.logger.info(f"{identifier} log_api_version: {self.log_version}") - self.logger.info("----------------------- Conf info -----------------------") - self.logger.info(f"{identifier} runtime_device: {self.config_status['runtime_device']}") - self.logger.info(f"{identifier} ir_optim: {self.config_status['ir_optim']}") - self.logger.info(f"{identifier} enable_memory_optim: {True}") - self.logger.info(f"{identifier} enable_tensorrt: {self.config_status['enable_tensorrt']}") - self.logger.info(f"{identifier} enable_mkldnn: {self.config_status['enable_mkldnn']}") - self.logger.info( - f"{identifier} cpu_math_library_num_threads: {self.config_status['cpu_math_library_num_threads']}") - self.logger.info("----------------------- Model info ----------------------") - self.logger.info(f"{identifier} model_name: {self.model_name}") - self.logger.info(f"{identifier} precision: {self.precision}") - self.logger.info("----------------------- Data info -----------------------") - self.logger.info(f"{identifier} batch_size: {self.batch_size}") - self.logger.info(f"{identifier} input_shape: {self.shape}") - self.logger.info(f"{identifier} data_num: {self.data_num}") - self.logger.info("----------------------- Perf info -----------------------") - self.logger.info( - f"{identifier} cpu_rss(MB): {self.cpu_rss_mb}, cpu_vms: {self.cpu_vms_mb}, cpu_shared_mb: {self.cpu_shared_mb}, cpu_dirty_mb: {self.cpu_dirty_mb}, cpu_util: {self.cpu_util}%" - ) - self.logger.info( - f"{identifier} gpu_rss(MB): {self.gpu_rss_mb}, gpu_util: {self.gpu_util}%, gpu_mem_util: {self.gpu_mem_util}%" - ) - self.logger.info(f"{identifier} total time spent(s): {self.total_time_s}") - - if self.with_tracker: - self.logger.info(f"{identifier} preprocess_time(ms): {round(self.preprocess_time_s*1000, 1)}, " - f"inference_time(ms): {round(self.inference_time_s*1000, 1)}, " - f"postprocess_time(ms): {round(self.postprocess_time_s*1000, 1)}, " - f"tracking_time(ms): {round(self.tracking_time_s*1000, 1)}") - else: - self.logger.info(f"{identifier} preprocess_time(ms): {round(self.preprocess_time_s*1000, 1)}, " - f"inference_time(ms): {round(self.inference_time_s*1000, 1)}, " - f"postprocess_time(ms): {round(self.postprocess_time_s*1000, 1)}") - if self.inference_time_s_90: - self.looger.info( - f"{identifier} 90%_cost: {self.inference_time_s_90}, 99%_cost: {self.inference_time_s_99}, succ_rate: {self.succ_rate}" - ) - if self.qps: - self.logger.info(f"{identifier} QPS: {self.qps}") - - def print_help(self): - """ - print function help - """ - print("""Usage: - ==== Print inference benchmark logs. ==== - config = paddle.inference.Config() - model_info = {'model_name': 'resnet50' - 'precision': 'fp32'} - data_info = {'batch_size': 1 - 'shape': '3,224,224' - 'data_num': 1000} - perf_info = {'preprocess_time_s': 1.0 - 'inference_time_s': 2.0 - 'postprocess_time_s': 1.0 - 'total_time_s': 4.0} - resource_info = {'cpu_rss_mb': 100 - 'gpu_rss_mb': 100 - 'gpu_util': 60} - log = PaddleInferBenchmark(config, model_info, data_info, perf_info, resource_info) - log('Test') - """) - - def __call__(self, identifier=None): - """ - __call__ - args: - identifier(string): identify log - """ - self.report(identifier) diff --git a/modules/image/keypoint_detection/pp-tinypose/det_keypoint_unite_infer.py b/modules/image/keypoint_detection/pp-tinypose/det_keypoint_unite_infer.py index 612f6dd51..7160d86b3 100644 --- a/modules/image/keypoint_detection/pp-tinypose/det_keypoint_unite_infer.py +++ b/modules/image/keypoint_detection/pp-tinypose/det_keypoint_unite_infer.py @@ -19,18 +19,12 @@ import numpy as np import paddle import yaml -from benchmark_utils import PaddleInferBenchmark -from det_keypoint_unite_utils import argsparser -from infer import bench_log from infer import Detector -from infer import get_test_images from infer import PredictConfig -from infer import print_arguments from keypoint_infer import KeyPointDetector from keypoint_infer import PredictConfig_KeyPoint from keypoint_postprocess import translate_to_ori_images from preprocess import decode_image -from utils import get_current_memory_mb from visualize import visualize_pose KEYPOINT_SUPPORT_MODELS = {'HigherHRNet': 'keypoint_bottomup', 'HRNet': 'keypoint_topdown'} @@ -49,182 +43,3 @@ def predict_with_given_det(image, det_res, keypoint_detector, keypoint_batch_siz []] keypoint_res['bbox'] = rect_vector return keypoint_res - - -def topdown_unite_predict(detector, topdown_keypoint_detector, image_list, keypoint_batch_size=1, save_res=False): - det_timer = detector.get_timer() - store_res = [] - for i, img_file in enumerate(image_list): - # Decode image in advance in det + pose prediction - det_timer.preprocess_time_s.start() - image, _ = decode_image(img_file, {}) - det_timer.preprocess_time_s.end() - - if FLAGS.run_benchmark: - results = detector.predict_image([image], run_benchmark=True, repeats=10) - - cm, gm, gu = get_current_memory_mb() - detector.cpu_mem += cm - detector.gpu_mem += gm - detector.gpu_util += gu - else: - results = detector.predict_image([image], visual=False) - results = detector.filter_box(results, FLAGS.det_threshold) - if results['boxes_num'] > 0: - keypoint_res = predict_with_given_det(image, results, topdown_keypoint_detector, keypoint_batch_size, - FLAGS.run_benchmark) - - if save_res: - save_name = img_file if isinstance(img_file, str) else i - store_res.append( - [save_name, keypoint_res['bbox'], [keypoint_res['keypoint'][0], keypoint_res['keypoint'][1]]]) - else: - results["keypoint"] = [[], []] - keypoint_res = results - if FLAGS.run_benchmark: - cm, gm, gu = get_current_memory_mb() - topdown_keypoint_detector.cpu_mem += cm - topdown_keypoint_detector.gpu_mem += gm - topdown_keypoint_detector.gpu_util += gu - else: - if not os.path.exists(FLAGS.output_dir): - os.makedirs(FLAGS.output_dir) - visualize_pose(img_file, keypoint_res, visual_thresh=FLAGS.keypoint_threshold, save_dir=FLAGS.output_dir) - if save_res: - """ - 1) store_res: a list of image_data - 2) image_data: [imageid, rects, [keypoints, scores]] - 3) rects: list of rect [xmin, ymin, xmax, ymax] - 4) keypoints: 17(joint numbers)*[x, y, conf], total 51 data in list - 5) scores: mean of all joint conf - """ - with open("det_keypoint_unite_image_results.json", 'w') as wf: - json.dump(store_res, wf, indent=4) - - -def topdown_unite_predict_video(detector, topdown_keypoint_detector, camera_id, keypoint_batch_size=1, save_res=False): - video_name = 'output.mp4' - if camera_id != -1: - capture = cv2.VideoCapture(camera_id) - else: - capture = cv2.VideoCapture(FLAGS.video_file) - video_name = os.path.split(FLAGS.video_file)[-1] - # Get Video info : resolution, fps, frame count - width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) - height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) - fps = int(capture.get(cv2.CAP_PROP_FPS)) - frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT)) - print("fps: %d, frame_count: %d" % (fps, frame_count)) - - if not os.path.exists(FLAGS.output_dir): - os.makedirs(FLAGS.output_dir) - out_path = os.path.join(FLAGS.output_dir, video_name) - fourcc = cv2.VideoWriter_fourcc(*'mp4v') - writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height)) - index = 0 - store_res = [] - while (1): - ret, frame = capture.read() - if not ret: - break - index += 1 - print('detect frame: %d' % (index)) - - frame2 = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) - - results = detector.predict_image([frame2], visual=False) - results = detector.filter_box(results, FLAGS.det_threshold) - if results['boxes_num'] == 0: - writer.write(frame) - continue - - keypoint_res = predict_with_given_det(frame2, results, topdown_keypoint_detector, keypoint_batch_size, - FLAGS.run_benchmark) - - im = visualize_pose(frame, keypoint_res, visual_thresh=FLAGS.keypoint_threshold, returnimg=True) - if save_res: - store_res.append([index, keypoint_res['bbox'], [keypoint_res['keypoint'][0], keypoint_res['keypoint'][1]]]) - - writer.write(im) - if camera_id != -1: - cv2.imshow('Mask Detection', im) - if cv2.waitKey(1) & 0xFF == ord('q'): - break - writer.release() - print('output_video saved to: {}'.format(out_path)) - if save_res: - """ - 1) store_res: a list of frame_data - 2) frame_data: [frameid, rects, [keypoints, scores]] - 3) rects: list of rect [xmin, ymin, xmax, ymax] - 4) keypoints: 17(joint numbers)*[x, y, conf], total 51 data in list - 5) scores: mean of all joint conf - """ - with open("det_keypoint_unite_video_results.json", 'w') as wf: - json.dump(store_res, wf, indent=4) - - -def main(): - deploy_file = os.path.join(FLAGS.det_model_dir, 'infer_cfg.yml') - with open(deploy_file) as f: - yml_conf = yaml.safe_load(f) - arch = yml_conf['arch'] - detector = Detector(FLAGS.det_model_dir, - device=FLAGS.device, - run_mode=FLAGS.run_mode, - trt_min_shape=FLAGS.trt_min_shape, - trt_max_shape=FLAGS.trt_max_shape, - trt_opt_shape=FLAGS.trt_opt_shape, - trt_calib_mode=FLAGS.trt_calib_mode, - cpu_threads=FLAGS.cpu_threads, - enable_mkldnn=FLAGS.enable_mkldnn, - threshold=FLAGS.det_threshold) - - topdown_keypoint_detector = KeyPointDetector(FLAGS.keypoint_model_dir, - device=FLAGS.device, - run_mode=FLAGS.run_mode, - batch_size=FLAGS.keypoint_batch_size, - trt_min_shape=FLAGS.trt_min_shape, - trt_max_shape=FLAGS.trt_max_shape, - trt_opt_shape=FLAGS.trt_opt_shape, - trt_calib_mode=FLAGS.trt_calib_mode, - cpu_threads=FLAGS.cpu_threads, - enable_mkldnn=FLAGS.enable_mkldnn, - use_dark=FLAGS.use_dark) - keypoint_arch = topdown_keypoint_detector.pred_config.arch - assert KEYPOINT_SUPPORT_MODELS[ - keypoint_arch] == 'keypoint_topdown', 'Detection-Keypoint unite inference only supports topdown models.' - - # predict from video file or camera video stream - if FLAGS.video_file is not None or FLAGS.camera_id != -1: - topdown_unite_predict_video(detector, topdown_keypoint_detector, FLAGS.camera_id, FLAGS.keypoint_batch_size, - FLAGS.save_res) - else: - # predict from image - img_list = get_test_images(FLAGS.image_dir, FLAGS.image_file) - topdown_unite_predict(detector, topdown_keypoint_detector, img_list, FLAGS.keypoint_batch_size, FLAGS.save_res) - if not FLAGS.run_benchmark: - detector.det_times.info(average=True) - topdown_keypoint_detector.det_times.info(average=True) - else: - mode = FLAGS.run_mode - det_model_dir = FLAGS.det_model_dir - det_model_info = {'model_name': det_model_dir.strip('/').split('/')[-1], 'precision': mode.split('_')[-1]} - bench_log(detector, img_list, det_model_info, name='Det') - keypoint_model_dir = FLAGS.keypoint_model_dir - keypoint_model_info = { - 'model_name': keypoint_model_dir.strip('/').split('/')[-1], - 'precision': mode.split('_')[-1] - } - bench_log(topdown_keypoint_detector, img_list, keypoint_model_info, FLAGS.keypoint_batch_size, 'KeyPoint') - - -if __name__ == '__main__': - paddle.enable_static() - parser = argsparser() - FLAGS = parser.parse_args() - print_arguments(FLAGS) - FLAGS.device = FLAGS.device.upper() - assert FLAGS.device in ['CPU', 'GPU', 'XPU'], "device should be CPU, GPU or XPU" - - main() diff --git a/modules/image/keypoint_detection/pp-tinypose/det_keypoint_unite_utils.py b/modules/image/keypoint_detection/pp-tinypose/det_keypoint_unite_utils.py deleted file mode 100644 index 309c80814..000000000 --- a/modules/image/keypoint_detection/pp-tinypose/det_keypoint_unite_utils.py +++ /dev/null @@ -1,86 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import argparse -import ast - - -def argsparser(): - parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument("--det_model_dir", - type=str, - default=None, - help=("Directory include:'model.pdiparams', 'model.pdmodel', " - "'infer_cfg.yml', created by tools/export_model.py."), - required=True) - parser.add_argument("--keypoint_model_dir", - type=str, - default=None, - help=("Directory include:'model.pdiparams', 'model.pdmodel', " - "'infer_cfg.yml', created by tools/export_model.py."), - required=True) - parser.add_argument("--image_file", type=str, default=None, help="Path of image file.") - parser.add_argument("--image_dir", - type=str, - default=None, - help="Dir of image file, `image_file` has a higher priority.") - parser.add_argument("--keypoint_batch_size", - type=int, - default=8, - help=("batch_size for keypoint inference. In detection-keypoint unit" - "inference, the batch size in detection is 1. Then collate det " - "result in batch for keypoint inference.")) - parser.add_argument("--video_file", - type=str, - default=None, - help="Path of video file, `video_file` or `camera_id` has a highest priority.") - parser.add_argument("--camera_id", type=int, default=-1, help="device id of camera to predict.") - parser.add_argument("--det_threshold", type=float, default=0.5, help="Threshold of score.") - parser.add_argument("--keypoint_threshold", type=float, default=0.5, help="Threshold of score.") - parser.add_argument("--output_dir", type=str, default="output", help="Directory of output visualization files.") - parser.add_argument("--run_mode", - type=str, - default='paddle', - help="mode of running(paddle/trt_fp32/trt_fp16/trt_int8)") - parser.add_argument("--device", - type=str, - default='cpu', - help="Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU.") - parser.add_argument("--run_benchmark", - type=ast.literal_eval, - default=False, - help="Whether to predict a image_file repeatedly for benchmark") - parser.add_argument("--enable_mkldnn", type=ast.literal_eval, default=False, help="Whether use mkldnn with CPU.") - parser.add_argument("--cpu_threads", type=int, default=1, help="Num of threads with CPU.") - parser.add_argument("--trt_min_shape", type=int, default=1, help="min_shape for TensorRT.") - parser.add_argument("--trt_max_shape", type=int, default=1280, help="max_shape for TensorRT.") - parser.add_argument("--trt_opt_shape", type=int, default=640, help="opt_shape for TensorRT.") - parser.add_argument("--trt_calib_mode", - type=bool, - default=False, - help="If the model is produced by TRT offline quantitative " - "calibration, trt_calib_mode need to set True.") - parser.add_argument('--use_dark', - type=ast.literal_eval, - default=True, - help='whether to use darkpose to get better keypoint position predict ') - parser.add_argument('--save_res', - type=bool, - default=False, - help=("whether to save predict results to json file" - "1) store_res: a list of image_data" - "2) image_data: [imageid, rects, [keypoints, scores]]" - "3) rects: list of rect [xmin, ymin, xmax, ymax]" - "4) keypoints: 17(joint numbers)*[x, y, conf], total 51 data in list" - "5) scores: mean of all joint conf")) - return parser diff --git a/modules/image/keypoint_detection/pp-tinypose/infer.py b/modules/image/keypoint_detection/pp-tinypose/infer.py index fe0764e97..8e5fe3f7f 100644 --- a/modules/image/keypoint_detection/pp-tinypose/infer.py +++ b/modules/image/keypoint_detection/pp-tinypose/infer.py @@ -23,7 +23,6 @@ import numpy as np import paddle import yaml -from benchmark_utils import PaddleInferBenchmark from keypoint_preprocess import EvalAffine from keypoint_preprocess import expand_crop from keypoint_preprocess import TopDownEvalAffine @@ -38,9 +37,6 @@ from preprocess import preprocess from preprocess import Resize from preprocess import WarpAffine -from utils import argsparser -from utils import get_current_memory_mb -from utils import Timer from visualize import visualize_box # Global dictionary @@ -67,18 +63,6 @@ } -def bench_log(detector, img_list, model_info, batch_size=1, name=None): - mems = { - 'cpu_rss_mb': detector.cpu_mem / len(img_list), - 'gpu_rss_mb': detector.gpu_mem / len(img_list), - 'gpu_util': detector.gpu_util * 100 / len(img_list) - } - perf_info = detector.det_times.report(average=True) - data_info = {'batch_size': batch_size, 'shape': "dynamic_shape", 'data_num': perf_info['img_num']} - log = PaddleInferBenchmark(detector.config, model_info, data_info, perf_info, mems) - log(name) - - class Detector(object): """ Args: @@ -132,7 +116,6 @@ def __init__(self, enable_mkldnn=enable_mkldnn, enable_mkldnn_bfloat16=enable_mkldnn_bfloat16, delete_shuffle_pass=delete_shuffle_pass) - self.det_times = Timer() self.cpu_mem, self.gpu_mem, self.gpu_util = 0, 0, 0 self.batch_size = batch_size self.output_dir = output_dir @@ -228,9 +211,6 @@ def merge_batch_result(self, batch_result): results[k] = np.concatenate(v) return results - def get_timer(self): - return self.det_times - def predict_image(self, image_list, run_benchmark=False, repeats=1, visual=True, save_file=None): batch_loop_cnt = math.ceil(float(len(image_list)) / self.batch_size) results = [] @@ -238,53 +218,28 @@ def predict_image(self, image_list, run_benchmark=False, repeats=1, visual=True, start_index = i * self.batch_size end_index = min((i + 1) * self.batch_size, len(image_list)) batch_image_list = image_list[start_index:end_index] - if run_benchmark: - # preprocess - inputs = self.preprocess(batch_image_list) # warmup - self.det_times.preprocess_time_s.start() - inputs = self.preprocess(batch_image_list) - self.det_times.preprocess_time_s.end() - - # model prediction - result = self.predict(repeats=50) # warmup - self.det_times.inference_time_s.start() - result = self.predict(repeats=repeats) - self.det_times.inference_time_s.end(repeats=repeats) - - # postprocess - result_warmup = self.postprocess(inputs, result) # warmup - self.det_times.postprocess_time_s.start() - result = self.postprocess(inputs, result) - self.det_times.postprocess_time_s.end() - self.det_times.img_num += len(batch_image_list) - - cm, gm, gu = get_current_memory_mb() - self.cpu_mem += cm - self.gpu_mem += gm - self.gpu_util += gu - else: - # preprocess - self.det_times.preprocess_time_s.start() - inputs = self.preprocess(batch_image_list) - self.det_times.preprocess_time_s.end() - - # model prediction - self.det_times.inference_time_s.start() - result = self.predict() - self.det_times.inference_time_s.end() - - # postprocess - self.det_times.postprocess_time_s.start() - result = self.postprocess(inputs, result) - self.det_times.postprocess_time_s.end() - self.det_times.img_num += len(batch_image_list) - - if visual: - visualize(batch_image_list, - result, - self.pred_config.labels, - output_dir=self.output_dir, - threshold=self.threshold) + # preprocess + self.det_times.preprocess_time_s.start() + inputs = self.preprocess(batch_image_list) + self.det_times.preprocess_time_s.end() + + # model prediction + self.det_times.inference_time_s.start() + result = self.predict() + self.det_times.inference_time_s.end() + + # postprocess + self.det_times.postprocess_time_s.start() + result = self.postprocess(inputs, result) + self.det_times.postprocess_time_s.end() + self.det_times.img_num += len(batch_image_list) + + if visual: + visualize(batch_image_list, + result, + self.pred_config.labels, + output_dir=self.output_dir, + threshold=self.threshold) results.append(result) if visual: @@ -626,69 +581,3 @@ def visualize(image_list, result, labels, output_dir='output/', threshold=0.5): out_path = os.path.join(output_dir, img_name) im.save(out_path, quality=95) print("save result to: " + out_path) - - -def print_arguments(args): - print('----------- Running Arguments -----------') - for arg, value in sorted(vars(args).items()): - print('%s: %s' % (arg, value)) - print('------------------------------------------') - - -def main(): - deploy_file = os.path.join(FLAGS.model_dir, 'infer_cfg.yml') - with open(deploy_file) as f: - yml_conf = yaml.safe_load(f) - arch = yml_conf['arch'] - detector_func = 'Detector' - if arch == 'SOLOv2': - detector_func = 'DetectorSOLOv2' - elif arch == 'PicoDet': - detector_func = 'DetectorPicoDet' - - detector = eval(detector_func)(FLAGS.model_dir, - device=FLAGS.device, - run_mode=FLAGS.run_mode, - batch_size=FLAGS.batch_size, - trt_min_shape=FLAGS.trt_min_shape, - trt_max_shape=FLAGS.trt_max_shape, - trt_opt_shape=FLAGS.trt_opt_shape, - trt_calib_mode=FLAGS.trt_calib_mode, - cpu_threads=FLAGS.cpu_threads, - enable_mkldnn=FLAGS.enable_mkldnn, - enable_mkldnn_bfloat16=FLAGS.enable_mkldnn_bfloat16, - threshold=FLAGS.threshold, - output_dir=FLAGS.output_dir) - - # predict from video file or camera video stream - if FLAGS.video_file is not None or FLAGS.camera_id != -1: - detector.predict_video(FLAGS.video_file, FLAGS.camera_id) - else: - # predict from image - if FLAGS.image_dir is None and FLAGS.image_file is not None: - assert FLAGS.batch_size == 1, "batch_size should be 1, when image_file is not None" - img_list = get_test_images(FLAGS.image_dir, FLAGS.image_file) - save_file = os.path.join(FLAGS.output_dir, 'results.json') if FLAGS.save_results else None - detector.predict_image(img_list, FLAGS.run_benchmark, repeats=100, save_file=save_file) - if not FLAGS.run_benchmark: - detector.det_times.info(average=True) - else: - mode = FLAGS.run_mode - model_dir = FLAGS.model_dir - model_info = {'model_name': model_dir.strip('/').split('/')[-1], 'precision': mode.split('_')[-1]} - bench_log(detector, img_list, model_info, name='DET') - - -if __name__ == '__main__': - paddle.enable_static() - parser = argsparser() - FLAGS = parser.parse_args() - print_arguments(FLAGS) - FLAGS.device = FLAGS.device.upper() - assert FLAGS.device in ['CPU', 'GPU', 'XPU'], "device should be CPU, GPU or XPU" - assert not FLAGS.use_gpu, "use_gpu has been deprecated, please use --device" - - assert not (FLAGS.enable_mkldnn == False and FLAGS.enable_mkldnn_bfloat16 - == True), 'To enable mkldnn bfloat, please turn on both enable_mkldnn and enable_mkldnn_bfloat16' - - main() diff --git a/modules/image/keypoint_detection/pp-tinypose/keypoint_infer.py b/modules/image/keypoint_detection/pp-tinypose/keypoint_infer.py index e782ac1be..5b1206cfd 100644 --- a/modules/image/keypoint_detection/pp-tinypose/keypoint_infer.py +++ b/modules/image/keypoint_detection/pp-tinypose/keypoint_infer.py @@ -33,9 +33,7 @@ from visualize import visualize_pose from paddle.inference import Config from paddle.inference import create_predictor -from utils import argsparser, Timer, get_current_memory_mb -from benchmark_utils import PaddleInferBenchmark -from infer import Detector, get_test_images, print_arguments +from infer import Detector # Global dictionary KEYPOINT_SUPPORT_MODELS = {'HigherHRNet': 'keypoint_bottomup', 'HRNet': 'keypoint_topdown'} @@ -169,52 +167,26 @@ def predict_image(self, image_list, run_benchmark=False, repeats=1, visual=True) start_index = i * self.batch_size end_index = min((i + 1) * self.batch_size, len(image_list)) batch_image_list = image_list[start_index:end_index] - if run_benchmark: - # preprocess - inputs = self.preprocess(batch_image_list) # warmup - self.det_times.preprocess_time_s.start() - inputs = self.preprocess(batch_image_list) - self.det_times.preprocess_time_s.end() + # preprocess + self.det_times.preprocess_time_s.start() + inputs = self.preprocess(batch_image_list) + self.det_times.preprocess_time_s.end() + + # model prediction + self.det_times.inference_time_s.start() + result = self.predict() + self.det_times.inference_time_s.end() + + # postprocess + self.det_times.postprocess_time_s.start() + result = self.postprocess(inputs, result) + self.det_times.postprocess_time_s.end() + self.det_times.img_num += len(batch_image_list) - # model prediction - result_warmup = self.predict(repeats=repeats) # warmup - self.det_times.inference_time_s.start() - result = self.predict(repeats=repeats) - self.det_times.inference_time_s.end(repeats=repeats) - - # postprocess - result_warmup = self.postprocess(inputs, result) # warmup - self.det_times.postprocess_time_s.start() - result = self.postprocess(inputs, result) - self.det_times.postprocess_time_s.end() - self.det_times.img_num += len(batch_image_list) - - cm, gm, gu = get_current_memory_mb() - self.cpu_mem += cm - self.gpu_mem += gm - self.gpu_util += gu - - else: - # preprocess - self.det_times.preprocess_time_s.start() - inputs = self.preprocess(batch_image_list) - self.det_times.preprocess_time_s.end() - - # model prediction - self.det_times.inference_time_s.start() - result = self.predict() - self.det_times.inference_time_s.end() - - # postprocess - self.det_times.postprocess_time_s.start() - result = self.postprocess(inputs, result) - self.det_times.postprocess_time_s.end() - self.det_times.img_num += len(batch_image_list) - - if visual: - if not os.path.exists(self.output_dir): - os.makedirs(self.output_dir) - visualize(batch_image_list, result, visual_thresh=self.threshold, save_dir=self.output_dir) + if visual: + if not os.path.exists(self.output_dir): + os.makedirs(self.output_dir) + visualize(batch_image_list, result, visual_thresh=self.threshold, save_dir=self.output_dir) results.append(result) if visual: @@ -328,54 +300,3 @@ def visualize(image_list, results, visual_thresh=0.6, save_dir='output'): score = scores[i:i + 1] im_results['keypoint'] = [skeleton, score] visualize_pose(image_file, im_results, visual_thresh=visual_thresh, save_dir=save_dir) - - -def main(): - detector = KeyPointDetector(FLAGS.model_dir, - device=FLAGS.device, - run_mode=FLAGS.run_mode, - batch_size=FLAGS.batch_size, - trt_min_shape=FLAGS.trt_min_shape, - trt_max_shape=FLAGS.trt_max_shape, - trt_opt_shape=FLAGS.trt_opt_shape, - trt_calib_mode=FLAGS.trt_calib_mode, - cpu_threads=FLAGS.cpu_threads, - enable_mkldnn=FLAGS.enable_mkldnn, - threshold=FLAGS.threshold, - output_dir=FLAGS.output_dir, - use_dark=FLAGS.use_dark) - - # predict from video file or camera video stream - if FLAGS.video_file is not None or FLAGS.camera_id != -1: - detector.predict_video(FLAGS.video_file, FLAGS.camera_id) - else: - # predict from image - img_list = get_test_images(FLAGS.image_dir, FLAGS.image_file) - detector.predict_image(img_list, FLAGS.run_benchmark, repeats=10) - if not FLAGS.run_benchmark: - detector.det_times.info(average=True) - else: - mems = { - 'cpu_rss_mb': detector.cpu_mem / len(img_list), - 'gpu_rss_mb': detector.gpu_mem / len(img_list), - 'gpu_util': detector.gpu_util * 100 / len(img_list) - } - perf_info = detector.det_times.report(average=True) - model_dir = FLAGS.model_dir - mode = FLAGS.run_mode - model_info = {'model_name': model_dir.strip('/').split('/')[-1], 'precision': mode.split('_')[-1]} - data_info = {'batch_size': 1, 'shape': "dynamic_shape", 'data_num': perf_info['img_num']} - det_log = PaddleInferBenchmark(detector.config, model_info, data_info, perf_info, mems) - det_log('KeyPoint') - - -if __name__ == '__main__': - paddle.enable_static() - parser = argsparser() - FLAGS = parser.parse_args() - print_arguments(FLAGS) - FLAGS.device = FLAGS.device.upper() - assert FLAGS.device in ['CPU', 'GPU', 'XPU'], "device should be CPU, GPU or XPU" - assert not FLAGS.use_gpu, "use_gpu has been deprecated, please use --device" - - main() diff --git a/modules/image/keypoint_detection/pp-tinypose/logger.py b/modules/image/keypoint_detection/pp-tinypose/logger.py deleted file mode 100644 index f7a5c5bea..000000000 --- a/modules/image/keypoint_detection/pp-tinypose/logger.py +++ /dev/null @@ -1,68 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import functools -import logging -import os -import sys - -import paddle.distributed as dist - -__all__ = ['setup_logger'] - -logger_initialized = [] - - -def setup_logger(name="ppdet", output=None): - """ - Initialize logger and set its verbosity level to INFO. - Args: - output (str): a file name or a directory to save log. If None, will not save log file. - If ends with ".txt" or ".log", assumed to be a file name. - Otherwise, logs will be saved to `output/log.txt`. - name (str): the root module name of this logger - - Returns: - logging.Logger: a logger - """ - logger = logging.getLogger(name) - if name in logger_initialized: - return logger - - logger.setLevel(logging.INFO) - logger.propagate = False - - formatter = logging.Formatter("[%(asctime)s] %(name)s %(levelname)s: %(message)s", datefmt="%m/%d %H:%M:%S") - # stdout logging: master only - local_rank = dist.get_rank() - if local_rank == 0: - ch = logging.StreamHandler(stream=sys.stdout) - ch.setLevel(logging.DEBUG) - ch.setFormatter(formatter) - logger.addHandler(ch) - - # file logging: all workers - if output is not None: - if output.endswith(".txt") or output.endswith(".log"): - filename = output - else: - filename = os.path.join(output, "log.txt") - if local_rank > 0: - filename = filename + ".rank{}".format(local_rank) - os.makedirs(os.path.dirname(filename)) - fh = logging.FileHandler(filename, mode='a') - fh.setLevel(logging.DEBUG) - fh.setFormatter(logging.Formatter()) - logger.addHandler(fh) - logger_initialized.append(name) - return logger diff --git a/modules/image/keypoint_detection/pp-tinypose/module.py b/modules/image/keypoint_detection/pp-tinypose/module.py index a710fe04f..4d1efc82f 100644 --- a/modules/image/keypoint_detection/pp-tinypose/module.py +++ b/modules/image/keypoint_detection/pp-tinypose/module.py @@ -23,19 +23,12 @@ import paddle import yaml from det_keypoint_unite_infer import predict_with_given_det -from infer import bench_log from infer import Detector -from infer import get_test_images -from infer import PredictConfig -from infer import print_arguments from keypoint_infer import KeyPointDetector -from keypoint_infer import PredictConfig_KeyPoint -from keypoint_postprocess import translate_to_ori_images from preprocess import base64_to_cv2 from preprocess import decode_image from visualize import visualize_pose -import paddlehub.vision.transforms as T from paddlehub.module.module import moduleinfo from paddlehub.module.module import runnable from paddlehub.module.module import serving diff --git a/modules/image/keypoint_detection/pp-tinypose/utils.py b/modules/image/keypoint_detection/pp-tinypose/utils.py deleted file mode 100644 index 4e0b46b77..000000000 --- a/modules/image/keypoint_detection/pp-tinypose/utils.py +++ /dev/null @@ -1,217 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import argparse -import ast -import os -import time - - -def argsparser(): - parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument("--model_dir", - type=str, - default=None, - help=("Directory include:'model.pdiparams', 'model.pdmodel', " - "'infer_cfg.yml', created by tools/export_model.py."), - required=True) - parser.add_argument("--image_file", type=str, default=None, help="Path of image file.") - parser.add_argument("--image_dir", - type=str, - default=None, - help="Dir of image file, `image_file` has a higher priority.") - parser.add_argument("--batch_size", type=int, default=1, help="batch_size for inference.") - parser.add_argument("--video_file", - type=str, - default=None, - help="Path of video file, `video_file` or `camera_id` has a highest priority.") - parser.add_argument("--camera_id", type=int, default=-1, help="device id of camera to predict.") - parser.add_argument("--threshold", type=float, default=0.5, help="Threshold of score.") - parser.add_argument("--output_dir", type=str, default="output", help="Directory of output visualization files.") - parser.add_argument("--run_mode", - type=str, - default='paddle', - help="mode of running(paddle/trt_fp32/trt_fp16/trt_int8)") - parser.add_argument("--device", - type=str, - default='cpu', - help="Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU.") - parser.add_argument("--use_gpu", type=ast.literal_eval, default=False, help="Deprecated, please use `--device`.") - parser.add_argument("--run_benchmark", - type=ast.literal_eval, - default=False, - help="Whether to predict a image_file repeatedly for benchmark") - parser.add_argument("--enable_mkldnn", type=ast.literal_eval, default=False, help="Whether use mkldnn with CPU.") - parser.add_argument("--enable_mkldnn_bfloat16", - type=ast.literal_eval, - default=False, - help="Whether use mkldnn bfloat16 inference with CPU.") - parser.add_argument("--cpu_threads", type=int, default=1, help="Num of threads with CPU.") - parser.add_argument("--trt_min_shape", type=int, default=1, help="min_shape for TensorRT.") - parser.add_argument("--trt_max_shape", type=int, default=1280, help="max_shape for TensorRT.") - parser.add_argument("--trt_opt_shape", type=int, default=640, help="opt_shape for TensorRT.") - parser.add_argument("--trt_calib_mode", - type=bool, - default=False, - help="If the model is produced by TRT offline quantitative " - "calibration, trt_calib_mode need to set True.") - parser.add_argument('--save_images', action='store_true', help='Save visualization image results.') - parser.add_argument('--save_mot_txts', action='store_true', help='Save tracking results (txt).') - parser.add_argument('--save_mot_txt_per_img', - action='store_true', - help='Save tracking results (txt) for each image.') - parser.add_argument('--scaled', - type=bool, - default=False, - help="Whether coords after detector outputs are scaled, False in JDE YOLOv3 " - "True in general detector.") - parser.add_argument("--tracker_config", type=str, default=None, help=("tracker donfig")) - parser.add_argument("--reid_model_dir", - type=str, - default=None, - help=("Directory include:'model.pdiparams', 'model.pdmodel', " - "'infer_cfg.yml', created by tools/export_model.py.")) - parser.add_argument("--reid_batch_size", type=int, default=50, help="max batch_size for reid model inference.") - parser.add_argument('--use_dark', - type=ast.literal_eval, - default=True, - help='whether to use darkpose to get better keypoint position predict ') - parser.add_argument("--action_file", type=str, default=None, help="Path of input file for action recognition.") - parser.add_argument("--window_size", - type=int, - default=50, - help="Temporal size of skeleton feature for action recognition.") - parser.add_argument("--random_pad", - type=ast.literal_eval, - default=False, - help="Whether do random padding for action recognition.") - parser.add_argument("--save_results", - type=bool, - default=False, - help="Whether save detection result to file using coco format") - - return parser - - -class Times(object): - - def __init__(self): - self.time = 0. - # start time - self.st = 0. - # end time - self.et = 0. - - def start(self): - self.st = time.time() - - def end(self, repeats=1, accumulative=True): - self.et = time.time() - if accumulative: - self.time += (self.et - self.st) / repeats - else: - self.time = (self.et - self.st) / repeats - - def reset(self): - self.time = 0. - self.st = 0. - self.et = 0. - - def value(self): - return round(self.time, 4) - - -class Timer(Times): - - def __init__(self, with_tracker=False): - super(Timer, self).__init__() - self.with_tracker = with_tracker - self.preprocess_time_s = Times() - self.inference_time_s = Times() - self.postprocess_time_s = Times() - self.tracking_time_s = Times() - self.img_num = 0 - - def info(self, average=False): - pre_time = self.preprocess_time_s.value() - infer_time = self.inference_time_s.value() - post_time = self.postprocess_time_s.value() - track_time = self.tracking_time_s.value() - - total_time = pre_time + infer_time + post_time - if self.with_tracker: - total_time = total_time + track_time - total_time = round(total_time, 4) - print("------------------ Inference Time Info ----------------------") - print("total_time(ms): {}, img_num: {}".format(total_time * 1000, self.img_num)) - preprocess_time = round(pre_time / max(1, self.img_num), 4) if average else pre_time - postprocess_time = round(post_time / max(1, self.img_num), 4) if average else post_time - inference_time = round(infer_time / max(1, self.img_num), 4) if average else infer_time - tracking_time = round(track_time / max(1, self.img_num), 4) if average else track_time - - average_latency = total_time / max(1, self.img_num) - qps = 0 - if total_time > 0: - qps = 1 / average_latency - print("average latency time(ms): {:.2f}, QPS: {:2f}".format(average_latency * 1000, qps)) - if self.with_tracker: - print( - "preprocess_time(ms): {:.2f}, inference_time(ms): {:.2f}, postprocess_time(ms): {:.2f}, tracking_time(ms): {:.2f}" - .format(preprocess_time * 1000, inference_time * 1000, postprocess_time * 1000, tracking_time * 1000)) - else: - print("preprocess_time(ms): {:.2f}, inference_time(ms): {:.2f}, postprocess_time(ms): {:.2f}".format( - preprocess_time * 1000, inference_time * 1000, postprocess_time * 1000)) - - def report(self, average=False): - dic = {} - pre_time = self.preprocess_time_s.value() - infer_time = self.inference_time_s.value() - post_time = self.postprocess_time_s.value() - track_time = self.tracking_time_s.value() - - dic['preprocess_time_s'] = round(pre_time / max(1, self.img_num), 4) if average else pre_time - dic['inference_time_s'] = round(infer_time / max(1, self.img_num), 4) if average else infer_time - dic['postprocess_time_s'] = round(post_time / max(1, self.img_num), 4) if average else post_time - dic['img_num'] = self.img_num - total_time = pre_time + infer_time + post_time - if self.with_tracker: - dic['tracking_time_s'] = round(track_time / max(1, self.img_num), 4) if average else track_time - total_time = total_time + track_time - dic['total_time_s'] = round(total_time, 4) - return dic - - -def get_current_memory_mb(): - """ - It is used to Obtain the memory usage of the CPU and GPU during the running of the program. - And this function Current program is time-consuming. - """ - import pynvml - import psutil - import GPUtil - gpu_id = int(os.environ.get('CUDA_VISIBLE_DEVICES', 0)) - - pid = os.getpid() - p = psutil.Process(pid) - info = p.memory_full_info() - cpu_mem = info.uss / 1024. / 1024. - gpu_mem = 0 - gpu_percent = 0 - gpus = GPUtil.getGPUs() - if gpu_id is not None and len(gpus) > 0: - gpu_percent = gpus[gpu_id].load - pynvml.nvmlInit() - handle = pynvml.nvmlDeviceGetHandleByIndex(0) - meminfo = pynvml.nvmlDeviceGetMemoryInfo(handle) - gpu_mem = meminfo.used / 1024. / 1024. - return round(cpu_mem, 4), round(gpu_mem, 4), round(gpu_percent, 4) From 56d597517d450f7ff5cfc87ea33042ca2f914330 Mon Sep 17 00:00:00 2001 From: chenjian Date: Tue, 24 May 2022 05:23:56 +0000 Subject: [PATCH 016/117] fix --- modules/image/keypoint_detection/pp-tinypose/infer.py | 8 +------- .../keypoint_detection/pp-tinypose/keypoint_infer.py | 10 ---------- 2 files changed, 1 insertion(+), 17 deletions(-) diff --git a/modules/image/keypoint_detection/pp-tinypose/infer.py b/modules/image/keypoint_detection/pp-tinypose/infer.py index 8e5fe3f7f..d9b838ba5 100644 --- a/modules/image/keypoint_detection/pp-tinypose/infer.py +++ b/modules/image/keypoint_detection/pp-tinypose/infer.py @@ -219,20 +219,14 @@ def predict_image(self, image_list, run_benchmark=False, repeats=1, visual=True, end_index = min((i + 1) * self.batch_size, len(image_list)) batch_image_list = image_list[start_index:end_index] # preprocess - self.det_times.preprocess_time_s.start() inputs = self.preprocess(batch_image_list) - self.det_times.preprocess_time_s.end() # model prediction - self.det_times.inference_time_s.start() result = self.predict() - self.det_times.inference_time_s.end() # postprocess - self.det_times.postprocess_time_s.start() + result = self.postprocess(inputs, result) - self.det_times.postprocess_time_s.end() - self.det_times.img_num += len(batch_image_list) if visual: visualize(batch_image_list, diff --git a/modules/image/keypoint_detection/pp-tinypose/keypoint_infer.py b/modules/image/keypoint_detection/pp-tinypose/keypoint_infer.py index 5b1206cfd..7ce8fb2ab 100644 --- a/modules/image/keypoint_detection/pp-tinypose/keypoint_infer.py +++ b/modules/image/keypoint_detection/pp-tinypose/keypoint_infer.py @@ -91,7 +91,6 @@ def set_config(self, model_dir): def get_person_from_rect(self, image, results): # crop the person result from image - self.det_times.preprocess_time_s.start() valid_rects = results['boxes'] rect_images = [] new_rects = [] @@ -103,7 +102,6 @@ def get_person_from_rect(self, image, results): rect_images.append(rect_image) new_rects.append(new_rect) org_rects.append(org_rect) - self.det_times.preprocess_time_s.end() return rect_images, new_rects, org_rects def postprocess(self, inputs, result): @@ -168,20 +166,12 @@ def predict_image(self, image_list, run_benchmark=False, repeats=1, visual=True) end_index = min((i + 1) * self.batch_size, len(image_list)) batch_image_list = image_list[start_index:end_index] # preprocess - self.det_times.preprocess_time_s.start() inputs = self.preprocess(batch_image_list) - self.det_times.preprocess_time_s.end() # model prediction - self.det_times.inference_time_s.start() result = self.predict() - self.det_times.inference_time_s.end() - # postprocess - self.det_times.postprocess_time_s.start() result = self.postprocess(inputs, result) - self.det_times.postprocess_time_s.end() - self.det_times.img_num += len(batch_image_list) if visual: if not os.path.exists(self.output_dir): From 69e88c8b9165cc6badd0173771883f7b201b37a1 Mon Sep 17 00:00:00 2001 From: chenjian Date: Tue, 24 May 2022 05:25:06 +0000 Subject: [PATCH 017/117] fix --- modules/image/keypoint_detection/pp-tinypose/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/image/keypoint_detection/pp-tinypose/__init__.py b/modules/image/keypoint_detection/pp-tinypose/__init__.py index 4e438baee..305c722c9 100644 --- a/modules/image/keypoint_detection/pp-tinypose/__init__.py +++ b/modules/image/keypoint_detection/pp-tinypose/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From 6cab8068c81a163113376627dff0ea880f2da318 Mon Sep 17 00:00:00 2001 From: chenjian Date: Tue, 24 May 2022 12:01:49 +0000 Subject: [PATCH 018/117] update model --- modules/image/keypoint_detection/pp-tinypose/README.md | 2 +- modules/image/keypoint_detection/pp-tinypose/module.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/image/keypoint_detection/pp-tinypose/README.md b/modules/image/keypoint_detection/pp-tinypose/README.md index 7c11c495c..dcb359c9c 100644 --- a/modules/image/keypoint_detection/pp-tinypose/README.md +++ b/modules/image/keypoint_detection/pp-tinypose/README.md @@ -20,7 +20,7 @@
输入图像
- +
输出图像 diff --git a/modules/image/keypoint_detection/pp-tinypose/module.py b/modules/image/keypoint_detection/pp-tinypose/module.py index 4d1efc82f..c0aa6fd88 100644 --- a/modules/image/keypoint_detection/pp-tinypose/module.py +++ b/modules/image/keypoint_detection/pp-tinypose/module.py @@ -48,7 +48,7 @@ class PP_TinyPose: def __init__(self): self.det_model_dir = os.path.join(self.directory, 'model/picodet_s_320_coco_lcnet/') - self.keypoint_model_dir = os.path.join(self.directory, 'model/dark_hrnet_w32_256x192/') + self.keypoint_model_dir = os.path.join(self.directory, 'model/tinypose_256x192/') self.detector = Detector(self.det_model_dir) self.topdown_keypoint_detector = KeyPointDetector(self.keypoint_model_dir) From b4931eaf28c3e9662bc64cfd2f62e9fa10008ad0 Mon Sep 17 00:00:00 2001 From: chenjian Date: Tue, 24 May 2022 12:22:08 +0000 Subject: [PATCH 019/117] update --- modules/image/keypoint_detection/pp-tinypose/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/image/keypoint_detection/pp-tinypose/README.md b/modules/image/keypoint_detection/pp-tinypose/README.md index dcb359c9c..caa247450 100644 --- a/modules/image/keypoint_detection/pp-tinypose/README.md +++ b/modules/image/keypoint_detection/pp-tinypose/README.md @@ -3,7 +3,7 @@ |模型名称|pp-tinypose| | :--- | :---: | |类别|图像-关键点检测| -|网络|PicoDet + HRNet| +|网络|PicoDet + tinypose| |数据集|COCO + AI Challenger| |是否支持Fine-tuning|否| |模型大小|125M| From cc9d7d8811e46ae8d7e4414eefff8ae9bcc94baa Mon Sep 17 00:00:00 2001 From: chenjian Date: Tue, 24 May 2022 12:43:54 +0000 Subject: [PATCH 020/117] fix --- modules/image/keypoint_detection/pp-tinypose/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/image/keypoint_detection/pp-tinypose/README.md b/modules/image/keypoint_detection/pp-tinypose/README.md index caa247450..8b904cbce 100644 --- a/modules/image/keypoint_detection/pp-tinypose/README.md +++ b/modules/image/keypoint_detection/pp-tinypose/README.md @@ -26,7 +26,7 @@ - ### 模型介绍 - - PP-TinyPose是PaddleDetecion针对移动端设备优化的实时关键点检测模型,可流畅地在移动端设备上执行多人姿态估计任务。借助PaddleDetecion自研的优秀轻量级检测模型PicoDet以及轻量级姿态估计任务骨干网络HRNet, 结合多种策略有效平衡了模型的速度和精度表现。 + - PP-TinyPose是PaddleDetecion针对移动端设备优化的实时关键点检测模型,可流畅地在移动端设备上执行多人姿态估计任务。借助PaddleDetecion自研的优秀轻量级检测模型PicoDet以及轻量级姿态估计任务骨干网络Tinypose, 结合多种策略有效平衡了模型的速度和精度表现。 - 更多详情参考:[PP-TinyPose](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.4/configs/keypoint/tiny_pose)。 From a1b17d6bb7b37673653f458dd7954e2fba8aae58 Mon Sep 17 00:00:00 2001 From: linjieccc <623543001@qq.com> Date: Tue, 28 Jun 2022 07:08:12 +0000 Subject: [PATCH 021/117] fix ernie_v2_eng_base --- .../ernie_v2_eng_base/README.md | 16 ++++--- .../ernie_v2_eng_base/module.py | 46 ++++++++++--------- 2 files changed, 35 insertions(+), 27 deletions(-) diff --git a/modules/text/language_model/ernie_v2_eng_base/README.md b/modules/text/language_model/ernie_v2_eng_base/README.md index 69309c9aa..404cebeca 100644 --- a/modules/text/language_model/ernie_v2_eng_base/README.md +++ b/modules/text/language_model/ernie_v2_eng_base/README.md @@ -1,12 +1,12 @@ # ernie_v2_eng_base |模型名称|ernie_v2_eng_base| -| :--- | :---: | +| :--- | :---: | |类别|文本-语义模型| |网络|ernie_v2_eng_base| |数据集|百度自建数据集| |是否支持Fine-tuning|是| |模型大小|1.3G| -|最新更新日期|2021-03-16| +|最新更新日期|2021-06-28| |数据指标|-| ## 一、模型基本信息 @@ -36,7 +36,7 @@ Ernie是百度提出的基于知识增强的持续学习语义理解模型,该 - ### 2、安装 - ```shell - $ hub install ernie_tiny + $ hub install ernie_v2_eng_base ``` - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) @@ -57,7 +57,7 @@ label_map = {0: 'negative', 1: 'positive'} model = hub.Module( name='ernie_v2_eng_base', - version='2.0.2', + version='2.0.3', task='seq-cls', load_checkpoint='/path/to/parameters', label_map=label_map) @@ -194,7 +194,11 @@ for idx, text in enumerate(data): * 2.0.2 - 增加文本匹配任务`text-matching` + 增加文本匹配任务`text-matching` + +* 2.0.3 + + 模型底座名称调整 ```shell - $ hub install ernie_v2_eng_base==2.0.2 + $ hub install ernie_v2_eng_base==2.0.3 ``` diff --git a/modules/text/language_model/ernie_v2_eng_base/module.py b/modules/text/language_model/ernie_v2_eng_base/module.py index 3206ed7d0..eb5bcc7e5 100644 --- a/modules/text/language_model/ernie_v2_eng_base/module.py +++ b/modules/text/language_model/ernie_v2_eng_base/module.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,17 +11,19 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict -import os import math +import os +from typing import Dict import paddle import paddle.nn as nn import paddle.nn.functional as F - -from paddlenlp.transformers.ernie.modeling import ErnieModel, ErnieForSequenceClassification, ErnieForTokenClassification -from paddlenlp.transformers.ernie.tokenizer import ErnieTokenizer from paddlenlp.metrics import ChunkEvaluator +from paddlenlp.transformers.ernie.modeling import ErnieForSequenceClassification +from paddlenlp.transformers.ernie.modeling import ErnieForTokenClassification +from paddlenlp.transformers.ernie.modeling import ErnieModel +from paddlenlp.transformers.ernie.tokenizer import ErnieTokenizer + from paddlehub.module.module import moduleinfo from paddlehub.module.nlp_module import TransformerModule from paddlehub.utils.log import logger @@ -29,7 +31,7 @@ @moduleinfo( name="ernie_v2_eng_base", - version="2.0.2", + version="2.0.3", summary= "Baidu's ERNIE 2.0, Enhanced Representation through kNowledge IntEgration, max_seq_len=512 when predtrained. The module is executed as paddle.dygraph.", author="paddlepaddle", @@ -42,13 +44,13 @@ class ErnieV2(nn.Layer): """ def __init__( - self, - task: str = None, - load_checkpoint: str = None, - label_map: Dict = None, - num_classes: int = 2, - suffix: bool = False, - **kwargs, + self, + task: str = None, + load_checkpoint: str = None, + label_map: Dict = None, + num_classes: int = 2, + suffix: bool = False, + **kwargs, ): super(ErnieV2, self).__init__() if label_map: @@ -64,22 +66,24 @@ def __init__( "'sequence_classification' has been deprecated and will be removed in the future.", ) if task == 'seq-cls': self.model = ErnieForSequenceClassification.from_pretrained( - pretrained_model_name_or_path='ernie-2.0-en', num_classes=self.num_classes, **kwargs) + pretrained_model_name_or_path='ernie-2.0-base-en', num_classes=self.num_classes, **kwargs) self.criterion = paddle.nn.loss.CrossEntropyLoss() self.metric = paddle.metric.Accuracy() elif task == 'token-cls': - self.model = ErnieForTokenClassification.from_pretrained( - pretrained_model_name_or_path='ernie-2.0-en', num_classes=self.num_classes, **kwargs) + self.model = ErnieForTokenClassification.from_pretrained(pretrained_model_name_or_path='ernie-2.0-base-en', + num_classes=self.num_classes, + **kwargs) self.criterion = paddle.nn.loss.CrossEntropyLoss() - self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], suffix=suffix) + self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], + suffix=suffix) elif task == 'text-matching': - self.model = ErnieModel.from_pretrained(pretrained_model_name_or_path='ernie-2.0-en', **kwargs) + self.model = ErnieModel.from_pretrained(pretrained_model_name_or_path='ernie-2.0-base-en', **kwargs) self.dropout = paddle.nn.Dropout(0.1) self.classifier = paddle.nn.Linear(self.model.config['hidden_size'] * 3, 2) self.criterion = paddle.nn.loss.CrossEntropyLoss() self.metric = paddle.metric.Accuracy() elif task is None: - self.model = ErnieModel.from_pretrained(pretrained_model_name_or_path='ernie-2.0-en', **kwargs) + self.model = ErnieModel.from_pretrained(pretrained_model_name_or_path='ernie-2.0-base-en', **kwargs) else: raise RuntimeError("Unknown task {}, task should be one in {}".format(task, self._tasks_supported)) @@ -171,4 +175,4 @@ def get_tokenizer(*args, **kwargs): """ Gets the tokenizer that is customized for this module. """ - return ErnieTokenizer.from_pretrained(pretrained_model_name_or_path='ernie-2.0-en', *args, **kwargs) + return ErnieTokenizer.from_pretrained(pretrained_model_name_or_path='ernie-2.0-base-en', *args, **kwargs) From 8468e1ac6cfe165aa1e3cf4f77ab6fb66ce98614 Mon Sep 17 00:00:00 2001 From: KP <109694228@qq.com> Date: Wed, 6 Jul 2022 10:30:01 +0800 Subject: [PATCH 022/117] Remove fluid api in modules and pkg. (#1906) --- .../stylepro_artistic/README.md | 6 +- .../stylepro_artistic/README_en.md | 6 +- .../stylepro_artistic/decoder_network.py | 144 -- .../stylepro_artistic/encoder_network.py | 173 --- .../stylepro_artistic/module.py | 164 ++- .../resnet50_vd_animals/README.md | 14 +- .../resnet50_vd_animals/README_en.md | 7 + .../resnet50_vd_animals/module.py | 88 +- .../resnet50_vd_animals/resnet_vd.py | 185 --- .../pyramidbox_lite_mobile/README.md | 6 +- .../pyramidbox_lite_mobile/README_en.md | 6 +- .../pyramidbox_lite_mobile/module.py | 140 +- .../pyramidbox_lite_mobile_mask/README.md | 7 +- .../pyramidbox_lite_mobile_mask/README_en.md | 7 +- .../pyramidbox_lite_mobile_mask/module.py | 127 +- .../pyramidbox_lite_server/README.md | 7 +- .../pyramidbox_lite_server/README_en.md | 7 +- .../pyramidbox_lite_server/module.py | 139 +- .../pyramidbox_lite_server_mask/README.md | 7 +- .../pyramidbox_lite_server_mask/README_en.md | 7 +- .../pyramidbox_lite_server_mask/module.py | 203 ++- .../README.md | 7 +- .../README_en.md | 7 +- .../face_detector.py | 1185 ---------------- .../module.py | 128 +- .../README.md | 7 +- .../README_en.md | 7 +- .../face_detector.py | 1186 ----------------- .../module.py | 128 +- .../face_landmark_localization/README.md | 27 +- .../face_landmark.py | 99 -- .../face_landmark_localization/module.py | 108 +- .../ssd_mobilenet_v1_pascal/README.md | 6 +- .../ssd_mobilenet_v1_pascal/README_en.md | 6 +- .../ssd_mobilenet_v1_pascal/mobilenet_v1.py | 211 --- .../ssd_mobilenet_v1_pascal/module.py | 277 ++-- .../ssd_mobilenet_v1_pascal/processor.py | 45 +- .../yolov3_darknet53_pedestrian/README.md | 6 +- .../yolov3_darknet53_pedestrian/README_en.md | 6 +- .../yolov3_darknet53_pedestrian/darknet.py | 170 --- .../yolov3_darknet53_pedestrian/module.py | 295 ++-- .../yolov3_darknet53_pedestrian/processor.py | 42 +- .../yolov3_darknet53_pedestrian/yolo_head.py | 273 ---- .../yolov3_darknet53_vehicles/README.md | 6 +- .../yolov3_darknet53_vehicles/README_en.md | 6 +- .../yolov3_darknet53_vehicles/darknet.py | 170 --- .../yolov3_darknet53_vehicles/module.py | 219 +-- .../yolov3_darknet53_vehicles/yolo_head.py | 273 ---- .../deeplabv3p_xception65_humanseg/README.md | 14 +- .../README_en.md | 18 +- .../deeplabv3p_xception65_humanseg/module.py | 127 +- .../chinese_ocr_db_crnn_mobile/README.md | 18 +- .../chinese_ocr_db_crnn_mobile/README_en.md | 6 +- .../chinese_ocr_db_crnn_mobile/module.py | 153 ++- .../chinese_ocr_db_crnn_server/README.md | 18 +- .../chinese_ocr_db_crnn_server/module.py | 153 ++- .../README.md | 20 +- .../module.py | 92 +- .../README.md | 20 +- .../module.py | 92 +- .../language_model/albert-base-v1/README.md | 4 +- .../text/language_model/simnet_bow/README.md | 12 +- .../text/language_model/simnet_bow/module.py | 191 +-- .../language_model/simnet_bow/processor.py | 3 +- .../lexical_analysis/jieba_paddle/README.md | 19 +- .../lexical_analysis/jieba_paddle/module.py | 35 +- modules/text/lexical_analysis/lac/README.md | 43 +- modules/text/lexical_analysis/lac/module.py | 295 ++-- .../text/lexical_analysis/lac/processor.py | 6 +- .../ernie_skep_sentiment_analysis/README.md | 22 +- .../model/ernie.py | 254 ---- .../model/transformer_encoder.py | 450 ------- .../ernie_skep_sentiment_analysis/module.py | 63 +- .../sentiment_analysis/senta_bilstm/README.md | 27 +- .../senta_bilstm/README_en.md | 31 +- .../sentiment_analysis/senta_bilstm/module.py | 154 +-- .../sentiment_analysis/senta_bilstm/net.py | 31 - .../sentiment_analysis/senta_bow/README.md | 22 +- .../sentiment_analysis/senta_bow/module.py | 155 +-- .../text/sentiment_analysis/senta_bow/net.py | 22 - .../sentiment_analysis/senta_cnn/README.md | 28 +- .../sentiment_analysis/senta_cnn/module.py | 145 +- .../text/sentiment_analysis/senta_cnn/net.py | 21 - .../sentiment_analysis/senta_gru/README.md | 24 +- .../sentiment_analysis/senta_gru/module.py | 145 +- .../text/sentiment_analysis/senta_gru/net.py | 23 - .../sentiment_analysis/senta_lstm/README.md | 30 +- .../sentiment_analysis/senta_lstm/module.py | 140 +- .../text/sentiment_analysis/senta_lstm/net.py | 22 - .../text_review/porn_detection_lstm/README.md | 59 +- .../text_review/porn_detection_lstm/module.py | 84 +- paddlehub/compat/module/module_v1.py | 62 +- paddlehub/compat/module/nlp_module.py | 146 +- paddlehub/compat/paddle_utils.py | 27 +- paddlehub/compat/task/base_task.py | 124 +- paddlehub/module/module.py | 90 +- 96 files changed, 2148 insertions(+), 7942 deletions(-) delete mode 100644 modules/image/Image_gan/style_transfer/stylepro_artistic/decoder_network.py delete mode 100644 modules/image/Image_gan/style_transfer/stylepro_artistic/encoder_network.py delete mode 100755 modules/image/classification/resnet50_vd_animals/resnet_vd.py delete mode 100644 modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/face_detector.py delete mode 100644 modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/face_detector.py delete mode 100644 modules/image/keypoint_detection/face_landmark_localization/face_landmark.py delete mode 100644 modules/image/object_detection/ssd_mobilenet_v1_pascal/mobilenet_v1.py delete mode 100644 modules/image/object_detection/yolov3_darknet53_pedestrian/darknet.py delete mode 100644 modules/image/object_detection/yolov3_darknet53_pedestrian/yolo_head.py delete mode 100644 modules/image/object_detection/yolov3_darknet53_vehicles/darknet.py delete mode 100644 modules/image/object_detection/yolov3_darknet53_vehicles/yolo_head.py delete mode 100644 modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/model/transformer_encoder.py delete mode 100755 modules/text/sentiment_analysis/senta_bilstm/net.py delete mode 100755 modules/text/sentiment_analysis/senta_bow/net.py delete mode 100755 modules/text/sentiment_analysis/senta_cnn/net.py delete mode 100755 modules/text/sentiment_analysis/senta_gru/net.py delete mode 100755 modules/text/sentiment_analysis/senta_lstm/net.py diff --git a/modules/image/Image_gan/style_transfer/stylepro_artistic/README.md b/modules/image/Image_gan/style_transfer/stylepro_artistic/README.md index 2d44dae43..2306e21a7 100644 --- a/modules/image/Image_gan/style_transfer/stylepro_artistic/README.md +++ b/modules/image/Image_gan/style_transfer/stylepro_artistic/README.md @@ -180,8 +180,10 @@ 初始发布 -* 1.0.1 +* 1.0.3 + + 移除 fluid api - ```shell - $ hub install stylepro_artistic==1.0.1 + $ hub install stylepro_artistic==1.0.3 ``` diff --git a/modules/image/Image_gan/style_transfer/stylepro_artistic/README_en.md b/modules/image/Image_gan/style_transfer/stylepro_artistic/README_en.md index 95484165c..c4a53d088 100644 --- a/modules/image/Image_gan/style_transfer/stylepro_artistic/README_en.md +++ b/modules/image/Image_gan/style_transfer/stylepro_artistic/README_en.md @@ -179,8 +179,10 @@ First release -* 1.0.1 +* 1.0.3 + + Remove fluid api - ```shell - $ hub install stylepro_artistic==1.0.1 + $ hub install stylepro_artistic==1.0.3 ``` diff --git a/modules/image/Image_gan/style_transfer/stylepro_artistic/decoder_network.py b/modules/image/Image_gan/style_transfer/stylepro_artistic/decoder_network.py deleted file mode 100644 index 99a67c0aa..000000000 --- a/modules/image/Image_gan/style_transfer/stylepro_artistic/decoder_network.py +++ /dev/null @@ -1,144 +0,0 @@ -# coding=utf-8 -from paddle.fluid.initializer import Constant -from paddle.fluid.param_attr import ParamAttr -import paddle.fluid as fluid - - -def decoder_net(): - x2paddle_22 = fluid.layers.create_parameter( - dtype='float32', shape=[4], name='x2paddle_22', attr='x2paddle_22', default_initializer=Constant(0.0)) - x2paddle_36 = fluid.layers.create_parameter( - dtype='float32', shape=[4], name='x2paddle_36', attr='x2paddle_36', default_initializer=Constant(0.0)) - x2paddle_44 = fluid.layers.create_parameter( - dtype='float32', shape=[4], name='x2paddle_44', attr='x2paddle_44', default_initializer=Constant(0.0)) - x2paddle_input_1 = fluid.layers.data( - dtype='float32', shape=[1, 512, 64, 64], name='x2paddle_input_1', append_batch_size=False) - x2paddle_19 = fluid.layers.pad2d( - x2paddle_input_1, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_19') - x2paddle_20 = fluid.layers.conv2d( - x2paddle_19, - num_filters=256, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_1', - name='x2paddle_20', - bias_attr='x2paddle_2') - x2paddle_21 = fluid.layers.relu(x2paddle_20, name='x2paddle_21') - x2paddle_23 = fluid.layers.resize_nearest(x2paddle_21, name='x2paddle_23', out_shape=[128, 128]) - x2paddle_24 = fluid.layers.pad2d( - x2paddle_23, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_24') - x2paddle_25 = fluid.layers.conv2d( - x2paddle_24, - num_filters=256, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_3', - name='x2paddle_25', - bias_attr='x2paddle_4') - x2paddle_26 = fluid.layers.relu(x2paddle_25, name='x2paddle_26') - x2paddle_27 = fluid.layers.pad2d( - x2paddle_26, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_27') - x2paddle_28 = fluid.layers.conv2d( - x2paddle_27, - num_filters=256, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_5', - name='x2paddle_28', - bias_attr='x2paddle_6') - x2paddle_29 = fluid.layers.relu(x2paddle_28, name='x2paddle_29') - x2paddle_30 = fluid.layers.pad2d( - x2paddle_29, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_30') - x2paddle_31 = fluid.layers.conv2d( - x2paddle_30, - num_filters=256, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_7', - name='x2paddle_31', - bias_attr='x2paddle_8') - x2paddle_32 = fluid.layers.relu(x2paddle_31, name='x2paddle_32') - x2paddle_33 = fluid.layers.pad2d( - x2paddle_32, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_33') - x2paddle_34 = fluid.layers.conv2d( - x2paddle_33, - num_filters=128, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_9', - name='x2paddle_34', - bias_attr='x2paddle_10') - x2paddle_35 = fluid.layers.relu(x2paddle_34, name='x2paddle_35') - x2paddle_37 = fluid.layers.resize_nearest(x2paddle_35, name='x2paddle_37', out_shape=[256, 256]) - x2paddle_38 = fluid.layers.pad2d( - x2paddle_37, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_38') - x2paddle_39 = fluid.layers.conv2d( - x2paddle_38, - num_filters=128, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_11', - name='x2paddle_39', - bias_attr='x2paddle_12') - x2paddle_40 = fluid.layers.relu(x2paddle_39, name='x2paddle_40') - x2paddle_41 = fluid.layers.pad2d( - x2paddle_40, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_41') - x2paddle_42 = fluid.layers.conv2d( - x2paddle_41, - num_filters=64, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_13', - name='x2paddle_42', - bias_attr='x2paddle_14') - x2paddle_43 = fluid.layers.relu(x2paddle_42, name='x2paddle_43') - x2paddle_45 = fluid.layers.resize_nearest(x2paddle_43, name='x2paddle_45', out_shape=[512, 512]) - x2paddle_46 = fluid.layers.pad2d( - x2paddle_45, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_46') - x2paddle_47 = fluid.layers.conv2d( - x2paddle_46, - num_filters=64, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_15', - name='x2paddle_47', - bias_attr='x2paddle_16') - x2paddle_48 = fluid.layers.relu(x2paddle_47, name='x2paddle_48') - x2paddle_49 = fluid.layers.pad2d( - x2paddle_48, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_49') - x2paddle_50 = fluid.layers.conv2d( - x2paddle_49, - num_filters=3, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_17', - name='x2paddle_50', - bias_attr='x2paddle_18') - return x2paddle_input_1, x2paddle_50 diff --git a/modules/image/Image_gan/style_transfer/stylepro_artistic/encoder_network.py b/modules/image/Image_gan/style_transfer/stylepro_artistic/encoder_network.py deleted file mode 100644 index 0bff785c6..000000000 --- a/modules/image/Image_gan/style_transfer/stylepro_artistic/encoder_network.py +++ /dev/null @@ -1,173 +0,0 @@ -# coding=utf-8 -from paddle.fluid.initializer import Constant -from paddle.fluid.param_attr import ParamAttr -import paddle.fluid as fluid - - -def encoder_net(): - x2paddle_0 = fluid.layers.data(dtype='float32', shape=[1, 3, 512, 512], name='x2paddle_0', append_batch_size=False) - x2paddle_21 = fluid.layers.conv2d( - x2paddle_0, - num_filters=3, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_1', - name='x2paddle_21', - bias_attr='x2paddle_2') - x2paddle_22 = fluid.layers.pad2d( - x2paddle_21, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_22') - x2paddle_23 = fluid.layers.conv2d( - x2paddle_22, - num_filters=64, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_3', - name='x2paddle_23', - bias_attr='x2paddle_4') - x2paddle_24 = fluid.layers.relu(x2paddle_23, name='x2paddle_24') - x2paddle_25 = fluid.layers.pad2d( - x2paddle_24, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_25') - x2paddle_26 = fluid.layers.conv2d( - x2paddle_25, - num_filters=64, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_5', - name='x2paddle_26', - bias_attr='x2paddle_6') - x2paddle_27 = fluid.layers.relu(x2paddle_26, name='x2paddle_27') - x2paddle_28 = fluid.layers.pool2d( - x2paddle_27, - pool_size=[2, 2], - pool_type='max', - pool_stride=[2, 2], - pool_padding=[0, 0], - ceil_mode=False, - name='x2paddle_28', - exclusive=False) - x2paddle_29 = fluid.layers.pad2d( - x2paddle_28, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_29') - x2paddle_30 = fluid.layers.conv2d( - x2paddle_29, - num_filters=128, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_7', - name='x2paddle_30', - bias_attr='x2paddle_8') - x2paddle_31 = fluid.layers.relu(x2paddle_30, name='x2paddle_31') - x2paddle_32 = fluid.layers.pad2d( - x2paddle_31, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_32') - x2paddle_33 = fluid.layers.conv2d( - x2paddle_32, - num_filters=128, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_9', - name='x2paddle_33', - bias_attr='x2paddle_10') - x2paddle_34 = fluid.layers.relu(x2paddle_33, name='x2paddle_34') - x2paddle_35 = fluid.layers.pool2d( - x2paddle_34, - pool_size=[2, 2], - pool_type='max', - pool_stride=[2, 2], - pool_padding=[0, 0], - ceil_mode=False, - name='x2paddle_35', - exclusive=False) - x2paddle_36 = fluid.layers.pad2d( - x2paddle_35, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_36') - x2paddle_37 = fluid.layers.conv2d( - x2paddle_36, - num_filters=256, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_11', - name='x2paddle_37', - bias_attr='x2paddle_12') - x2paddle_38 = fluid.layers.relu(x2paddle_37, name='x2paddle_38') - x2paddle_39 = fluid.layers.pad2d( - x2paddle_38, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_39') - x2paddle_40 = fluid.layers.conv2d( - x2paddle_39, - num_filters=256, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_13', - name='x2paddle_40', - bias_attr='x2paddle_14') - x2paddle_41 = fluid.layers.relu(x2paddle_40, name='x2paddle_41') - x2paddle_42 = fluid.layers.pad2d( - x2paddle_41, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_42') - x2paddle_43 = fluid.layers.conv2d( - x2paddle_42, - num_filters=256, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_15', - name='x2paddle_43', - bias_attr='x2paddle_16') - x2paddle_44 = fluid.layers.relu(x2paddle_43, name='x2paddle_44') - x2paddle_45 = fluid.layers.pad2d( - x2paddle_44, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_45') - x2paddle_46 = fluid.layers.conv2d( - x2paddle_45, - num_filters=256, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_17', - name='x2paddle_46', - bias_attr='x2paddle_18') - x2paddle_47 = fluid.layers.relu(x2paddle_46, name='x2paddle_47') - x2paddle_48 = fluid.layers.pool2d( - x2paddle_47, - pool_size=[2, 2], - pool_type='max', - pool_stride=[2, 2], - pool_padding=[0, 0], - ceil_mode=False, - name='x2paddle_48', - exclusive=False) - x2paddle_49 = fluid.layers.pad2d( - x2paddle_48, pad_value=0.0, mode='reflect', paddings=[1, 1, 1, 1], name='x2paddle_49') - x2paddle_50 = fluid.layers.conv2d( - x2paddle_49, - num_filters=512, - filter_size=[3, 3], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='x2paddle_19', - name='x2paddle_50', - bias_attr='x2paddle_20') - x2paddle_51 = fluid.layers.relu(x2paddle_50, name='x2paddle_51') - return x2paddle_0, x2paddle_51 diff --git a/modules/image/Image_gan/style_transfer/stylepro_artistic/module.py b/modules/image/Image_gan/style_transfer/stylepro_artistic/module.py index 001068193..d4aa4d823 100644 --- a/modules/image/Image_gan/style_transfer/stylepro_artistic/module.py +++ b/modules/image/Image_gan/style_transfer/stylepro_artistic/module.py @@ -2,32 +2,38 @@ from __future__ import absolute_import from __future__ import division +import argparse import ast import copy -import time import os -import argparse +import time import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.module.module import moduleinfo, runnable, serving - -from stylepro_artistic.encoder_network import encoder_net -from stylepro_artistic.decoder_network import decoder_net -from stylepro_artistic.processor import postprocess, fr, cv2_to_base64, base64_to_cv2 +import paddle +from paddle.inference import Config +from paddle.inference import create_predictor from stylepro_artistic.data_feed import reader +from stylepro_artistic.processor import base64_to_cv2 +from stylepro_artistic.processor import cv2_to_base64 +from stylepro_artistic.processor import fr +from stylepro_artistic.processor import postprocess + +import paddlehub as hub +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving +# coding=utf-8 @moduleinfo( name="stylepro_artistic", - version="1.0.2", + version="1.0.3", type="cv/style_transfer", summary="StylePro Artistic is an algorithm for Arbitrary image style, which is parameter-free, fast yet effective.", author="baidu-bdl", author_email="") class StyleProjection(hub.Module): + def _initialize(self): self.pretrained_encoder_net = os.path.join(self.directory, "style_projection_enc") self.pretrained_decoder_net = os.path.join(self.directory, "style_projection_dec") @@ -38,15 +44,15 @@ def _set_config(self): predictor config setting """ # encoder - cpu_config_enc = AnalysisConfig(self.pretrained_encoder_net) + cpu_config_enc = Config(self.pretrained_encoder_net) cpu_config_enc.disable_glog_info() cpu_config_enc.disable_gpu() - self.cpu_predictor_enc = create_paddle_predictor(cpu_config_enc) + self.cpu_predictor_enc = create_predictor(cpu_config_enc) # decoder - cpu_config_dec = AnalysisConfig(self.pretrained_decoder_net) + cpu_config_dec = Config(self.pretrained_decoder_net) cpu_config_dec.disable_glog_info() cpu_config_dec.disable_gpu() - self.cpu_predictor_dec = create_paddle_predictor(cpu_config_dec) + self.cpu_predictor_dec = create_predictor(cpu_config_dec) try: _places = os.environ["CUDA_VISIBLE_DEVICES"] @@ -56,15 +62,15 @@ def _set_config(self): use_gpu = False if use_gpu: # encoder - gpu_config_enc = AnalysisConfig(self.pretrained_encoder_net) + gpu_config_enc = Config(self.pretrained_encoder_net) gpu_config_enc.disable_glog_info() gpu_config_enc.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) - self.gpu_predictor_enc = create_paddle_predictor(gpu_config_enc) + self.gpu_predictor_enc = create_predictor(gpu_config_enc) # decoder - gpu_config_dec = AnalysisConfig(self.pretrained_decoder_net) + gpu_config_dec = Config(self.pretrained_decoder_net) gpu_config_dec.disable_glog_info() gpu_config_dec.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) - self.gpu_predictor_dec = create_paddle_predictor(gpu_config_dec) + self.gpu_predictor_dec = create_predictor(gpu_config_dec) def style_transfer(self, images=None, @@ -102,22 +108,36 @@ def style_transfer(self, "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." ) + predictor_enc = self.gpu_predictor_enc if use_gpu else self.cpu_predictor_enc + input_names_enc = predictor_enc.get_input_names() + input_handle_enc = predictor_enc.get_input_handle(input_names_enc[0]) + output_names_enc = predictor_enc.get_output_names() + output_handle_enc = predictor_enc.get_output_handle(output_names_enc[0]) + + predictor_dec = self.gpu_predictor_dec if use_gpu else self.cpu_predictor_dec + input_names_dec = predictor_dec.get_input_names() + input_handle_dec = predictor_dec.get_input_handle(input_names_dec[0]) + output_names_dec = predictor_dec.get_output_names() + output_handle_dec = predictor_dec.get_output_handle(output_names_dec[0]) + im_output = [] for component, w, h in reader(images, paths): - content = PaddleTensor(component['content_arr'].copy()) - content_feats = self.gpu_predictor_enc.run([content]) if use_gpu else self.cpu_predictor_enc.run([content]) + input_handle_enc.copy_from_cpu(component['content_arr']) + predictor_enc.run() + content_feats = output_handle_enc.copy_to_cpu() accumulate = np.zeros((3, 512, 512)) for idx, style_arr in enumerate(component['styles_arr_list']): - style = PaddleTensor(style_arr.copy()) # encode - style_feats = self.gpu_predictor_enc.run([style]) if use_gpu else self.cpu_predictor_enc.run([style]) - fr_feats = fr(content_feats[0].as_ndarray(), style_feats[0].as_ndarray(), alpha) - fr_feats = PaddleTensor(fr_feats.copy()) + input_handle_enc.copy_from_cpu(style_arr) + predictor_enc.run() + style_feats = output_handle_enc.copy_to_cpu() + fr_feats = fr(content_feats, style_feats, alpha) # decode - predict_outputs = self.gpu_predictor_dec.run([fr_feats]) if use_gpu else self.cpu_predictor_dec.run( - [fr_feats]) + input_handle_dec.copy_from_cpu(fr_feats) + predictor_dec.run() + predict_outputs = output_handle_dec.copy_to_cpu() # interpolation - accumulate += predict_outputs[0].as_ndarray()[0] * component['style_interpolation_weights'][idx] + accumulate += predict_outputs[0] * component['style_interpolation_weights'][idx] # postprocess save_im_name = 'ndarray_{}.jpg'.format(time.time()) result = postprocess(accumulate, output_dir, save_im_name, visualization, size=(w, h)) @@ -134,39 +154,37 @@ def _save_encode_model(self, dirname, model_filename=None, params_filename=None, if combined: model_filename = "__model__" if not model_filename else model_filename params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) + place = paddle.CPUPlace() + exe = paddle.Executor(place) - encode_program, encode_feeded_var_names, encode_target_vars = fluid.io.load_inference_model( + encode_program, encode_feeded_var_names, encode_target_vars = paddle.static.load_inference_model( dirname=self.pretrained_encoder_net, executor=exe) - fluid.io.save_inference_model( - dirname=dirname, - main_program=encode_program, - executor=exe, - feeded_var_names=encode_feeded_var_names, - target_vars=encode_target_vars, - model_filename=model_filename, - params_filename=params_filename) + paddle.static.save_inference_model(dirname=dirname, + main_program=encode_program, + executor=exe, + feeded_var_names=encode_feeded_var_names, + target_vars=encode_target_vars, + model_filename=model_filename, + params_filename=params_filename) def _save_decode_model(self, dirname, model_filename=None, params_filename=None, combined=True): if combined: model_filename = "__model__" if not model_filename else model_filename params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) + place = paddle.CPUPlace() + exe = paddle.Executor(place) - decode_program, decode_feeded_var_names, decode_target_vars = fluid.io.load_inference_model( + decode_program, decode_feeded_var_names, decode_target_vars = paddle.static.load_inference_model( dirname=self.pretrained_decoder_net, executor=exe) - fluid.io.save_inference_model( - dirname=dirname, - main_program=decode_program, - executor=exe, - feeded_var_names=decode_feeded_var_names, - target_vars=decode_target_vars, - model_filename=model_filename, - params_filename=params_filename) + paddle.static.save_inference_model(dirname=dirname, + main_program=decode_program, + executor=exe, + feeded_var_names=decode_feeded_var_names, + target_vars=decode_target_vars, + model_filename=model_filename, + params_filename=params_filename) @serving def serving_method(self, images, **kwargs): @@ -186,11 +204,10 @@ def run_cmd(self, argvs): """ Run as a command. """ - self.parser = argparse.ArgumentParser( - description="Run the {} module.".format(self.name), - prog='hub run {}'.format(self.name), - usage='%(prog)s', - add_help=True) + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") self.arg_config_group = self.parser.add_argument_group( @@ -202,20 +219,29 @@ def run_cmd(self, argvs): paths = [{'content': args.content, 'styles': args.styles.split(',')}] else: paths = [{'content': args.content, 'styles': args.styles.split(','), 'weights': list(args.weights)}] - results = self.style_transfer( - paths=paths, alpha=args.alpha, use_gpu=args.use_gpu, output_dir=args.output_dir, visualization=True) + results = self.style_transfer(paths=paths, + alpha=args.alpha, + use_gpu=args.use_gpu, + output_dir=args.output_dir, + visualization=True) return results def add_module_config_arg(self): """ Add the command config options. """ - self.arg_config_group.add_argument( - '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") - self.arg_config_group.add_argument( - '--output_dir', type=str, default='transfer_result', help="The directory to save output images.") - self.arg_config_group.add_argument( - '--visualization', type=ast.literal_eval, default=True, help="whether to save output as images.") + self.arg_config_group.add_argument('--use_gpu', + type=ast.literal_eval, + default=False, + help="whether use GPU or not") + self.arg_config_group.add_argument('--output_dir', + type=str, + default='transfer_result', + help="The directory to save output images.") + self.arg_config_group.add_argument('--visualization', + type=ast.literal_eval, + default=True, + help="whether to save output as images.") def add_module_input_arg(self): """ @@ -223,7 +249,11 @@ def add_module_input_arg(self): """ self.arg_input_group.add_argument('--content', type=str, help="path to content.") self.arg_input_group.add_argument('--styles', type=str, help="path to styles.") - self.arg_input_group.add_argument( - '--weights', type=ast.literal_eval, default=None, help="interpolation weights of styles.") - self.arg_config_group.add_argument( - '--alpha', type=ast.literal_eval, default=1, help="The parameter to control the tranform degree.") + self.arg_input_group.add_argument('--weights', + type=ast.literal_eval, + default=None, + help="interpolation weights of styles.") + self.arg_config_group.add_argument('--alpha', + type=ast.literal_eval, + default=1, + help="The parameter to control the tranform degree.") diff --git a/modules/image/classification/resnet50_vd_animals/README.md b/modules/image/classification/resnet50_vd_animals/README.md index 0b7deba6c..3895f3535 100644 --- a/modules/image/classification/resnet50_vd_animals/README.md +++ b/modules/image/classification/resnet50_vd_animals/README.md @@ -1,7 +1,7 @@ # resnet50_vd_animals |模型名称|resnet50_vd_animals| -| :--- | :---: | +| :--- | :---: | |类别|图像-图像分类| |网络|ResNet50_vd| |数据集|百度自建动物数据集| @@ -31,13 +31,13 @@ - ### 2、安装 - ```shell - $ hub install resnet50_vd_animals==1.0.0 + $ hub install resnet50_vd_animals ``` - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) ## 三、模型API预测 - + - ### 1、命令行预测 - ``` @@ -167,3 +167,11 @@ * 1.0.0 初始发布 + +* 1.0.1 + + 移除 fluid api + + - ```shell + $ hub install resnet50_vd_animals==1.0.1 + ``` diff --git a/modules/image/classification/resnet50_vd_animals/README_en.md b/modules/image/classification/resnet50_vd_animals/README_en.md index 031f469fc..13af67e43 100644 --- a/modules/image/classification/resnet50_vd_animals/README_en.md +++ b/modules/image/classification/resnet50_vd_animals/README_en.md @@ -171,3 +171,10 @@ First release +* 1.0.1 + + Remove fluid api + + - ```shell + $ hub install resnet50_vd_animals==1.0.1 + ``` diff --git a/modules/image/classification/resnet50_vd_animals/module.py b/modules/image/classification/resnet50_vd_animals/module.py index 8c907e12d..e60c82a8d 100644 --- a/modules/image/classification/resnet50_vd_animals/module.py +++ b/modules/image/classification/resnet50_vd_animals/module.py @@ -2,22 +2,23 @@ from __future__ import absolute_import from __future__ import division -import ast import argparse +import ast import os import numpy as np -import paddle.fluid as fluid -import paddlehub as hub +import paddle from paddle.inference import Config from paddle.inference import create_predictor +from resnet50_vd_animals.data_feed import reader +from resnet50_vd_animals.processor import base64_to_cv2 +from resnet50_vd_animals.processor import postprocess -from paddlehub.module.module import moduleinfo, runnable, serving +import paddlehub as hub from paddlehub.common.paddle_helper import add_vars_prefix - -from resnet50_vd_animals.processor import postprocess, base64_to_cv2 -from resnet50_vd_animals.data_feed import reader -from resnet50_vd_animals.resnet_vd import ResNet50_vd +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving @moduleinfo( @@ -26,8 +27,9 @@ author="baidu-vis", author_email="", summary="ResNet50vd is a image classfication model, this module is trained with Baidu's self-built animals dataset.", - version="1.0.0") + version="1.0.1") class ResNet50vdAnimals(hub.Module): + def _initialize(self): self.default_pretrained_model_path = os.path.join(self.directory, "model") label_file = os.path.join(self.directory, "label_list.txt") @@ -97,54 +99,6 @@ def _set_config(self): xpu_config.enable_xpu(100) self.xpu_predictor = create_predictor(xpu_config) - def context(self, trainable=True, pretrained=True): - """context for transfer learning. - - Args: - trainable (bool): Set parameters in program to be trainable. - pretrained (bool) : Whether to load pretrained model. - - Returns: - inputs (dict): key is 'image', corresponding vaule is image tensor. - outputs (dict): key is : - 'classification', corresponding value is the result of classification. - 'feature_map', corresponding value is the result of the layer before the fully connected layer. - context_prog (fluid.Program): program for transfer learning. - """ - context_prog = fluid.Program() - startup_prog = fluid.Program() - with fluid.program_guard(context_prog, startup_prog): - with fluid.unique_name.guard(): - image = fluid.layers.data(name="image", shape=[3, 224, 224], dtype="float32") - resnet_vd = ResNet50_vd() - output, feature_map = resnet_vd.net(input=image, class_dim=len(self.label_list)) - - name_prefix = '@HUB_{}@'.format(self.name) - inputs = {'image': name_prefix + image.name} - outputs = {'classification': name_prefix + output.name, 'feature_map': name_prefix + feature_map.name} - add_vars_prefix(context_prog, name_prefix) - add_vars_prefix(startup_prog, name_prefix) - global_vars = context_prog.global_block().vars - inputs = {key: global_vars[value] for key, value in inputs.items()} - outputs = {key: global_vars[value] for key, value in outputs.items()} - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - # pretrained - if pretrained: - - def _if_exist(var): - b = os.path.exists(os.path.join(self.default_pretrained_model_path, var.name)) - return b - - fluid.io.load_vars(exe, self.default_pretrained_model_path, context_prog, predicate=_if_exist) - else: - exe.run(startup_prog) - # trainable - for param in context_prog.global_block().iter_parameters(): - param.trainable = trainable - return inputs, outputs, context_prog - def classification(self, images=None, paths=None, batch_size=1, use_gpu=False, top_k=1, use_device=None): """ API for image classification. @@ -215,19 +169,19 @@ def save_inference_model(self, dirname, model_filename=None, params_filename=Non if combined: model_filename = "__model__" if not model_filename else model_filename params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) + place = paddle.CPUPlace() + exe = paddle.Executor(place) - program, feeded_var_names, target_vars = fluid.io.load_inference_model( + program, feeded_var_names, target_vars = paddle.static.load_inference_model( dirname=self.default_pretrained_model_path, executor=exe) - fluid.io.save_inference_model(dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) + paddle.static.save_inference_model(dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) @serving def serving_method(self, images, **kwargs): diff --git a/modules/image/classification/resnet50_vd_animals/resnet_vd.py b/modules/image/classification/resnet50_vd_animals/resnet_vd.py deleted file mode 100755 index 3d9a91ca7..000000000 --- a/modules/image/classification/resnet50_vd_animals/resnet_vd.py +++ /dev/null @@ -1,185 +0,0 @@ -#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math - -import paddle -import paddle.fluid as fluid -from paddle.fluid.param_attr import ParamAttr - -__all__ = ["ResNet", "ResNet50_vd", "ResNet101_vd", "ResNet152_vd", "ResNet200_vd"] - -train_parameters = { - "input_size": [3, 224, 224], - "input_mean": [0.485, 0.456, 0.406], - "input_std": [0.229, 0.224, 0.225], - "learning_strategy": { - "name": "piecewise_decay", - "batch_size": 256, - "epochs": [30, 60, 90], - "steps": [0.1, 0.01, 0.001, 0.0001] - } -} - - -class ResNet(): - def __init__(self, layers=50, is_3x3=False): - self.params = train_parameters - self.layers = layers - self.is_3x3 = is_3x3 - - def net(self, input, class_dim=1000): - is_3x3 = self.is_3x3 - layers = self.layers - supported_layers = [50, 101, 152, 200] - assert layers in supported_layers, \ - "supported layers are {} but input layer is {}".format(supported_layers, layers) - - if layers == 50: - depth = [3, 4, 6, 3] - elif layers == 101: - depth = [3, 4, 23, 3] - elif layers == 152: - depth = [3, 8, 36, 3] - elif layers == 200: - depth = [3, 12, 48, 3] - num_filters = [64, 128, 256, 512] - if is_3x3 == False: - conv = self.conv_bn_layer(input=input, num_filters=64, filter_size=7, stride=2, act='relu') - else: - conv = self.conv_bn_layer(input=input, num_filters=32, filter_size=3, stride=2, act='relu', name='conv1_1') - conv = self.conv_bn_layer(input=conv, num_filters=32, filter_size=3, stride=1, act='relu', name='conv1_2') - conv = self.conv_bn_layer(input=conv, num_filters=64, filter_size=3, stride=1, act='relu', name='conv1_3') - - conv = fluid.layers.pool2d(input=conv, pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') - - for block in range(len(depth)): - for i in range(depth[block]): - if layers in [101, 152, 200] and block == 2: - if i == 0: - conv_name = "res" + str(block + 2) + "a" - else: - conv_name = "res" + str(block + 2) + "b" + str(i) - else: - conv_name = "res" + str(block + 2) + chr(97 + i) - conv = self.bottleneck_block( - input=conv, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - if_first=block == 0, - name=conv_name) - - pool = fluid.layers.pool2d(input=conv, pool_size=7, pool_type='avg', global_pooling=True) - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - - out = fluid.layers.fc( - input=pool, - size=class_dim, - param_attr=fluid.param_attr.ParamAttr(initializer=fluid.initializer.Uniform(-stdv, stdv))) - - return out, pool - - def conv_bn_layer(self, input, num_filters, filter_size, stride=1, groups=1, act=None, name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - param_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - return fluid.layers.batch_norm( - input=conv, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def conv_bn_layer_new(self, input, num_filters, filter_size, stride=1, groups=1, act=None, name=None): - pool = fluid.layers.pool2d(input=input, pool_size=2, pool_stride=2, pool_padding=0, pool_type='avg') - - conv = fluid.layers.conv2d( - input=pool, - num_filters=num_filters, - filter_size=filter_size, - stride=1, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - param_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - return fluid.layers.batch_norm( - input=conv, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def shortcut(self, input, ch_out, stride, name, if_first=False): - ch_in = input.shape[1] - if ch_in != ch_out or stride != 1: - if if_first: - return self.conv_bn_layer(input, ch_out, 1, stride, name=name) - else: - return self.conv_bn_layer_new(input, ch_out, 1, stride, name=name) - else: - return input - - def bottleneck_block(self, input, num_filters, stride, name, if_first): - conv0 = self.conv_bn_layer( - input=input, num_filters=num_filters, filter_size=1, act='relu', name=name + "_branch2a") - conv1 = self.conv_bn_layer( - input=conv0, num_filters=num_filters, filter_size=3, stride=stride, act='relu', name=name + "_branch2b") - conv2 = self.conv_bn_layer( - input=conv1, num_filters=num_filters * 4, filter_size=1, act=None, name=name + "_branch2c") - - short = self.shortcut(input, num_filters * 4, stride, if_first=if_first, name=name + "_branch1") - - return fluid.layers.elementwise_add(x=short, y=conv2, act='relu') - - -def ResNet50_vd(): - model = ResNet(layers=50, is_3x3=True) - return model - - -def ResNet101_vd(): - model = ResNet(layers=101, is_3x3=True) - return model - - -def ResNet152_vd(): - model = ResNet(layers=152, is_3x3=True) - return model - - -def ResNet200_vd(): - model = ResNet(layers=200, is_3x3=True) - return model diff --git a/modules/image/face_detection/pyramidbox_lite_mobile/README.md b/modules/image/face_detection/pyramidbox_lite_mobile/README.md index 7a1f984f4..b4fd8b8c8 100644 --- a/modules/image/face_detection/pyramidbox_lite_mobile/README.md +++ b/modules/image/face_detection/pyramidbox_lite_mobile/README.md @@ -163,8 +163,10 @@ 初始发布 -* 1.2.0 +* 1.2.1 + + 移除 fluid api - ```shell - $ hub install pyramidbox_lite_mobile==1.2.0 + $ hub install pyramidbox_lite_mobile==1.2.1 ``` diff --git a/modules/image/face_detection/pyramidbox_lite_mobile/README_en.md b/modules/image/face_detection/pyramidbox_lite_mobile/README_en.md index 088fd4725..d1439fc22 100644 --- a/modules/image/face_detection/pyramidbox_lite_mobile/README_en.md +++ b/modules/image/face_detection/pyramidbox_lite_mobile/README_en.md @@ -162,8 +162,10 @@ First release -* 1.2.0 +* 1.2.1 + + Remove fluid api - ```shell - $ hub install pyramidbox_lite_mobile==1.2.0 + $ hub install pyramidbox_lite_mobile==1.2.1 ``` diff --git a/modules/image/face_detection/pyramidbox_lite_mobile/module.py b/modules/image/face_detection/pyramidbox_lite_mobile/module.py index 468aff860..2f6e665e9 100644 --- a/modules/image/face_detection/pyramidbox_lite_mobile/module.py +++ b/modules/image/face_detection/pyramidbox_lite_mobile/module.py @@ -2,28 +2,32 @@ from __future__ import absolute_import from __future__ import division -import ast import argparse +import ast import os import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.module.module import moduleinfo, runnable, serving - +import paddle +from paddle.inference import Config +from paddle.inference import create_predictor from pyramidbox_lite_mobile.data_feed import reader -from pyramidbox_lite_mobile.processor import postprocess, base64_to_cv2 +from pyramidbox_lite_mobile.processor import base64_to_cv2 +from pyramidbox_lite_mobile.processor import postprocess +import paddlehub as hub +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving -@moduleinfo( - name="pyramidbox_lite_mobile", - type="CV/face_detection", - author="baidu-vis", - author_email="", - summary="PyramidBox-Lite-Mobile is a high-performance face detection model.", - version="1.2.0") + +@moduleinfo(name="pyramidbox_lite_mobile", + type="CV/face_detection", + author="baidu-vis", + author_email="", + summary="PyramidBox-Lite-Mobile is a high-performance face detection model.", + version="1.2.1") class PyramidBoxLiteMobile(hub.Module): + def _initialize(self): self.default_pretrained_model_path = os.path.join(self.directory, "pyramidbox_lite_mobile_face_detection") self._set_config() @@ -33,10 +37,10 @@ def _set_config(self): """ predictor config setting """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) + cpu_config = Config(self.default_pretrained_model_path) cpu_config.disable_glog_info() cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) + self.cpu_predictor = create_predictor(cpu_config) try: _places = os.environ["CUDA_VISIBLE_DEVICES"] @@ -45,10 +49,10 @@ def _set_config(self): except: use_gpu = False if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config = Config(self.default_pretrained_model_path) gpu_config.disable_glog_info() gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) + self.gpu_predictor = create_predictor(gpu_config) def face_detection(self, images=None, @@ -98,18 +102,26 @@ def face_detection(self, # process one by one for element in reader(images, paths, shrink): image = np.expand_dims(element['image'], axis=0).astype('float32') - image_tensor = PaddleTensor(image.copy()) - data_out = self.gpu_predictor.run([image_tensor]) if use_gpu else self.cpu_predictor.run([image_tensor]) - out = postprocess( - data_out=data_out[0].as_ndarray(), - org_im=element['org_im'], - org_im_path=element['org_im_path'], - image_width=element['image_width'], - image_height=element['image_height'], - output_dir=output_dir, - visualization=visualization, - shrink=shrink, - confs_threshold=confs_threshold) + + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + input_handle.copy_from_cpu(image) + + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) + output_data = output_handle.copy_to_cpu() + + out = postprocess(data_out=output_data, + org_im=element['org_im'], + org_im_path=element['org_im_path'], + image_width=element['image_width'], + image_height=element['image_height'], + output_dir=output_dir, + visualization=visualization, + shrink=shrink, + confs_threshold=confs_threshold) res.append(out) return res @@ -117,23 +129,21 @@ def save_inference_model(self, dirname, model_filename=None, params_filename=Non if combined: model_filename = "__model__" if not model_filename else model_filename params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) + place = paddle.CPUPlace() + exe = paddle.Executor(place) - program, feeded_var_names, target_vars = fluid.io.load_inference_model( + program, feeded_var_names, target_vars = paddle.static.load_inference_model( dirname=self.default_pretrained_model_path, executor=exe) var = program.global_block().vars['detection_output_0.tmp_1'] - var.desc.set_dtype(fluid.core.VarDesc.VarType.INT32) - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) + paddle.static.save_inference_model(dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) @serving def serving_method(self, images, **kwargs): @@ -149,36 +159,40 @@ def run_cmd(self, argvs): """ Run as a command. """ - self.parser = argparse.ArgumentParser( - description="Run the {} module.".format(self.name), - prog='hub run {}'.format(self.name), - usage='%(prog)s', - add_help=True) + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") self.arg_config_group = self.parser.add_argument_group( title="Config options", description="Run configuration for controlling module behavior, not required.") self.add_module_config_arg() self.add_module_input_arg() args = self.parser.parse_args(argvs) - results = self.face_detection( - paths=[args.input_path], - use_gpu=args.use_gpu, - output_dir=args.output_dir, - visualization=args.visualization, - shrink=args.shrink, - confs_threshold=args.confs_threshold) + results = self.face_detection(paths=[args.input_path], + use_gpu=args.use_gpu, + output_dir=args.output_dir, + visualization=args.visualization, + shrink=args.shrink, + confs_threshold=args.confs_threshold) return results def add_module_config_arg(self): """ Add the command config options. """ - self.arg_config_group.add_argument( - '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") - self.arg_config_group.add_argument( - '--output_dir', type=str, default='detection_result', help="The directory to save output images.") - self.arg_config_group.add_argument( - '--visualization', type=ast.literal_eval, default=False, help="whether to save output as images.") + self.arg_config_group.add_argument('--use_gpu', + type=ast.literal_eval, + default=False, + help="whether use GPU or not") + self.arg_config_group.add_argument('--output_dir', + type=str, + default='detection_result', + help="The directory to save output images.") + self.arg_config_group.add_argument('--visualization', + type=ast.literal_eval, + default=False, + help="whether to save output as images.") def add_module_input_arg(self): """ @@ -190,5 +204,7 @@ def add_module_input_arg(self): type=ast.literal_eval, default=0.5, help="resize the image to shrink * original_shape before feeding into network.") - self.arg_input_group.add_argument( - '--confs_threshold', type=ast.literal_eval, default=0.6, help="confidence threshold.") + self.arg_input_group.add_argument('--confs_threshold', + type=ast.literal_eval, + default=0.6, + help="confidence threshold.") diff --git a/modules/image/face_detection/pyramidbox_lite_mobile_mask/README.md b/modules/image/face_detection/pyramidbox_lite_mobile_mask/README.md index 434d01b19..458a60e72 100644 --- a/modules/image/face_detection/pyramidbox_lite_mobile_mask/README.md +++ b/modules/image/face_detection/pyramidbox_lite_mobile_mask/README.md @@ -208,7 +208,10 @@ 初始发布 -* 1.3.0 +* 1.3.1 + + 移除 fluid api + - ```shell - $ hub install pyramidbox_lite_mobile_mask==1.3.0 + $ hub install pyramidbox_lite_mobile_mask==1.3.1 ``` diff --git a/modules/image/face_detection/pyramidbox_lite_mobile_mask/README_en.md b/modules/image/face_detection/pyramidbox_lite_mobile_mask/README_en.md index 56661fd6f..f7d2ef026 100644 --- a/modules/image/face_detection/pyramidbox_lite_mobile_mask/README_en.md +++ b/modules/image/face_detection/pyramidbox_lite_mobile_mask/README_en.md @@ -184,7 +184,10 @@ First release -* 1.3.0 +* 1.3.1 + + Remove fluid api + - ```shell - $ hub install pyramidbox_lite_mobile_mask==1.3.0 + $ hub install pyramidbox_lite_mobile_mask==1.3.1 ``` diff --git a/modules/image/face_detection/pyramidbox_lite_mobile_mask/module.py b/modules/image/face_detection/pyramidbox_lite_mobile_mask/module.py index 73d3c6851..99a09fdd4 100644 --- a/modules/image/face_detection/pyramidbox_lite_mobile_mask/module.py +++ b/modules/image/face_detection/pyramidbox_lite_mobile_mask/module.py @@ -2,18 +2,22 @@ from __future__ import absolute_import from __future__ import division -import ast import argparse +import ast import os import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.module.module import moduleinfo, runnable, serving - +import paddle +from paddle.inference import Config +from paddle.inference import create_predictor from pyramidbox_lite_mobile_mask.data_feed import reader -from pyramidbox_lite_mobile_mask.processor import postprocess, base64_to_cv2 +from pyramidbox_lite_mobile_mask.processor import base64_to_cv2 +from pyramidbox_lite_mobile_mask.processor import postprocess + +import paddlehub as hub +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving @moduleinfo( @@ -23,8 +27,9 @@ author_email="", summary= "Pyramidbox-Lite-Mobile-Mask is a high-performance face detection model used to detect whether people wear masks.", - version="1.3.0") + version="1.3.1") class PyramidBoxLiteMobileMask(hub.Module): + def _initialize(self, face_detector_module=None): """ Args: @@ -42,10 +47,10 @@ def _set_config(self): """ predictor config setting """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) + cpu_config = Config(self.default_pretrained_model_path) cpu_config.disable_glog_info() cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) + self.cpu_predictor = create_predictor(cpu_config) try: _places = os.environ["CUDA_VISIBLE_DEVICES"] @@ -54,10 +59,10 @@ def _set_config(self): except: use_gpu = False if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config = Config(self.default_pretrained_model_path) gpu_config.disable_glog_info() gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) + self.gpu_predictor = create_predictor(gpu_config) def set_face_detector_module(self, face_detector_module): """ @@ -146,12 +151,18 @@ def face_detection(self, pass image_arr = np.squeeze(np.array(batch_data), axis=1) - image_tensor = PaddleTensor(image_arr.copy()) - data_out = self.gpu_predictor.run([image_tensor]) if use_gpu else self.cpu_predictor.run([image_tensor]) - # len(data_out) == 1 - # data_out[0].as_ndarray().shape == (-1, 2) - data_out = data_out[0].as_ndarray() - predict_out = np.concatenate((predict_out, data_out)) + + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + input_handle.copy_from_cpu(image_arr) + + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) + output_data = output_handle.copy_to_cpu() + + predict_out = np.concatenate((predict_out, output_data)) predict_out = predict_out[1:] # postprocess one by one @@ -160,13 +171,12 @@ def face_detection(self, detect_faces_list = [handled['face'] for handled in all_element[i]['preprocessed']] interval_left = sum(element_image_num[0:i]) interval_right = interval_left + element_image_num[i] - out = postprocess( - confidence_out=predict_out[interval_left:interval_right], - org_im=all_element[i]['org_im'], - org_im_path=all_element[i]['org_im_path'], - detected_faces=detect_faces_list, - output_dir=output_dir, - visualization=visualization) + out = postprocess(confidence_out=predict_out[interval_left:interval_right], + org_im=all_element[i]['org_im'], + org_im_path=all_element[i]['org_im_path'], + detected_faces=detect_faces_list, + output_dir=output_dir, + visualization=visualization) res.append(out) return res @@ -183,20 +193,19 @@ def _save_classifier_model(self, dirname, model_filename=None, params_filename=N if combined: model_filename = "__model__" if not model_filename else model_filename params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) + place = paddle.CPUPlace() + exe = paddle.Executor(place) - program, feeded_var_names, target_vars = fluid.io.load_inference_model( + program, feeded_var_names, target_vars = paddle.static.load_inference_model( dirname=self.default_pretrained_model_path, executor=exe) - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) + paddle.static.save_inference_model(dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) @serving def serving_method(self, images, **kwargs): @@ -212,36 +221,40 @@ def run_cmd(self, argvs): """ Run as a command. """ - self.parser = argparse.ArgumentParser( - description="Run the {} module.".format(self.name), - prog='hub run {}'.format(self.name), - usage='%(prog)s', - add_help=True) + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") self.arg_config_group = self.parser.add_argument_group( title="Config options", description="Run configuration for controlling module behavior, not required.") self.add_module_config_arg() self.add_module_input_arg() args = self.parser.parse_args(argvs) - results = self.face_detection( - paths=[args.input_path], - use_gpu=args.use_gpu, - output_dir=args.output_dir, - visualization=args.visualization, - shrink=args.shrink, - confs_threshold=args.confs_threshold) + results = self.face_detection(paths=[args.input_path], + use_gpu=args.use_gpu, + output_dir=args.output_dir, + visualization=args.visualization, + shrink=args.shrink, + confs_threshold=args.confs_threshold) return results def add_module_config_arg(self): """ Add the command config options. """ - self.arg_config_group.add_argument( - '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") - self.arg_config_group.add_argument( - '--output_dir', type=str, default='detection_result', help="The directory to save output images.") - self.arg_config_group.add_argument( - '--visualization', type=ast.literal_eval, default=False, help="whether to save output as images.") + self.arg_config_group.add_argument('--use_gpu', + type=ast.literal_eval, + default=False, + help="whether use GPU or not") + self.arg_config_group.add_argument('--output_dir', + type=str, + default='detection_result', + help="The directory to save output images.") + self.arg_config_group.add_argument('--visualization', + type=ast.literal_eval, + default=False, + help="whether to save output as images.") def add_module_input_arg(self): """ @@ -253,5 +266,7 @@ def add_module_input_arg(self): type=ast.literal_eval, default=0.5, help="resize the image to `shrink * original_shape` before feeding into network.") - self.arg_input_group.add_argument( - '--confs_threshold', type=ast.literal_eval, default=0.6, help="confidence threshold.") + self.arg_input_group.add_argument('--confs_threshold', + type=ast.literal_eval, + default=0.6, + help="confidence threshold.") diff --git a/modules/image/face_detection/pyramidbox_lite_server/README.md b/modules/image/face_detection/pyramidbox_lite_server/README.md index b52fb6a74..a0bc69d70 100644 --- a/modules/image/face_detection/pyramidbox_lite_server/README.md +++ b/modules/image/face_detection/pyramidbox_lite_server/README.md @@ -166,6 +166,11 @@ * 1.2.0 修复numpy数据读取问题 + +* 1.2.1 + + 移除 fluid api + - ```shell - $ hub install pyramidbox_lite_server==1.2.0 + $ hub install pyramidbox_lite_server==1.2.1 ``` diff --git a/modules/image/face_detection/pyramidbox_lite_server/README_en.md b/modules/image/face_detection/pyramidbox_lite_server/README_en.md index 92818bf93..ba4d6c0ee 100644 --- a/modules/image/face_detection/pyramidbox_lite_server/README_en.md +++ b/modules/image/face_detection/pyramidbox_lite_server/README_en.md @@ -166,6 +166,11 @@ * 1.2.0 Fix the problem of reading numpy + +* 1.2.1 + + Remove fluid api + - ```shell - $ hub install pyramidbox_lite_server==1.2.0 + $ hub install pyramidbox_lite_server==1.2.1 ``` diff --git a/modules/image/face_detection/pyramidbox_lite_server/module.py b/modules/image/face_detection/pyramidbox_lite_server/module.py index 617baba6c..3509bea31 100644 --- a/modules/image/face_detection/pyramidbox_lite_server/module.py +++ b/modules/image/face_detection/pyramidbox_lite_server/module.py @@ -2,28 +2,32 @@ from __future__ import absolute_import from __future__ import division -import ast import argparse +import ast import os import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.module.module import moduleinfo, runnable, serving - +import paddle +from paddle.inference import Config +from paddle.inference import create_predictor from pyramidbox_lite_server.data_feed import reader -from pyramidbox_lite_server.processor import postprocess, base64_to_cv2 +from pyramidbox_lite_server.processor import base64_to_cv2 +from pyramidbox_lite_server.processor import postprocess +import paddlehub as hub +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving -@moduleinfo( - name="pyramidbox_lite_server", - type="CV/face_detection", - author="baidu-vis", - author_email="", - summary="PyramidBox-Lite-Server is a high-performance face detection model.", - version="1.2.0") + +@moduleinfo(name="pyramidbox_lite_server", + type="CV/face_detection", + author="baidu-vis", + author_email="", + summary="PyramidBox-Lite-Server is a high-performance face detection model.", + version="1.2.1") class PyramidBoxLiteServer(hub.Module): + def _initialize(self): self.default_pretrained_model_path = os.path.join(self.directory, "pyramidbox_lite_server_face_detection") self._set_config() @@ -33,10 +37,10 @@ def _set_config(self): """ predictor config setting """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) + cpu_config = Config(self.default_pretrained_model_path) cpu_config.disable_glog_info() cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) + self.cpu_predictor = create_predictor(cpu_config) try: _places = os.environ["CUDA_VISIBLE_DEVICES"] @@ -45,10 +49,10 @@ def _set_config(self): except: use_gpu = False if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config = Config(self.default_pretrained_model_path) gpu_config.disable_glog_info() gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) + self.gpu_predictor = create_predictor(gpu_config) def face_detection(self, images=None, @@ -98,18 +102,26 @@ def face_detection(self, # process one by one for element in reader(images, paths, shrink): image = np.expand_dims(element['image'], axis=0).astype('float32') - image_tensor = PaddleTensor(image.copy()) - data_out = self.gpu_predictor.run([image_tensor]) if use_gpu else self.cpu_predictor.run([image_tensor]) - out = postprocess( - data_out=data_out[0].as_ndarray(), - org_im=element['org_im'], - org_im_path=element['org_im_path'], - image_width=element['image_width'], - image_height=element['image_height'], - output_dir=output_dir, - visualization=visualization, - shrink=shrink, - confs_threshold=confs_threshold) + + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + input_handle.copy_from_cpu(image) + + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) + output_data = output_handle.copy_to_cpu() + + out = postprocess(data_out=output_data, + org_im=element['org_im'], + org_im_path=element['org_im_path'], + image_width=element['image_width'], + image_height=element['image_height'], + output_dir=output_dir, + visualization=visualization, + shrink=shrink, + confs_threshold=confs_threshold) res.append(out) return res @@ -117,20 +129,19 @@ def save_inference_model(self, dirname, model_filename=None, params_filename=Non if combined: model_filename = "__model__" if not model_filename else model_filename params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) + place = paddle.CPUPlace() + exe = paddle.Executor(place) - program, feeded_var_names, target_vars = fluid.io.load_inference_model( + program, feeded_var_names, target_vars = paddle.static.load_inference_model( dirname=self.default_pretrained_model_path, executor=exe) - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) + paddle.static.save_inference_model(dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) @serving def serving_method(self, images, **kwargs): @@ -146,36 +157,40 @@ def run_cmd(self, argvs): """ Run as a command. """ - self.parser = argparse.ArgumentParser( - description="Run the {} module.".format(self.name), - prog='hub run {}'.format(self.name), - usage='%(prog)s', - add_help=True) + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") self.arg_config_group = self.parser.add_argument_group( title="Config options", description="Run configuration for controlling module behavior, not required.") self.add_module_config_arg() self.add_module_input_arg() args = self.parser.parse_args(argvs) - results = self.face_detection( - paths=[args.input_path], - use_gpu=args.use_gpu, - output_dir=args.output_dir, - visualization=args.visualization, - shrink=args.shrink, - confs_threshold=args.confs_threshold) + results = self.face_detection(paths=[args.input_path], + use_gpu=args.use_gpu, + output_dir=args.output_dir, + visualization=args.visualization, + shrink=args.shrink, + confs_threshold=args.confs_threshold) return results def add_module_config_arg(self): """ Add the command config options. """ - self.arg_config_group.add_argument( - '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") - self.arg_config_group.add_argument( - '--output_dir', type=str, default='detection_result', help="The directory to save output images.") - self.arg_config_group.add_argument( - '--visualization', type=ast.literal_eval, default=False, help="whether to save output as images.") + self.arg_config_group.add_argument('--use_gpu', + type=ast.literal_eval, + default=False, + help="whether use GPU or not") + self.arg_config_group.add_argument('--output_dir', + type=str, + default='detection_result', + help="The directory to save output images.") + self.arg_config_group.add_argument('--visualization', + type=ast.literal_eval, + default=False, + help="whether to save output as images.") def add_module_input_arg(self): """ @@ -187,5 +202,7 @@ def add_module_input_arg(self): type=ast.literal_eval, default=0.5, help="resize the image to shrink * original_shape before feeding into network.") - self.arg_input_group.add_argument( - '--confs_threshold', type=ast.literal_eval, default=0.6, help="confidence threshold.") + self.arg_input_group.add_argument('--confs_threshold', + type=ast.literal_eval, + default=0.6, + help="confidence threshold.") diff --git a/modules/image/face_detection/pyramidbox_lite_server_mask/README.md b/modules/image/face_detection/pyramidbox_lite_server_mask/README.md index 1c49c750b..6f21a6ab7 100644 --- a/modules/image/face_detection/pyramidbox_lite_server_mask/README.md +++ b/modules/image/face_detection/pyramidbox_lite_server_mask/README.md @@ -208,7 +208,10 @@ 初始发布 -* 1.3.1 +* 1.3.2 + + 移除 fluid api + - ```shell - $ hub install pyramidbox_lite_server_mask==1.3.1 + $ hub install pyramidbox_lite_server_mask==1.3.2 ``` diff --git a/modules/image/face_detection/pyramidbox_lite_server_mask/README_en.md b/modules/image/face_detection/pyramidbox_lite_server_mask/README_en.md index 7efc0d922..da5ba9e38 100644 --- a/modules/image/face_detection/pyramidbox_lite_server_mask/README_en.md +++ b/modules/image/face_detection/pyramidbox_lite_server_mask/README_en.md @@ -185,7 +185,10 @@ First release -* 1.3.1 +* 1.3.2 + + Remove fluid api + - ```shell - $ hub install pyramidbox_lite_server_mask==1.3.1 + $ hub install pyramidbox_lite_server_mask==1.3.2 ``` diff --git a/modules/image/face_detection/pyramidbox_lite_server_mask/module.py b/modules/image/face_detection/pyramidbox_lite_server_mask/module.py index e4f8eac50..9184fa6f6 100644 --- a/modules/image/face_detection/pyramidbox_lite_server_mask/module.py +++ b/modules/image/face_detection/pyramidbox_lite_server_mask/module.py @@ -2,18 +2,22 @@ from __future__ import absolute_import from __future__ import division -import ast import argparse +import ast import os import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.module.module import moduleinfo, runnable, serving - +import paddle +from paddle.inference import Config +from paddle.inference import create_predictor from pyramidbox_lite_server_mask.data_feed import reader -from pyramidbox_lite_server_mask.processor import postprocess, base64_to_cv2 +from pyramidbox_lite_server_mask.processor import base64_to_cv2 +from pyramidbox_lite_server_mask.processor import postprocess + +import paddlehub as hub +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving @moduleinfo( @@ -23,15 +27,15 @@ author_email="", summary= "PyramidBox-Lite-Server-Mask is a high-performance face detection model used to detect whether people wear masks.", - version="1.3.1") + version="1.3.2") class PyramidBoxLiteServerMask(hub.Module): + def _initialize(self, face_detector_module=None): """ Args: face_detector_module (class): module to detect face. """ - self.default_pretrained_model_path = os.path.join( - self.directory, "pyramidbox_lite_server_mask_model") + self.default_pretrained_model_path = os.path.join(self.directory, "pyramidbox_lite_server_mask_model") if face_detector_module is None: self.face_detector = hub.Module(name='pyramidbox_lite_server') else: @@ -43,10 +47,10 @@ def _set_config(self): """ predictor config setting """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) + cpu_config = Config(self.default_pretrained_model_path) cpu_config.disable_glog_info() cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) + self.cpu_predictor = create_predictor(cpu_config) try: _places = os.environ["CUDA_VISIBLE_DEVICES"] @@ -55,11 +59,10 @@ def _set_config(self): except: use_gpu = False if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config = Config(self.default_pretrained_model_path) gpu_config.disable_glog_info() - gpu_config.enable_use_gpu( - memory_pool_init_size_mb=1000, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) + gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) + self.gpu_predictor = create_predictor(gpu_config) def set_face_detector_module(self, face_detector_module): """ @@ -123,16 +126,13 @@ def face_detection(self, # get all data all_element = list() - for yield_data in reader(self.face_detector, shrink, confs_threshold, - images, paths, use_gpu, use_multi_scale): + for yield_data in reader(self.face_detector, shrink, confs_threshold, images, paths, use_gpu, use_multi_scale): all_element.append(yield_data) image_list = list() element_image_num = list() for i in range(len(all_element)): - element_image = [ - handled['image'] for handled in all_element[i]['preprocessed'] - ] + element_image = [handled['image'] for handled in all_element[i]['preprocessed']] element_image_num.append(len(element_image)) image_list.extend(element_image) @@ -150,76 +150,61 @@ def face_detection(self, pass image_arr = np.squeeze(np.array(batch_data), axis=1) - image_tensor = PaddleTensor(image_arr.copy()) - data_out = self.gpu_predictor.run([ - image_tensor - ]) if use_gpu else self.cpu_predictor.run([image_tensor]) - # len(data_out) == 1 - # data_out[0].as_ndarray().shape == (-1, 2) - data_out = data_out[0].as_ndarray() - predict_out = np.concatenate((predict_out, data_out)) + + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + input_handle.copy_from_cpu(image_arr) + + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) + output_data = output_handle.copy_to_cpu() + + predict_out = np.concatenate((predict_out, output_data)) predict_out = predict_out[1:] # postprocess one by one res = list() for i in range(len(all_element)): - detect_faces_list = [ - handled['face'] for handled in all_element[i]['preprocessed'] - ] + detect_faces_list = [handled['face'] for handled in all_element[i]['preprocessed']] interval_left = sum(element_image_num[0:i]) interval_right = interval_left + element_image_num[i] - out = postprocess( - confidence_out=predict_out[interval_left:interval_right], - org_im=all_element[i]['org_im'], - org_im_path=all_element[i]['org_im_path'], - detected_faces=detect_faces_list, - output_dir=output_dir, - visualization=visualization) + out = postprocess(confidence_out=predict_out[interval_left:interval_right], + org_im=all_element[i]['org_im'], + org_im_path=all_element[i]['org_im_path'], + detected_faces=detect_faces_list, + output_dir=output_dir, + visualization=visualization) res.append(out) return res - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): + def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): classifier_dir = os.path.join(dirname, 'mask_detector') detector_dir = os.path.join(dirname, 'pyramidbox_lite') - self._save_classifier_model(classifier_dir, model_filename, - params_filename, combined) - self._save_detector_model(detector_dir, model_filename, params_filename, - combined) - - def _save_detector_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - self.face_detector.save_inference_model(dirname, model_filename, - params_filename, combined) - - def _save_classifier_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): + self._save_classifier_model(classifier_dir, model_filename, params_filename, combined) + self._save_detector_model(detector_dir, model_filename, params_filename, combined) + + def _save_detector_model(self, dirname, model_filename=None, params_filename=None, combined=True): + self.face_detector.save_inference_model(dirname, model_filename, params_filename, combined) + + def _save_classifier_model(self, dirname, model_filename=None, params_filename=None, combined=True): if combined: model_filename = "__model__" if not model_filename else model_filename params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) + place = paddle.CPUPlace() + exe = paddle.Executor(place) - program, feeded_var_names, target_vars = fluid.io.load_inference_model( + program, feeded_var_names, target_vars = paddle.static.load_inference_model( dirname=self.default_pretrained_model_path, executor=exe) - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) + paddle.static.save_inference_model(dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) @serving def serving_method(self, images, **kwargs): @@ -235,64 +220,52 @@ def run_cmd(self, argvs): """ Run as a command. """ - self.parser = argparse.ArgumentParser( - description="Run the {} module.".format(self.name), - prog='hub run {}'.format(self.name), - usage='%(prog)s', - add_help=True) - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") + title="Config options", description="Run configuration for controlling module behavior, not required.") self.add_module_config_arg() self.add_module_input_arg() args = self.parser.parse_args(argvs) - results = self.face_detection( - paths=[args.input_path], - use_gpu=args.use_gpu, - output_dir=args.output_dir, - visualization=args.visualization, - shrink=args.shrink, - confs_threshold=args.confs_threshold) + results = self.face_detection(paths=[args.input_path], + use_gpu=args.use_gpu, + output_dir=args.output_dir, + visualization=args.visualization, + shrink=args.shrink, + confs_threshold=args.confs_threshold) return results def add_module_config_arg(self): """ Add the command config options. """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not") - self.arg_config_group.add_argument( - '--output_dir', - type=str, - default='detection_result', - help="The directory to save output images.") - self.arg_config_group.add_argument( - '--visualization', - type=ast.literal_eval, - default=False, - help="whether to save output as images.") + self.arg_config_group.add_argument('--use_gpu', + type=ast.literal_eval, + default=False, + help="whether use GPU or not") + self.arg_config_group.add_argument('--output_dir', + type=str, + default='detection_result', + help="The directory to save output images.") + self.arg_config_group.add_argument('--visualization', + type=ast.literal_eval, + default=False, + help="whether to save output as images.") def add_module_input_arg(self): """ Add the command input options. """ - self.arg_input_group.add_argument( - '--input_path', type=str, help="path to image.") + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") self.arg_input_group.add_argument( '--shrink', type=ast.literal_eval, default=0.5, - help= - "resize the image to `shrink * original_shape` before feeding into network." - ) - self.arg_input_group.add_argument( - '--confs_threshold', - type=ast.literal_eval, - default=0.6, - help="confidence threshold.") + help="resize the image to `shrink * original_shape` before feeding into network.") + self.arg_input_group.add_argument('--confs_threshold', + type=ast.literal_eval, + default=0.6, + help="confidence threshold.") diff --git a/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/README.md b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/README.md index ea00f9878..bb44d9c6a 100644 --- a/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/README.md +++ b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/README.md @@ -164,7 +164,10 @@ 初始发布 -* 1.1.2 +* 1.1.3 + + 移除 fluid api + - ```shell - $ hub install ultra_light_fast_generic_face_detector_1mb_320==1.1.2 + $ hub install ultra_light_fast_generic_face_detector_1mb_320==1.1.3 ``` diff --git a/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/README_en.md b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/README_en.md index 31d0758cb..2bad14e23 100644 --- a/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/README_en.md +++ b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/README_en.md @@ -163,7 +163,10 @@ First release -* 1.1.2 +* 1.1.3 + + Remove fluid api + - ```shell - $ hub install ultra_light_fast_generic_face_detector_1mb_320==1.1.2 + $ hub install ultra_light_fast_generic_face_detector_1mb_320==1.1.3 ``` diff --git a/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/face_detector.py b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/face_detector.py deleted file mode 100644 index 22c51db2b..000000000 --- a/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/face_detector.py +++ /dev/null @@ -1,1185 +0,0 @@ -# coding=utf-8 -from paddle.fluid.initializer import Constant -from paddle.fluid.param_attr import ParamAttr -import paddle.fluid as fluid - - -def face_detector_320(): - _319 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=0) - _322 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=-1) - _323 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=2) - _333 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=0) - _336 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=-1) - _337 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=4) - _365 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=0) - _368 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=-1) - _369 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=2) - _379 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=0) - _382 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=-1) - _383 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=4) - _405 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=0) - _408 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=-1) - _409 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=2) - _419 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=0) - _422 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=-1) - _423 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=4) - _437 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=0) - _440 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=-1) - _441 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=2) - _449 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=0) - _452 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=-1) - _453 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=4) - _463 = fluid.layers.fill_constant(shape=[1], dtype='float32', value=0.10000000149011612) - _465 = fluid.layers.create_parameter( - dtype='float32', shape=[1, 4420, 2], name='_465', attr='_465', default_initializer=Constant(0.0)) - _467 = fluid.layers.create_parameter( - dtype='float32', shape=[1, 4420, 2], name='_467', attr='_467', default_initializer=Constant(0.0)) - _470 = fluid.layers.fill_constant(shape=[1], dtype='float32', value=0.20000000298023224) - _473 = fluid.layers.create_parameter( - dtype='float32', shape=[1, 4420, 2], name='_473', attr='_473', default_initializer=Constant(0.0)) - _478 = fluid.layers.fill_constant(shape=[1], dtype='float32', value=2.0) - _483 = fluid.layers.fill_constant(shape=[1], dtype='float32', value=2.0) - _input = fluid.layers.data(dtype='float32', shape=[1, 3, 240, 320], name='_input', append_batch_size=False) - _325 = fluid.layers.assign(_322) - _326 = fluid.layers.assign(_323) - _339 = fluid.layers.assign(_336) - _340 = fluid.layers.assign(_337) - _371 = fluid.layers.assign(_368) - _372 = fluid.layers.assign(_369) - _385 = fluid.layers.assign(_382) - _386 = fluid.layers.assign(_383) - _411 = fluid.layers.assign(_408) - _412 = fluid.layers.assign(_409) - _425 = fluid.layers.assign(_422) - _426 = fluid.layers.assign(_423) - _443 = fluid.layers.assign(_440) - _444 = fluid.layers.assign(_441) - _455 = fluid.layers.assign(_452) - _456 = fluid.layers.assign(_453) - _245 = fluid.layers.conv2d( - _input, - num_filters=16, - filter_size=[3, 3], - stride=[2, 2], - padding=[1, 1], - dilation=[1, 1], - groups=1, - param_attr='_base_net_0_0_weight', - name='_245', - bias_attr=False) - _246 = fluid.layers.batch_norm( - _245, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_0_1_weight', - bias_attr='_base_net_0_1_bias', - moving_mean_name='_base_net_0_1_running_mean', - moving_variance_name='_base_net_0_1_running_var', - use_global_stats=False, - name='_246') - _247 = fluid.layers.relu(_246, name='_247') - _248 = fluid.layers.conv2d( - _247, - num_filters=16, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=16, - param_attr='_base_net_1_0_weight', - name='_248', - bias_attr=False) - _249 = fluid.layers.batch_norm( - _248, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_1_1_weight', - bias_attr='_base_net_1_1_bias', - moving_mean_name='_base_net_1_1_running_mean', - moving_variance_name='_base_net_1_1_running_var', - use_global_stats=False, - name='_249') - _250 = fluid.layers.relu(_249, name='_250') - _251 = fluid.layers.conv2d( - _250, - num_filters=32, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_1_3_weight', - name='_251', - bias_attr=False) - _252 = fluid.layers.batch_norm( - _251, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_1_4_weight', - bias_attr='_base_net_1_4_bias', - moving_mean_name='_base_net_1_4_running_mean', - moving_variance_name='_base_net_1_4_running_var', - use_global_stats=False, - name='_252') - _253 = fluid.layers.relu(_252, name='_253') - _254 = fluid.layers.conv2d( - _253, - num_filters=32, - filter_size=[3, 3], - stride=[2, 2], - padding=[1, 1], - dilation=[1, 1], - groups=32, - param_attr='_base_net_2_0_weight', - name='_254', - bias_attr=False) - _255 = fluid.layers.batch_norm( - _254, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_2_1_weight', - bias_attr='_base_net_2_1_bias', - moving_mean_name='_base_net_2_1_running_mean', - moving_variance_name='_base_net_2_1_running_var', - use_global_stats=False, - name='_255') - _256 = fluid.layers.relu(_255, name='_256') - _257 = fluid.layers.conv2d( - _256, - num_filters=32, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_2_3_weight', - name='_257', - bias_attr=False) - _258 = fluid.layers.batch_norm( - _257, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_2_4_weight', - bias_attr='_base_net_2_4_bias', - moving_mean_name='_base_net_2_4_running_mean', - moving_variance_name='_base_net_2_4_running_var', - use_global_stats=False, - name='_258') - _259 = fluid.layers.relu(_258, name='_259') - _260 = fluid.layers.conv2d( - _259, - num_filters=32, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=32, - param_attr='_base_net_3_0_weight', - name='_260', - bias_attr=False) - _261 = fluid.layers.batch_norm( - _260, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_3_1_weight', - bias_attr='_base_net_3_1_bias', - moving_mean_name='_base_net_3_1_running_mean', - moving_variance_name='_base_net_3_1_running_var', - use_global_stats=False, - name='_261') - _262 = fluid.layers.relu(_261, name='_262') - _263 = fluid.layers.conv2d( - _262, - num_filters=32, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_3_3_weight', - name='_263', - bias_attr=False) - _264 = fluid.layers.batch_norm( - _263, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_3_4_weight', - bias_attr='_base_net_3_4_bias', - moving_mean_name='_base_net_3_4_running_mean', - moving_variance_name='_base_net_3_4_running_var', - use_global_stats=False, - name='_264') - _265 = fluid.layers.relu(_264, name='_265') - _266 = fluid.layers.conv2d( - _265, - num_filters=32, - filter_size=[3, 3], - stride=[2, 2], - padding=[1, 1], - dilation=[1, 1], - groups=32, - param_attr='_base_net_4_0_weight', - name='_266', - bias_attr=False) - _267 = fluid.layers.batch_norm( - _266, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_4_1_weight', - bias_attr='_base_net_4_1_bias', - moving_mean_name='_base_net_4_1_running_mean', - moving_variance_name='_base_net_4_1_running_var', - use_global_stats=False, - name='_267') - _268 = fluid.layers.relu(_267, name='_268') - _269 = fluid.layers.conv2d( - _268, - num_filters=64, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_4_3_weight', - name='_269', - bias_attr=False) - _270 = fluid.layers.batch_norm( - _269, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_4_4_weight', - bias_attr='_base_net_4_4_bias', - moving_mean_name='_base_net_4_4_running_mean', - moving_variance_name='_base_net_4_4_running_var', - use_global_stats=False, - name='_270') - _271 = fluid.layers.relu(_270, name='_271') - _272 = fluid.layers.conv2d( - _271, - num_filters=64, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=64, - param_attr='_base_net_5_0_weight', - name='_272', - bias_attr=False) - _273 = fluid.layers.batch_norm( - _272, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_5_1_weight', - bias_attr='_base_net_5_1_bias', - moving_mean_name='_base_net_5_1_running_mean', - moving_variance_name='_base_net_5_1_running_var', - use_global_stats=False, - name='_273') - _274 = fluid.layers.relu(_273, name='_274') - _275 = fluid.layers.conv2d( - _274, - num_filters=64, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_5_3_weight', - name='_275', - bias_attr=False) - _276 = fluid.layers.batch_norm( - _275, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_5_4_weight', - bias_attr='_base_net_5_4_bias', - moving_mean_name='_base_net_5_4_running_mean', - moving_variance_name='_base_net_5_4_running_var', - use_global_stats=False, - name='_276') - _277 = fluid.layers.relu(_276, name='_277') - _278 = fluid.layers.conv2d( - _277, - num_filters=64, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=64, - param_attr='_base_net_6_0_weight', - name='_278', - bias_attr=False) - _279 = fluid.layers.batch_norm( - _278, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_6_1_weight', - bias_attr='_base_net_6_1_bias', - moving_mean_name='_base_net_6_1_running_mean', - moving_variance_name='_base_net_6_1_running_var', - use_global_stats=False, - name='_279') - _280 = fluid.layers.relu(_279, name='_280') - _281 = fluid.layers.conv2d( - _280, - num_filters=64, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_6_3_weight', - name='_281', - bias_attr=False) - _282 = fluid.layers.batch_norm( - _281, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_6_4_weight', - bias_attr='_base_net_6_4_bias', - moving_mean_name='_base_net_6_4_running_mean', - moving_variance_name='_base_net_6_4_running_var', - use_global_stats=False, - name='_282') - _283 = fluid.layers.relu(_282, name='_283') - _284 = fluid.layers.conv2d( - _283, - num_filters=8, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_7_branch0_0_conv_weight', - name='_284', - bias_attr=False) - _291 = fluid.layers.conv2d( - _283, - num_filters=8, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_7_branch1_0_conv_weight', - name='_291', - bias_attr=False) - _298 = fluid.layers.conv2d( - _283, - num_filters=8, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_7_branch2_0_conv_weight', - name='_298', - bias_attr=False) - _311 = fluid.layers.conv2d( - _283, - num_filters=64, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_7_shortcut_conv_weight', - name='_311', - bias_attr=False) - _285 = fluid.layers.batch_norm( - _284, - momentum=0.9900000095367432, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_7_branch0_0_bn_weight', - bias_attr='_base_net_7_branch0_0_bn_bias', - moving_mean_name='_base_net_7_branch0_0_bn_running_mean', - moving_variance_name='_base_net_7_branch0_0_bn_running_var', - use_global_stats=False, - name='_285') - _292 = fluid.layers.batch_norm( - _291, - momentum=0.9900000095367432, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_7_branch1_0_bn_weight', - bias_attr='_base_net_7_branch1_0_bn_bias', - moving_mean_name='_base_net_7_branch1_0_bn_running_mean', - moving_variance_name='_base_net_7_branch1_0_bn_running_var', - use_global_stats=False, - name='_292') - _299 = fluid.layers.batch_norm( - _298, - momentum=0.9900000095367432, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_7_branch2_0_bn_weight', - bias_attr='_base_net_7_branch2_0_bn_bias', - moving_mean_name='_base_net_7_branch2_0_bn_running_mean', - moving_variance_name='_base_net_7_branch2_0_bn_running_var', - use_global_stats=False, - name='_299') - _312 = fluid.layers.batch_norm( - _311, - momentum=0.9900000095367432, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_7_shortcut_bn_weight', - bias_attr='_base_net_7_shortcut_bn_bias', - moving_mean_name='_base_net_7_shortcut_bn_running_mean', - moving_variance_name='_base_net_7_shortcut_bn_running_var', - use_global_stats=False, - name='_312') - _286 = fluid.layers.conv2d( - _285, - num_filters=16, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=1, - param_attr='_base_net_7_branch0_1_conv_weight', - name='_286', - bias_attr=False) - _293 = fluid.layers.conv2d( - _292, - num_filters=16, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=1, - param_attr='_base_net_7_branch1_1_conv_weight', - name='_293', - bias_attr=False) - _300 = fluid.layers.conv2d( - _299, - num_filters=12, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=1, - param_attr='_base_net_7_branch2_1_conv_weight', - name='_300', - bias_attr=False) - _287 = fluid.layers.batch_norm( - _286, - momentum=0.9900000095367432, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_7_branch0_1_bn_weight', - bias_attr='_base_net_7_branch0_1_bn_bias', - moving_mean_name='_base_net_7_branch0_1_bn_running_mean', - moving_variance_name='_base_net_7_branch0_1_bn_running_var', - use_global_stats=False, - name='_287') - _294 = fluid.layers.batch_norm( - _293, - momentum=0.9900000095367432, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_7_branch1_1_bn_weight', - bias_attr='_base_net_7_branch1_1_bn_bias', - moving_mean_name='_base_net_7_branch1_1_bn_running_mean', - moving_variance_name='_base_net_7_branch1_1_bn_running_var', - use_global_stats=False, - name='_294') - _301 = fluid.layers.batch_norm( - _300, - momentum=0.9900000095367432, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_7_branch2_1_bn_weight', - bias_attr='_base_net_7_branch2_1_bn_bias', - moving_mean_name='_base_net_7_branch2_1_bn_running_mean', - moving_variance_name='_base_net_7_branch2_1_bn_running_var', - use_global_stats=False, - name='_301') - _288 = fluid.layers.relu(_287, name='_288') - _295 = fluid.layers.relu(_294, name='_295') - _302 = fluid.layers.relu(_301, name='_302') - _289 = fluid.layers.conv2d( - _288, - num_filters=16, - filter_size=[3, 3], - stride=[1, 1], - padding=[2, 2], - dilation=[2, 2], - groups=1, - param_attr='_base_net_7_branch0_2_conv_weight', - name='_289', - bias_attr=False) - _296 = fluid.layers.conv2d( - _295, - num_filters=16, - filter_size=[3, 3], - stride=[1, 1], - padding=[3, 3], - dilation=[3, 3], - groups=1, - param_attr='_base_net_7_branch1_2_conv_weight', - name='_296', - bias_attr=False) - _303 = fluid.layers.conv2d( - _302, - num_filters=16, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=1, - param_attr='_base_net_7_branch2_2_conv_weight', - name='_303', - bias_attr=False) - _290 = fluid.layers.batch_norm( - _289, - momentum=0.9900000095367432, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_7_branch0_2_bn_weight', - bias_attr='_base_net_7_branch0_2_bn_bias', - moving_mean_name='_base_net_7_branch0_2_bn_running_mean', - moving_variance_name='_base_net_7_branch0_2_bn_running_var', - use_global_stats=False, - name='_290') - _297 = fluid.layers.batch_norm( - _296, - momentum=0.9900000095367432, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_7_branch1_2_bn_weight', - bias_attr='_base_net_7_branch1_2_bn_bias', - moving_mean_name='_base_net_7_branch1_2_bn_running_mean', - moving_variance_name='_base_net_7_branch1_2_bn_running_var', - use_global_stats=False, - name='_297') - _304 = fluid.layers.batch_norm( - _303, - momentum=0.9900000095367432, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_7_branch2_2_bn_weight', - bias_attr='_base_net_7_branch2_2_bn_bias', - moving_mean_name='_base_net_7_branch2_2_bn_running_mean', - moving_variance_name='_base_net_7_branch2_2_bn_running_var', - use_global_stats=False, - name='_304') - _305 = fluid.layers.relu(_304, name='_305') - _306 = fluid.layers.conv2d( - _305, - num_filters=16, - filter_size=[3, 3], - stride=[1, 1], - padding=[5, 5], - dilation=[5, 5], - groups=1, - param_attr='_base_net_7_branch2_3_conv_weight', - name='_306', - bias_attr=False) - _307 = fluid.layers.batch_norm( - _306, - momentum=0.9900000095367432, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_7_branch2_3_bn_weight', - bias_attr='_base_net_7_branch2_3_bn_bias', - moving_mean_name='_base_net_7_branch2_3_bn_running_mean', - moving_variance_name='_base_net_7_branch2_3_bn_running_var', - use_global_stats=False, - name='_307') - _308 = fluid.layers.concat([_290, _297, _307], axis=1) - _309 = fluid.layers.conv2d( - _308, - num_filters=64, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_7_ConvLinear_conv_weight', - name='_309', - bias_attr=False) - _310 = fluid.layers.batch_norm( - _309, - momentum=0.9900000095367432, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_7_ConvLinear_bn_weight', - bias_attr='_base_net_7_ConvLinear_bn_bias', - moving_mean_name='_base_net_7_ConvLinear_bn_running_mean', - moving_variance_name='_base_net_7_ConvLinear_bn_running_var', - use_global_stats=False, - name='_310') - _313 = fluid.layers.elementwise_add(x=_310, y=_312, name='_313') - _314 = fluid.layers.relu(_313, name='_314') - _315 = fluid.layers.conv2d( - _314, - num_filters=64, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=64, - param_attr='_classification_headers_0_0_weight', - name='_315', - bias_attr='_classification_headers_0_0_bias') - _329 = fluid.layers.conv2d( - _314, - num_filters=64, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=64, - param_attr='_regression_headers_0_0_weight', - name='_329', - bias_attr='_regression_headers_0_0_bias') - _343 = fluid.layers.conv2d( - _314, - num_filters=64, - filter_size=[3, 3], - stride=[2, 2], - padding=[1, 1], - dilation=[1, 1], - groups=64, - param_attr='_base_net_8_0_weight', - name='_343', - bias_attr=False) - _316 = fluid.layers.relu(_315, name='_316') - _330 = fluid.layers.relu(_329, name='_330') - _344 = fluid.layers.batch_norm( - _343, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_8_1_weight', - bias_attr='_base_net_8_1_bias', - moving_mean_name='_base_net_8_1_running_mean', - moving_variance_name='_base_net_8_1_running_var', - use_global_stats=False, - name='_344') - _317 = fluid.layers.conv2d( - _316, - num_filters=6, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_classification_headers_0_2_weight', - name='_317', - bias_attr='_classification_headers_0_2_bias') - _331 = fluid.layers.conv2d( - _330, - num_filters=12, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_regression_headers_0_2_weight', - name='_331', - bias_attr='_regression_headers_0_2_bias') - _345 = fluid.layers.relu(_344, name='_345') - _318 = fluid.layers.transpose(_317, perm=[0, 2, 3, 1], name='_318') - _332 = fluid.layers.transpose(_331, perm=[0, 2, 3, 1], name='_332') - _346 = fluid.layers.conv2d( - _345, - num_filters=128, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_8_3_weight', - name='_346', - bias_attr=False) - _320 = fluid.layers.shape(_318) - _334 = fluid.layers.shape(_332) - _347 = fluid.layers.batch_norm( - _346, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_8_4_weight', - bias_attr='_base_net_8_4_bias', - moving_mean_name='_base_net_8_4_running_mean', - moving_variance_name='_base_net_8_4_running_var', - use_global_stats=False, - name='_347') - _321 = fluid.layers.gather(input=_320, index=_319) - _335 = fluid.layers.gather(input=_334, index=_333) - _348 = fluid.layers.relu(_347, name='_348') - _324 = fluid.layers.assign(_321) - _338 = fluid.layers.assign(_335) - _349 = fluid.layers.conv2d( - _348, - num_filters=128, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=128, - param_attr='_base_net_9_0_weight', - name='_349', - bias_attr=False) - _327 = fluid.layers.concat([_324, _325, _326], axis=0) - _341 = fluid.layers.concat([_338, _339, _340], axis=0) - _350 = fluid.layers.batch_norm( - _349, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_9_1_weight', - bias_attr='_base_net_9_1_bias', - moving_mean_name='_base_net_9_1_running_mean', - moving_variance_name='_base_net_9_1_running_var', - use_global_stats=False, - name='_350') - _327_cast = fluid.layers.cast(_327, dtype='int32') - _328 = fluid.layers.reshape(_318, name='_328', actual_shape=_327_cast, shape=[1, -1, 2]) - _341_cast = fluid.layers.cast(_341, dtype='int32') - _342 = fluid.layers.reshape(_332, name='_342', actual_shape=_341_cast, shape=[1, -1, 4]) - _351 = fluid.layers.relu(_350, name='_351') - _352 = fluid.layers.conv2d( - _351, - num_filters=128, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_9_3_weight', - name='_352', - bias_attr=False) - _353 = fluid.layers.batch_norm( - _352, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_9_4_weight', - bias_attr='_base_net_9_4_bias', - moving_mean_name='_base_net_9_4_running_mean', - moving_variance_name='_base_net_9_4_running_var', - use_global_stats=False, - name='_353') - _354 = fluid.layers.relu(_353, name='_354') - _355 = fluid.layers.conv2d( - _354, - num_filters=128, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=128, - param_attr='_base_net_10_0_weight', - name='_355', - bias_attr=False) - _356 = fluid.layers.batch_norm( - _355, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_10_1_weight', - bias_attr='_base_net_10_1_bias', - moving_mean_name='_base_net_10_1_running_mean', - moving_variance_name='_base_net_10_1_running_var', - use_global_stats=False, - name='_356') - _357 = fluid.layers.relu(_356, name='_357') - _358 = fluid.layers.conv2d( - _357, - num_filters=128, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_10_3_weight', - name='_358', - bias_attr=False) - _359 = fluid.layers.batch_norm( - _358, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_10_4_weight', - bias_attr='_base_net_10_4_bias', - moving_mean_name='_base_net_10_4_running_mean', - moving_variance_name='_base_net_10_4_running_var', - use_global_stats=False, - name='_359') - _360 = fluid.layers.relu(_359, name='_360') - _361 = fluid.layers.conv2d( - _360, - num_filters=128, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=128, - param_attr='_classification_headers_1_0_weight', - name='_361', - bias_attr='_classification_headers_1_0_bias') - _375 = fluid.layers.conv2d( - _360, - num_filters=128, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=128, - param_attr='_regression_headers_1_0_weight', - name='_375', - bias_attr='_regression_headers_1_0_bias') - _389 = fluid.layers.conv2d( - _360, - num_filters=128, - filter_size=[3, 3], - stride=[2, 2], - padding=[1, 1], - dilation=[1, 1], - groups=128, - param_attr='_base_net_11_0_weight', - name='_389', - bias_attr=False) - _362 = fluid.layers.relu(_361, name='_362') - _376 = fluid.layers.relu(_375, name='_376') - _390 = fluid.layers.batch_norm( - _389, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_11_1_weight', - bias_attr='_base_net_11_1_bias', - moving_mean_name='_base_net_11_1_running_mean', - moving_variance_name='_base_net_11_1_running_var', - use_global_stats=False, - name='_390') - _363 = fluid.layers.conv2d( - _362, - num_filters=4, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_classification_headers_1_2_weight', - name='_363', - bias_attr='_classification_headers_1_2_bias') - _377 = fluid.layers.conv2d( - _376, - num_filters=8, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_regression_headers_1_2_weight', - name='_377', - bias_attr='_regression_headers_1_2_bias') - _391 = fluid.layers.relu(_390, name='_391') - _364 = fluid.layers.transpose(_363, perm=[0, 2, 3, 1], name='_364') - _378 = fluid.layers.transpose(_377, perm=[0, 2, 3, 1], name='_378') - _392 = fluid.layers.conv2d( - _391, - num_filters=256, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_11_3_weight', - name='_392', - bias_attr=False) - _366 = fluid.layers.shape(_364) - _380 = fluid.layers.shape(_378) - _393 = fluid.layers.batch_norm( - _392, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_11_4_weight', - bias_attr='_base_net_11_4_bias', - moving_mean_name='_base_net_11_4_running_mean', - moving_variance_name='_base_net_11_4_running_var', - use_global_stats=False, - name='_393') - _367 = fluid.layers.gather(input=_366, index=_365) - _381 = fluid.layers.gather(input=_380, index=_379) - _394 = fluid.layers.relu(_393, name='_394') - _370 = fluid.layers.assign(_367) - _384 = fluid.layers.assign(_381) - _395 = fluid.layers.conv2d( - _394, - num_filters=256, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=256, - param_attr='_base_net_12_0_weight', - name='_395', - bias_attr=False) - _373 = fluid.layers.concat([_370, _371, _372], axis=0) - _387 = fluid.layers.concat([_384, _385, _386], axis=0) - _396 = fluid.layers.batch_norm( - _395, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_12_1_weight', - bias_attr='_base_net_12_1_bias', - moving_mean_name='_base_net_12_1_running_mean', - moving_variance_name='_base_net_12_1_running_var', - use_global_stats=False, - name='_396') - _373_cast = fluid.layers.cast(_373, dtype='int32') - _374 = fluid.layers.reshape(_364, name='_374', actual_shape=_373_cast, shape=[1, -1, 2]) - _387_cast = fluid.layers.cast(_387, dtype='int32') - _388 = fluid.layers.reshape(_378, name='_388', actual_shape=_387_cast, shape=[1, -1, 4]) - _397 = fluid.layers.relu(_396, name='_397') - _398 = fluid.layers.conv2d( - _397, - num_filters=256, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_12_3_weight', - name='_398', - bias_attr=False) - _399 = fluid.layers.batch_norm( - _398, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_12_4_weight', - bias_attr='_base_net_12_4_bias', - moving_mean_name='_base_net_12_4_running_mean', - moving_variance_name='_base_net_12_4_running_var', - use_global_stats=False, - name='_399') - _400 = fluid.layers.relu(_399, name='_400') - _401 = fluid.layers.conv2d( - _400, - num_filters=256, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=256, - param_attr='_classification_headers_2_0_weight', - name='_401', - bias_attr='_classification_headers_2_0_bias') - _415 = fluid.layers.conv2d( - _400, - num_filters=256, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=256, - param_attr='_regression_headers_2_0_weight', - name='_415', - bias_attr='_regression_headers_2_0_bias') - _429 = fluid.layers.conv2d( - _400, - num_filters=64, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_extras_0_0_weight', - name='_429', - bias_attr='_extras_0_0_bias') - _402 = fluid.layers.relu(_401, name='_402') - _416 = fluid.layers.relu(_415, name='_416') - _430 = fluid.layers.relu(_429, name='_430') - _403 = fluid.layers.conv2d( - _402, - num_filters=4, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_classification_headers_2_2_weight', - name='_403', - bias_attr='_classification_headers_2_2_bias') - _417 = fluid.layers.conv2d( - _416, - num_filters=8, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_regression_headers_2_2_weight', - name='_417', - bias_attr='_regression_headers_2_2_bias') - _431 = fluid.layers.conv2d( - _430, - num_filters=64, - filter_size=[3, 3], - stride=[2, 2], - padding=[1, 1], - dilation=[1, 1], - groups=64, - param_attr='_extras_0_2_0_weight', - name='_431', - bias_attr='_extras_0_2_0_bias') - _404 = fluid.layers.transpose(_403, perm=[0, 2, 3, 1], name='_404') - _418 = fluid.layers.transpose(_417, perm=[0, 2, 3, 1], name='_418') - _432 = fluid.layers.relu(_431, name='_432') - _406 = fluid.layers.shape(_404) - _420 = fluid.layers.shape(_418) - _433 = fluid.layers.conv2d( - _432, - num_filters=256, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_extras_0_2_2_weight', - name='_433', - bias_attr='_extras_0_2_2_bias') - _407 = fluid.layers.gather(input=_406, index=_405) - _421 = fluid.layers.gather(input=_420, index=_419) - _434 = fluid.layers.relu(_433, name='_434') - _410 = fluid.layers.assign(_407) - _424 = fluid.layers.assign(_421) - _435 = fluid.layers.conv2d( - _434, - num_filters=6, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=1, - param_attr='_classification_headers_3_weight', - name='_435', - bias_attr='_classification_headers_3_bias') - _447 = fluid.layers.conv2d( - _434, - num_filters=12, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=1, - param_attr='_regression_headers_3_weight', - name='_447', - bias_attr='_regression_headers_3_bias') - _413 = fluid.layers.concat([_410, _411, _412], axis=0) - _427 = fluid.layers.concat([_424, _425, _426], axis=0) - _436 = fluid.layers.transpose(_435, perm=[0, 2, 3, 1], name='_436') - _448 = fluid.layers.transpose(_447, perm=[0, 2, 3, 1], name='_448') - _413_cast = fluid.layers.cast(_413, dtype='int32') - _414 = fluid.layers.reshape(_404, name='_414', actual_shape=_413_cast, shape=[1, -1, 2]) - _427_cast = fluid.layers.cast(_427, dtype='int32') - _428 = fluid.layers.reshape(_418, name='_428', actual_shape=_427_cast, shape=[1, -1, 4]) - _438 = fluid.layers.shape(_436) - _450 = fluid.layers.shape(_448) - _439 = fluid.layers.gather(input=_438, index=_437) - _451 = fluid.layers.gather(input=_450, index=_449) - _442 = fluid.layers.assign(_439) - _454 = fluid.layers.assign(_451) - _445 = fluid.layers.concat([_442, _443, _444], axis=0) - _457 = fluid.layers.concat([_454, _455, _456], axis=0) - _445_cast = fluid.layers.cast(_445, dtype='int32') - _446 = fluid.layers.reshape(_436, name='_446', actual_shape=_445_cast, shape=[1, -1, 2]) - _457_cast = fluid.layers.cast(_457, dtype='int32') - _458 = fluid.layers.reshape(_448, name='_458', actual_shape=_457_cast, shape=[1, -1, 4]) - _459 = fluid.layers.concat([_328, _374, _414, _446], axis=1) - _460 = fluid.layers.concat([_342, _388, _428, _458], axis=1) - _scores = fluid.layers.softmax(_459, axis=2, name='_scores') - _462 = fluid.layers.slice(_460, axes=[2], starts=[0], ends=[2]) - _469 = fluid.layers.slice(_460, axes=[2], starts=[2], ends=[4]) - _464 = fluid.layers.elementwise_mul(x=_462, y=_463, name='_464') - _471 = fluid.layers.elementwise_mul(x=_469, y=_470, name='_471') - _466 = fluid.layers.elementwise_mul(x=_464, y=_465, name='_466') - _472 = fluid.layers.exp(_471, name='_472') - _468 = fluid.layers.elementwise_add(x=_466, y=_467, name='_468') - _474 = fluid.layers.elementwise_mul(x=_472, y=_473, name='_474') - _475 = fluid.layers.concat([_468, _474], axis=2) - _476 = fluid.layers.slice(_475, axes=[2], starts=[0], ends=[2]) - _477 = fluid.layers.slice(_475, axes=[2], starts=[2], ends=[4]) - _481 = fluid.layers.slice(_475, axes=[2], starts=[0], ends=[2]) - _482 = fluid.layers.slice(_475, axes=[2], starts=[2], ends=[4]) - _479 = fluid.layers.elementwise_div(x=_477, y=_478, name='_479') - _484 = fluid.layers.elementwise_div(x=_482, y=_483, name='_484') - _480 = fluid.layers.elementwise_sub(x=_476, y=_479, name='_480') - _485 = fluid.layers.elementwise_add(x=_481, y=_484, name='_485') - _boxes = fluid.layers.concat([_480, _485], axis=2) - return [_input], [_scores, _boxes] - - -def run_net(param_dir="./"): - import os - inputs, outputs = face_detector_320() - for i, out in enumerate(outputs): - if isinstance(out, list): - for out_part in out: - outputs.append(out_part) - del outputs[i] - exe = fluid.Executor(fluid.CPUPlace()) - exe.run(fluid.default_startup_program()) - - def if_exist(var): - b = os.path.exists(os.path.join(param_dir, var.name)) - return b - - fluid.io.load_vars(exe, param_dir, fluid.default_main_program(), predicate=if_exist) diff --git a/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/module.py b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/module.py index 8155ae4db..6744e3571 100644 --- a/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/module.py +++ b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/module.py @@ -2,18 +2,22 @@ from __future__ import absolute_import from __future__ import division -import ast import argparse +import ast import os import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.module.module import moduleinfo, runnable, serving - -from ultra_light_fast_generic_face_detector_1mb_320.processor import postprocess, base64_to_cv2 +import paddle +from paddle.inference import Config +from paddle.inference import create_predictor from ultra_light_fast_generic_face_detector_1mb_320.data_feed import reader +from ultra_light_fast_generic_face_detector_1mb_320.processor import base64_to_cv2 +from ultra_light_fast_generic_face_detector_1mb_320.processor import postprocess + +import paddlehub as hub +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving @moduleinfo( @@ -23,8 +27,9 @@ author_email="paddle-dev@baidu.com", summary= "Ultra-Light-Fast-Generic-Face-Detector-1MB is a high-performance object detection model release on https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB.", - version="1.1.2") + version="1.1.3") class FaceDetector320(hub.Module): + def _initialize(self): self.default_pretrained_model_path = os.path.join(self.directory, "ultra_light_fast_generic_face_detector_1mb_320") @@ -34,10 +39,10 @@ def _set_config(self): """ predictor config setting """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) + cpu_config = Config(self.default_pretrained_model_path) cpu_config.disable_glog_info() cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) + self.cpu_predictor = create_predictor(cpu_config) try: _places = os.environ["CUDA_VISIBLE_DEVICES"] @@ -46,29 +51,28 @@ def _set_config(self): except: use_gpu = False if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config = Config(self.default_pretrained_model_path) gpu_config.disable_glog_info() gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) + self.gpu_predictor = create_predictor(gpu_config) def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): if combined: model_filename = "__model__" if not model_filename else model_filename params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) + place = paddle.CPUPlace() + exe = paddle.Executor(place) - program, feeded_var_names, target_vars = fluid.io.load_inference_model( + program, feeded_var_names, target_vars = paddle.static.load_inference_model( dirname=self.default_pretrained_model_path, executor=exe) - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) + paddle.static.save_inference_model(dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) def face_detection(self, images=None, @@ -129,24 +133,31 @@ def face_detection(self, except: pass # feed batch image - batch_image = np.array([data['image'] for data in batch_data]) - batch_image = PaddleTensor(batch_image.astype('float32')) - data_out = self.gpu_predictor.run([batch_image]) if use_gpu else self.cpu_predictor.run([batch_image]) - confidences = data_out[0].as_ndarray() - boxes = data_out[1].as_ndarray() + batch_image = np.array([data['image'] for data in batch_data]).astype('float32') + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + input_handle.copy_from_cpu(batch_image) + + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) + confidences = output_handle.copy_to_cpu() + + output_handle = predictor.get_output_handle(output_names[1]) + boxes = output_handle.copy_to_cpu() # postprocess one by one for i in range(len(batch_data)): - out = postprocess( - confidences=confidences[i], - boxes=boxes[i], - orig_im=batch_data[i]['orig_im'], - orig_im_shape=batch_data[i]['orig_im_shape'], - orig_im_path=batch_data[i]['orig_im_path'], - output_dir=output_dir, - visualization=visualization, - confs_threshold=confs_threshold, - iou_threshold=iou_threshold) + out = postprocess(confidences=confidences[i], + boxes=boxes[i], + orig_im=batch_data[i]['orig_im'], + orig_im_shape=batch_data[i]['orig_im_shape'], + orig_im_path=batch_data[i]['orig_im_path'], + output_dir=output_dir, + visualization=visualization, + confs_threshold=confs_threshold, + iou_threshold=iou_threshold) res.append(out) return res @@ -164,38 +175,39 @@ def run_cmd(self, argvs): """ Run as a command. """ - self.parser = argparse.ArgumentParser( - description="Run the {} module.".format(self.name), - prog='hub run {}'.format(self.name), - usage='%(prog)s', - add_help=True) + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") self.arg_config_group = self.parser.add_argument_group( title="Config options", description="Run configuration for controlling module behavior, not required.") self.add_module_config_arg() self.add_module_input_arg() args = self.parser.parse_args(argvs) - results = self.face_detection( - paths=[args.input_path], - batch_size=args.batch_size, - use_gpu=args.use_gpu, - output_dir=args.output_dir, - visualization=args.visualization) + results = self.face_detection(paths=[args.input_path], + batch_size=args.batch_size, + use_gpu=args.use_gpu, + output_dir=args.output_dir, + visualization=args.visualization) return results def add_module_config_arg(self): """ Add the command config options. """ - self.arg_config_group.add_argument( - '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") - self.arg_config_group.add_argument( - '--output_dir', - type=str, - default='face_detector_320_predict_output', - help="The directory to save output images.") - self.arg_config_group.add_argument( - '--visualization', type=ast.literal_eval, default=False, help="whether to save output as images.") + self.arg_config_group.add_argument('--use_gpu', + type=ast.literal_eval, + default=False, + help="whether use GPU or not") + self.arg_config_group.add_argument('--output_dir', + type=str, + default='face_detector_320_predict_output', + help="The directory to save output images.") + self.arg_config_group.add_argument('--visualization', + type=ast.literal_eval, + default=False, + help="whether to save output as images.") self.arg_config_group.add_argument('--batch_size', type=ast.literal_eval, default=1, help="batch size.") def add_module_input_arg(self): diff --git a/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/README.md b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/README.md index afa5ad3e3..b7b36aa78 100644 --- a/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/README.md +++ b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/README.md @@ -164,7 +164,10 @@ 初始发布 -* 1.1.2 +* 1.1.3 + + 移除 fluid api + - ```shell - $ hub install ultra_light_fast_generic_face_detector_1mb_640==1.1.2 + $ hub install ultra_light_fast_generic_face_detector_1mb_640==1.1.3 ``` diff --git a/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/README_en.md b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/README_en.md index 6eeed2c2c..e200bb397 100644 --- a/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/README_en.md +++ b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/README_en.md @@ -163,7 +163,10 @@ First release -* 1.1.2 +* 1.1.3 + + Remove fluid api + - ```shell - $ hub install ultra_light_fast_generic_face_detector_1mb_640==1.1.2 + $ hub install ultra_light_fast_generic_face_detector_1mb_640==1.1.3 ``` diff --git a/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/face_detector.py b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/face_detector.py deleted file mode 100644 index d1b6d894e..000000000 --- a/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/face_detector.py +++ /dev/null @@ -1,1186 +0,0 @@ -# coding=utf-8 -from paddle.fluid.initializer import Constant -from paddle.fluid.param_attr import ParamAttr -import paddle.fluid as fluid - - -def face_detector(): - _319 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=0) - _322 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=-1) - _323 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=2) - _333 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=0) - _336 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=-1) - _337 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=4) - _365 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=0) - _368 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=-1) - _369 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=2) - _379 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=0) - _382 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=-1) - _383 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=4) - _405 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=0) - _408 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=-1) - _409 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=2) - _419 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=0) - _422 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=-1) - _423 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=4) - _437 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=0) - _440 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=-1) - _441 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=2) - _449 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=0) - _452 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=-1) - _453 = fluid.layers.fill_constant(shape=[1], dtype='int32', value=4) - _463 = fluid.layers.fill_constant(shape=[1], dtype='float32', value=0.10000000149011612) - _465 = fluid.layers.create_parameter( - dtype='float32', shape=[1, 17640, 2], name='_465', attr='_465', default_initializer=Constant(0.0)) - _467 = fluid.layers.create_parameter( - dtype='float32', shape=[1, 17640, 2], name='_467', attr='_467', default_initializer=Constant(0.0)) - _470 = fluid.layers.fill_constant(shape=[1], dtype='float32', value=0.20000000298023224) - _473 = fluid.layers.create_parameter( - dtype='float32', shape=[1, 17640, 2], name='_473', attr='_473', default_initializer=Constant(0.0)) - _478 = fluid.layers.fill_constant(shape=[1], dtype='float32', value=2.0) - _483 = fluid.layers.fill_constant(shape=[1], dtype='float32', value=2.0) - _input = fluid.layers.data(dtype='float32', shape=[1, 3, 480, 640], name='_input', append_batch_size=False) - _325 = fluid.layers.assign(_322) - _326 = fluid.layers.assign(_323) - _339 = fluid.layers.assign(_336) - _340 = fluid.layers.assign(_337) - _371 = fluid.layers.assign(_368) - _372 = fluid.layers.assign(_369) - _385 = fluid.layers.assign(_382) - _386 = fluid.layers.assign(_383) - _411 = fluid.layers.assign(_408) - _412 = fluid.layers.assign(_409) - _425 = fluid.layers.assign(_422) - _426 = fluid.layers.assign(_423) - _443 = fluid.layers.assign(_440) - _444 = fluid.layers.assign(_441) - _455 = fluid.layers.assign(_452) - _456 = fluid.layers.assign(_453) - _245 = fluid.layers.conv2d( - _input, - num_filters=16, - filter_size=[3, 3], - stride=[2, 2], - padding=[1, 1], - dilation=[1, 1], - groups=1, - param_attr='_base_net_0_0_weight', - name='_245', - bias_attr=False) - _246 = fluid.layers.batch_norm( - _245, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_0_1_weight', - bias_attr='_base_net_0_1_bias', - moving_mean_name='_base_net_0_1_running_mean', - moving_variance_name='_base_net_0_1_running_var', - use_global_stats=False, - name='_246') - _247 = fluid.layers.relu(_246, name='_247') - _248 = fluid.layers.conv2d( - _247, - num_filters=16, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=16, - param_attr='_base_net_1_0_weight', - name='_248', - bias_attr=False) - _249 = fluid.layers.batch_norm( - _248, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_1_1_weight', - bias_attr='_base_net_1_1_bias', - moving_mean_name='_base_net_1_1_running_mean', - moving_variance_name='_base_net_1_1_running_var', - use_global_stats=False, - name='_249') - _250 = fluid.layers.relu(_249, name='_250') - _251 = fluid.layers.conv2d( - _250, - num_filters=32, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_1_3_weight', - name='_251', - bias_attr=False) - _252 = fluid.layers.batch_norm( - _251, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_1_4_weight', - bias_attr='_base_net_1_4_bias', - moving_mean_name='_base_net_1_4_running_mean', - moving_variance_name='_base_net_1_4_running_var', - use_global_stats=False, - name='_252') - _253 = fluid.layers.relu(_252, name='_253') - _254 = fluid.layers.conv2d( - _253, - num_filters=32, - filter_size=[3, 3], - stride=[2, 2], - padding=[1, 1], - dilation=[1, 1], - groups=32, - param_attr='_base_net_2_0_weight', - name='_254', - bias_attr=False) - _255 = fluid.layers.batch_norm( - _254, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_2_1_weight', - bias_attr='_base_net_2_1_bias', - moving_mean_name='_base_net_2_1_running_mean', - moving_variance_name='_base_net_2_1_running_var', - use_global_stats=False, - name='_255') - _256 = fluid.layers.relu(_255, name='_256') - _257 = fluid.layers.conv2d( - _256, - num_filters=32, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_2_3_weight', - name='_257', - bias_attr=False) - _258 = fluid.layers.batch_norm( - _257, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_2_4_weight', - bias_attr='_base_net_2_4_bias', - moving_mean_name='_base_net_2_4_running_mean', - moving_variance_name='_base_net_2_4_running_var', - use_global_stats=False, - name='_258') - _259 = fluid.layers.relu(_258, name='_259') - _260 = fluid.layers.conv2d( - _259, - num_filters=32, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=32, - param_attr='_base_net_3_0_weight', - name='_260', - bias_attr=False) - _261 = fluid.layers.batch_norm( - _260, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_3_1_weight', - bias_attr='_base_net_3_1_bias', - moving_mean_name='_base_net_3_1_running_mean', - moving_variance_name='_base_net_3_1_running_var', - use_global_stats=False, - name='_261') - _262 = fluid.layers.relu(_261, name='_262') - _263 = fluid.layers.conv2d( - _262, - num_filters=32, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_3_3_weight', - name='_263', - bias_attr=False) - _264 = fluid.layers.batch_norm( - _263, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_3_4_weight', - bias_attr='_base_net_3_4_bias', - moving_mean_name='_base_net_3_4_running_mean', - moving_variance_name='_base_net_3_4_running_var', - use_global_stats=False, - name='_264') - _265 = fluid.layers.relu(_264, name='_265') - _266 = fluid.layers.conv2d( - _265, - num_filters=32, - filter_size=[3, 3], - stride=[2, 2], - padding=[1, 1], - dilation=[1, 1], - groups=32, - param_attr='_base_net_4_0_weight', - name='_266', - bias_attr=False) - _267 = fluid.layers.batch_norm( - _266, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_4_1_weight', - bias_attr='_base_net_4_1_bias', - moving_mean_name='_base_net_4_1_running_mean', - moving_variance_name='_base_net_4_1_running_var', - use_global_stats=False, - name='_267') - _268 = fluid.layers.relu(_267, name='_268') - _269 = fluid.layers.conv2d( - _268, - num_filters=64, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_4_3_weight', - name='_269', - bias_attr=False) - _270 = fluid.layers.batch_norm( - _269, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_4_4_weight', - bias_attr='_base_net_4_4_bias', - moving_mean_name='_base_net_4_4_running_mean', - moving_variance_name='_base_net_4_4_running_var', - use_global_stats=False, - name='_270') - _271 = fluid.layers.relu(_270, name='_271') - _272 = fluid.layers.conv2d( - _271, - num_filters=64, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=64, - param_attr='_base_net_5_0_weight', - name='_272', - bias_attr=False) - _273 = fluid.layers.batch_norm( - _272, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_5_1_weight', - bias_attr='_base_net_5_1_bias', - moving_mean_name='_base_net_5_1_running_mean', - moving_variance_name='_base_net_5_1_running_var', - use_global_stats=False, - name='_273') - _274 = fluid.layers.relu(_273, name='_274') - _275 = fluid.layers.conv2d( - _274, - num_filters=64, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_5_3_weight', - name='_275', - bias_attr=False) - _276 = fluid.layers.batch_norm( - _275, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_5_4_weight', - bias_attr='_base_net_5_4_bias', - moving_mean_name='_base_net_5_4_running_mean', - moving_variance_name='_base_net_5_4_running_var', - use_global_stats=False, - name='_276') - _277 = fluid.layers.relu(_276, name='_277') - _278 = fluid.layers.conv2d( - _277, - num_filters=64, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=64, - param_attr='_base_net_6_0_weight', - name='_278', - bias_attr=False) - _279 = fluid.layers.batch_norm( - _278, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_6_1_weight', - bias_attr='_base_net_6_1_bias', - moving_mean_name='_base_net_6_1_running_mean', - moving_variance_name='_base_net_6_1_running_var', - use_global_stats=False, - name='_279') - _280 = fluid.layers.relu(_279, name='_280') - _281 = fluid.layers.conv2d( - _280, - num_filters=64, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_6_3_weight', - name='_281', - bias_attr=False) - _282 = fluid.layers.batch_norm( - _281, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_6_4_weight', - bias_attr='_base_net_6_4_bias', - moving_mean_name='_base_net_6_4_running_mean', - moving_variance_name='_base_net_6_4_running_var', - use_global_stats=False, - name='_282') - _283 = fluid.layers.relu(_282, name='_283') - _284 = fluid.layers.conv2d( - _283, - num_filters=8, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_7_branch0_0_conv_weight', - name='_284', - bias_attr=False) - _291 = fluid.layers.conv2d( - _283, - num_filters=8, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_7_branch1_0_conv_weight', - name='_291', - bias_attr=False) - _298 = fluid.layers.conv2d( - _283, - num_filters=8, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_7_branch2_0_conv_weight', - name='_298', - bias_attr=False) - _311 = fluid.layers.conv2d( - _283, - num_filters=64, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_7_shortcut_conv_weight', - name='_311', - bias_attr=False) - _285 = fluid.layers.batch_norm( - _284, - momentum=0.9900000095367432, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_7_branch0_0_bn_weight', - bias_attr='_base_net_7_branch0_0_bn_bias', - moving_mean_name='_base_net_7_branch0_0_bn_running_mean', - moving_variance_name='_base_net_7_branch0_0_bn_running_var', - use_global_stats=False, - name='_285') - _292 = fluid.layers.batch_norm( - _291, - momentum=0.9900000095367432, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_7_branch1_0_bn_weight', - bias_attr='_base_net_7_branch1_0_bn_bias', - moving_mean_name='_base_net_7_branch1_0_bn_running_mean', - moving_variance_name='_base_net_7_branch1_0_bn_running_var', - use_global_stats=False, - name='_292') - _299 = fluid.layers.batch_norm( - _298, - momentum=0.9900000095367432, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_7_branch2_0_bn_weight', - bias_attr='_base_net_7_branch2_0_bn_bias', - moving_mean_name='_base_net_7_branch2_0_bn_running_mean', - moving_variance_name='_base_net_7_branch2_0_bn_running_var', - use_global_stats=False, - name='_299') - _312 = fluid.layers.batch_norm( - _311, - momentum=0.9900000095367432, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_7_shortcut_bn_weight', - bias_attr='_base_net_7_shortcut_bn_bias', - moving_mean_name='_base_net_7_shortcut_bn_running_mean', - moving_variance_name='_base_net_7_shortcut_bn_running_var', - use_global_stats=False, - name='_312') - _286 = fluid.layers.conv2d( - _285, - num_filters=16, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=1, - param_attr='_base_net_7_branch0_1_conv_weight', - name='_286', - bias_attr=False) - _293 = fluid.layers.conv2d( - _292, - num_filters=16, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=1, - param_attr='_base_net_7_branch1_1_conv_weight', - name='_293', - bias_attr=False) - _300 = fluid.layers.conv2d( - _299, - num_filters=12, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=1, - param_attr='_base_net_7_branch2_1_conv_weight', - name='_300', - bias_attr=False) - _287 = fluid.layers.batch_norm( - _286, - momentum=0.9900000095367432, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_7_branch0_1_bn_weight', - bias_attr='_base_net_7_branch0_1_bn_bias', - moving_mean_name='_base_net_7_branch0_1_bn_running_mean', - moving_variance_name='_base_net_7_branch0_1_bn_running_var', - use_global_stats=False, - name='_287') - _294 = fluid.layers.batch_norm( - _293, - momentum=0.9900000095367432, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_7_branch1_1_bn_weight', - bias_attr='_base_net_7_branch1_1_bn_bias', - moving_mean_name='_base_net_7_branch1_1_bn_running_mean', - moving_variance_name='_base_net_7_branch1_1_bn_running_var', - use_global_stats=False, - name='_294') - _301 = fluid.layers.batch_norm( - _300, - momentum=0.9900000095367432, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_7_branch2_1_bn_weight', - bias_attr='_base_net_7_branch2_1_bn_bias', - moving_mean_name='_base_net_7_branch2_1_bn_running_mean', - moving_variance_name='_base_net_7_branch2_1_bn_running_var', - use_global_stats=False, - name='_301') - _288 = fluid.layers.relu(_287, name='_288') - _295 = fluid.layers.relu(_294, name='_295') - _302 = fluid.layers.relu(_301, name='_302') - _289 = fluid.layers.conv2d( - _288, - num_filters=16, - filter_size=[3, 3], - stride=[1, 1], - padding=[2, 2], - dilation=[2, 2], - groups=1, - param_attr='_base_net_7_branch0_2_conv_weight', - name='_289', - bias_attr=False) - _296 = fluid.layers.conv2d( - _295, - num_filters=16, - filter_size=[3, 3], - stride=[1, 1], - padding=[3, 3], - dilation=[3, 3], - groups=1, - param_attr='_base_net_7_branch1_2_conv_weight', - name='_296', - bias_attr=False) - _303 = fluid.layers.conv2d( - _302, - num_filters=16, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=1, - param_attr='_base_net_7_branch2_2_conv_weight', - name='_303', - bias_attr=False) - _290 = fluid.layers.batch_norm( - _289, - momentum=0.9900000095367432, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_7_branch0_2_bn_weight', - bias_attr='_base_net_7_branch0_2_bn_bias', - moving_mean_name='_base_net_7_branch0_2_bn_running_mean', - moving_variance_name='_base_net_7_branch0_2_bn_running_var', - use_global_stats=False, - name='_290') - _297 = fluid.layers.batch_norm( - _296, - momentum=0.9900000095367432, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_7_branch1_2_bn_weight', - bias_attr='_base_net_7_branch1_2_bn_bias', - moving_mean_name='_base_net_7_branch1_2_bn_running_mean', - moving_variance_name='_base_net_7_branch1_2_bn_running_var', - use_global_stats=False, - name='_297') - _304 = fluid.layers.batch_norm( - _303, - momentum=0.9900000095367432, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_7_branch2_2_bn_weight', - bias_attr='_base_net_7_branch2_2_bn_bias', - moving_mean_name='_base_net_7_branch2_2_bn_running_mean', - moving_variance_name='_base_net_7_branch2_2_bn_running_var', - use_global_stats=False, - name='_304') - _305 = fluid.layers.relu(_304, name='_305') - _306 = fluid.layers.conv2d( - _305, - num_filters=16, - filter_size=[3, 3], - stride=[1, 1], - padding=[5, 5], - dilation=[5, 5], - groups=1, - param_attr='_base_net_7_branch2_3_conv_weight', - name='_306', - bias_attr=False) - _307 = fluid.layers.batch_norm( - _306, - momentum=0.9900000095367432, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_7_branch2_3_bn_weight', - bias_attr='_base_net_7_branch2_3_bn_bias', - moving_mean_name='_base_net_7_branch2_3_bn_running_mean', - moving_variance_name='_base_net_7_branch2_3_bn_running_var', - use_global_stats=False, - name='_307') - _308 = fluid.layers.concat([_290, _297, _307], axis=1) - _309 = fluid.layers.conv2d( - _308, - num_filters=64, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_7_ConvLinear_conv_weight', - name='_309', - bias_attr=False) - _310 = fluid.layers.batch_norm( - _309, - momentum=0.9900000095367432, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_7_ConvLinear_bn_weight', - bias_attr='_base_net_7_ConvLinear_bn_bias', - moving_mean_name='_base_net_7_ConvLinear_bn_running_mean', - moving_variance_name='_base_net_7_ConvLinear_bn_running_var', - use_global_stats=False, - name='_310') - _313 = fluid.layers.elementwise_add(x=_310, y=_312, name='_313') - _314 = fluid.layers.relu(_313, name='_314') - _315 = fluid.layers.conv2d( - _314, - num_filters=64, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=64, - param_attr='_classification_headers_0_0_weight', - name='_315', - bias_attr='_classification_headers_0_0_bias') - _329 = fluid.layers.conv2d( - _314, - num_filters=64, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=64, - param_attr='_regression_headers_0_0_weight', - name='_329', - bias_attr='_regression_headers_0_0_bias') - _343 = fluid.layers.conv2d( - _314, - num_filters=64, - filter_size=[3, 3], - stride=[2, 2], - padding=[1, 1], - dilation=[1, 1], - groups=64, - param_attr='_base_net_8_0_weight', - name='_343', - bias_attr=False) - _316 = fluid.layers.relu(_315, name='_316') - _330 = fluid.layers.relu(_329, name='_330') - _344 = fluid.layers.batch_norm( - _343, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_8_1_weight', - bias_attr='_base_net_8_1_bias', - moving_mean_name='_base_net_8_1_running_mean', - moving_variance_name='_base_net_8_1_running_var', - use_global_stats=False, - name='_344') - _317 = fluid.layers.conv2d( - _316, - num_filters=6, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_classification_headers_0_2_weight', - name='_317', - bias_attr='_classification_headers_0_2_bias') - _331 = fluid.layers.conv2d( - _330, - num_filters=12, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_regression_headers_0_2_weight', - name='_331', - bias_attr='_regression_headers_0_2_bias') - _345 = fluid.layers.relu(_344, name='_345') - _318 = fluid.layers.transpose(_317, perm=[0, 2, 3, 1], name='_318') - _332 = fluid.layers.transpose(_331, perm=[0, 2, 3, 1], name='_332') - _346 = fluid.layers.conv2d( - _345, - num_filters=128, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_8_3_weight', - name='_346', - bias_attr=False) - _320 = fluid.layers.shape(_318) - _334 = fluid.layers.shape(_332) - _347 = fluid.layers.batch_norm( - _346, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_8_4_weight', - bias_attr='_base_net_8_4_bias', - moving_mean_name='_base_net_8_4_running_mean', - moving_variance_name='_base_net_8_4_running_var', - use_global_stats=False, - name='_347') - _321 = fluid.layers.gather(input=_320, index=_319) - _335 = fluid.layers.gather(input=_334, index=_333) - _348 = fluid.layers.relu(_347, name='_348') - _324 = fluid.layers.assign(_321) - _338 = fluid.layers.assign(_335) - _349 = fluid.layers.conv2d( - _348, - num_filters=128, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=128, - param_attr='_base_net_9_0_weight', - name='_349', - bias_attr=False) - _327 = fluid.layers.concat([_324, _325, _326], axis=0) - _341 = fluid.layers.concat([_338, _339, _340], axis=0) - _350 = fluid.layers.batch_norm( - _349, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_9_1_weight', - bias_attr='_base_net_9_1_bias', - moving_mean_name='_base_net_9_1_running_mean', - moving_variance_name='_base_net_9_1_running_var', - use_global_stats=False, - name='_350') - _327_cast = fluid.layers.cast(_327, dtype='int32') - _328 = fluid.layers.reshape(_318, name='_328', actual_shape=_327_cast, shape=[1, -1, 2]) - _341_cast = fluid.layers.cast(_341, dtype='int32') - _342 = fluid.layers.reshape(_332, name='_342', actual_shape=_341_cast, shape=[1, -1, 4]) - _351 = fluid.layers.relu(_350, name='_351') - _352 = fluid.layers.conv2d( - _351, - num_filters=128, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_9_3_weight', - name='_352', - bias_attr=False) - _353 = fluid.layers.batch_norm( - _352, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_9_4_weight', - bias_attr='_base_net_9_4_bias', - moving_mean_name='_base_net_9_4_running_mean', - moving_variance_name='_base_net_9_4_running_var', - use_global_stats=False, - name='_353') - _354 = fluid.layers.relu(_353, name='_354') - _355 = fluid.layers.conv2d( - _354, - num_filters=128, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=128, - param_attr='_base_net_10_0_weight', - name='_355', - bias_attr=False) - _356 = fluid.layers.batch_norm( - _355, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_10_1_weight', - bias_attr='_base_net_10_1_bias', - moving_mean_name='_base_net_10_1_running_mean', - moving_variance_name='_base_net_10_1_running_var', - use_global_stats=False, - name='_356') - _357 = fluid.layers.relu(_356, name='_357') - _358 = fluid.layers.conv2d( - _357, - num_filters=128, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_10_3_weight', - name='_358', - bias_attr=False) - _359 = fluid.layers.batch_norm( - _358, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_10_4_weight', - bias_attr='_base_net_10_4_bias', - moving_mean_name='_base_net_10_4_running_mean', - moving_variance_name='_base_net_10_4_running_var', - use_global_stats=False, - name='_359') - _360 = fluid.layers.relu(_359, name='_360') - _361 = fluid.layers.conv2d( - _360, - num_filters=128, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=128, - param_attr='_classification_headers_1_0_weight', - name='_361', - bias_attr='_classification_headers_1_0_bias') - _375 = fluid.layers.conv2d( - _360, - num_filters=128, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=128, - param_attr='_regression_headers_1_0_weight', - name='_375', - bias_attr='_regression_headers_1_0_bias') - _389 = fluid.layers.conv2d( - _360, - num_filters=128, - filter_size=[3, 3], - stride=[2, 2], - padding=[1, 1], - dilation=[1, 1], - groups=128, - param_attr='_base_net_11_0_weight', - name='_389', - bias_attr=False) - _362 = fluid.layers.relu(_361, name='_362') - _376 = fluid.layers.relu(_375, name='_376') - _390 = fluid.layers.batch_norm( - _389, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_11_1_weight', - bias_attr='_base_net_11_1_bias', - moving_mean_name='_base_net_11_1_running_mean', - moving_variance_name='_base_net_11_1_running_var', - use_global_stats=False, - name='_390') - _363 = fluid.layers.conv2d( - _362, - num_filters=4, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_classification_headers_1_2_weight', - name='_363', - bias_attr='_classification_headers_1_2_bias') - _377 = fluid.layers.conv2d( - _376, - num_filters=8, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_regression_headers_1_2_weight', - name='_377', - bias_attr='_regression_headers_1_2_bias') - _391 = fluid.layers.relu(_390, name='_391') - _364 = fluid.layers.transpose(_363, perm=[0, 2, 3, 1], name='_364') - _378 = fluid.layers.transpose(_377, perm=[0, 2, 3, 1], name='_378') - _392 = fluid.layers.conv2d( - _391, - num_filters=256, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_11_3_weight', - name='_392', - bias_attr=False) - _366 = fluid.layers.shape(_364) - _380 = fluid.layers.shape(_378) - _393 = fluid.layers.batch_norm( - _392, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_11_4_weight', - bias_attr='_base_net_11_4_bias', - moving_mean_name='_base_net_11_4_running_mean', - moving_variance_name='_base_net_11_4_running_var', - use_global_stats=False, - name='_393') - _367 = fluid.layers.gather(input=_366, index=_365) - _381 = fluid.layers.gather(input=_380, index=_379) - _394 = fluid.layers.relu(_393, name='_394') - _370 = fluid.layers.assign(_367) - _384 = fluid.layers.assign(_381) - _395 = fluid.layers.conv2d( - _394, - num_filters=256, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=256, - param_attr='_base_net_12_0_weight', - name='_395', - bias_attr=False) - _373 = fluid.layers.concat([_370, _371, _372], axis=0) - _387 = fluid.layers.concat([_384, _385, _386], axis=0) - _396 = fluid.layers.batch_norm( - _395, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_12_1_weight', - bias_attr='_base_net_12_1_bias', - moving_mean_name='_base_net_12_1_running_mean', - moving_variance_name='_base_net_12_1_running_var', - use_global_stats=False, - name='_396') - _373_cast = fluid.layers.cast(_373, dtype='int32') - _374 = fluid.layers.reshape(_364, name='_374', actual_shape=_373_cast, shape=[1, -1, 2]) - _387_cast = fluid.layers.cast(_387, dtype='int32') - _388 = fluid.layers.reshape(_378, name='_388', actual_shape=_387_cast, shape=[1, -1, 4]) - _397 = fluid.layers.relu(_396, name='_397') - _398 = fluid.layers.conv2d( - _397, - num_filters=256, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_base_net_12_3_weight', - name='_398', - bias_attr=False) - _399 = fluid.layers.batch_norm( - _398, - momentum=0.8999999761581421, - epsilon=9.999999747378752e-06, - data_layout='NCHW', - is_test=True, - param_attr='_base_net_12_4_weight', - bias_attr='_base_net_12_4_bias', - moving_mean_name='_base_net_12_4_running_mean', - moving_variance_name='_base_net_12_4_running_var', - use_global_stats=False, - name='_399') - _400 = fluid.layers.relu(_399, name='_400') - _401 = fluid.layers.conv2d( - _400, - num_filters=256, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=256, - param_attr='_classification_headers_2_0_weight', - name='_401', - bias_attr='_classification_headers_2_0_bias') - _415 = fluid.layers.conv2d( - _400, - num_filters=256, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=256, - param_attr='_regression_headers_2_0_weight', - name='_415', - bias_attr='_regression_headers_2_0_bias') - _429 = fluid.layers.conv2d( - _400, - num_filters=64, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_extras_0_0_weight', - name='_429', - bias_attr='_extras_0_0_bias') - _402 = fluid.layers.relu(_401, name='_402') - _416 = fluid.layers.relu(_415, name='_416') - _430 = fluid.layers.relu(_429, name='_430') - _403 = fluid.layers.conv2d( - _402, - num_filters=4, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_classification_headers_2_2_weight', - name='_403', - bias_attr='_classification_headers_2_2_bias') - _417 = fluid.layers.conv2d( - _416, - num_filters=8, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_regression_headers_2_2_weight', - name='_417', - bias_attr='_regression_headers_2_2_bias') - _431 = fluid.layers.conv2d( - _430, - num_filters=64, - filter_size=[3, 3], - stride=[2, 2], - padding=[1, 1], - dilation=[1, 1], - groups=64, - param_attr='_extras_0_2_0_weight', - name='_431', - bias_attr='_extras_0_2_0_bias') - _404 = fluid.layers.transpose(_403, perm=[0, 2, 3, 1], name='_404') - _418 = fluid.layers.transpose(_417, perm=[0, 2, 3, 1], name='_418') - _432 = fluid.layers.relu(_431, name='_432') - _406 = fluid.layers.shape(_404) - _420 = fluid.layers.shape(_418) - _433 = fluid.layers.conv2d( - _432, - num_filters=256, - filter_size=[1, 1], - stride=[1, 1], - padding=[0, 0], - dilation=[1, 1], - groups=1, - param_attr='_extras_0_2_2_weight', - name='_433', - bias_attr='_extras_0_2_2_bias') - _407 = fluid.layers.gather(input=_406, index=_405) - _421 = fluid.layers.gather(input=_420, index=_419) - _434 = fluid.layers.relu(_433, name='_434') - _410 = fluid.layers.assign(_407) - _424 = fluid.layers.assign(_421) - _435 = fluid.layers.conv2d( - _434, - num_filters=6, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=1, - param_attr='_classification_headers_3_weight', - name='_435', - bias_attr='_classification_headers_3_bias') - _447 = fluid.layers.conv2d( - _434, - num_filters=12, - filter_size=[3, 3], - stride=[1, 1], - padding=[1, 1], - dilation=[1, 1], - groups=1, - param_attr='_regression_headers_3_weight', - name='_447', - bias_attr='_regression_headers_3_bias') - _413 = fluid.layers.concat([_410, _411, _412], axis=0) - _427 = fluid.layers.concat([_424, _425, _426], axis=0) - _436 = fluid.layers.transpose(_435, perm=[0, 2, 3, 1], name='_436') - _448 = fluid.layers.transpose(_447, perm=[0, 2, 3, 1], name='_448') - _413_cast = fluid.layers.cast(_413, dtype='int32') - _414 = fluid.layers.reshape(_404, name='_414', actual_shape=_413_cast, shape=[1, -1, 2]) - _427_cast = fluid.layers.cast(_427, dtype='int32') - _428 = fluid.layers.reshape(_418, name='_428', actual_shape=_427_cast, shape=[1, -1, 4]) - _438 = fluid.layers.shape(_436) - _450 = fluid.layers.shape(_448) - _439 = fluid.layers.gather(input=_438, index=_437) - _451 = fluid.layers.gather(input=_450, index=_449) - _442 = fluid.layers.assign(_439) - _454 = fluid.layers.assign(_451) - _445 = fluid.layers.concat([_442, _443, _444], axis=0) - _457 = fluid.layers.concat([_454, _455, _456], axis=0) - _445_cast = fluid.layers.cast(_445, dtype='int32') - _446 = fluid.layers.reshape(_436, name='_446', actual_shape=_445_cast, shape=[1, -1, 2]) - _457_cast = fluid.layers.cast(_457, dtype='int32') - _458 = fluid.layers.reshape(_448, name='_458', actual_shape=_457_cast, shape=[1, -1, 4]) - _459 = fluid.layers.concat([_328, _374, _414, _446], axis=1) - _460 = fluid.layers.concat([_342, _388, _428, _458], axis=1) - _scores = fluid.layers.softmax(_459, axis=2, name='_scores') - _462 = fluid.layers.slice(_460, axes=[2], starts=[0], ends=[2]) - _469 = fluid.layers.slice(_460, axes=[2], starts=[2], ends=[4]) - _464 = fluid.layers.elementwise_mul(x=_462, y=_463, name='_464') - _471 = fluid.layers.elementwise_mul(x=_469, y=_470, name='_471') - _466 = fluid.layers.elementwise_mul(x=_464, y=_465, name='_466') - _472 = fluid.layers.exp(_471, name='_472') - _468 = fluid.layers.elementwise_add(x=_466, y=_467, name='_468') - _474 = fluid.layers.elementwise_mul(x=_472, y=_473, name='_474') - _475 = fluid.layers.concat([_468, _474], axis=2) - _476 = fluid.layers.slice(_475, axes=[2], starts=[0], ends=[2]) - _477 = fluid.layers.slice(_475, axes=[2], starts=[2], ends=[4]) - _481 = fluid.layers.slice(_475, axes=[2], starts=[0], ends=[2]) - _482 = fluid.layers.slice(_475, axes=[2], starts=[2], ends=[4]) - _479 = fluid.layers.elementwise_div(x=_477, y=_478, name='_479') - _484 = fluid.layers.elementwise_div(x=_482, y=_483, name='_484') - _480 = fluid.layers.elementwise_sub(x=_476, y=_479, name='_480') - _485 = fluid.layers.elementwise_add(x=_481, y=_484, name='_485') - _boxes = fluid.layers.concat([_480, _485], axis=2) - - return [_input], [_scores, _boxes] - - -def run_net(param_dir="./"): - import os - inputs, outputs = face_detector() - for i, out in enumerate(outputs): - if isinstance(out, list): - for out_part in out: - outputs.append(out_part) - del outputs[i] - exe = fluid.Executor(fluid.CPUPlace()) - exe.run(fluid.default_startup_program()) - - def if_exist(var): - b = os.path.exists(os.path.join(param_dir, var.name)) - return b - - fluid.io.load_vars(exe, param_dir, fluid.default_main_program(), predicate=if_exist) diff --git a/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/module.py b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/module.py index b2f104578..6caa32ace 100644 --- a/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/module.py +++ b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/module.py @@ -2,18 +2,22 @@ from __future__ import absolute_import from __future__ import division -import ast import argparse +import ast import os import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.module.module import moduleinfo, runnable, serving - -from ultra_light_fast_generic_face_detector_1mb_640.processor import postprocess, base64_to_cv2 +import paddle +from paddle.inference import Config +from paddle.inference import create_predictor from ultra_light_fast_generic_face_detector_1mb_640.data_feed import reader +from ultra_light_fast_generic_face_detector_1mb_640.processor import base64_to_cv2 +from ultra_light_fast_generic_face_detector_1mb_640.processor import postprocess + +import paddlehub as hub +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving @moduleinfo( @@ -23,8 +27,9 @@ author_email="paddle-dev@baidu.com", summary= "Ultra-Light-Fast-Generic-Face-Detector-1MB is a high-performance object detection model release on https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB.", - version="1.1.2") + version="1.1.3") class FaceDetector640(hub.Module): + def _initialize(self): self.default_pretrained_model_path = os.path.join(self.directory, "ultra_light_fast_generic_face_detector_1mb_640") @@ -34,10 +39,10 @@ def _set_config(self): """ predictor config setting """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) + cpu_config = Config(self.default_pretrained_model_path) cpu_config.disable_glog_info() cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) + self.cpu_predictor = create_predictor(cpu_config) try: _places = os.environ["CUDA_VISIBLE_DEVICES"] @@ -46,29 +51,28 @@ def _set_config(self): except: use_gpu = False if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config = Config(self.default_pretrained_model_path) gpu_config.disable_glog_info() gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) + self.gpu_predictor = create_predictor(gpu_config) def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): if combined: model_filename = "__model__" if not model_filename else model_filename params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) + place = paddle.CPUPlace() + exe = paddle.Executor(place) - program, feeded_var_names, target_vars = fluid.io.load_inference_model( + program, feeded_var_names, target_vars = paddle.static.load_inference_model( dirname=self.default_pretrained_model_path, executor=exe) - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) + paddle.static.save_inference_model(dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) def face_detection(self, images=None, @@ -128,24 +132,31 @@ def face_detection(self, except: pass # feed batch image - batch_image = np.array([data['image'] for data in batch_data]) - batch_image = PaddleTensor(batch_image.astype('float32')) - data_out = self.gpu_predictor.run([batch_image]) if use_gpu else self.cpu_predictor.run([batch_image]) - confidences = data_out[0].as_ndarray() - boxes = data_out[1].as_ndarray() + batch_image = np.array([data['image'] for data in batch_data]).astype('float32') + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + input_handle.copy_from_cpu(batch_image) + + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) + confidences = output_handle.copy_to_cpu() + + output_handle = predictor.get_output_handle(output_names[1]) + boxes = output_handle.copy_to_cpu() # postprocess one by one for i in range(len(batch_data)): - out = postprocess( - confidences=confidences[i], - boxes=boxes[i], - orig_im=batch_data[i]['orig_im'], - orig_im_shape=batch_data[i]['orig_im_shape'], - orig_im_path=batch_data[i]['orig_im_path'], - output_dir=output_dir, - visualization=visualization, - confs_threshold=confs_threshold, - iou_threshold=iou_threshold) + out = postprocess(confidences=confidences[i], + boxes=boxes[i], + orig_im=batch_data[i]['orig_im'], + orig_im_shape=batch_data[i]['orig_im_shape'], + orig_im_path=batch_data[i]['orig_im_path'], + output_dir=output_dir, + visualization=visualization, + confs_threshold=confs_threshold, + iou_threshold=iou_threshold) res.append(out) return res @@ -163,38 +174,39 @@ def run_cmd(self, argvs): """ Run as a command. """ - self.parser = argparse.ArgumentParser( - description="Run the {} module.".format(self.name), - prog='hub run {}'.format(self.name), - usage='%(prog)s', - add_help=True) + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") self.arg_config_group = self.parser.add_argument_group( title="Config options", description="Run configuration for controlling module behavior, not required.") self.add_module_config_arg() self.add_module_input_arg() args = self.parser.parse_args(argvs) - results = self.face_detection( - paths=[args.input_path], - batch_size=args.batch_size, - use_gpu=args.use_gpu, - output_dir=args.output_dir, - visualization=args.visualization) + results = self.face_detection(paths=[args.input_path], + batch_size=args.batch_size, + use_gpu=args.use_gpu, + output_dir=args.output_dir, + visualization=args.visualization) return results def add_module_config_arg(self): """ Add the command config options. """ - self.arg_config_group.add_argument( - '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") - self.arg_config_group.add_argument( - '--output_dir', - type=str, - default='face_detector_640_predict_output', - help="The directory to save output images.") - self.arg_config_group.add_argument( - '--visualization', type=ast.literal_eval, default=False, help="whether to save output as images.") + self.arg_config_group.add_argument('--use_gpu', + type=ast.literal_eval, + default=False, + help="whether use GPU or not") + self.arg_config_group.add_argument('--output_dir', + type=str, + default='face_detector_640_predict_output', + help="The directory to save output images.") + self.arg_config_group.add_argument('--visualization', + type=ast.literal_eval, + default=False, + help="whether to save output as images.") self.arg_config_group.add_argument('--batch_size', type=ast.literal_eval, default=1, help="batch size.") def add_module_input_arg(self): diff --git a/modules/image/keypoint_detection/face_landmark_localization/README.md b/modules/image/keypoint_detection/face_landmark_localization/README.md index ae433239a..c24205000 100644 --- a/modules/image/keypoint_detection/face_landmark_localization/README.md +++ b/modules/image/keypoint_detection/face_landmark_localization/README.md @@ -48,7 +48,7 @@ - ```shell $ hub run face_landmark_localization --input_path "/PATH/TO/IMAGE" ``` - + - 通过命令行方式实现hub模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) - ### 2、预测代码示例 @@ -56,17 +56,17 @@ - ```python import paddlehub as hub import cv2 - + face_landmark = hub.Module(name="face_landmark_localization") - + # Replace face detection module to speed up predictions but reduce performance # face_landmark.set_face_detector_module(hub.Module(name="ultra_light_fast_generic_face_detector_1mb_320")) - + result = face_landmark.keypoint_detection(images=[cv2.imread('/PATH/TO/IMAGE')]) # or # result = face_landmark.keypoint_detection(paths=['/PATH/TO/IMAGE']) ``` - + - ### 3、API - ```python @@ -125,7 +125,7 @@ params_filename=None, combined=False): ``` - + - 将模型保存到指定路径,由于人脸关键点检测模型由人脸检测+关键点检测两个模型组成,因此保存后会存在两个子目录,其中`face_landmark`为人脸关键点模型,`detector`为人脸检测模型。 - **参数** - dirname: 存在模型的目录名称 @@ -158,17 +158,17 @@ import cv2 import base64 import paddlehub as hub - + def cv2_to_base64(image): data = cv2.imencode('.jpg', image)[1] return base64.b64encode(data.tostring()).decode('utf8') - + # 发送HTTP请求 data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} headers = {"Content-type": "application/json"} url = "http://127.0.0.1:8866/predict/face_landmark_localization" r = requests.post(url=url, headers=headers, data=json.dumps(data)) - + # 打印预测结果 print(r.json()["results"]) ``` @@ -178,12 +178,15 @@ * 1.0.0 初始发布 - + * 1.0.1 * 1.0.2 +* 1.0.3 + + 移除 fluid api + * ```shell - $ hub install face_landmark_localization==1.0.2 + $ hub install face_landmark_localization==1.0.3 ``` - diff --git a/modules/image/keypoint_detection/face_landmark_localization/face_landmark.py b/modules/image/keypoint_detection/face_landmark_localization/face_landmark.py deleted file mode 100644 index b2be891d6..000000000 --- a/modules/image/keypoint_detection/face_landmark_localization/face_landmark.py +++ /dev/null @@ -1,99 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import - -import paddle.fluid as fluid - -__all__ = ["face_landmark_localization"] - - -def face_landmark_localization(image): - # image = fluid.layers.data(shape=[1, 60, 60], name='data', dtype='float32') - Conv1 = fluid.layers.conv2d( - image, - param_attr='Conv1_weights', - name='Conv1', - dilation=[1, 1], - filter_size=[5, 5], - stride=[1, 1], - groups=1, - bias_attr='Conv1_bias', - padding=[2, 2], - num_filters=20) - ActivationTangH1 = fluid.layers.tanh(Conv1, name='ActivationTangH1') - ActivationAbs1 = fluid.layers.abs(ActivationTangH1, name='ActivationAbs1') - Pool1 = fluid.layers.pool2d( - ActivationAbs1, - exclusive=False, - pool_type='max', - pool_padding=[0, 0], - name='Pool1', - global_pooling=False, - pool_stride=[2, 2], - ceil_mode=True, - pool_size=[2, 2]) - Conv2 = fluid.layers.conv2d( - Pool1, - param_attr='Conv2_weights', - name='Conv2', - dilation=[1, 1], - filter_size=[5, 5], - stride=[1, 1], - groups=1, - bias_attr='Conv2_bias', - padding=[2, 2], - num_filters=48) - ActivationTangH2 = fluid.layers.tanh(Conv2, name='ActivationTangH2') - ActivationAbs2 = fluid.layers.abs(ActivationTangH2, name='ActivationAbs2') - Pool2 = fluid.layers.pool2d( - ActivationAbs2, - exclusive=False, - pool_type='max', - pool_padding=[0, 0], - name='Pool2', - global_pooling=False, - pool_stride=[2, 2], - ceil_mode=True, - pool_size=[2, 2]) - Conv3 = fluid.layers.conv2d( - Pool2, - param_attr='Conv3_weights', - name='Conv3', - dilation=[1, 1], - filter_size=[3, 3], - stride=[1, 1], - groups=1, - bias_attr='Conv3_bias', - padding=[0, 0], - num_filters=64) - ActivationTangH3 = fluid.layers.tanh(Conv3, name='ActivationTangH3') - ActivationAbs3 = fluid.layers.abs(ActivationTangH3, name='ActivationAbs3') - Pool3 = fluid.layers.pool2d( - ActivationAbs3, - exclusive=False, - pool_type='max', - pool_padding=[0, 0], - name='Pool3', - global_pooling=False, - pool_stride=[2, 2], - ceil_mode=True, - pool_size=[3, 3]) - Conv4 = fluid.layers.conv2d( - Pool3, - param_attr='Conv4_weights', - name='Conv4', - dilation=[1, 1], - filter_size=[3, 3], - stride=[1, 1], - groups=1, - bias_attr='Conv4_bias', - padding=[0, 0], - num_filters=80) - ActivationTangH4 = fluid.layers.tanh(Conv4, name='ActivationTangH4') - ActivationAbs4 = fluid.layers.abs(ActivationTangH4, name='ActivationAbs4') - Dense1 = fluid.layers.fc( - ActivationAbs4, param_attr='Dense1_weights', act=None, name='Dense1', size=512, bias_attr='Dense1_bias') - ActivationTangH5 = fluid.layers.tanh(Dense1, name='ActivationTangH5') - ActivationAbs5 = fluid.layers.abs(ActivationTangH5, name='ActivationAbs5') - Dense3 = fluid.layers.fc( - ActivationAbs5, param_attr='Dense3_weights', act=None, name='Dense3', size=136, bias_attr='Dense3_bias') - return Dense3 diff --git a/modules/image/keypoint_detection/face_landmark_localization/module.py b/modules/image/keypoint_detection/face_landmark_localization/module.py index c389fec43..31d635235 100644 --- a/modules/image/keypoint_detection/face_landmark_localization/module.py +++ b/modules/image/keypoint_detection/face_landmark_localization/module.py @@ -2,21 +2,25 @@ from __future__ import absolute_import from __future__ import division -import ast import argparse -import time +import ast import os +import time from collections import OrderedDict import cv2 import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.module.module import moduleinfo, runnable, serving - -from face_landmark_localization.processor import postprocess, base64_to_cv2 +import paddle from face_landmark_localization.data_feed import reader +from face_landmark_localization.processor import base64_to_cv2 +from face_landmark_localization.processor import postprocess +from paddle.inference import Config +from paddle.inference import create_predictor + +import paddlehub as hub +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving @moduleinfo( @@ -26,8 +30,9 @@ author_email="paddle-dev@baidu.com", summary= "Face_Landmark_Localization can be used to locate face landmark. This Module is trained through the MPII Human Pose dataset.", - version="1.0.2") + version="1.0.3") class FaceLandmarkLocalization(hub.Module): + def _initialize(self, face_detector_module=None): """ Args: @@ -44,10 +49,10 @@ def _set_config(self): """ predictor config setting """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) + cpu_config = Config(self.default_pretrained_model_path) cpu_config.disable_glog_info() cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) + self.cpu_predictor = create_predictor(cpu_config) try: _places = os.environ["CUDA_VISIBLE_DEVICES"] @@ -56,10 +61,10 @@ def _set_config(self): except: use_gpu = False if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config = Config(self.default_pretrained_model_path) gpu_config.disable_glog_info() gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) + self.gpu_predictor = create_predictor(gpu_config) def set_face_detector_module(self, face_detector_module): """ @@ -77,24 +82,25 @@ def save_inference_model(self, dirname, model_filename=None, params_filename=Non if combined: model_filename = "__model__" if not model_filename else model_filename params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) + place = paddle.CPUPlace() + exe = paddle.Executor(place) - program, feeded_var_names, target_vars = fluid.io.load_inference_model( + program, feeded_var_names, target_vars = paddle.static.load_inference_model( dirname=self.default_pretrained_model_path, executor=exe) face_landmark_dir = os.path.join(dirname, "face_landmark") detector_dir = os.path.join(dirname, "detector") - fluid.io.save_inference_model( - dirname=face_landmark_dir, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - self.face_detector.save_inference_model( - dirname=detector_dir, model_filename=model_filename, params_filename=params_filename, combined=combined) + paddle.static.save_inference_model(dirname=face_landmark_dir, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) + self.face_detector.save_inference_model(dirname=detector_dir, + model_filename=model_filename, + params_filename=params_filename, + combined=combined) def keypoint_detection(self, images=None, @@ -143,11 +149,18 @@ def keypoint_detection(self, batch_data.append(all_data[handle_id + image_id]) except: pass - # feed batch image - batch_image = np.array([data['face'] for data in batch_data]) - face_tensor = PaddleTensor(batch_image.astype('float32')) - pred_out = self.gpu_predictor.run([face_tensor]) if use_gpu else self.cpu_predictor.run([face_tensor]) - points = pred_out[0].as_ndarray() + # feed batch image + batch_image = np.array([data['face'] for data in batch_data]).astype('float32') + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + input_handle.copy_from_cpu(batch_image) + + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) + points = output_handle.copy_to_cpu() + for idx, sample in enumerate(batch_data): sample['points'] = points[idx].reshape(68, -1) res += batch_data @@ -169,11 +182,10 @@ def run_cmd(self, argvs): """ Run as a command. """ - self.parser = argparse.ArgumentParser( - description="Run the {} module.".format(self.name), - prog='hub run {}'.format(self.name), - usage='%(prog)s', - add_help=True) + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") self.arg_config_group = self.parser.add_argument_group( @@ -181,20 +193,28 @@ def run_cmd(self, argvs): self.add_module_config_arg() self.add_module_input_arg() args = self.parser.parse_args(argvs) - results = self.keypoint_detection( - paths=[args.input_path], use_gpu=args.use_gpu, output_dir=args.output_dir, visualization=args.visualization) + results = self.keypoint_detection(paths=[args.input_path], + use_gpu=args.use_gpu, + output_dir=args.output_dir, + visualization=args.visualization) return results def add_module_config_arg(self): """ Add the command config options. """ - self.arg_config_group.add_argument( - '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") - self.arg_config_group.add_argument( - '--output_dir', type=str, default=None, help="The directory to save output images.") - self.arg_config_group.add_argument( - '--visualization', type=ast.literal_eval, default=False, help="whether to save output as images.") + self.arg_config_group.add_argument('--use_gpu', + type=ast.literal_eval, + default=False, + help="whether use GPU or not") + self.arg_config_group.add_argument('--output_dir', + type=str, + default=None, + help="The directory to save output images.") + self.arg_config_group.add_argument('--visualization', + type=ast.literal_eval, + default=False, + help="whether to save output as images.") def add_module_input_arg(self): """ diff --git a/modules/image/object_detection/ssd_mobilenet_v1_pascal/README.md b/modules/image/object_detection/ssd_mobilenet_v1_pascal/README.md index 7c3749cec..4b3ac1822 100644 --- a/modules/image/object_detection/ssd_mobilenet_v1_pascal/README.md +++ b/modules/image/object_detection/ssd_mobilenet_v1_pascal/README.md @@ -168,6 +168,10 @@ 修复numpy数据读取问题 +* 1.1.3 + + 移除 fluid api + - ```shell - $ hub install ssd_mobilenet_v1_pascal==1.1.2 + $ hub install ssd_mobilenet_v1_pascal==1.1.3 ``` diff --git a/modules/image/object_detection/ssd_mobilenet_v1_pascal/README_en.md b/modules/image/object_detection/ssd_mobilenet_v1_pascal/README_en.md index 8956a7ed4..4bad42420 100644 --- a/modules/image/object_detection/ssd_mobilenet_v1_pascal/README_en.md +++ b/modules/image/object_detection/ssd_mobilenet_v1_pascal/README_en.md @@ -167,6 +167,10 @@ Fix the problem of reading numpy +* 1.1.3 + + Remove fluid api + - ```shell - $ hub install ssd_mobilenet_v1_pascal==1.1.2 + $ hub install ssd_mobilenet_v1_pascal==1.1.3 ``` diff --git a/modules/image/object_detection/ssd_mobilenet_v1_pascal/mobilenet_v1.py b/modules/image/object_detection/ssd_mobilenet_v1_pascal/mobilenet_v1.py deleted file mode 100644 index 742391cc2..000000000 --- a/modules/image/object_detection/ssd_mobilenet_v1_pascal/mobilenet_v1.py +++ /dev/null @@ -1,211 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.regularizer import L2Decay - -__all__ = ['MobileNet'] - - -class MobileNet(object): - """ - MobileNet v1, see https://arxiv.org/abs/1704.04861 - - Args: - norm_type (str): normalization type, 'bn' and 'sync_bn' are supported - norm_decay (float): weight decay for normalization layer weights - conv_group_scale (int): scaling factor for convolution groups - with_extra_blocks (bool): if extra blocks should be added - extra_block_filters (list): number of filter for each extra block - class_dim (int): number of class while classification - yolo_v3 (bool): whether to output layers which yolo_v3 needs - """ - __shared__ = ['norm_type', 'weight_prefix_name'] - - def __init__(self, - norm_type='bn', - norm_decay=0., - conv_group_scale=1, - conv_learning_rate=1.0, - with_extra_blocks=False, - extra_block_filters=[[256, 512], [128, 256], [128, 256], - [64, 128]], - weight_prefix_name='', - class_dim=1000, - yolo_v3=False): - self.norm_type = norm_type - self.norm_decay = norm_decay - self.conv_group_scale = conv_group_scale - self.conv_learning_rate = conv_learning_rate - self.with_extra_blocks = with_extra_blocks - self.extra_block_filters = extra_block_filters - self.prefix_name = weight_prefix_name - self.class_dim = class_dim - self.yolo_v3 = yolo_v3 - - def _conv_norm(self, - input, - filter_size, - num_filters, - stride, - padding, - num_groups=1, - act='relu', - use_cudnn=True, - name=None): - parameter_attr = ParamAttr( - learning_rate=self.conv_learning_rate, - initializer=fluid.initializer.MSRA(), - name=name + "_weights") - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=padding, - groups=num_groups, - act=None, - use_cudnn=use_cudnn, - param_attr=parameter_attr, - bias_attr=False) - - bn_name = name + "_bn" - norm_decay = self.norm_decay - bn_param_attr = ParamAttr( - regularizer=L2Decay(norm_decay), name=bn_name + '_scale') - bn_bias_attr = ParamAttr( - regularizer=L2Decay(norm_decay), name=bn_name + '_offset') - return fluid.layers.batch_norm( - input=conv, - act=act, - param_attr=bn_param_attr, - bias_attr=bn_bias_attr, - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def depthwise_separable(self, - input, - num_filters1, - num_filters2, - num_groups, - stride, - scale, - name=None): - depthwise_conv = self._conv_norm( - input=input, - filter_size=3, - num_filters=int(num_filters1 * scale), - stride=stride, - padding=1, - num_groups=int(num_groups * scale), - use_cudnn=False, - name=name + "_dw") - - pointwise_conv = self._conv_norm( - input=depthwise_conv, - filter_size=1, - num_filters=int(num_filters2 * scale), - stride=1, - padding=0, - name=name + "_sep") - return pointwise_conv - - def _extra_block(self, - input, - num_filters1, - num_filters2, - num_groups, - stride, - name=None): - pointwise_conv = self._conv_norm( - input=input, - filter_size=1, - num_filters=int(num_filters1), - stride=1, - num_groups=int(num_groups), - padding=0, - name=name + "_extra1") - normal_conv = self._conv_norm( - input=pointwise_conv, - filter_size=3, - num_filters=int(num_filters2), - stride=2, - num_groups=int(num_groups), - padding=1, - name=name + "_extra2") - return normal_conv - - def __call__(self, input): - scale = self.conv_group_scale - blocks = [] - # input 1/1 - out = self._conv_norm( - input, 3, int(32 * scale), 2, 1, name=self.prefix_name + "conv1") - # 1/2 - out = self.depthwise_separable( - out, 32, 64, 32, 1, scale, name=self.prefix_name + "conv2_1") - out = self.depthwise_separable( - out, 64, 128, 64, 2, scale, name=self.prefix_name + "conv2_2") - # 1/4 - out = self.depthwise_separable( - out, 128, 128, 128, 1, scale, name=self.prefix_name + "conv3_1") - out = self.depthwise_separable( - out, 128, 256, 128, 2, scale, name=self.prefix_name + "conv3_2") - # 1/8 - blocks.append(out) - out = self.depthwise_separable( - out, 256, 256, 256, 1, scale, name=self.prefix_name + "conv4_1") - out = self.depthwise_separable( - out, 256, 512, 256, 2, scale, name=self.prefix_name + "conv4_2") - # 1/16 - blocks.append(out) - for i in range(5): - out = self.depthwise_separable( - out, - 512, - 512, - 512, - 1, - scale, - name=self.prefix_name + "conv5_" + str(i + 1)) - module11 = out - - out = self.depthwise_separable( - out, 512, 1024, 512, 2, scale, name=self.prefix_name + "conv5_6") - # 1/32 - out = self.depthwise_separable( - out, 1024, 1024, 1024, 1, scale, name=self.prefix_name + "conv6") - module13 = out - blocks.append(out) - if self.yolo_v3: - return blocks - if not self.with_extra_blocks: - out = fluid.layers.pool2d( - input=out, pool_type='avg', global_pooling=True) - out = fluid.layers.fc( - input=out, - size=self.class_dim, - param_attr=ParamAttr( - initializer=fluid.initializer.MSRA(), name="fc7_weights"), - bias_attr=ParamAttr(name="fc7_offset")) - out = fluid.layers.softmax(out) - blocks.append(out) - return blocks - - num_filters = self.extra_block_filters - module14 = self._extra_block(module13, num_filters[0][0], - num_filters[0][1], 1, 2, - self.prefix_name + "conv7_1") - module15 = self._extra_block(module14, num_filters[1][0], - num_filters[1][1], 1, 2, - self.prefix_name + "conv7_2") - module16 = self._extra_block(module15, num_filters[2][0], - num_filters[2][1], 1, 2, - self.prefix_name + "conv7_3") - module17 = self._extra_block(module16, num_filters[3][0], - num_filters[3][1], 1, 2, - self.prefix_name + "conv7_4") - return module11, module13, module14, module15, module16, module17 diff --git a/modules/image/object_detection/ssd_mobilenet_v1_pascal/module.py b/modules/image/object_detection/ssd_mobilenet_v1_pascal/module.py index c732da92f..7460115c3 100644 --- a/modules/image/object_detection/ssd_mobilenet_v1_pascal/module.py +++ b/modules/image/object_detection/ssd_mobilenet_v1_pascal/module.py @@ -1,47 +1,49 @@ # coding=utf-8 from __future__ import absolute_import -import ast import argparse +import ast import os from functools import partial -import yaml import numpy as np -import paddle.fluid as fluid +import paddle +import yaml +from paddle.inference import Config +from paddle.inference import create_predictor +from ssd_mobilenet_v1_pascal.data_feed import reader +from ssd_mobilenet_v1_pascal.processor import base64_to_cv2 +from ssd_mobilenet_v1_pascal.processor import load_label_info +from ssd_mobilenet_v1_pascal.processor import postprocess + import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.module.module import moduleinfo, runnable, serving from paddlehub.common.paddle_helper import add_vars_prefix - -from ssd_mobilenet_v1_pascal.mobilenet_v1 import MobileNet -from ssd_mobilenet_v1_pascal.processor import load_label_info, postprocess, base64_to_cv2 -from ssd_mobilenet_v1_pascal.data_feed import reader +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving -@moduleinfo( - name="ssd_mobilenet_v1_pascal", - version="1.1.2", - type="cv/object_detection", - summary="SSD with backbone MobileNet_V1, trained with dataset Pasecal VOC.", - author="paddlepaddle", - author_email="paddle-dev@baidu.com") +@moduleinfo(name="ssd_mobilenet_v1_pascal", + version="1.1.3", + type="cv/object_detection", + summary="SSD with backbone MobileNet_V1, trained with dataset Pasecal VOC.", + author="paddlepaddle", + author_email="paddle-dev@baidu.com") class SSDMobileNetv1(hub.Module): + def _initialize(self): - self.default_pretrained_model_path = os.path.join( - self.directory, "ssd_mobilenet_v1_model") - self.label_names = load_label_info( - os.path.join(self.directory, "label_file.txt")) + self.default_pretrained_model_path = os.path.join(self.directory, "ssd_mobilenet_v1_model") + self.label_names = load_label_info(os.path.join(self.directory, "label_file.txt")) self.model_config = None self._set_config() def _set_config(self): # predictor config setting. - cpu_config = AnalysisConfig(self.default_pretrained_model_path) + cpu_config = Config(self.default_pretrained_model_path) cpu_config.disable_glog_info() cpu_config.disable_gpu() cpu_config.switch_ir_optim(False) - self.cpu_predictor = create_paddle_predictor(cpu_config) + self.cpu_predictor = create_predictor(cpu_config) try: _places = os.environ["CUDA_VISIBLE_DEVICES"] @@ -50,10 +52,10 @@ def _set_config(self): except: use_gpu = False if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config = Config(self.default_pretrained_model_path) gpu_config.disable_glog_info() gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) + self.gpu_predictor = create_predictor(gpu_config) # model config setting. if not self.model_config: @@ -64,99 +66,6 @@ def _set_config(self): self.output_decoder_config = self.model_config['SSDOutputDecoder'] self.mobilenet_config = self.model_config['MobileNet'] - def context(self, trainable=True, pretrained=True, get_prediction=False): - """ - Distill the Head Features, so as to perform transfer learning. - - Args: - trainable (bool): whether to set parameters trainable. - pretrained (bool): whether to load default pretrained model. - get_prediction (bool): whether to get prediction. - - Returns: - inputs(dict): the input variables. - outputs(dict): the output variables. - context_prog (Program): the program to execute transfer learning. - """ - context_prog = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(context_prog, startup_program): - with fluid.unique_name.guard(): - # image - image = fluid.layers.data( - name='image', shape=[3, 300, 300], dtype='float32') - # backbone - backbone = MobileNet(**self.mobilenet_config) - # body_feats - body_feats = backbone(image) - # im_size - im_size = fluid.layers.data( - name='im_size', shape=[2], dtype='int32') - # var_prefix - var_prefix = '@HUB_{}@'.format(self.name) - # names of inputs - inputs = { - 'image': var_prefix + image.name, - 'im_size': var_prefix + im_size.name - } - # names of outputs - if get_prediction: - locs, confs, box, box_var = fluid.layers.multi_box_head( - inputs=body_feats, - image=image, - num_classes=21, - **self.multi_box_head_config) - pred = fluid.layers.detection_output( - loc=locs, - scores=confs, - prior_box=box, - prior_box_var=box_var, - **self.output_decoder_config) - outputs = {'bbox_out': [var_prefix + pred.name]} - else: - outputs = { - 'body_features': - [var_prefix + var.name for var in body_feats] - } - - # add_vars_prefix - add_vars_prefix(context_prog, var_prefix) - add_vars_prefix(fluid.default_startup_program(), var_prefix) - # inputs - inputs = { - key: context_prog.global_block().vars[value] - for key, value in inputs.items() - } - outputs = { - out_key: [ - context_prog.global_block().vars[varname] - for varname in out_value - ] - for out_key, out_value in outputs.items() - } - # trainable - for param in context_prog.global_block().iter_parameters(): - param.trainable = trainable - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - # pretrained - if pretrained: - - def _if_exist(var): - return os.path.exists( - os.path.join(self.default_pretrained_model_path, - var.name)) - - fluid.io.load_vars( - exe, - self.default_pretrained_model_path, - predicate=_if_exist) - else: - exe.run(startup_program) - - return inputs, outputs, context_prog - def object_detection(self, paths=None, images=None, @@ -202,50 +111,48 @@ def object_detection(self, paths += data['image'] data_reader = partial(reader, paths, images) - batch_reader = fluid.io.batch(data_reader, batch_size=batch_size) + batch_reader = paddle.batch(data_reader, batch_size=batch_size) res = [] for iter_id, feed_data in enumerate(batch_reader()): feed_data = np.array(feed_data) - image_tensor = PaddleTensor(np.array(list(feed_data[:, 0])).copy()) - if use_gpu: - data_out = self.gpu_predictor.run([image_tensor]) - else: - data_out = self.cpu_predictor.run([image_tensor]) - output = postprocess( - paths=paths, - images=images, - data_out=data_out, - score_thresh=score_thresh, - label_names=self.label_names, - output_dir=output_dir, - handle_id=iter_id * batch_size, - visualization=visualization) + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + input_handle.copy_from_cpu(np.array(list(feed_data[:, 0]))) + + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) + + output = postprocess(paths=paths, + images=images, + data_out=output_handle, + score_thresh=score_thresh, + label_names=self.label_names, + output_dir=output_dir, + handle_id=iter_id * batch_size, + visualization=visualization) res.extend(output) return res - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): + def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): if combined: model_filename = "__model__" if not model_filename else model_filename params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) + place = paddle.CPUPlace() + exe = paddle.Executor(place) - program, feeded_var_names, target_vars = fluid.io.load_inference_model( + program, feeded_var_names, target_vars = paddle.static.load_inference_model( dirname=self.default_pretrained_model_path, executor=exe) - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) + paddle.static.save_inference_model(dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) @serving def serving_method(self, images, **kwargs): @@ -261,62 +168,48 @@ def run_cmd(self, argvs): """ Run as a command. """ - self.parser = argparse.ArgumentParser( - description="Run the {} module.".format(self.name), - prog='hub run {}'.format(self.name), - usage='%(prog)s', - add_help=True) - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") + title="Config options", description="Run configuration for controlling module behavior, not required.") self.add_module_config_arg() self.add_module_input_arg() args = self.parser.parse_args(argvs) - results = self.object_detection( - paths=[args.input_path], - batch_size=args.batch_size, - use_gpu=args.use_gpu, - output_dir=args.output_dir, - visualization=args.visualization, - score_thresh=args.score_thresh) + results = self.object_detection(paths=[args.input_path], + batch_size=args.batch_size, + use_gpu=args.use_gpu, + output_dir=args.output_dir, + visualization=args.visualization, + score_thresh=args.score_thresh) return results def add_module_config_arg(self): """ Add the command config options. """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not") - self.arg_config_group.add_argument( - '--output_dir', - type=str, - default='detection_result', - help="The directory to save output images.") - self.arg_config_group.add_argument( - '--visualization', - type=ast.literal_eval, - default=False, - help="whether to save output as images.") + self.arg_config_group.add_argument('--use_gpu', + type=ast.literal_eval, + default=False, + help="whether use GPU or not") + self.arg_config_group.add_argument('--output_dir', + type=str, + default='detection_result', + help="The directory to save output images.") + self.arg_config_group.add_argument('--visualization', + type=ast.literal_eval, + default=False, + help="whether to save output as images.") def add_module_input_arg(self): """ Add the command input options. """ - self.arg_input_group.add_argument( - '--input_path', type=str, help="path to image.") - self.arg_input_group.add_argument( - '--batch_size', - type=ast.literal_eval, - default=1, - help="batch size.") - self.arg_input_group.add_argument( - '--score_thresh', - type=ast.literal_eval, - default=0.5, - help="threshold for object detecion.") + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") + self.arg_input_group.add_argument('--batch_size', type=ast.literal_eval, default=1, help="batch size.") + self.arg_input_group.add_argument('--score_thresh', + type=ast.literal_eval, + default=0.5, + help="threshold for object detecion.") diff --git a/modules/image/object_detection/ssd_mobilenet_v1_pascal/processor.py b/modules/image/object_detection/ssd_mobilenet_v1_pascal/processor.py index 82b2335f6..0cbe01ded 100644 --- a/modules/image/object_detection/ssd_mobilenet_v1_pascal/processor.py +++ b/modules/image/object_detection/ssd_mobilenet_v1_pascal/processor.py @@ -4,7 +4,8 @@ import cv2 import numpy as np -from PIL import Image, ImageDraw +from PIL import Image +from PIL import ImageDraw __all__ = ['base64_to_cv2', 'load_label_info', 'postprocess'] @@ -15,6 +16,7 @@ def base64_to_cv2(b64str): data = cv2.imdecode(data, cv2.IMREAD_COLOR) return data + def check_dir(dir_path): if not os.path.exists(dir_path): os.makedirs(dir_path) @@ -22,6 +24,7 @@ def check_dir(dir_path): os.remove(dir_path) os.makedirs(dir_path) + def get_save_image_name(img, output_dir, image_path): """ Get save image name from source image path. @@ -50,23 +53,17 @@ def draw_bounding_box_on_image(image_path, data_list, save_dir): image = Image.open(image_path) draw = ImageDraw.Draw(image) for data in data_list: - left, right, top, bottom = data['left'], data['right'], data[ - 'top'], data['bottom'] + left, right, top, bottom = data['left'], data['right'], data['top'], data['bottom'] # draw bbox - draw.line([(left, top), (left, bottom), (right, bottom), (right, top), - (left, top)], - width=2, - fill='red') + draw.line([(left, top), (left, bottom), (right, bottom), (right, top), (left, top)], width=2, fill='red') # draw label if image.mode == 'RGB': text = data['label'] + ": %.2f%%" % (100 * data['confidence']) textsize_width, textsize_height = draw.textsize(text=text) - draw.rectangle( - xy=(left, top - (textsize_height + 5), - left + textsize_width + 10, top), - fill=(255, 255, 255)) + draw.rectangle(xy=(left, top - (textsize_height + 5), left + textsize_width + 10, top), + fill=(255, 255, 255)) draw.text(xy=(left, top - 15), text=text, fill=(0, 0, 0)) save_name = get_save_image_name(image, save_dir, image_path) @@ -95,16 +92,9 @@ def load_label_info(file_path): return label_names -def postprocess(paths, - images, - data_out, - score_thresh, - label_names, - output_dir, - handle_id, - visualization=True): +def postprocess(paths, images, data_out, score_thresh, label_names, output_dir, handle_id, visualization=True): """ - postprocess the lod_tensor produced by fluid.Executor.run + postprocess the lod_tensor produced by Executor.run Args: paths (list[str]): the path of images. @@ -127,9 +117,8 @@ def postprocess(paths, confidence (float): The confidence of detection result. save_path (str): The path to save output images. """ - lod_tensor = data_out[0] - lod = lod_tensor.lod[0] - results = lod_tensor.as_ndarray() + lod = data_out.lod()[0] + results = data_out.copy_to_cpu() check_dir(output_dir) @@ -159,9 +148,7 @@ def postprocess(paths, org_img = org_img.astype(np.uint8) org_img = Image.fromarray(org_img[:, :, ::-1]) if visualization: - org_img_path = get_save_image_name( - org_img, output_dir, 'image_numpy_{}'.format( - (handle_id + index))) + org_img_path = get_save_image_name(org_img, output_dir, 'image_numpy_{}'.format((handle_id + index))) org_img.save(org_img_path) org_img_height = org_img.height org_img_width = org_img.width @@ -181,13 +168,11 @@ def postprocess(paths, dt = {} dt['label'] = label_names[category_id] dt['confidence'] = float(confidence) - dt['left'], dt['top'], dt['right'], dt['bottom'] = clip_bbox( - bbox, org_img_width, org_img_height) + dt['left'], dt['top'], dt['right'], dt['bottom'] = clip_bbox(bbox, org_img_width, org_img_height) output_i['data'].append(dt) output.append(output_i) if visualization: - output_i['save_path'] = draw_bounding_box_on_image( - org_img_path, output_i['data'], output_dir) + output_i['save_path'] = draw_bounding_box_on_image(org_img_path, output_i['data'], output_dir) return output diff --git a/modules/image/object_detection/yolov3_darknet53_pedestrian/README.md b/modules/image/object_detection/yolov3_darknet53_pedestrian/README.md index 0abc368b6..1cdc13767 100644 --- a/modules/image/object_detection/yolov3_darknet53_pedestrian/README.md +++ b/modules/image/object_detection/yolov3_darknet53_pedestrian/README.md @@ -167,6 +167,10 @@ 修复numpy数据读取问题 +* 1.0.3 + + 移除 fluid api + - ```shell - $ hub install yolov3_darknet53_pedestrian==1.0.2 + $ hub install yolov3_darknet53_pedestrian==1.0.3 ``` diff --git a/modules/image/object_detection/yolov3_darknet53_pedestrian/README_en.md b/modules/image/object_detection/yolov3_darknet53_pedestrian/README_en.md index f7be6546a..09d82d391 100644 --- a/modules/image/object_detection/yolov3_darknet53_pedestrian/README_en.md +++ b/modules/image/object_detection/yolov3_darknet53_pedestrian/README_en.md @@ -166,6 +166,10 @@ Fix the problem of reading numpy +* 1.0.3 + + Remove fluid api + - ```shell - $ hub install yolov3_darknet53_pedestrian==1.0.2 + $ hub install yolov3_darknet53_pedestrian==1.0.3 ``` diff --git a/modules/image/object_detection/yolov3_darknet53_pedestrian/darknet.py b/modules/image/object_detection/yolov3_darknet53_pedestrian/darknet.py deleted file mode 100644 index 1e6e6f8f6..000000000 --- a/modules/image/object_detection/yolov3_darknet53_pedestrian/darknet.py +++ /dev/null @@ -1,170 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import six -import math - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.regularizer import L2Decay - -__all__ = ['DarkNet'] - - -class DarkNet(object): - """DarkNet, see https://pjreddie.com/darknet/yolo/ - Args: - depth (int): network depth, currently only darknet 53 is supported - norm_type (str): normalization type, 'bn' and 'sync_bn' are supported - norm_decay (float): weight decay for normalization layer weights - get_prediction (bool): whether to get prediction - class_dim (int): number of class while classification - """ - - def __init__(self, - depth=53, - norm_type='sync_bn', - norm_decay=0., - weight_prefix_name='', - get_prediction=False, - class_dim=1000): - assert depth in [53], "unsupported depth value" - self.depth = depth - self.norm_type = norm_type - self.norm_decay = norm_decay - self.depth_cfg = {53: ([1, 2, 8, 8, 4], self.basicblock)} - self.prefix_name = weight_prefix_name - self.class_dim = class_dim - self.get_prediction = get_prediction - - def _conv_norm(self, - input, - ch_out, - filter_size, - stride, - padding, - act='leaky', - name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=ch_out, - filter_size=filter_size, - stride=stride, - padding=padding, - act=None, - param_attr=ParamAttr(name=name + ".conv.weights"), - bias_attr=False) - - bn_name = name + ".bn" - bn_param_attr = ParamAttr( - regularizer=L2Decay(float(self.norm_decay)), - name=bn_name + '.scale') - bn_bias_attr = ParamAttr( - regularizer=L2Decay(float(self.norm_decay)), - name=bn_name + '.offset') - - out = fluid.layers.batch_norm( - input=conv, - act=None, - param_attr=bn_param_attr, - bias_attr=bn_bias_attr, - moving_mean_name=bn_name + '.mean', - moving_variance_name=bn_name + '.var') - - # leaky relu here has `alpha` as 0.1, can not be set by - # `act` param in fluid.layers.batch_norm above. - if act == 'leaky': - out = fluid.layers.leaky_relu(x=out, alpha=0.1) - - return out - - def _downsample(self, - input, - ch_out, - filter_size=3, - stride=2, - padding=1, - name=None): - return self._conv_norm( - input, - ch_out=ch_out, - filter_size=filter_size, - stride=stride, - padding=padding, - name=name) - - def basicblock(self, input, ch_out, name=None): - conv1 = self._conv_norm( - input, - ch_out=ch_out, - filter_size=1, - stride=1, - padding=0, - name=name + ".0") - conv2 = self._conv_norm( - conv1, - ch_out=ch_out * 2, - filter_size=3, - stride=1, - padding=1, - name=name + ".1") - out = fluid.layers.elementwise_add(x=input, y=conv2, act=None) - return out - - def layer_warp(self, block_func, input, ch_out, count, name=None): - out = block_func(input, ch_out=ch_out, name='{}.0'.format(name)) - for j in six.moves.xrange(1, count): - out = block_func(out, ch_out=ch_out, name='{}.{}'.format(name, j)) - return out - - def __call__(self, input): - """Get the backbone of DarkNet, that is output for the 5 stages. - - :param input: Variable of input image - :type input: Variable - :Returns: The last variables of each stage. - """ - stages, block_func = self.depth_cfg[self.depth] - stages = stages[0:5] - conv = self._conv_norm( - input=input, - ch_out=32, - filter_size=3, - stride=1, - padding=1, - name=self.prefix_name + "yolo_input") - downsample_ = self._downsample( - input=conv, - ch_out=conv.shape[1] * 2, - name=self.prefix_name + "yolo_input.downsample") - blocks = [] - for i, stage in enumerate(stages): - block = self.layer_warp( - block_func=block_func, - input=downsample_, - ch_out=32 * 2**i, - count=stage, - name=self.prefix_name + "stage.{}".format(i)) - blocks.append(block) - if i < len(stages) - 1: # do not downsaple in the last stage - downsample_ = self._downsample( - input=block, - ch_out=block.shape[1] * 2, - name=self.prefix_name + "stage.{}.downsample".format(i)) - if self.get_prediction: - pool = fluid.layers.pool2d( - input=block, pool_type='avg', global_pooling=True) - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - out = fluid.layers.fc( - input=pool, - size=self.class_dim, - param_attr=ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv), - name='fc_weights'), - bias_attr=ParamAttr(name='fc_offset')) - out = fluid.layers.softmax(out) - return out - else: - return blocks diff --git a/modules/image/object_detection/yolov3_darknet53_pedestrian/module.py b/modules/image/object_detection/yolov3_darknet53_pedestrian/module.py index 590b9efac..5b8a4c842 100644 --- a/modules/image/object_detection/yolov3_darknet53_pedestrian/module.py +++ b/modules/image/object_detection/yolov3_darknet53_pedestrian/module.py @@ -1,49 +1,49 @@ # coding=utf-8 from __future__ import absolute_import -import ast import argparse +import ast import os from functools import partial import numpy as np -import paddle.fluid as fluid +import paddle +from paddle.inference import Config +from paddle.inference import create_predictor +from yolov3_darknet53_pedestrian.data_feed import reader +from yolov3_darknet53_pedestrian.processor import base64_to_cv2 +from yolov3_darknet53_pedestrian.processor import load_label_info +from yolov3_darknet53_pedestrian.processor import postprocess + import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.module.module import moduleinfo, runnable, serving from paddlehub.common.paddle_helper import add_vars_prefix - -from yolov3_darknet53_pedestrian.darknet import DarkNet -from yolov3_darknet53_pedestrian.processor import load_label_info, postprocess, base64_to_cv2 -from yolov3_darknet53_pedestrian.data_feed import reader -from yolov3_darknet53_pedestrian.yolo_head import MultiClassNMS, YOLOv3Head +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving -@moduleinfo( - name="yolov3_darknet53_pedestrian", - version="1.0.2", - type="CV/object_detection", - summary= - "Baidu's YOLOv3 model for pedestrian detection, with backbone DarkNet53.", - author="paddlepaddle", - author_email="paddle-dev@baidu.com") +@moduleinfo(name="yolov3_darknet53_pedestrian", + version="1.0.3", + type="CV/object_detection", + summary="Baidu's YOLOv3 model for pedestrian detection, with backbone DarkNet53.", + author="paddlepaddle", + author_email="paddle-dev@baidu.com") class YOLOv3DarkNet53Pedestrian(hub.Module): + def _initialize(self): - self.default_pretrained_model_path = os.path.join( - self.directory, "yolov3_darknet53_pedestrian_model") - self.label_names = load_label_info( - os.path.join(self.directory, "label_file.txt")) + self.default_pretrained_model_path = os.path.join(self.directory, "yolov3_darknet53_pedestrian_model") + self.label_names = load_label_info(os.path.join(self.directory, "label_file.txt")) self._set_config() def _set_config(self): """ predictor config setting. """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) + cpu_config = Config(self.default_pretrained_model_path) cpu_config.disable_glog_info() cpu_config.disable_gpu() cpu_config.switch_ir_optim(False) - self.cpu_predictor = create_paddle_predictor(cpu_config) + self.cpu_predictor = create_predictor(cpu_config) try: _places = os.environ["CUDA_VISIBLE_DEVICES"] @@ -52,116 +52,10 @@ def _set_config(self): except: use_gpu = False if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config = Config(self.default_pretrained_model_path) gpu_config.disable_glog_info() gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) - - def context(self, trainable=True, pretrained=True, get_prediction=False): - """ - Distill the Head Features, so as to perform transfer learning. - - Args: - trainable (bool): whether to set parameters trainable. - pretrained (bool): whether to load default pretrained model. - get_prediction (bool): whether to get prediction. - - Returns: - inputs(dict): the input variables. - outputs(dict): the output variables. - context_prog (Program): the program to execute transfer learning. - """ - context_prog = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(context_prog, startup_program): - with fluid.unique_name.guard(): - # image - image = fluid.layers.data( - name='image', shape=[3, 608, 608], dtype='float32') - # backbone - backbone = DarkNet(norm_type='sync_bn', norm_decay=0., depth=53) - # body_feats - body_feats = backbone(image) - # im_size - im_size = fluid.layers.data( - name='im_size', shape=[2], dtype='int32') - # yolo_head - yolo_head = YOLOv3Head( - anchor_masks=[[6, 7, 8], [3, 4, 5], [0, 1, 2]], - anchors=[[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], - [59, 119], [116, 90], [156, 198], [373, 326]], - norm_decay=0., - num_classes=1, - ignore_thresh=0.7, - label_smooth=True, - nms=MultiClassNMS( - background_label=-1, - keep_top_k=100, - nms_threshold=0.45, - nms_top_k=1000, - normalized=False, - score_threshold=0.01)) - # head_features - head_features, body_features = yolo_head._get_outputs( - body_feats, is_train=trainable) - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - exe.run(fluid.default_startup_program()) - - # var_prefix - var_prefix = '@HUB_{}@'.format(self.name) - # name of inputs - inputs = { - 'image': var_prefix + image.name, - 'im_size': var_prefix + im_size.name - } - # name of outputs - if get_prediction: - bbox_out = yolo_head.get_prediction(head_features, im_size) - outputs = {'bbox_out': [var_prefix + bbox_out.name]} - else: - outputs = { - 'head_features': - [var_prefix + var.name for var in head_features], - 'body_features': - [var_prefix + var.name for var in body_features] - } - # add_vars_prefix - add_vars_prefix(context_prog, var_prefix) - add_vars_prefix(fluid.default_startup_program(), var_prefix) - # inputs - inputs = { - key: context_prog.global_block().vars[value] - for key, value in inputs.items() - } - # outputs - outputs = { - key: [ - context_prog.global_block().vars[varname] - for varname in value - ] - for key, value in outputs.items() - } - # trainable - for param in context_prog.global_block().iter_parameters(): - param.trainable = trainable - # pretrained - if pretrained: - - def _if_exist(var): - return os.path.exists( - os.path.join(self.default_pretrained_model_path, - var.name)) - - fluid.io.load_vars( - exe, - self.default_pretrained_model_path, - predicate=_if_exist) - else: - exe.run(startup_program) - - return inputs, outputs, context_prog + self.gpu_predictor = create_predictor(gpu_config) def object_detection(self, paths=None, @@ -204,53 +98,50 @@ def object_detection(self, paths = paths if paths else list() data_reader = partial(reader, paths, images) - batch_reader = fluid.io.batch(data_reader, batch_size=batch_size) + batch_reader = paddle.batch(data_reader, batch_size=batch_size) res = [] for iter_id, feed_data in enumerate(batch_reader()): feed_data = np.array(feed_data) - image_tensor = PaddleTensor(np.array(list(feed_data[:, 0]))) - im_size_tensor = PaddleTensor(np.array(list(feed_data[:, 1]))) - if use_gpu: - data_out = self.gpu_predictor.run( - [image_tensor, im_size_tensor]) - else: - data_out = self.cpu_predictor.run( - [image_tensor, im_size_tensor]) - output = postprocess( - paths=paths, - images=images, - data_out=data_out, - score_thresh=score_thresh, - label_names=self.label_names, - output_dir=output_dir, - handle_id=iter_id * batch_size, - visualization=visualization) + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + input_handle.copy_from_cpu(np.array(list(feed_data[:, 0]))) + input_handle = predictor.get_input_handle(input_names[1]) + input_handle.copy_from_cpu(np.array(list(feed_data[:, 1]))) + + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) + + output = postprocess(paths=paths, + images=images, + data_out=output_handle, + score_thresh=score_thresh, + label_names=self.label_names, + output_dir=output_dir, + handle_id=iter_id * batch_size, + visualization=visualization) res.extend(output) return res - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): + def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): if combined: model_filename = "__model__" if not model_filename else model_filename params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) + place = paddle.CPUPlace() + exe = paddle.Executor(place) - program, feeded_var_names, target_vars = fluid.io.load_inference_model( + program, feeded_var_names, target_vars = paddle.static.load_inference_model( dirname=self.default_pretrained_model_path, executor=exe) - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) + paddle.static.save_inference_model(dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) @serving def serving_method(self, images, **kwargs): @@ -266,62 +157,48 @@ def run_cmd(self, argvs): """ Run as a command. """ - self.parser = argparse.ArgumentParser( - description="Run the {} module.".format(self.name), - prog='hub run {}'.format(self.name), - usage='%(prog)s', - add_help=True) - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") + title="Config options", description="Run configuration for controlling module behavior, not required.") self.add_module_config_arg() self.add_module_input_arg() args = self.parser.parse_args(argvs) - results = self.object_detection( - paths=[args.input_path], - batch_size=args.batch_size, - use_gpu=args.use_gpu, - output_dir=args.output_dir, - visualization=args.visualization, - score_thresh=args.score_thresh) + results = self.object_detection(paths=[args.input_path], + batch_size=args.batch_size, + use_gpu=args.use_gpu, + output_dir=args.output_dir, + visualization=args.visualization, + score_thresh=args.score_thresh) return results def add_module_config_arg(self): """ Add the command config options. """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not") - self.arg_config_group.add_argument( - '--output_dir', - type=str, - default='yolov3_pedestrian_detect_output', - help="The directory to save output images.") - self.arg_config_group.add_argument( - '--visualization', - type=ast.literal_eval, - default=False, - help="whether to save output as images.") + self.arg_config_group.add_argument('--use_gpu', + type=ast.literal_eval, + default=False, + help="whether use GPU or not") + self.arg_config_group.add_argument('--output_dir', + type=str, + default='yolov3_pedestrian_detect_output', + help="The directory to save output images.") + self.arg_config_group.add_argument('--visualization', + type=ast.literal_eval, + default=False, + help="whether to save output as images.") def add_module_input_arg(self): """ Add the command input options. """ - self.arg_input_group.add_argument( - '--input_path', type=str, help="path to image.") - self.arg_input_group.add_argument( - '--batch_size', - type=ast.literal_eval, - default=1, - help="batch size.") - self.arg_input_group.add_argument( - '--score_thresh', - type=ast.literal_eval, - default=0.2, - help="threshold for object detecion.") + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") + self.arg_input_group.add_argument('--batch_size', type=ast.literal_eval, default=1, help="batch size.") + self.arg_input_group.add_argument('--score_thresh', + type=ast.literal_eval, + default=0.2, + help="threshold for object detecion.") diff --git a/modules/image/object_detection/yolov3_darknet53_pedestrian/processor.py b/modules/image/object_detection/yolov3_darknet53_pedestrian/processor.py index 1039e3e48..356ce0342 100644 --- a/modules/image/object_detection/yolov3_darknet53_pedestrian/processor.py +++ b/modules/image/object_detection/yolov3_darknet53_pedestrian/processor.py @@ -4,7 +4,8 @@ import cv2 import numpy as np -from PIL import Image, ImageDraw +from PIL import Image +from PIL import ImageDraw __all__ = ['base64_to_cv2', 'load_label_info', 'postprocess'] @@ -50,21 +51,15 @@ def draw_bounding_box_on_image(image_path, data_list, save_dir): image = Image.open(image_path) draw = ImageDraw.Draw(image) for data in data_list: - left, right, top, bottom = data['left'], data['right'], data[ - 'top'], data['bottom'] + left, right, top, bottom = data['left'], data['right'], data['top'], data['bottom'] # draw bbox - draw.line([(left, top), (left, bottom), (right, bottom), (right, top), - (left, top)], - width=2, - fill='red') + draw.line([(left, top), (left, bottom), (right, bottom), (right, top), (left, top)], width=2, fill='red') # draw label if image.mode == 'RGB': text = data['label'] + ": %.2f%%" % (100 * data['confidence']) textsize_width, textsize_height = draw.textsize(text=text) - draw.rectangle( - xy=(left, top - (textsize_height + 5), - left + textsize_width + 10, top), - fill=(255, 255, 255)) + draw.rectangle(xy=(left, top - (textsize_height + 5), left + textsize_width + 10, top), + fill=(255, 255, 255)) draw.text(xy=(left, top - 15), text=text, fill=(0, 0, 0)) save_name = get_save_image_name(image, save_dir, image_path) @@ -92,14 +87,7 @@ def load_label_info(file_path): return label_names -def postprocess(paths, - images, - data_out, - score_thresh, - label_names, - output_dir, - handle_id, - visualization=True): +def postprocess(paths, images, data_out, score_thresh, label_names, output_dir, handle_id, visualization=True): """ postprocess the lod_tensor produced by fluid.Executor.run @@ -126,9 +114,8 @@ def postprocess(paths, confidence (float): The confidence of detection result. save_path (str): The path to save output images. """ - lod_tensor = data_out[0] - lod = lod_tensor.lod[0] - results = lod_tensor.as_ndarray() + lod = data_out.lod()[0] + results = data_out.copy_to_cpu() check_dir(output_dir) @@ -146,7 +133,6 @@ def postprocess(paths, else: unhandled_paths_num = 0 - output = list() for index in range(len(lod) - 1): output_i = {'data': []} @@ -158,9 +144,7 @@ def postprocess(paths, org_img = org_img.astype(np.uint8) org_img = Image.fromarray(org_img[:, :, ::-1]) if visualization: - org_img_path = get_save_image_name( - org_img, output_dir, 'image_numpy_{}'.format( - (handle_id + index))) + org_img_path = get_save_image_name(org_img, output_dir, 'image_numpy_{}'.format((handle_id + index))) org_img.save(org_img_path) org_img_height = org_img.height org_img_width = org_img.width @@ -176,13 +160,11 @@ def postprocess(paths, dt = {} dt['label'] = label_names[category_id] dt['confidence'] = float(confidence) - dt['left'], dt['top'], dt['right'], dt['bottom'] = clip_bbox( - bbox, org_img_width, org_img_height) + dt['left'], dt['top'], dt['right'], dt['bottom'] = clip_bbox(bbox, org_img_width, org_img_height) output_i['data'].append(dt) output.append(output_i) if visualization: - output_i['save_path'] = draw_bounding_box_on_image( - org_img_path, output_i['data'], output_dir) + output_i['save_path'] = draw_bounding_box_on_image(org_img_path, output_i['data'], output_dir) return output diff --git a/modules/image/object_detection/yolov3_darknet53_pedestrian/yolo_head.py b/modules/image/object_detection/yolov3_darknet53_pedestrian/yolo_head.py deleted file mode 100644 index 7428fb4c2..000000000 --- a/modules/image/object_detection/yolov3_darknet53_pedestrian/yolo_head.py +++ /dev/null @@ -1,273 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from collections import OrderedDict - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.regularizer import L2Decay - -__all__ = ['MultiClassNMS', 'YOLOv3Head'] - - -class MultiClassNMS(object): - # __op__ = fluid.layers.multiclass_nms - def __init__(self, background_label, keep_top_k, nms_threshold, nms_top_k, - normalized, score_threshold): - super(MultiClassNMS, self).__init__() - self.background_label = background_label - self.keep_top_k = keep_top_k - self.nms_threshold = nms_threshold - self.nms_top_k = nms_top_k - self.normalized = normalized - self.score_threshold = score_threshold - - -class YOLOv3Head(object): - """Head block for YOLOv3 network - - Args: - norm_decay (float): weight decay for normalization layer weights - num_classes (int): number of output classes - ignore_thresh (float): threshold to ignore confidence loss - label_smooth (bool): whether to use label smoothing - anchors (list): anchors - anchor_masks (list): anchor masks - nms (object): an instance of `MultiClassNMS` - """ - - def __init__(self, - norm_decay=0., - num_classes=80, - ignore_thresh=0.7, - label_smooth=True, - anchors=[[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], - [59, 119], [116, 90], [156, 198], [373, 326]], - anchor_masks=[[6, 7, 8], [3, 4, 5], [0, 1, 2]], - nms=MultiClassNMS( - background_label=-1, - keep_top_k=100, - nms_threshold=0.45, - nms_top_k=1000, - normalized=True, - score_threshold=0.01), - weight_prefix_name=''): - self.norm_decay = norm_decay - self.num_classes = num_classes - self.ignore_thresh = ignore_thresh - self.label_smooth = label_smooth - self.anchor_masks = anchor_masks - self._parse_anchors(anchors) - self.nms = nms - self.prefix_name = weight_prefix_name - - def _conv_bn(self, - input, - ch_out, - filter_size, - stride, - padding, - act='leaky', - is_test=True, - name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=ch_out, - filter_size=filter_size, - stride=stride, - padding=padding, - act=None, - param_attr=ParamAttr(name=name + ".conv.weights"), - bias_attr=False) - - bn_name = name + ".bn" - bn_param_attr = ParamAttr( - regularizer=L2Decay(self.norm_decay), name=bn_name + '.scale') - bn_bias_attr = ParamAttr( - regularizer=L2Decay(self.norm_decay), name=bn_name + '.offset') - out = fluid.layers.batch_norm( - input=conv, - act=None, - is_test=is_test, - param_attr=bn_param_attr, - bias_attr=bn_bias_attr, - moving_mean_name=bn_name + '.mean', - moving_variance_name=bn_name + '.var') - - if act == 'leaky': - out = fluid.layers.leaky_relu(x=out, alpha=0.1) - return out - - def _detection_block(self, input, channel, is_test=True, name=None): - assert channel % 2 == 0, \ - "channel {} cannot be divided by 2 in detection block {}" \ - .format(channel, name) - - conv = input - for j in range(2): - conv = self._conv_bn( - conv, - channel, - filter_size=1, - stride=1, - padding=0, - is_test=is_test, - name='{}.{}.0'.format(name, j)) - conv = self._conv_bn( - conv, - channel * 2, - filter_size=3, - stride=1, - padding=1, - is_test=is_test, - name='{}.{}.1'.format(name, j)) - route = self._conv_bn( - conv, - channel, - filter_size=1, - stride=1, - padding=0, - is_test=is_test, - name='{}.2'.format(name)) - tip = self._conv_bn( - route, - channel * 2, - filter_size=3, - stride=1, - padding=1, - is_test=is_test, - name='{}.tip'.format(name)) - return route, tip - - def _upsample(self, input, scale=2, name=None): - out = fluid.layers.resize_nearest( - input=input, scale=float(scale), name=name) - return out - - def _parse_anchors(self, anchors): - """ - Check ANCHORS/ANCHOR_MASKS in config and parse mask_anchors - - """ - self.anchors = [] - self.mask_anchors = [] - - assert len(anchors) > 0, "ANCHORS not set." - assert len(self.anchor_masks) > 0, "ANCHOR_MASKS not set." - - for anchor in anchors: - assert len(anchor) == 2, "anchor {} len should be 2".format(anchor) - self.anchors.extend(anchor) - - anchor_num = len(anchors) - for masks in self.anchor_masks: - self.mask_anchors.append([]) - for mask in masks: - assert mask < anchor_num, "anchor mask index overflow" - self.mask_anchors[-1].extend(anchors[mask]) - - def _get_outputs(self, input, is_train=True): - """ - Get YOLOv3 head output - - Args: - input (list): List of Variables, output of backbone stages - is_train (bool): whether in train or test mode - - Returns: - outputs (list): Variables of each output layer - """ - - outputs = [] - - # get last out_layer_num blocks in reverse order - out_layer_num = len(self.anchor_masks) - if isinstance(input, OrderedDict): - blocks = list(input.values())[-1:-out_layer_num - 1:-1] - else: - blocks = input[-1:-out_layer_num - 1:-1] - route = None - for i, block in enumerate(blocks): - if i > 0: # perform concat in first 2 detection_block - block = fluid.layers.concat(input=[route, block], axis=1) - route, tip = self._detection_block( - block, - channel=512 // (2**i), - is_test=(not is_train), - name=self.prefix_name + "yolo_block.{}".format(i)) - - # out channel number = mask_num * (5 + class_num) - num_filters = len(self.anchor_masks[i]) * (self.num_classes + 5) - block_out = fluid.layers.conv2d( - input=tip, - num_filters=num_filters, - filter_size=1, - stride=1, - padding=0, - act=None, - param_attr=ParamAttr(name=self.prefix_name + - "yolo_output.{}.conv.weights".format(i)), - bias_attr=ParamAttr( - regularizer=L2Decay(0.), - name=self.prefix_name + - "yolo_output.{}.conv.bias".format(i))) - outputs.append(block_out) - - if i < len(blocks) - 1: - # do not perform upsample in the last detection_block - route = self._conv_bn( - input=route, - ch_out=256 // (2**i), - filter_size=1, - stride=1, - padding=0, - is_test=(not is_train), - name=self.prefix_name + "yolo_transition.{}".format(i)) - # upsample - route = self._upsample(route) - - return outputs, blocks - - def get_prediction(self, outputs, im_size): - """ - Get prediction result of YOLOv3 network - - Args: - outputs (list): list of Variables, return from _get_outputs - im_size (Variable): Variable of size([h, w]) of each image - - Returns: - pred (Variable): The prediction result after non-max suppress. - - """ - boxes = [] - scores = [] - downsample = 32 - for i, output in enumerate(outputs): - box, score = fluid.layers.yolo_box( - x=output, - img_size=im_size, - anchors=self.mask_anchors[i], - class_num=self.num_classes, - conf_thresh=self.nms.score_threshold, - downsample_ratio=downsample, - name=self.prefix_name + "yolo_box" + str(i)) - boxes.append(box) - scores.append(fluid.layers.transpose(score, perm=[0, 2, 1])) - - downsample //= 2 - - yolo_boxes = fluid.layers.concat(boxes, axis=1) - yolo_scores = fluid.layers.concat(scores, axis=2) - pred = fluid.layers.multiclass_nms( - bboxes=yolo_boxes, - scores=yolo_scores, - score_threshold=self.nms.score_threshold, - nms_top_k=self.nms.nms_top_k, - keep_top_k=self.nms.keep_top_k, - nms_threshold=self.nms.nms_threshold, - background_label=self.nms.background_label, - normalized=self.nms.normalized, - name="multiclass_nms") - return pred diff --git a/modules/image/object_detection/yolov3_darknet53_vehicles/README.md b/modules/image/object_detection/yolov3_darknet53_vehicles/README.md index 72fee4b31..fdf4569de 100644 --- a/modules/image/object_detection/yolov3_darknet53_vehicles/README.md +++ b/modules/image/object_detection/yolov3_darknet53_vehicles/README.md @@ -166,6 +166,10 @@ 修复numpy数据读取问题 +* 1.0.3 + + 移除 fluid api + - ```shell - $ hub install yolov3_darknet53_vehicles==1.0.2 + $ hub install yolov3_darknet53_vehicles==1.0.3 ``` diff --git a/modules/image/object_detection/yolov3_darknet53_vehicles/README_en.md b/modules/image/object_detection/yolov3_darknet53_vehicles/README_en.md index b0a5aa992..59cb62134 100644 --- a/modules/image/object_detection/yolov3_darknet53_vehicles/README_en.md +++ b/modules/image/object_detection/yolov3_darknet53_vehicles/README_en.md @@ -166,6 +166,10 @@ Fix the problem of reading numpy +* 1.0.3 + + Remove fluid api + - ```shell - $ hub install yolov3_darknet53_vehicles==1.0.2 + $ hub install yolov3_darknet53_vehicles==1.0.3 ``` diff --git a/modules/image/object_detection/yolov3_darknet53_vehicles/darknet.py b/modules/image/object_detection/yolov3_darknet53_vehicles/darknet.py deleted file mode 100644 index 1e6e6f8f6..000000000 --- a/modules/image/object_detection/yolov3_darknet53_vehicles/darknet.py +++ /dev/null @@ -1,170 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import six -import math - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.regularizer import L2Decay - -__all__ = ['DarkNet'] - - -class DarkNet(object): - """DarkNet, see https://pjreddie.com/darknet/yolo/ - Args: - depth (int): network depth, currently only darknet 53 is supported - norm_type (str): normalization type, 'bn' and 'sync_bn' are supported - norm_decay (float): weight decay for normalization layer weights - get_prediction (bool): whether to get prediction - class_dim (int): number of class while classification - """ - - def __init__(self, - depth=53, - norm_type='sync_bn', - norm_decay=0., - weight_prefix_name='', - get_prediction=False, - class_dim=1000): - assert depth in [53], "unsupported depth value" - self.depth = depth - self.norm_type = norm_type - self.norm_decay = norm_decay - self.depth_cfg = {53: ([1, 2, 8, 8, 4], self.basicblock)} - self.prefix_name = weight_prefix_name - self.class_dim = class_dim - self.get_prediction = get_prediction - - def _conv_norm(self, - input, - ch_out, - filter_size, - stride, - padding, - act='leaky', - name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=ch_out, - filter_size=filter_size, - stride=stride, - padding=padding, - act=None, - param_attr=ParamAttr(name=name + ".conv.weights"), - bias_attr=False) - - bn_name = name + ".bn" - bn_param_attr = ParamAttr( - regularizer=L2Decay(float(self.norm_decay)), - name=bn_name + '.scale') - bn_bias_attr = ParamAttr( - regularizer=L2Decay(float(self.norm_decay)), - name=bn_name + '.offset') - - out = fluid.layers.batch_norm( - input=conv, - act=None, - param_attr=bn_param_attr, - bias_attr=bn_bias_attr, - moving_mean_name=bn_name + '.mean', - moving_variance_name=bn_name + '.var') - - # leaky relu here has `alpha` as 0.1, can not be set by - # `act` param in fluid.layers.batch_norm above. - if act == 'leaky': - out = fluid.layers.leaky_relu(x=out, alpha=0.1) - - return out - - def _downsample(self, - input, - ch_out, - filter_size=3, - stride=2, - padding=1, - name=None): - return self._conv_norm( - input, - ch_out=ch_out, - filter_size=filter_size, - stride=stride, - padding=padding, - name=name) - - def basicblock(self, input, ch_out, name=None): - conv1 = self._conv_norm( - input, - ch_out=ch_out, - filter_size=1, - stride=1, - padding=0, - name=name + ".0") - conv2 = self._conv_norm( - conv1, - ch_out=ch_out * 2, - filter_size=3, - stride=1, - padding=1, - name=name + ".1") - out = fluid.layers.elementwise_add(x=input, y=conv2, act=None) - return out - - def layer_warp(self, block_func, input, ch_out, count, name=None): - out = block_func(input, ch_out=ch_out, name='{}.0'.format(name)) - for j in six.moves.xrange(1, count): - out = block_func(out, ch_out=ch_out, name='{}.{}'.format(name, j)) - return out - - def __call__(self, input): - """Get the backbone of DarkNet, that is output for the 5 stages. - - :param input: Variable of input image - :type input: Variable - :Returns: The last variables of each stage. - """ - stages, block_func = self.depth_cfg[self.depth] - stages = stages[0:5] - conv = self._conv_norm( - input=input, - ch_out=32, - filter_size=3, - stride=1, - padding=1, - name=self.prefix_name + "yolo_input") - downsample_ = self._downsample( - input=conv, - ch_out=conv.shape[1] * 2, - name=self.prefix_name + "yolo_input.downsample") - blocks = [] - for i, stage in enumerate(stages): - block = self.layer_warp( - block_func=block_func, - input=downsample_, - ch_out=32 * 2**i, - count=stage, - name=self.prefix_name + "stage.{}".format(i)) - blocks.append(block) - if i < len(stages) - 1: # do not downsaple in the last stage - downsample_ = self._downsample( - input=block, - ch_out=block.shape[1] * 2, - name=self.prefix_name + "stage.{}.downsample".format(i)) - if self.get_prediction: - pool = fluid.layers.pool2d( - input=block, pool_type='avg', global_pooling=True) - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - out = fluid.layers.fc( - input=pool, - size=self.class_dim, - param_attr=ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv), - name='fc_weights'), - bias_attr=ParamAttr(name='fc_offset')) - out = fluid.layers.softmax(out) - return out - else: - return blocks diff --git a/modules/image/object_detection/yolov3_darknet53_vehicles/module.py b/modules/image/object_detection/yolov3_darknet53_vehicles/module.py index 090223cc4..b4586de5a 100644 --- a/modules/image/object_detection/yolov3_darknet53_vehicles/module.py +++ b/modules/image/object_detection/yolov3_darknet53_vehicles/module.py @@ -1,33 +1,35 @@ # coding=utf-8 from __future__ import absolute_import -import ast import argparse +import ast import os from functools import partial import numpy as np -import paddle.fluid as fluid -import paddlehub as hub +import paddle from paddle.inference import Config from paddle.inference import create_predictor -from paddlehub.module.module import moduleinfo, runnable, serving -from paddlehub.common.paddle_helper import add_vars_prefix - -from yolov3_darknet53_vehicles.darknet import DarkNet -from yolov3_darknet53_vehicles.processor import load_label_info, postprocess, base64_to_cv2 from yolov3_darknet53_vehicles.data_feed import reader -from yolov3_darknet53_vehicles.yolo_head import MultiClassNMS, YOLOv3Head +from yolov3_darknet53_vehicles.processor import base64_to_cv2 +from yolov3_darknet53_vehicles.processor import load_label_info +from yolov3_darknet53_vehicles.processor import postprocess + +import paddlehub as hub +from paddlehub.common.paddle_helper import add_vars_prefix +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving -@moduleinfo( - name="yolov3_darknet53_vehicles", - version="1.0.2", - type="CV/object_detection", - summary="Baidu's YOLOv3 model for vehicles detection, with backbone DarkNet53.", - author="paddlepaddle", - author_email="paddle-dev@baidu.com") +@moduleinfo(name="yolov3_darknet53_vehicles", + version="1.0.3", + type="CV/object_detection", + summary="Baidu's YOLOv3 model for vehicles detection, with backbone DarkNet53.", + author="paddlepaddle", + author_email="paddle-dev@baidu.com") class YOLOv3DarkNet53Vehicles(hub.Module): + def _initialize(self): self.default_pretrained_model_path = os.path.join(self.directory, "yolov3_darknet53_vehicles_model") self.label_names = load_label_info(os.path.join(self.directory, "label_file.txt")) @@ -81,93 +83,6 @@ def _set_config(self): xpu_config.enable_xpu(100) self.xpu_predictor = create_predictor(xpu_config) - def context(self, trainable=True, pretrained=True, get_prediction=False): - """ - Distill the Head Features, so as to perform transfer learning. - - Args: - trainable (bool): whether to set parameters trainable. - pretrained (bool): whether to load default pretrained model. - get_prediction (bool): whether to get prediction. - - Returns: - inputs(dict): the input variables. - outputs(dict): the output variables. - context_prog (Program): the program to execute transfer learning. - """ - context_prog = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(context_prog, startup_program): - with fluid.unique_name.guard(): - # image - image = fluid.layers.data(name='image', shape=[3, 608, 608], dtype='float32') - # backbone - backbone = DarkNet(norm_type='sync_bn', norm_decay=0., depth=53) - # body_feats - body_feats = backbone(image) - # im_size - im_size = fluid.layers.data(name='im_size', shape=[2], dtype='int32') - # yolo_head - yolo_head = YOLOv3Head( - anchor_masks=[[6, 7, 8], [3, 4, 5], [0, 1, 2]], - anchors=[[8, 9], [10, 23], [19, 15], [23, 33], [40, 25], [54, 50], [101, 80], [139, 145], - [253, 224]], - norm_decay=0., - num_classes=6, - ignore_thresh=0.7, - label_smooth=False, - nms=MultiClassNMS( - background_label=-1, - keep_top_k=100, - nms_threshold=0.45, - nms_top_k=400, - normalized=False, - score_threshold=0.005)) - # head_features - head_features, body_features = yolo_head._get_outputs(body_feats, is_train=trainable) - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - exe.run(fluid.default_startup_program()) - - # var_prefix - var_prefix = '@HUB_{}@'.format(self.name) - # name of inputs - inputs = {'image': var_prefix + image.name, 'im_size': var_prefix + im_size.name} - # name of outputs - if get_prediction: - bbox_out = yolo_head.get_prediction(head_features, im_size) - outputs = {'bbox_out': [var_prefix + bbox_out.name]} - else: - outputs = { - 'head_features': [var_prefix + var.name for var in head_features], - 'body_features': [var_prefix + var.name for var in body_features] - } - # add_vars_prefix - add_vars_prefix(context_prog, var_prefix) - add_vars_prefix(fluid.default_startup_program(), var_prefix) - # inputs - inputs = {key: context_prog.global_block().vars[value] for key, value in inputs.items()} - # outputs - outputs = { - key: [context_prog.global_block().vars[varname] for varname in value] - for key, value in outputs.items() - } - # trainable - for param in context_prog.global_block().iter_parameters(): - param.trainable = trainable - # pretrained - if pretrained: - - def _if_exist(var): - return os.path.exists(os.path.join(self.default_pretrained_model_path, var.name)) - - fluid.io.load_vars(exe, self.default_pretrained_model_path, predicate=_if_exist) - else: - exe.run(startup_program) - - return inputs, outputs, context_prog - def object_detection(self, paths=None, images=None, @@ -222,7 +137,7 @@ def object_detection(self, paths = paths if paths else list() data_reader = partial(reader, paths, images) - batch_reader = fluid.io.batch(data_reader, batch_size=batch_size) + batch_reader = paddle.batch(data_reader, batch_size=batch_size) res = [] for iter_id, feed_data in enumerate(batch_reader()): feed_data = np.array(feed_data) @@ -243,15 +158,14 @@ def object_detection(self, output_names = predictor.get_output_names() output_handle = predictor.get_output_handle(output_names[0]) - output = postprocess( - paths=paths, - images=images, - data_out=output_handle, - score_thresh=score_thresh, - label_names=self.label_names, - output_dir=output_dir, - handle_id=iter_id * batch_size, - visualization=visualization) + output = postprocess(paths=paths, + images=images, + data_out=output_handle, + score_thresh=score_thresh, + label_names=self.label_names, + output_dir=output_dir, + handle_id=iter_id * batch_size, + visualization=visualization) res.extend(output) return res @@ -259,20 +173,19 @@ def save_inference_model(self, dirname, model_filename=None, params_filename=Non if combined: model_filename = "__model__" if not model_filename else model_filename params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) + place = paddle.CPUPlace() + exe = paddle.Executor(place) - program, feeded_var_names, target_vars = fluid.io.load_inference_model( + program, feeded_var_names, target_vars = paddle.static.load_inference_model( dirname=self.default_pretrained_model_path, executor=exe) - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) + paddle.static.save_inference_model(dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) @serving def serving_method(self, images, **kwargs): @@ -288,44 +201,44 @@ def run_cmd(self, argvs): """ Run as a command. """ - self.parser = argparse.ArgumentParser( - description="Run the {} module.".format(self.name), - prog='hub run {}'.format(self.name), - usage='%(prog)s', - add_help=True) + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") self.arg_config_group = self.parser.add_argument_group( title="Config options", description="Run configuration for controlling module behavior, not required.") self.add_module_config_arg() self.add_module_input_arg() args = self.parser.parse_args(argvs) - results = self.object_detection( - paths=[args.input_path], - batch_size=args.batch_size, - use_gpu=args.use_gpu, - output_dir=args.output_dir, - visualization=args.visualization, - score_thresh=args.score_thresh, - use_device=args.use_device) + results = self.object_detection(paths=[args.input_path], + batch_size=args.batch_size, + use_gpu=args.use_gpu, + output_dir=args.output_dir, + visualization=args.visualization, + score_thresh=args.score_thresh, + use_device=args.use_device) return results def add_module_config_arg(self): """ Add the command config options. """ - self.arg_config_group.add_argument( - '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") - self.arg_config_group.add_argument( - '--output_dir', - type=str, - default='yolov3_vehicles_detect_output', - help="The directory to save output images.") - self.arg_config_group.add_argument( - '--visualization', type=ast.literal_eval, default=False, help="whether to save output as images.") - self.arg_config_group.add_argument( - '--use_device', - choices=["cpu", "gpu", "xpu", "npu"], - help="use cpu, gpu, xpu or npu. overwrites use_gpu flag.") + self.arg_config_group.add_argument('--use_gpu', + type=ast.literal_eval, + default=False, + help="whether use GPU or not") + self.arg_config_group.add_argument('--output_dir', + type=str, + default='yolov3_vehicles_detect_output', + help="The directory to save output images.") + self.arg_config_group.add_argument('--visualization', + type=ast.literal_eval, + default=False, + help="whether to save output as images.") + self.arg_config_group.add_argument('--use_device', + choices=["cpu", "gpu", "xpu", "npu"], + help="use cpu, gpu, xpu or npu. overwrites use_gpu flag.") def add_module_input_arg(self): """ @@ -333,5 +246,7 @@ def add_module_input_arg(self): """ self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") self.arg_input_group.add_argument('--batch_size', type=ast.literal_eval, default=1, help="batch size.") - self.arg_input_group.add_argument( - '--score_thresh', type=ast.literal_eval, default=0.2, help="threshold for object detecion.") + self.arg_input_group.add_argument('--score_thresh', + type=ast.literal_eval, + default=0.2, + help="threshold for object detecion.") diff --git a/modules/image/object_detection/yolov3_darknet53_vehicles/yolo_head.py b/modules/image/object_detection/yolov3_darknet53_vehicles/yolo_head.py deleted file mode 100644 index 7428fb4c2..000000000 --- a/modules/image/object_detection/yolov3_darknet53_vehicles/yolo_head.py +++ /dev/null @@ -1,273 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from collections import OrderedDict - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.regularizer import L2Decay - -__all__ = ['MultiClassNMS', 'YOLOv3Head'] - - -class MultiClassNMS(object): - # __op__ = fluid.layers.multiclass_nms - def __init__(self, background_label, keep_top_k, nms_threshold, nms_top_k, - normalized, score_threshold): - super(MultiClassNMS, self).__init__() - self.background_label = background_label - self.keep_top_k = keep_top_k - self.nms_threshold = nms_threshold - self.nms_top_k = nms_top_k - self.normalized = normalized - self.score_threshold = score_threshold - - -class YOLOv3Head(object): - """Head block for YOLOv3 network - - Args: - norm_decay (float): weight decay for normalization layer weights - num_classes (int): number of output classes - ignore_thresh (float): threshold to ignore confidence loss - label_smooth (bool): whether to use label smoothing - anchors (list): anchors - anchor_masks (list): anchor masks - nms (object): an instance of `MultiClassNMS` - """ - - def __init__(self, - norm_decay=0., - num_classes=80, - ignore_thresh=0.7, - label_smooth=True, - anchors=[[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], - [59, 119], [116, 90], [156, 198], [373, 326]], - anchor_masks=[[6, 7, 8], [3, 4, 5], [0, 1, 2]], - nms=MultiClassNMS( - background_label=-1, - keep_top_k=100, - nms_threshold=0.45, - nms_top_k=1000, - normalized=True, - score_threshold=0.01), - weight_prefix_name=''): - self.norm_decay = norm_decay - self.num_classes = num_classes - self.ignore_thresh = ignore_thresh - self.label_smooth = label_smooth - self.anchor_masks = anchor_masks - self._parse_anchors(anchors) - self.nms = nms - self.prefix_name = weight_prefix_name - - def _conv_bn(self, - input, - ch_out, - filter_size, - stride, - padding, - act='leaky', - is_test=True, - name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=ch_out, - filter_size=filter_size, - stride=stride, - padding=padding, - act=None, - param_attr=ParamAttr(name=name + ".conv.weights"), - bias_attr=False) - - bn_name = name + ".bn" - bn_param_attr = ParamAttr( - regularizer=L2Decay(self.norm_decay), name=bn_name + '.scale') - bn_bias_attr = ParamAttr( - regularizer=L2Decay(self.norm_decay), name=bn_name + '.offset') - out = fluid.layers.batch_norm( - input=conv, - act=None, - is_test=is_test, - param_attr=bn_param_attr, - bias_attr=bn_bias_attr, - moving_mean_name=bn_name + '.mean', - moving_variance_name=bn_name + '.var') - - if act == 'leaky': - out = fluid.layers.leaky_relu(x=out, alpha=0.1) - return out - - def _detection_block(self, input, channel, is_test=True, name=None): - assert channel % 2 == 0, \ - "channel {} cannot be divided by 2 in detection block {}" \ - .format(channel, name) - - conv = input - for j in range(2): - conv = self._conv_bn( - conv, - channel, - filter_size=1, - stride=1, - padding=0, - is_test=is_test, - name='{}.{}.0'.format(name, j)) - conv = self._conv_bn( - conv, - channel * 2, - filter_size=3, - stride=1, - padding=1, - is_test=is_test, - name='{}.{}.1'.format(name, j)) - route = self._conv_bn( - conv, - channel, - filter_size=1, - stride=1, - padding=0, - is_test=is_test, - name='{}.2'.format(name)) - tip = self._conv_bn( - route, - channel * 2, - filter_size=3, - stride=1, - padding=1, - is_test=is_test, - name='{}.tip'.format(name)) - return route, tip - - def _upsample(self, input, scale=2, name=None): - out = fluid.layers.resize_nearest( - input=input, scale=float(scale), name=name) - return out - - def _parse_anchors(self, anchors): - """ - Check ANCHORS/ANCHOR_MASKS in config and parse mask_anchors - - """ - self.anchors = [] - self.mask_anchors = [] - - assert len(anchors) > 0, "ANCHORS not set." - assert len(self.anchor_masks) > 0, "ANCHOR_MASKS not set." - - for anchor in anchors: - assert len(anchor) == 2, "anchor {} len should be 2".format(anchor) - self.anchors.extend(anchor) - - anchor_num = len(anchors) - for masks in self.anchor_masks: - self.mask_anchors.append([]) - for mask in masks: - assert mask < anchor_num, "anchor mask index overflow" - self.mask_anchors[-1].extend(anchors[mask]) - - def _get_outputs(self, input, is_train=True): - """ - Get YOLOv3 head output - - Args: - input (list): List of Variables, output of backbone stages - is_train (bool): whether in train or test mode - - Returns: - outputs (list): Variables of each output layer - """ - - outputs = [] - - # get last out_layer_num blocks in reverse order - out_layer_num = len(self.anchor_masks) - if isinstance(input, OrderedDict): - blocks = list(input.values())[-1:-out_layer_num - 1:-1] - else: - blocks = input[-1:-out_layer_num - 1:-1] - route = None - for i, block in enumerate(blocks): - if i > 0: # perform concat in first 2 detection_block - block = fluid.layers.concat(input=[route, block], axis=1) - route, tip = self._detection_block( - block, - channel=512 // (2**i), - is_test=(not is_train), - name=self.prefix_name + "yolo_block.{}".format(i)) - - # out channel number = mask_num * (5 + class_num) - num_filters = len(self.anchor_masks[i]) * (self.num_classes + 5) - block_out = fluid.layers.conv2d( - input=tip, - num_filters=num_filters, - filter_size=1, - stride=1, - padding=0, - act=None, - param_attr=ParamAttr(name=self.prefix_name + - "yolo_output.{}.conv.weights".format(i)), - bias_attr=ParamAttr( - regularizer=L2Decay(0.), - name=self.prefix_name + - "yolo_output.{}.conv.bias".format(i))) - outputs.append(block_out) - - if i < len(blocks) - 1: - # do not perform upsample in the last detection_block - route = self._conv_bn( - input=route, - ch_out=256 // (2**i), - filter_size=1, - stride=1, - padding=0, - is_test=(not is_train), - name=self.prefix_name + "yolo_transition.{}".format(i)) - # upsample - route = self._upsample(route) - - return outputs, blocks - - def get_prediction(self, outputs, im_size): - """ - Get prediction result of YOLOv3 network - - Args: - outputs (list): list of Variables, return from _get_outputs - im_size (Variable): Variable of size([h, w]) of each image - - Returns: - pred (Variable): The prediction result after non-max suppress. - - """ - boxes = [] - scores = [] - downsample = 32 - for i, output in enumerate(outputs): - box, score = fluid.layers.yolo_box( - x=output, - img_size=im_size, - anchors=self.mask_anchors[i], - class_num=self.num_classes, - conf_thresh=self.nms.score_threshold, - downsample_ratio=downsample, - name=self.prefix_name + "yolo_box" + str(i)) - boxes.append(box) - scores.append(fluid.layers.transpose(score, perm=[0, 2, 1])) - - downsample //= 2 - - yolo_boxes = fluid.layers.concat(boxes, axis=1) - yolo_scores = fluid.layers.concat(scores, axis=2) - pred = fluid.layers.multiclass_nms( - bboxes=yolo_boxes, - scores=yolo_scores, - score_threshold=self.nms.score_threshold, - nms_top_k=self.nms.nms_top_k, - keep_top_k=self.nms.keep_top_k, - nms_threshold=self.nms.nms_threshold, - background_label=self.nms.background_label, - normalized=self.nms.normalized, - name="multiclass_nms") - return pred diff --git a/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/README.md b/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/README.md index f84c3578a..4b939565d 100644 --- a/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/README.md +++ b/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/README.md @@ -1,7 +1,7 @@ -# deeplabv3p_xception65_humanseg +# deeplabv3p_xception65_humanseg |模型名称|deeplabv3p_xception65_humanseg| -| :--- | :---: | +| :--- | :---: | |类别|图像-图像分割| |网络|deeplabv3p| |数据集|百度自建数据集| @@ -18,7 +18,7 @@

- + - ### 模型介绍 - DeepLabv3+使用百度自建数据集进行训练,可用于人像分割,支持任意大小的图片输入。 @@ -41,7 +41,7 @@ - ```shell $ hub install deeplabv3p_xception65_humanseg ``` - + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) @@ -176,4 +176,8 @@ * 1.1.2 - 修复cudnn为8.0.4显存泄露问题 + 移除 fluid api + + - ```shell + $ hub install deeplabv3p_xception65_humanseg==1.1.2 + ``` diff --git a/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/README_en.md b/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/README_en.md index 1afa20b09..eb6204c76 100644 --- a/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/README_en.md +++ b/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/README_en.md @@ -1,7 +1,7 @@ -# deeplabv3p_xception65_humanseg +# deeplabv3p_xception65_humanseg |Module Name |deeplabv3p_xception65_humanseg| -| :--- | :---: | +| :--- | :---: | |Category|Image segmentation| |Network|deeplabv3p| |Dataset|Baidu self-built dataset| @@ -10,7 +10,7 @@ |Data indicators |-| |Latest update date|2021-02-26| -## I. Basic Information +## I. Basic Information - ### Application Effect Display @@ -18,7 +18,7 @@

- + - ### Module Introduction - DeepLabv3+ model is trained by Baidu self-built dataset, which can be used for portrait segmentation. @@ -89,9 +89,9 @@ - **Return** - * res (list\[dict\]): The list of recognition results, where each element is dict and each field is: + * res (list\[dict\]): The list of recognition results, where each element is dict and each field is: * save\_path (str, optional): Save path of the result. - * data (numpy.ndarray): The result of portrait segmentation. + * data (numpy.ndarray): The result of portrait segmentation. - ```python def save_inference_model(dirname, @@ -172,4 +172,8 @@ * 1.1.2 - Fix memory leakage problem of on cudnn 8.0.4 + Remove fluid api + + - ```shell + $ hub install deeplabv3p_xception65_humanseg==1.1.2 + ``` diff --git a/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/module.py b/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/module.py index 9aac87575..5208b61ba 100644 --- a/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/module.py +++ b/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/module.py @@ -2,28 +2,33 @@ from __future__ import absolute_import from __future__ import division +import argparse import ast import os -import argparse import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.module.module import moduleinfo, runnable, serving - -from deeplabv3p_xception65_humanseg.processor import postprocess, base64_to_cv2, cv2_to_base64 +import paddle from deeplabv3p_xception65_humanseg.data_feed import reader +from deeplabv3p_xception65_humanseg.processor import base64_to_cv2 +from deeplabv3p_xception65_humanseg.processor import cv2_to_base64 +from deeplabv3p_xception65_humanseg.processor import postprocess +from paddle.inference import Config +from paddle.inference import create_predictor +import paddlehub as hub +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving -@moduleinfo( - name="deeplabv3p_xception65_humanseg", - type="CV/semantic_segmentation", - author="baidu-vis", - author_email="", - summary="DeepLabv3+ is a semantic segmentation model.", - version="1.1.1") + +@moduleinfo(name="deeplabv3p_xception65_humanseg", + type="CV/semantic_segmentation", + author="baidu-vis", + author_email="", + summary="DeepLabv3+ is a semantic segmentation model.", + version="1.1.2") class DeeplabV3pXception65HumanSeg(hub.Module): + def _initialize(self): self.default_pretrained_model_path = os.path.join(self.directory, "deeplabv3p_xception65_humanseg_model") self._set_config() @@ -32,10 +37,10 @@ def _set_config(self): """ predictor config setting """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) + cpu_config = Config(self.default_pretrained_model_path) cpu_config.disable_glog_info() cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) + self.cpu_predictor = create_predictor(cpu_config) try: _places = os.environ["CUDA_VISIBLE_DEVICES"] @@ -44,10 +49,10 @@ def _set_config(self): except: use_gpu = False if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config = Config(self.default_pretrained_model_path) gpu_config.disable_glog_info() gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) + self.gpu_predictor = create_predictor(gpu_config) def segmentation(self, images=None, @@ -107,18 +112,25 @@ def segmentation(self, pass # feed batch image batch_image = np.array([data['image'] for data in batch_data]) - batch_image = PaddleTensor(batch_image.copy()) - output = self.gpu_predictor.run([batch_image]) if use_gpu else self.cpu_predictor.run([batch_image]) - output = np.expand_dims(output[0].as_ndarray(), axis=1) + + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + input_handle.copy_from_cpu(batch_image) + + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) + output_data = output_handle.copy_to_cpu() + output = np.expand_dims(output_data, axis=1) # postprocess one by one for i in range(len(batch_data)): - out = postprocess( - data_out=output[i], - org_im=batch_data[i]['org_im'], - org_im_shape=batch_data[i]['org_im_shape'], - org_im_path=batch_data[i]['org_im_path'], - output_dir=output_dir, - visualization=visualization) + out = postprocess(data_out=output[i], + org_im=batch_data[i]['org_im'], + org_im_shape=batch_data[i]['org_im_shape'], + org_im_path=batch_data[i]['org_im_path'], + output_dir=output_dir, + visualization=visualization) res.append(out) return res @@ -126,20 +138,19 @@ def save_inference_model(self, dirname, model_filename=None, params_filename=Non if combined: model_filename = "__model__" if not model_filename else model_filename params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) + place = paddle.CPUPlace() + exe = paddle.Executor(place) - program, feeded_var_names, target_vars = fluid.io.load_inference_model( + program, feeded_var_names, target_vars = paddle.static.load_inference_model( dirname=self.default_pretrained_model_path, executor=exe) - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) + paddle.static.save_inference_model(dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) @serving def serving_method(self, images, **kwargs): @@ -156,11 +167,10 @@ def run_cmd(self, argvs): """ Run as a command. """ - self.parser = argparse.ArgumentParser( - description="Run the {} module.".format(self.name), - prog='hub run {}'.format(self.name), - usage='%(prog)s', - add_help=True) + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") self.arg_config_group = self.parser.add_argument_group( @@ -168,24 +178,29 @@ def run_cmd(self, argvs): self.add_module_config_arg() self.add_module_input_arg() args = self.parser.parse_args(argvs) - results = self.segmentation( - paths=[args.input_path], - batch_size=args.batch_size, - use_gpu=args.use_gpu, - output_dir=args.output_dir, - visualization=args.visualization) + results = self.segmentation(paths=[args.input_path], + batch_size=args.batch_size, + use_gpu=args.use_gpu, + output_dir=args.output_dir, + visualization=args.visualization) return results def add_module_config_arg(self): """ Add the command config options. """ - self.arg_config_group.add_argument( - '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") - self.arg_config_group.add_argument( - '--output_dir', type=str, default='humanseg_output', help="The directory to save output images.") - self.arg_config_group.add_argument( - '--visualization', type=ast.literal_eval, default=False, help="whether to save output as images.") + self.arg_config_group.add_argument('--use_gpu', + type=ast.literal_eval, + default=False, + help="whether use GPU or not") + self.arg_config_group.add_argument('--output_dir', + type=str, + default='humanseg_output', + help="The directory to save output images.") + self.arg_config_group.add_argument('--visualization', + type=ast.literal_eval, + default=False, + help="whether to save output as images.") self.arg_config_group.add_argument('--batch_size', type=ast.literal_eval, default=1, help="batch size.") def add_module_input_arg(self): diff --git a/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/README.md b/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/README.md index e28ba6cb4..1433a45d3 100644 --- a/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/README.md +++ b/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/README.md @@ -1,7 +1,7 @@ # chinese_ocr_db_crnn_mobile |模型名称|chinese_ocr_db_crnn_mobile| -| :--- | :---: | +| :--- | :---: | |类别|图像-文字识别| |网络|Differentiable Binarization+RCNN| |数据集|icdar2015数据集| @@ -35,9 +35,9 @@ ## 二、安装 -- ### 1、环境依赖 +- ### 1、环境依赖 - - paddlepaddle >= 1.7.2 + - paddlepaddle >= 1.7.2 - paddlehub >= 1.6.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) @@ -48,7 +48,7 @@ - ```shell $ pip install shapely pyclipper ``` - - **该Module依赖于第三方库shapely和pyclipper,使用该Module之前,请先安装shapely和pyclipper。** + - **该Module依赖于第三方库shapely和pyclipper,使用该Module之前,请先安装shapely和pyclipper。** - ### 2、安装 @@ -87,7 +87,7 @@ - ```python __init__(text_detector_module=None, enable_mkldnn=False) ``` - + - 构造ChineseOCRDBCRNN对象 - **参数** @@ -191,11 +191,15 @@ * 1.1.1 支持文本中空格识别。 - + * 1.1.2 修复只能检出30字段问题。 +* 1.1.3 + + 移除 fluid api + - ```shell - $ hub install chinese_ocr_db_crnn_mobile==1.1.2 + $ hub install chinese_ocr_db_crnn_mobile==1.1.3 ``` diff --git a/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/README_en.md b/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/README_en.md index 2ed262867..1b28ee118 100644 --- a/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/README_en.md +++ b/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/README_en.md @@ -197,6 +197,10 @@ Fixed an issue where only 30 fields can be detected. +* 1.1.3 + + Remove fluid api + - ```shell - $ hub install chinese_ocr_db_crnn_mobile==1.1.2 + $ hub install chinese_ocr_db_crnn_mobile==1.1.3 ``` diff --git a/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/module.py b/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/module.py index 371e8f973..d7afd7019 100644 --- a/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/module.py +++ b/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/module.py @@ -6,28 +6,35 @@ import os import time -from paddle.fluid.core import AnalysisConfig, create_paddle_predictor, PaddleTensor -from paddlehub.common.logger import logger -from paddlehub.module.module import moduleinfo, runnable, serving -from PIL import Image import cv2 import numpy as np -import paddle.fluid as fluid -import paddlehub as hub - +import paddle from chinese_ocr_db_crnn_mobile.character import CharacterOps -from chinese_ocr_db_crnn_mobile.utils import base64_to_cv2, draw_ocr, get_image_ext, sorted_boxes +from chinese_ocr_db_crnn_mobile.utils import base64_to_cv2 +from chinese_ocr_db_crnn_mobile.utils import draw_ocr +from chinese_ocr_db_crnn_mobile.utils import get_image_ext +from chinese_ocr_db_crnn_mobile.utils import sorted_boxes +from paddle.inference import Config +from paddle.inference import create_predictor +from PIL import Image + +import paddlehub as hub +from paddlehub.common.logger import logger +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving @moduleinfo( name="chinese_ocr_db_crnn_mobile", - version="1.1.2", + version="1.1.3", summary="The module can recognize the chinese texts in an image. Firstly, it will detect the text box positions \ based on the differentiable_binarization_chn module. Then it classifies the text angle and recognizes the chinese texts. ", author="paddle-dev", author_email="paddle-dev@baidu.com", type="cv/text_recognition") class ChineseOCRDBCRNN(hub.Module): + def _initialize(self, text_detector_module=None, enable_mkldnn=False): """ initialize with the necessary elements @@ -60,7 +67,7 @@ def _set_config(self, pretrained_model_path): model_file_path = os.path.join(pretrained_model_path, 'model') params_file_path = os.path.join(pretrained_model_path, 'params') - config = AnalysisConfig(model_file_path, params_file_path) + config = Config(model_file_path, params_file_path) try: _places = os.environ["CUDA_VISIBLE_DEVICES"] int(_places[0]) @@ -81,14 +88,14 @@ def _set_config(self, pretrained_model_path): config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass") config.switch_use_feed_fetch_ops(False) - predictor = create_paddle_predictor(config) + predictor = create_predictor(config) input_names = predictor.get_input_names() - input_tensor = predictor.get_input_tensor(input_names[0]) + input_tensor = predictor.get_input_handle(input_names[0]) output_names = predictor.get_output_names() output_tensors = [] for output_name in output_names: - output_tensor = predictor.get_output_tensor(output_name) + output_tensor = predictor.get_output_handle(output_name) output_tensors.append(output_tensor) return predictor, input_tensor, output_tensors @@ -99,8 +106,9 @@ def text_detector_module(self): text detect module """ if not self._text_detector_module: - self._text_detector_module = hub.Module( - name='chinese_text_detection_db_mobile', enable_mkldnn=self.enable_mkldnn, version='1.0.4') + self._text_detector_module = hub.Module(name='chinese_text_detection_db_mobile', + enable_mkldnn=self.enable_mkldnn, + version='1.0.4') return self._text_detector_module def read_images(self, paths=[]): @@ -129,8 +137,10 @@ def get_rotate_crop_image(self, img, points): img_crop_height = int(max(np.linalg.norm(points[0] - points[3]), np.linalg.norm(points[1] - points[2]))) pts_std = np.float32([[0, 0], [img_crop_width, 0], [img_crop_width, img_crop_height], [0, img_crop_height]]) M = cv2.getPerspectiveTransform(points, pts_std) - dst_img = cv2.warpPerspective( - img, M, (img_crop_width, img_crop_height), borderMode=cv2.BORDER_REPLICATE, flags=cv2.INTER_CUBIC) + dst_img = cv2.warpPerspective(img, + M, (img_crop_width, img_crop_height), + borderMode=cv2.BORDER_REPLICATE, + flags=cv2.INTER_CUBIC) dst_img_height, dst_img_width = dst_img.shape[0:2] if dst_img_height * 1.0 / dst_img_width >= 1.5: dst_img = np.rot90(dst_img) @@ -223,8 +233,9 @@ def recognize_text(self, assert predicted_data != [], "There is not any image to be predicted. Please check the input data." - detection_results = self.text_detector_module.detect_text( - images=predicted_data, use_gpu=self.use_gpu, box_thresh=box_thresh) + detection_results = self.text_detector_module.detect_text(images=predicted_data, + use_gpu=self.use_gpu, + box_thresh=box_thresh) boxes = [np.array(item['data']).astype(np.float32) for item in detection_results] all_results = [] @@ -240,8 +251,8 @@ def recognize_text(self, tmp_box = copy.deepcopy(boxes[num_box]) img_crop = self.get_rotate_crop_image(original_image, tmp_box) img_crop_list.append(img_crop) - img_crop_list, angle_list = self._classify_text( - img_crop_list, angle_classification_thresh=angle_classification_thresh) + img_crop_list, angle_list = self._classify_text(img_crop_list, + angle_classification_thresh=angle_classification_thresh) rec_results = self._recognize_text(img_crop_list) # if the recognized text confidence score is lower than text_thresh, then drop it @@ -273,18 +284,23 @@ def serving_method(self, images, **kwargs): return results def save_result_image( - self, - original_image, - detection_boxes, - rec_results, - output_dir='ocr_result', - text_thresh=0.5, + self, + original_image, + detection_boxes, + rec_results, + output_dir='ocr_result', + text_thresh=0.5, ): image = Image.fromarray(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)) txts = [item[0] for item in rec_results] scores = [item[1] for item in rec_results] - draw_img = draw_ocr( - image, detection_boxes, txts, scores, font_file=self.font_file, draw_txt=True, drop_score=text_thresh) + draw_img = draw_ocr(image, + detection_boxes, + txts, + scores, + font_file=self.font_file, + draw_txt=True, + drop_score=text_thresh) if not os.path.exists(output_dir): os.makedirs(output_dir) @@ -322,7 +338,7 @@ def _classify_text(self, image_list, angle_classification_thresh=0.9): norm_img_batch = norm_img_batch.copy() self.cls_input_tensor.copy_from_cpu(norm_img_batch) - self.cls_predictor.zero_copy_run() + self.cls_predictor.run() prob_out = self.cls_output_tensors[0].copy_to_cpu() label_out = self.cls_output_tensors[1].copy_to_cpu() @@ -366,7 +382,7 @@ def _recognize_text(self, img_list): norm_img_batch = norm_img_batch.copy() self.rec_input_tensor.copy_from_cpu(norm_img_batch) - self.rec_predictor.zero_copy_run() + self.rec_predictor.run() rec_idx_batch = self.rec_output_tensors[0].copy_to_cpu() rec_idx_lod = self.rec_output_tensors[0].lod()[0] @@ -407,60 +423,57 @@ def _save_recognizer_model(self, dirname, model_filename=None, params_filename=N if combined: model_filename = "__model__" if not model_filename else model_filename params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) + place = paddle.CPUPlace() + exe = paddle.Executor(place) model_file_path = os.path.join(self.rec_pretrained_model_path, 'model') params_file_path = os.path.join(self.rec_pretrained_model_path, 'params') - program, feeded_var_names, target_vars = fluid.io.load_inference_model( + program, feeded_var_names, target_vars = paddle.static.load_inference_model( dirname=self.rec_pretrained_model_path, model_filename=model_file_path, params_filename=params_file_path, executor=exe) - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) + paddle.static.save_inference_model(dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) def _save_classifier_model(self, dirname, model_filename=None, params_filename=None, combined=True): if combined: model_filename = "__model__" if not model_filename else model_filename params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) + place = paddle.CPUPlace() + exe = paddle.Executor(place) model_file_path = os.path.join(self.cls_pretrained_model_path, 'model') params_file_path = os.path.join(self.cls_pretrained_model_path, 'params') - program, feeded_var_names, target_vars = fluid.io.load_inference_model( + program, feeded_var_names, target_vars = paddle.static.load_inference_model( dirname=self.cls_pretrained_model_path, model_filename=model_file_path, params_filename=params_file_path, executor=exe) - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) + paddle.static.save_inference_model(dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) @runnable def run_cmd(self, argvs): """ Run as a command """ - self.parser = argparse.ArgumentParser( - description="Run the %s module." % self.name, - prog='hub run %s' % self.name, - usage='%(prog)s', - add_help=True) + self.parser = argparse.ArgumentParser(description="Run the %s module." % self.name, + prog='hub run %s' % self.name, + usage='%(prog)s', + add_help=True) self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") self.arg_config_group = self.parser.add_argument_group( @@ -470,20 +483,28 @@ def run_cmd(self, argvs): self.add_module_input_arg() args = self.parser.parse_args(argvs) - results = self.recognize_text( - paths=[args.input_path], use_gpu=args.use_gpu, output_dir=args.output_dir, visualization=args.visualization) + results = self.recognize_text(paths=[args.input_path], + use_gpu=args.use_gpu, + output_dir=args.output_dir, + visualization=args.visualization) return results def add_module_config_arg(self): """ Add the command config options """ - self.arg_config_group.add_argument( - '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") - self.arg_config_group.add_argument( - '--output_dir', type=str, default='ocr_result', help="The directory to save output images.") - self.arg_config_group.add_argument( - '--visualization', type=ast.literal_eval, default=False, help="whether to save output as images.") + self.arg_config_group.add_argument('--use_gpu', + type=ast.literal_eval, + default=False, + help="whether use GPU or not") + self.arg_config_group.add_argument('--output_dir', + type=str, + default='ocr_result', + help="The directory to save output images.") + self.arg_config_group.add_argument('--visualization', + type=ast.literal_eval, + default=False, + help="whether to save output as images.") def add_module_input_arg(self): """ diff --git a/modules/image/text_recognition/chinese_ocr_db_crnn_server/README.md b/modules/image/text_recognition/chinese_ocr_db_crnn_server/README.md index b6c2e8c90..55c8c94fb 100644 --- a/modules/image/text_recognition/chinese_ocr_db_crnn_server/README.md +++ b/modules/image/text_recognition/chinese_ocr_db_crnn_server/README.md @@ -1,7 +1,7 @@ # chinese_ocr_db_crnn_server |模型名称|chinese_ocr_db_crnn_server| -| :--- | :---: | +| :--- | :---: | |类别|图像-文字识别| |网络|Differentiable Binarization+RCNN| |数据集|icdar2015数据集| @@ -32,9 +32,9 @@ ## 二、安装 -- ### 1、环境依赖 +- ### 1、环境依赖 - - paddlepaddle >= 1.7.2 + - paddlepaddle >= 1.7.2 - paddlehub >= 1.6.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) @@ -45,7 +45,7 @@ - ```shell $ pip install shapely pyclipper ``` - - **该Module依赖于第三方库shapely和pyclipper,使用该Module之前,请先安装shapely和pyclipper。** + - **该Module依赖于第三方库shapely和pyclipper,使用该Module之前,请先安装shapely和pyclipper。** - ### 2、安装 @@ -183,11 +183,15 @@ * 1.1.1 支持文本中空格识别。 - + * 1.1.2 修复检出字段无法超过30个问题。 - + +* 1.1.3 + + 移除 fluid api + - ```shell - $ hub install chinese_ocr_db_crnn_server==1.1.2 + $ hub install chinese_ocr_db_crnn_server==1.1.3 ``` diff --git a/modules/image/text_recognition/chinese_ocr_db_crnn_server/module.py b/modules/image/text_recognition/chinese_ocr_db_crnn_server/module.py index a96673f3d..b60331b19 100644 --- a/modules/image/text_recognition/chinese_ocr_db_crnn_server/module.py +++ b/modules/image/text_recognition/chinese_ocr_db_crnn_server/module.py @@ -10,28 +10,35 @@ import os import time -from paddle.fluid.core import AnalysisConfig, create_paddle_predictor, PaddleTensor -from paddlehub.common.logger import logger -from paddlehub.module.module import moduleinfo, runnable, serving -from PIL import Image import cv2 import numpy as np -import paddle.fluid as fluid -import paddlehub as hub - +import paddle from chinese_ocr_db_crnn_server.character import CharacterOps -from chinese_ocr_db_crnn_server.utils import base64_to_cv2, draw_ocr, get_image_ext, sorted_boxes +from chinese_ocr_db_crnn_server.utils import base64_to_cv2 +from chinese_ocr_db_crnn_server.utils import draw_ocr +from chinese_ocr_db_crnn_server.utils import get_image_ext +from chinese_ocr_db_crnn_server.utils import sorted_boxes +from paddle.inference import Config +from paddle.inference import create_predictor +from PIL import Image + +import paddlehub as hub +from paddlehub.common.logger import logger +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving @moduleinfo( name="chinese_ocr_db_crnn_server", - version="1.1.2", + version="1.1.3", summary= "The module can recognize the chinese texts in an image. Firstly, it will detect the text box positions based on the differentiable_binarization_chn module. Then it recognizes the chinese texts. ", author="paddle-dev", author_email="paddle-dev@baidu.com", type="cv/text_recognition") class ChineseOCRDBCRNNServer(hub.Module): + def _initialize(self, text_detector_module=None, enable_mkldnn=False): """ initialize with the necessary elements @@ -64,7 +71,7 @@ def _set_config(self, pretrained_model_path): model_file_path = os.path.join(pretrained_model_path, 'model') params_file_path = os.path.join(pretrained_model_path, 'params') - config = AnalysisConfig(model_file_path, params_file_path) + config = Config(model_file_path, params_file_path) try: _places = os.environ["CUDA_VISIBLE_DEVICES"] int(_places[0]) @@ -85,14 +92,14 @@ def _set_config(self, pretrained_model_path): config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass") config.switch_use_feed_fetch_ops(False) - predictor = create_paddle_predictor(config) + predictor = create_predictor(config) input_names = predictor.get_input_names() - input_tensor = predictor.get_input_tensor(input_names[0]) + input_tensor = predictor.get_input_handle(input_names[0]) output_names = predictor.get_output_names() output_tensors = [] for output_name in output_names: - output_tensor = predictor.get_output_tensor(output_name) + output_tensor = predictor.get_output_handle(output_name) output_tensors.append(output_tensor) return predictor, input_tensor, output_tensors @@ -103,8 +110,9 @@ def text_detector_module(self): text detect module """ if not self._text_detector_module: - self._text_detector_module = hub.Module( - name='chinese_text_detection_db_server', enable_mkldnn=self.enable_mkldnn, version='1.0.2') + self._text_detector_module = hub.Module(name='chinese_text_detection_db_server', + enable_mkldnn=self.enable_mkldnn, + version='1.0.2') return self._text_detector_module def read_images(self, paths=[]): @@ -133,8 +141,10 @@ def get_rotate_crop_image(self, img, points): img_crop_height = int(max(np.linalg.norm(points[0] - points[3]), np.linalg.norm(points[1] - points[2]))) pts_std = np.float32([[0, 0], [img_crop_width, 0], [img_crop_width, img_crop_height], [0, img_crop_height]]) M = cv2.getPerspectiveTransform(points, pts_std) - dst_img = cv2.warpPerspective( - img, M, (img_crop_width, img_crop_height), borderMode=cv2.BORDER_REPLICATE, flags=cv2.INTER_CUBIC) + dst_img = cv2.warpPerspective(img, + M, (img_crop_width, img_crop_height), + borderMode=cv2.BORDER_REPLICATE, + flags=cv2.INTER_CUBIC) dst_img_height, dst_img_width = dst_img.shape[0:2] if dst_img_height * 1.0 / dst_img_width >= 1.5: dst_img = np.rot90(dst_img) @@ -227,8 +237,9 @@ def recognize_text(self, assert predicted_data != [], "There is not any image to be predicted. Please check the input data." - detection_results = self.text_detector_module.detect_text( - images=predicted_data, use_gpu=self.use_gpu, box_thresh=box_thresh) + detection_results = self.text_detector_module.detect_text(images=predicted_data, + use_gpu=self.use_gpu, + box_thresh=box_thresh) boxes = [np.array(item['data']).astype(np.float32) for item in detection_results] all_results = [] @@ -244,8 +255,8 @@ def recognize_text(self, tmp_box = copy.deepcopy(boxes[num_box]) img_crop = self.get_rotate_crop_image(original_image, tmp_box) img_crop_list.append(img_crop) - img_crop_list, angle_list = self._classify_text( - img_crop_list, angle_classification_thresh=angle_classification_thresh) + img_crop_list, angle_list = self._classify_text(img_crop_list, + angle_classification_thresh=angle_classification_thresh) rec_results = self._recognize_text(img_crop_list) # if the recognized text confidence score is lower than text_thresh, then drop it @@ -277,18 +288,23 @@ def serving_method(self, images, **kwargs): return results def save_result_image( - self, - original_image, - detection_boxes, - rec_results, - output_dir='ocr_result', - text_thresh=0.5, + self, + original_image, + detection_boxes, + rec_results, + output_dir='ocr_result', + text_thresh=0.5, ): image = Image.fromarray(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)) txts = [item[0] for item in rec_results] scores = [item[1] for item in rec_results] - draw_img = draw_ocr( - image, detection_boxes, txts, scores, font_file=self.font_file, draw_txt=True, drop_score=text_thresh) + draw_img = draw_ocr(image, + detection_boxes, + txts, + scores, + font_file=self.font_file, + draw_txt=True, + drop_score=text_thresh) if not os.path.exists(output_dir): os.makedirs(output_dir) @@ -326,7 +342,7 @@ def _classify_text(self, image_list, angle_classification_thresh=0.9): norm_img_batch = norm_img_batch.copy() self.cls_input_tensor.copy_from_cpu(norm_img_batch) - self.cls_predictor.zero_copy_run() + self.cls_predictor.run() prob_out = self.cls_output_tensors[0].copy_to_cpu() label_out = self.cls_output_tensors[1].copy_to_cpu() @@ -370,7 +386,7 @@ def _recognize_text(self, img_list): norm_img_batch = norm_img_batch.copy() self.rec_input_tensor.copy_from_cpu(norm_img_batch) - self.rec_predictor.zero_copy_run() + self.rec_predictor.run() rec_idx_batch = self.rec_output_tensors[0].copy_to_cpu() rec_idx_lod = self.rec_output_tensors[0].lod()[0] @@ -411,60 +427,57 @@ def _save_recognizer_model(self, dirname, model_filename=None, params_filename=N if combined: model_filename = "__model__" if not model_filename else model_filename params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) + place = paddle.CPUPlace() + exe = paddle.Executor(place) model_file_path = os.path.join(self.rec_pretrained_model_path, 'model') params_file_path = os.path.join(self.rec_pretrained_model_path, 'params') - program, feeded_var_names, target_vars = fluid.io.load_inference_model( + program, feeded_var_names, target_vars = paddle.static.load_inference_model( dirname=self.rec_pretrained_model_path, model_filename=model_file_path, params_filename=params_file_path, executor=exe) - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) + paddle.static.save_inference_model(dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) def _save_classifier_model(self, dirname, model_filename=None, params_filename=None, combined=True): if combined: model_filename = "__model__" if not model_filename else model_filename params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) + place = paddle.CPUPlace() + exe = paddle.Executor(place) model_file_path = os.path.join(self.cls_pretrained_model_path, 'model') params_file_path = os.path.join(self.cls_pretrained_model_path, 'params') - program, feeded_var_names, target_vars = fluid.io.load_inference_model( + program, feeded_var_names, target_vars = paddle.static.load_inference_model( dirname=self.cls_pretrained_model_path, model_filename=model_file_path, params_filename=params_file_path, executor=exe) - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) + paddle.static.save_inference_model(dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) @runnable def run_cmd(self, argvs): """ Run as a command """ - self.parser = argparse.ArgumentParser( - description="Run the %s module." % self.name, - prog='hub run %s' % self.name, - usage='%(prog)s', - add_help=True) + self.parser = argparse.ArgumentParser(description="Run the %s module." % self.name, + prog='hub run %s' % self.name, + usage='%(prog)s', + add_help=True) self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") self.arg_config_group = self.parser.add_argument_group( @@ -474,20 +487,28 @@ def run_cmd(self, argvs): self.add_module_input_arg() args = self.parser.parse_args(argvs) - results = self.recognize_text( - paths=[args.input_path], use_gpu=args.use_gpu, output_dir=args.output_dir, visualization=args.visualization) + results = self.recognize_text(paths=[args.input_path], + use_gpu=args.use_gpu, + output_dir=args.output_dir, + visualization=args.visualization) return results def add_module_config_arg(self): """ Add the command config options """ - self.arg_config_group.add_argument( - '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") - self.arg_config_group.add_argument( - '--output_dir', type=str, default='ocr_result', help="The directory to save output images.") - self.arg_config_group.add_argument( - '--visualization', type=ast.literal_eval, default=False, help="whether to save output as images.") + self.arg_config_group.add_argument('--use_gpu', + type=ast.literal_eval, + default=False, + help="whether use GPU or not") + self.arg_config_group.add_argument('--output_dir', + type=str, + default='ocr_result', + help="The directory to save output images.") + self.arg_config_group.add_argument('--visualization', + type=ast.literal_eval, + default=False, + help="whether to save output as images.") def add_module_input_arg(self): """ diff --git a/modules/image/text_recognition/chinese_text_detection_db_mobile/README.md b/modules/image/text_recognition/chinese_text_detection_db_mobile/README.md index 695b8e3a6..eccf56885 100644 --- a/modules/image/text_recognition/chinese_text_detection_db_mobile/README.md +++ b/modules/image/text_recognition/chinese_text_detection_db_mobile/README.md @@ -1,7 +1,7 @@ # chinese_text_detection_db_mobile |模型名称|chinese_text_detection_db_mobile| -| :--- | :---: | +| :--- | :---: | |类别|图像-文字识别| |网络|Differentiable Binarization| |数据集|icdar2015数据集| @@ -33,9 +33,9 @@ ## 二、安装 -- ### 1、环境依赖 +- ### 1、环境依赖 - - paddlepaddle >= 1.7.2 + - paddlepaddle >= 1.7.2 - paddlehub >= 1.6.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) @@ -46,7 +46,7 @@ - ```shell $ pip install shapely pyclipper ``` - - **该Module依赖于第三方库shapely和pyclipper,使用该Module之前,请先安装shapely和pyclipper。** + - **该Module依赖于第三方库shapely和pyclipper,使用该Module之前,请先安装shapely和pyclipper。** - ### 2、安装 @@ -85,7 +85,7 @@ - ```python __init__(enable_mkldnn=False) ``` - + - 构造ChineseTextDetectionDB对象 - **参数** @@ -100,7 +100,7 @@ box_thresh=0.5, visualization=False) ``` - + - 预测API,检测输入图片中的所有中文文本的位置。 - **参数** @@ -181,7 +181,11 @@ * 1.0.4 使用超轻量级的三阶段模型(文本框检测-角度分类-文字识别)识别图片文字。 - + +* 1.0.5 + + 移除 fluid api + - ```shell - $ hub install chinese_text_detection_db_mobile==1.0.4 + $ hub install chinese_text_detection_db_mobile==1.0.5 ``` diff --git a/modules/image/text_recognition/chinese_text_detection_db_mobile/module.py b/modules/image/text_recognition/chinese_text_detection_db_mobile/module.py index aaae4aea1..c5e1b1b06 100644 --- a/modules/image/text_recognition/chinese_text_detection_db_mobile/module.py +++ b/modules/image/text_recognition/chinese_text_detection_db_mobile/module.py @@ -5,19 +5,23 @@ import argparse import ast +import base64 import math import os import time -from paddle.fluid.core import AnalysisConfig, create_paddle_predictor, PaddleTensor -from paddlehub.common.logger import logger -from paddlehub.module.module import moduleinfo, runnable, serving -from PIL import Image -import base64 import cv2 import numpy as np -import paddle.fluid as fluid +import paddle +from paddle.inference import Config +from paddle.inference import create_predictor +from PIL import Image + import paddlehub as hub +from paddlehub.common.logger import logger +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving def base64_to_cv2(b64str): @@ -29,13 +33,14 @@ def base64_to_cv2(b64str): @moduleinfo( name="chinese_text_detection_db_mobile", - version="1.0.4", + version="1.0.5", summary= "The module aims to detect chinese text position in the image, which is based on differentiable_binarization algorithm.", author="paddle-dev", author_email="paddle-dev@baidu.com", type="cv/text_recognition") class ChineseTextDetectionDB(hub.Module): + def _initialize(self, enable_mkldnn=False): """ initialize with the necessary elements @@ -60,7 +65,7 @@ def _set_config(self): model_file_path = os.path.join(self.pretrained_model_path, 'model') params_file_path = os.path.join(self.pretrained_model_path, 'params') - config = AnalysisConfig(model_file_path, params_file_path) + config = Config(model_file_path, params_file_path) try: _places = os.environ["CUDA_VISIBLE_DEVICES"] int(_places[0]) @@ -83,13 +88,13 @@ def _set_config(self): # use zero copy config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass") config.switch_use_feed_fetch_ops(False) - self.predictor = create_paddle_predictor(config) + self.predictor = create_predictor(config) input_names = self.predictor.get_input_names() - self.input_tensor = self.predictor.get_input_tensor(input_names[0]) + self.input_tensor = self.predictor.get_input_handle(input_names[0]) output_names = self.predictor.get_output_names() self.output_tensors = [] for output_name in output_names: - output_tensor = self.predictor.get_output_tensor(output_name) + output_tensor = self.predictor.get_output_handle(output_name) self.output_tensors.append(output_tensor) def read_images(self, paths=[]): @@ -218,7 +223,7 @@ def detect_text(self, else: im = im.copy() self.input_tensor.copy_from_cpu(im) - self.predictor.zero_copy_run() + self.predictor.run() outputs = [] for output_tensor in self.output_tensors: @@ -255,25 +260,23 @@ def save_inference_model(self, dirname, model_filename=None, params_filename=Non if combined: model_filename = "__model__" if not model_filename else model_filename params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) + place = paddle.CPUPlace() + exe = paddle.Executor(place) model_file_path = os.path.join(self.pretrained_model_path, 'model') params_file_path = os.path.join(self.pretrained_model_path, 'params') - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.pretrained_model_path, - model_filename=model_file_path, - params_filename=params_file_path, - executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) + program, feeded_var_names, target_vars = paddle.static.load_inference_model(dirname=self.pretrained_model_path, + model_filename=model_file_path, + params_filename=params_file_path, + executor=exe) + + paddle.static.save_inference_model(dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) @serving def serving_method(self, images, **kwargs): @@ -289,11 +292,10 @@ def run_cmd(self, argvs): """ Run as a command """ - self.parser = argparse.ArgumentParser( - description="Run the %s module." % self.name, - prog='hub run %s' % self.name, - usage='%(prog)s', - add_help=True) + self.parser = argparse.ArgumentParser(description="Run the %s module." % self.name, + prog='hub run %s' % self.name, + usage='%(prog)s', + add_help=True) self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") self.arg_config_group = self.parser.add_argument_group( @@ -303,20 +305,28 @@ def run_cmd(self, argvs): self.add_module_input_arg() args = self.parser.parse_args(argvs) - results = self.detect_text( - paths=[args.input_path], use_gpu=args.use_gpu, output_dir=args.output_dir, visualization=args.visualization) + results = self.detect_text(paths=[args.input_path], + use_gpu=args.use_gpu, + output_dir=args.output_dir, + visualization=args.visualization) return results def add_module_config_arg(self): """ Add the command config options """ - self.arg_config_group.add_argument( - '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") - self.arg_config_group.add_argument( - '--output_dir', type=str, default='detection_result', help="The directory to save output images.") - self.arg_config_group.add_argument( - '--visualization', type=ast.literal_eval, default=False, help="whether to save output as images.") + self.arg_config_group.add_argument('--use_gpu', + type=ast.literal_eval, + default=False, + help="whether use GPU or not") + self.arg_config_group.add_argument('--output_dir', + type=str, + default='detection_result', + help="The directory to save output images.") + self.arg_config_group.add_argument('--visualization', + type=ast.literal_eval, + default=False, + help="whether to save output as images.") def add_module_input_arg(self): """ diff --git a/modules/image/text_recognition/chinese_text_detection_db_server/README.md b/modules/image/text_recognition/chinese_text_detection_db_server/README.md index c35ea1e99..b8c5882b8 100644 --- a/modules/image/text_recognition/chinese_text_detection_db_server/README.md +++ b/modules/image/text_recognition/chinese_text_detection_db_server/README.md @@ -1,7 +1,7 @@ # chinese_text_detection_db_server |模型名称|chinese_text_detection_db_server| -| :--- | :---: | +| :--- | :---: | |类别|图像-文字识别| |网络|Differentiable Binarization| |数据集|icdar2015数据集| @@ -33,9 +33,9 @@ ## 二、安装 -- ### 1、环境依赖 +- ### 1、环境依赖 - - paddlepaddle >= 1.7.2 + - paddlepaddle >= 1.7.2 - paddlehub >= 1.6.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) @@ -46,7 +46,7 @@ - ```shell $ pip install shapely pyclipper ``` - - **该Module依赖于第三方库shapely和pyclipper,使用该Module之前,请先安装shapely和pyclipper。** + - **该Module依赖于第三方库shapely和pyclipper,使用该Module之前,请先安装shapely和pyclipper。** - ### 2、安装 @@ -100,7 +100,7 @@ visualization=False) ``` - 预测API,检测输入图片中的所有中文文本的位置。 - + - **参数** - paths (list\[str\]): 图片的路径; @@ -145,7 +145,7 @@ def cv2_to_base64(image): data = cv2.imencode('.jpg', image)[1] return base64.b64encode(data.tostring()).decode('utf8') - + # 发送HTTP请求 data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} headers = {"Content-type": "application/json"} @@ -170,7 +170,11 @@ * 1.0.2 增加更多预训练数据,更新预训练参数 - + +* 1.0.3 + + 移除 fluid api + - ```shell - $ hub install chinese_text_detection_db_server==1.0.2 + $ hub install chinese_text_detection_db_server==1.0.3 ``` diff --git a/modules/image/text_recognition/chinese_text_detection_db_server/module.py b/modules/image/text_recognition/chinese_text_detection_db_server/module.py index 52295bef7..a2db44060 100644 --- a/modules/image/text_recognition/chinese_text_detection_db_server/module.py +++ b/modules/image/text_recognition/chinese_text_detection_db_server/module.py @@ -5,19 +5,23 @@ import argparse import ast +import base64 import math import os import time -from paddle.fluid.core import AnalysisConfig, create_paddle_predictor, PaddleTensor -from paddlehub.common.logger import logger -from paddlehub.module.module import moduleinfo, runnable, serving -from PIL import Image -import base64 import cv2 import numpy as np -import paddle.fluid as fluid +import paddle +from paddle.inference import Config +from paddle.inference import create_predictor +from PIL import Image + import paddlehub as hub +from paddlehub.common.logger import logger +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving def base64_to_cv2(b64str): @@ -29,13 +33,14 @@ def base64_to_cv2(b64str): @moduleinfo( name="chinese_text_detection_db_server", - version="1.0.2", + version="1.0.3", summary= "The module aims to detect chinese text position in the image, which is based on differentiable_binarization algorithm.", author="paddle-dev", author_email="paddle-dev@baidu.com", type="cv/text_recognition") class ChineseTextDetectionDBServer(hub.Module): + def _initialize(self, enable_mkldnn=False): """ initialize with the necessary elements @@ -60,7 +65,7 @@ def _set_config(self): model_file_path = os.path.join(self.pretrained_model_path, 'model') params_file_path = os.path.join(self.pretrained_model_path, 'params') - config = AnalysisConfig(model_file_path, params_file_path) + config = Config(model_file_path, params_file_path) try: _places = os.environ["CUDA_VISIBLE_DEVICES"] int(_places[0]) @@ -80,13 +85,13 @@ def _set_config(self): # use zero copy config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass") config.switch_use_feed_fetch_ops(False) - self.predictor = create_paddle_predictor(config) + self.predictor = create_predictor(config) input_names = self.predictor.get_input_names() - self.input_tensor = self.predictor.get_input_tensor(input_names[0]) + self.input_tensor = self.predictor.get_input_handle(input_names[0]) output_names = self.predictor.get_output_names() self.output_tensors = [] for output_name in output_names: - output_tensor = self.predictor.get_output_tensor(output_name) + output_tensor = self.predictor.get_output_handle(output_name) self.output_tensors.append(output_tensor) def read_images(self, paths=[]): @@ -202,7 +207,7 @@ def detect_text(self, im = im.copy() starttime = time.time() self.input_tensor.copy_from_cpu(im) - self.predictor.zero_copy_run() + self.predictor.run() data_out = self.output_tensors[0].copy_to_cpu() dt_boxes_list = postprocessor(data_out, [ratio_list]) boxes = self.filter_tag_det_res(dt_boxes_list[0], original_image.shape) @@ -229,25 +234,23 @@ def save_inference_model(self, dirname, model_filename=None, params_filename=Non if combined: model_filename = "__model__" if not model_filename else model_filename params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) + place = paddle.CPUPlace() + exe = paddle.static.Executor(place) model_file_path = os.path.join(self.pretrained_model_path, 'model') params_file_path = os.path.join(self.pretrained_model_path, 'params') - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.pretrained_model_path, - model_filename=model_file_path, - params_filename=params_file_path, - executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) + program, feeded_var_names, target_vars = paddle.static.load_inference_model(dirname=self.pretrained_model_path, + model_filename=model_file_path, + params_filename=params_file_path, + executor=exe) + + paddle.static.save_inference_model(dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) @serving def serving_method(self, images, **kwargs): @@ -263,11 +266,10 @@ def run_cmd(self, argvs): """ Run as a command """ - self.parser = argparse.ArgumentParser( - description="Run the %s module." % self.name, - prog='hub run %s' % self.name, - usage='%(prog)s', - add_help=True) + self.parser = argparse.ArgumentParser(description="Run the %s module." % self.name, + prog='hub run %s' % self.name, + usage='%(prog)s', + add_help=True) self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") self.arg_config_group = self.parser.add_argument_group( @@ -277,20 +279,28 @@ def run_cmd(self, argvs): self.add_module_input_arg() args = self.parser.parse_args(argvs) - results = self.detect_text( - paths=[args.input_path], use_gpu=args.use_gpu, output_dir=args.output_dir, visualization=args.visualization) + results = self.detect_text(paths=[args.input_path], + use_gpu=args.use_gpu, + output_dir=args.output_dir, + visualization=args.visualization) return results def add_module_config_arg(self): """ Add the command config options """ - self.arg_config_group.add_argument( - '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") - self.arg_config_group.add_argument( - '--output_dir', type=str, default='detection_result', help="The directory to save output images.") - self.arg_config_group.add_argument( - '--visualization', type=ast.literal_eval, default=False, help="whether to save output as images.") + self.arg_config_group.add_argument('--use_gpu', + type=ast.literal_eval, + default=False, + help="whether use GPU or not") + self.arg_config_group.add_argument('--output_dir', + type=str, + default='detection_result', + help="The directory to save output images.") + self.arg_config_group.add_argument('--visualization', + type=ast.literal_eval, + default=False, + help="whether to save output as images.") def add_module_input_arg(self): """ diff --git a/modules/text/language_model/albert-base-v1/README.md b/modules/text/language_model/albert-base-v1/README.md index b0f68e8b9..abef64ad5 100644 --- a/modules/text/language_model/albert-base-v1/README.md +++ b/modules/text/language_model/albert-base-v1/README.md @@ -25,9 +25,9 @@ - ### 1、环境依赖 - - paddlepaddle >= 2.2.0 + - paddlepaddle >= 2.0.0 - - paddlehub >= 2.2.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) - ### 2、安装 diff --git a/modules/text/language_model/simnet_bow/README.md b/modules/text/language_model/simnet_bow/README.md index 330dd0237..6b31e00be 100644 --- a/modules/text/language_model/simnet_bow/README.md +++ b/modules/text/language_model/simnet_bow/README.md @@ -1,6 +1,6 @@ # simnet_bow |模型名称|simnet_bow| -| :--- | :---: | +| :--- | :---: | |类别|文本-语义匹配| |网络|BOW| |数据集|百度自建数据集| @@ -59,7 +59,7 @@ # [{'text_1': '这道题太难了', 'text_2': '这道题是上一年的考题', 'similarity': 0.689}, {'text_1': '这道题太难了', 'text_2': '这道题不简单', 'similarity': 0.855}, {'text_1': '这道题太难了', 'text_2': '这道题很有意思', 'similarity': 0.8166}] ``` - + - ### 3、 API - ```python @@ -153,3 +153,11 @@ * 1.2.0 模型升级,支持用于文本分类,文本匹配等各种任务迁移学习 + +* 1.2.1 + + 移除 fluid api + + - ```shell + $ hub install simnet_bow==1.2.1 + ``` diff --git a/modules/text/language_model/simnet_bow/module.py b/modules/text/language_model/simnet_bow/module.py index ef4a49935..48145a6dd 100644 --- a/modules/text/language_model/simnet_bow/module.py +++ b/modules/text/language_model/simnet_bow/module.py @@ -8,35 +8,39 @@ import json import math import os -import six import numpy as np -import paddle.fluid as fluid -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +import six +from paddle.inference import Config +from paddle.inference import create_predictor +from simnet_bow.processor import load_vocab +from simnet_bow.processor import postprocess +from simnet_bow.processor import preprocess + import paddlehub as hub -from paddlehub.common.paddle_helper import add_vars_prefix, get_variable_info +from paddlehub.common.paddle_helper import add_vars_prefix +from paddlehub.common.paddle_helper import get_variable_info from paddlehub.common.utils import sys_stdin_encoding from paddlehub.io.parser import txt_parser -from paddlehub.module.module import serving from paddlehub.module.module import moduleinfo from paddlehub.module.module import runnable - -from simnet_bow.processor import load_vocab, preprocess, postprocess +from paddlehub.module.module import serving class DataFormatError(Exception): + def __init__(self, *args): self.args = args -@moduleinfo( - name="simnet_bow", - version="1.2.0", - summary="Baidu's open-source similarity network model based on bow_pairwise.", - author="baidu-nlp", - author_email="", - type="nlp/sentiment_analysis") +@moduleinfo(name="simnet_bow", + version="1.2.1", + summary="Baidu's open-source similarity network model based on bow_pairwise.", + author="baidu-nlp", + author_email="", + type="nlp/sentiment_analysis") class SimnetBow(hub.Module): + def _initialize(self): """ initialize with the necessary elements @@ -62,11 +66,11 @@ def _set_config(self): """ predictor config setting """ - cpu_config = AnalysisConfig(self.pretrained_model_path) + cpu_config = Config(self.pretrained_model_path) cpu_config.disable_glog_info() cpu_config.disable_gpu() cpu_config.switch_ir_optim(False) - self.cpu_predictor = create_paddle_predictor(cpu_config) + self.cpu_predictor = create_predictor(cpu_config) try: _places = os.environ["CUDA_VISIBLE_DEVICES"] @@ -75,125 +79,18 @@ def _set_config(self): except: use_gpu = False if use_gpu: - gpu_config = AnalysisConfig(self.pretrained_model_path) + gpu_config = Config(self.pretrained_model_path) gpu_config.disable_glog_info() gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) - - def context(self, trainable=False, max_seq_len=128, num_slots=1): - """ - Get the input ,output and program of the pretrained simnet_bow - - Args: - trainable(bool): whether fine-tune the pretrained parameters of simnet_bow or not。 - max_seq_len (int): It will limit the total sequence returned so that it has a maximum length. - num_slots(int): It's number of data inputted to the model, selectted as following options: - - - 1(default): There's only one data to be feeded in the model, e.g. the module is used for sentence classification task. - - 2: There are two data to be feeded in the model, e.g. the module is used for text matching task (point-wise). - - 3: There are three data to be feeded in the model, e.g. the module is used for text matching task (pair-wise). + self.gpu_predictor = create_predictor(gpu_config) - Returns: - inputs(dict): the input variables of simnet_bow (words) - outputs(dict): the output variables of input words (word embeddings) and sequence lenght of the first input_text - main_program(Program): the main_program of simnet_bow with pretrained prameters - """ - assert num_slots >= 1 and num_slots <= 3, "num_slots must be 1, 2, or 3, but the input is %d" % num_slots - main_program = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(main_program, startup_program): - text_1 = fluid.layers.data(name="text", shape=[-1, max_seq_len, 1], dtype="int64", lod_level=0) - seq_len = fluid.layers.data(name="seq_len", shape=[1], dtype='int64', lod_level=0) - seq_len_used = fluid.layers.squeeze(seq_len, axes=[1]) - - # Add embedding layer. - w_param_attrs = fluid.ParamAttr( - name="emb", initializer=fluid.initializer.TruncatedNormal(scale=0.02), trainable=trainable) - dict_dim = 500002 - emb_1 = fluid.layers.embedding( - input=text_1, - size=[dict_dim, 128], - is_sparse=True, - padding_idx=dict_dim - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_1_name = emb_1.name - data_list = [text_1] - emb_name_list = [emb_1_name] - - if num_slots > 1: - text_2 = fluid.data(name='text_2', shape=[-1, max_seq_len], dtype='int64', lod_level=0) - emb_2 = fluid.embedding( - input=text_2, - size=[dict_dim, 128], - is_sparse=True, - padding_idx=dict_dim - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_2_name = emb_2.name - data_list.append(text_2) - emb_name_list.append(emb_2_name) - - if num_slots > 2: - text_3 = fluid.data(name='text_3', shape=[-1, max_seq_len], dtype='int64', lod_level=0) - emb_3 = fluid.embedding( - input=text_3, - size=[dict_dim, 128], - is_sparse=True, - padding_idx=dict_dim - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_3_name = emb_3.name - data_list.append(text_3) - emb_name_list.append(emb_3_name) - - variable_names = filter(lambda v: v not in ['text', 'text_2', 'text_3', "seq_len"], - list(main_program.global_block().vars.keys())) - prefix_name = "@HUB_{}@".format(self.name) - add_vars_prefix(program=main_program, prefix=prefix_name, vars=variable_names) - - for param in main_program.global_block().iter_parameters(): - param.trainable = trainable - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - # Load the senta_lstm pretrained model. - def if_exist(var): - return os.path.exists(os.path.join(self.pretrained_model_path, var.name)) - - fluid.io.load_vars(exe, self.pretrained_model_path, predicate=if_exist) - - inputs = {'seq_len': seq_len} - outputs = {} - for index, data in enumerate(data_list): - if index == 0: - inputs['text'] = data - outputs['emb'] = main_program.global_block().vars[prefix_name + emb_name_list[0]] - else: - inputs['text_%s' % (index + 1)] = data - outputs['emb_%s' % (index + 1)] = main_program.global_block().vars[prefix_name + - emb_name_list[index]] - return inputs, outputs, main_program - - def texts2tensor(self, texts): - """ - Tranform the texts(dict) to PaddleTensor - Args: - texts(list): texts - Returns: - tensor(PaddleTensor): tensor with texts data - """ + def _texts_process(self, texts): lod = [0] data = [] for i, text in enumerate(texts): data += text['processed'] lod.append(len(text['processed']) + lod[i]) - tensor = PaddleTensor(np.array(data).astype('int64')) - tensor.name = "words" - tensor.lod = [lod] - tensor.shape = [lod[-1], 1] - return tensor + return np.array(data).astype('int64'), [lod], [lod[-1], 1] def to_unicode(self, texts): """ @@ -282,14 +179,28 @@ def similarity(self, texts=[], data={}, use_gpu=False, batch_size=1): start_idx = start_idx + batch_size processed_results = preprocess(self.word_seg_module, self.vocab, batch_data, use_gpu, batch_size) - tensor_words_1 = self.texts2tensor(processed_results["text_1"]) - tensor_words_2 = self.texts2tensor(processed_results["text_2"]) + data_1, lod_1, shape_1 = self._texts_process(processed_results["text_1"]) + data_2, lod_2, shape_2 = self._texts_process(processed_results["text_2"]) - if use_gpu: - batch_out = self.gpu_predictor.run([tensor_words_1, tensor_words_2]) - else: - batch_out = self.cpu_predictor.run([tensor_words_1, tensor_words_2]) - batch_result = postprocess(batch_out[1], processed_results) + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + input_handle.copy_from_cpu(data_1) + input_handle.set_lod(lod_1) + input_handle.reshape(shape_1) + + input_handle = predictor.get_input_handle(input_names[1]) + input_handle.copy_from_cpu(data_2) + input_handle.set_lod(lod_2) + input_handle.reshape(shape_2) + + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[1]) + batch_out = output_handle.copy_to_cpu() + + batch_result = postprocess(batch_out, processed_results) results += batch_result return results @@ -298,8 +209,10 @@ def run_cmd(self, argvs): """ Run as a command """ - self.parser = argparse.ArgumentParser( - description="Run the simnet_bow module.", prog='hub run simnet_bow', usage='%(prog)s', add_help=True) + self.parser = argparse.ArgumentParser(description="Run the simnet_bow module.", + prog='hub run simnet_bow', + usage='%(prog)s', + add_help=True) self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") self.arg_config_group = self.parser.add_argument_group( @@ -324,8 +237,10 @@ def add_module_config_arg(self): """ Add the command config options """ - self.arg_config_group.add_argument( - '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU for prediction") + self.arg_config_group.add_argument('--use_gpu', + type=ast.literal_eval, + default=False, + help="whether use GPU for prediction") self.arg_config_group.add_argument('--batch_size', type=int, default=1, help="batch size for prediction") diff --git a/modules/text/language_model/simnet_bow/processor.py b/modules/text/language_model/simnet_bow/processor.py index a4362e854..b932a0391 100644 --- a/modules/text/language_model/simnet_bow/processor.py +++ b/modules/text/language_model/simnet_bow/processor.py @@ -46,12 +46,11 @@ def preprocess(lac, word_dict, data_dict, use_gpu=False, batch_size=1): return result -def postprocess(predict_out, data_info): +def postprocess(pred, data_info): """ Convert model's output tensor to pornography label """ result = [] - pred = predict_out.as_ndarray() for index in range(len(data_info[text_a_key])): result_i = {} result_i[text_a_key] = data_info[text_a_key][index]['origin'] diff --git a/modules/text/lexical_analysis/jieba_paddle/README.md b/modules/text/lexical_analysis/jieba_paddle/README.md index 9f8be4493..8d141131f 100644 --- a/modules/text/lexical_analysis/jieba_paddle/README.md +++ b/modules/text/lexical_analysis/jieba_paddle/README.md @@ -1,7 +1,7 @@ # jieba_paddle |模型名称|jieba_paddle| -| :--- | :---: | +| :--- | :---: | |类别|文本-词法分析| |网络|BiGRU+CRF| |数据集|百度自建数据集| @@ -27,7 +27,7 @@ - ### 1、环境依赖 - paddlepaddle >= 1.8.0 - + - paddlehub >= 1.8.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) - ### 2、安装 @@ -59,10 +59,10 @@ import paddlehub as hub jieba = hub.Module(name="jieba_paddle") - + results = jieba.cut("今天是个好日子", cut_all=False, HMM=True) print(results) - + # ['今天', '是', '个', '好日子'] ``` @@ -89,7 +89,7 @@ - ```python def cut_for_search(sentence, HMM=True) ``` - + - jieba的搜索引擎模式切词,该方法适合用于搜索引擎构建倒排索引的分词,粒度比较细 - **参数** @@ -212,7 +212,12 @@ * 1.0.0 - 初始发布 + 初始发布 + +* 1.0.1 + + 移除 fluid api + - ```shell - $ hub install jieba_paddle==1.0.0 + $ hub install jieba_paddle==1.0.1 ``` diff --git a/modules/text/lexical_analysis/jieba_paddle/module.py b/modules/text/lexical_analysis/jieba_paddle/module.py index c4ed5844f..45b0bacd3 100644 --- a/modules/text/lexical_analysis/jieba_paddle/module.py +++ b/modules/text/lexical_analysis/jieba_paddle/module.py @@ -6,21 +6,22 @@ import logging import os -import paddle.fluid as fluid import paddlehub as hub from paddlehub.common.logger import logger -from paddlehub.module.module import moduleinfo, serving +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import serving @moduleinfo( name="jieba_paddle", - version="1.0.0", + version="1.0.1", summary= "jieba_paddle is a chineses tokenizer using BiGRU base on the PaddlePaddle deeplearning framework. More information please refer to https://github.com/fxsjy/jieba.", author="baidu-paddle", author_email="paddle-dev@gmail.com", type="nlp/lexical_analysis") class JiebaPaddle(hub.Module): + def _initialize(self): pass @@ -119,8 +120,11 @@ def extract_tags(self, sentence, topK=20, withWeight=False, allowPOS=(), withFla import jieba import jieba.analyse jieba.setLogLevel(logging.ERROR) - res = jieba.analyse.extract_tags( - sentence, topK=topK, withWeight=withWeight, allowPOS=allowPOS, withFlag=withFlag) + res = jieba.analyse.extract_tags(sentence, + topK=topK, + withWeight=withWeight, + allowPOS=allowPOS, + withFlag=withFlag) return res def textrank(self, sentence, topK=20, withWeight=False, allowPOS=('ns', 'n', 'vn', 'v'), withFlag=False): @@ -143,24 +147,3 @@ def textrank(self, sentence, topK=20, withWeight=False, allowPOS=('ns', 'n', 'vn jieba.setLogLevel(logging.ERROR) res = jieba.analyse.textrank(sentence, topK=topK, withWeight=withWeight, allowPOS=allowPOS, withFlag=withFlag) return res - - -if __name__ == "__main__": - jb_pd = JiebaPaddle() - res = jb_pd.cut( - sentence="我来到北京清华大学", - use_paddle=True, - ) - print(res) - res = jb_pd.cut(sentence="我来到北京清华大学", use_paddle=False, cut_all=True) - print(res) - res = jb_pd.cut(sentence="我来到北京清华大学", use_paddle=False, cut_all=False) - print(res) - res = jb_pd.cut_for_search(sentence="我来到北京清华大学") - print(res) - res = jb_pd.extract_tags(sentence="我来到北京清华大学") - print(res) - res = jb_pd.extract_tags(sentence="我来到北京清华大学", withWeight=True) - print(res) - res = jb_pd.textrank(sentence="我来到北京清华大学", withWeight=True) - print(res) diff --git a/modules/text/lexical_analysis/lac/README.md b/modules/text/lexical_analysis/lac/README.md index 9584b957b..d94c875e3 100644 --- a/modules/text/lexical_analysis/lac/README.md +++ b/modules/text/lexical_analysis/lac/README.md @@ -1,7 +1,7 @@ # lac |模型名称|lac| -| :--- | :---: | +| :--- | :---: | |类别|文本-词法分析| |网络|BiGRU+CRF| |数据集|百度自建数据集| @@ -17,7 +17,7 @@ - ### 模型介绍 - Lexical Analysis of Chinese,简称 LAC,是一个联合的词法分析模型,能整体性地完成中文分词、词性标注、专名识别任务。在百度自建数据集上评测,LAC效果:Precision=88.0%,Recall=88.7%,F1-Score=88.4%。该PaddleHub Module支持预测。 - +


@@ -32,15 +32,15 @@ - ### 1、环境依赖 - paddlepaddle >= 1.6.2 - + - paddlehub >= 1.6.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) - - - 若使用词典干预功能,额外依赖第三方库 pyahocorasick - + + - 若使用词典干预功能,额外依赖第三方库 pyahocorasick + - ```shell $ pip install pyahocorasick ``` - + - ### 2、安装 - ```shell @@ -63,23 +63,23 @@ - ```shell $ hub run lac --input_file test.txt --user_dict user.dict ``` - + - test.txt 存放待分词文本, 如: - ```shell - 今天是个好日子 - 今天天气晴朗 - ``` + 今天是个好日子 + 今天天气晴朗 + ``` - user.dict 为用户自定义词典,可以不指定,当指定自定义词典时,可以干预默认分词结果。如: - ```shell 春天/SEASON 花/n 开/v 秋天的风 落 阳 - ``` + ``` - 词典文件每行表示一个定制化的item,由一个单词或多个连续的单词组成,每个单词后使用'/'表示标签,如果没有'/'标签则会使用模型默认的标签。每个item单词数越多,干预效果会越精准。 - Note:该PaddleHub Module使用词典干预功能时,依赖于第三方库pyahocorasick,请自行安装 - + - 通过命令行方式实现文字识别模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) - ### 2、预测代码示例 @@ -95,7 +95,7 @@ for result in results: print(result['word']) print(result['tag']) - + # ['今天', '是', '个', '好日子'] # ['TIME', 'v', 'q', 'n'] # ['天气预报', '说', '今天', '要', '下雨'] @@ -103,9 +103,9 @@ # ['下', '一班', '地铁', '马上', '就要', '到', '了'] # ['f', 'm', 'n', 'd', 'v', 'v', 'xc'] ``` - - - + + + - ### 3、API - ```python @@ -135,7 +135,7 @@ - ```python def lexical_analysis(texts=[], data={}, use_gpu=False, batch_size=1, user_dict=None, return_tag=True) ``` - + - **该接口将会在未来版本被废弃,如有需要,请使用cut接口预测** - lac预测接口,预测输入句子的分词结果 @@ -282,6 +282,11 @@ * 2.2.0 升级自定义词典功能,支持增加不属于lac默认提供的词性 + +* 2.2.1 + + 移除 fluid api + - ```shell - $ hub install lac==2.2.0 + $ hub install lac==2.2.1 ``` diff --git a/modules/text/lexical_analysis/lac/module.py b/modules/text/lexical_analysis/lac/module.py index 40136fe63..df6993761 100644 --- a/modules/text/lexical_analysis/lac/module.py +++ b/modules/text/lexical_analysis/lac/module.py @@ -9,11 +9,14 @@ import json import math import os -import six import numpy as np -import paddle.fluid as fluid - +import paddle +import six +from lac.custom import Customization +from lac.processor import load_kv_dict +from lac.processor import parse_result +from lac.processor import word_to_ids from paddle.inference import Config from paddle.inference import create_predictor @@ -22,27 +25,27 @@ from paddlehub.common.paddle_helper import add_vars_prefix from paddlehub.common.utils import sys_stdin_encoding from paddlehub.io.parser import txt_parser -from paddlehub.module.module import moduleinfo, runnable, serving - -from lac.network import lex_net -from lac.processor import load_kv_dict, word_to_ids, parse_result -from lac.custom import Customization +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving class DataFormatError(Exception): + def __init__(self, *args): self.args = args @moduleinfo( name="lac", - version="2.2.0", + version="2.2.1", summary= "Baidu's open-source lexical analysis tool for Chinese, including word segmentation, part-of-speech tagging & named entity recognition", author="baidu-nlp", author_email="paddle-dev@baidu.com", type="nlp/lexical_analysis") class LAC(hub.Module): + def _initialize(self, user_dict=None): """ initialize with the necessary elements @@ -65,129 +68,27 @@ def _initialize(self, user_dict=None): self._set_config() - def _get_device_id(self, places): - try: - places = os.environ[places] - id = int(places) - except: - id = -1 - return id - def _set_config(self): """ predictor config setting """ - - # create default cpu predictor cpu_config = Config(self.pretrained_model_path) cpu_config.disable_glog_info() cpu_config.disable_gpu() self.cpu_predictor = create_predictor(cpu_config) - # create predictors using various types of devices - - # npu - npu_id = self._get_device_id("FLAGS_selected_npus") - if npu_id != -1: - # use npu - npu_config = Config(self.pretrained_model_path) - npu_config.disable_glog_info() - npu_config.enable_npu(device_id=npu_id) - self.npu_predictor = create_predictor(npu_config) - - # gpu - gpu_id = self._get_device_id("CUDA_VISIBLE_DEVICES") - if gpu_id != -1: - # use gpu + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: gpu_config = Config(self.pretrained_model_path) gpu_config.disable_glog_info() - gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=gpu_id) + gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) self.gpu_predictor = create_predictor(gpu_config) - # xpu - xpu_id = self._get_device_id("XPU_VISIBLE_DEVICES") - if xpu_id != -1: - # use xpu - xpu_config = Config(self.pretrained_model_path) - xpu_config.disable_glog_info() - xpu_config.enable_xpu(100) - self.xpu_predictor = create_predictor(xpu_config) - - def _internal_predict(self, predictor, texts): - """ - Tranform the texts(list) to Tensor and then do "real predict" - Args: - texts(list): texts - Returns: - result(PaddleInferTensor): predict output - """ - - # texts to data and lod - lod = [0] - data = [] - for i, text in enumerate(texts): - text_inds = word_to_ids(text, self.word2id_dict, self.word_replace_dict, oov_id=self.oov_id) - data += text_inds - lod.append(len(text_inds) + lod[i]) - - # get predictor tensor - input_names = predictor.get_input_names() - input_tensor = predictor.get_input_handle(input_names[0]) - - # set data, shape and lod - input_tensor.copy_from_cpu(np.array(data).astype('int64')) - input_tensor.reshape([lod[-1], 1]) - input_tensor.set_lod([lod]) - - # real predict - predictor.run() - output_names = predictor.get_output_names() - output_handle = predictor.get_output_handle(output_names[0]) - - return output_handle - - def context(self, trainable=False): - """ - Get the input ,output and program of the pretrained lac - - Args: - trainable(bool): whether fine-tune the pretrained parameters of lac or not - - Returns: - inputs(dict): the input variables of lac (words) - outputs(dict): the output variables of lac (the word segmentation results) - main_program(Program): the main_program of lac with pretrained prameters - """ - main_program = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(main_program, startup_program): - with fluid.unique_name.guard(): - crf_decode, word, fc = lex_net(self.word_dict_len, self.label_dict_len) - word_name = word.name - pred_name = crf_decode.name - fc_name = fc.name - - prefix_name = "@HUB_{}@".format(self.name) - add_vars_prefix(program=main_program, prefix=prefix_name) - for param in main_program.global_block().iter_parameters(): - param.trainable = trainable - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - # load the lac pretrained model - def if_exist(var): - return os.path.exists(os.path.join(self.pretrained_model_path, var.name)) - - fluid.io.load_vars(exe, self.pretrained_model_path, predicate=if_exist) - - inputs = {"words": main_program.global_block().vars[prefix_name + word_name]} - outputs = { - "predicted": main_program.global_block().vars[prefix_name + pred_name], - "sentence_feature": main_program.global_block().vars[prefix_name + fc_name] - } - return inputs, outputs, main_program - def set_user_dict(self, dict_path, sep=None): """ Set the costomized dictionary if you wanna exploit the self-defined dictionary @@ -230,6 +131,22 @@ def to_unicode(self, texts): texts = unicode_texts return texts + def preprocess(self, texts): + """ + Tranform the texts(list) to PaddleTensor + Args: + texts(list): texts + Returns: + np.array, list, list + """ + lod = [0] + data = [] + for i, text in enumerate(texts): + text_inds = word_to_ids(text, self.word2id_dict, self.word_replace_dict, oov_id=self.oov_id) + data += text_inds + lod.append(len(text_inds) + lod[i]) + return np.array(data).astype('int64'), [lod], [lod[-1], 1] + def _get_index(self, data_list, item=""): """ find all indexes of item in data_list @@ -241,7 +158,7 @@ def _get_index(self, data_list, item=""): return res @serving - def cut(self, text, use_gpu=False, batch_size=1, return_tag=True, use_device=None): + def cut(self, text, use_gpu=False, batch_size=1, return_tag=True): """ The main function that segments an entire text that contains Chinese characters into separated words. @@ -250,32 +167,20 @@ def cut(self, text, use_gpu=False, batch_size=1, return_tag=True, use_device=Non use_gpu(bool): whether use gpu to predict or not batch_size(int): the program deals once with one batch return_tag: Whether to get tag or not. - use_device (str): use cpu, gpu, xpu or npu, overwrites use_gpu flag. Returns: results(dict or list): The word segmentation result of the input text, whose key is 'word', if text is a list. If text is a str, the word segmentation result (list) is obtained. """ - - # real predictor to use - if use_device is not None: - if use_device == "cpu": - predictor = self.cpu_predictor - elif use_device == "xpu": - predictor = self.xpu_predictor - elif use_device == "npu": - predictor = self.npu_predictor - elif use_device == "gpu": - predictor = self.gpu_predictor - else: - raise Exception("Unsupported device: " + use_device) - else: - # use_device is not set, therefore follow use_gpu - if use_gpu: - predictor = self.gpu_predictor - else: - predictor = self.cpu_predictor + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) if isinstance(text, list) and len(text) != 0: @@ -295,8 +200,20 @@ def cut(self, text, use_gpu=False, batch_size=1, return_tag=True, use_device=Non batch_data = predicted_data[start_idx:] start_idx = start_idx + batch_size - batch_out = self._internal_predict(predictor, batch_data) - batch_result = parse_result(batch_data, batch_out, self.id2label_dict, interventer=self.custom) + data, lod, shape = self.preprocess(batch_data) + + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + input_handle.copy_from_cpu(data) + input_handle.set_lod(lod) + input_handle.reshape(shape) + + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) + + batch_result = parse_result(batch_data, output_handle, self.id2label_dict, interventer=self.custom) results += batch_result for index in empty_str_indexes: @@ -309,8 +226,20 @@ def cut(self, text, use_gpu=False, batch_size=1, return_tag=True, use_device=Non return results elif isinstance(text, str) and text != "": - batch_out = self._internal_predict(predictor, [text]) - batch_result = parse_result([text], batch_out, self.id2label_dict, interventer=self.custom) + data, lod, shape = self.preprocess([text]) + + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + input_handle.copy_from_cpu(data) + input_handle.set_lod(lod) + input_handle.reshape(shape) + + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) + + batch_result = parse_result([text], output_handle, self.id2label_dict, interventer=self.custom) return batch_result[0]['word'] elif text == "": @@ -318,7 +247,7 @@ def cut(self, text, use_gpu=False, batch_size=1, return_tag=True, use_device=Non else: raise TypeError("The input data is inconsistent with expectations.") - def lexical_analysis(self, texts=[], data={}, use_gpu=False, batch_size=1, return_tag=True, use_device=None): + def lexical_analysis(self, texts=[], data={}, use_gpu=False, batch_size=1, return_tag=True): """ Get the word segmentation results with the texts as input @@ -328,30 +257,19 @@ def lexical_analysis(self, texts=[], data={}, use_gpu=False, batch_size=1, retur use_gpu(bool): whether use gpu to predict or not batch_size(int): the program deals once with one batch return_tag: Whether to get tag or not. - use_device (str): use cpu, gpu, xpu or npu, overwrites use_gpu flag. Returns: results(list): the word segmentation results """ - # real predictor to use - if use_device is not None: - if use_device == "cpu": - predictor = self.cpu_predictor - elif use_device == "xpu": - predictor = self.xpu_predictor - elif use_device == "npu": - predictor = self.npu_predictor - elif use_device == "gpu": - predictor = self.gpu_predictor - else: - raise Exception("Unsupported device: " + use_device) - else: - # use_device is not set, therefore follow use_gpu - if use_gpu: - predictor = self.gpu_predictor - else: - predictor = self.cpu_predictor + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) if texts != [] and isinstance(texts, list) and data == {}: predicted_data = texts @@ -376,8 +294,20 @@ def lexical_analysis(self, texts=[], data={}, use_gpu=False, batch_size=1, retur batch_data = predicted_data[start_idx:] start_idx = start_idx + batch_size - batch_out = self._internal_predict(predictor, batch_data) - batch_result = parse_result(batch_data, batch_out, self.id2label_dict, interventer=self.custom) + data, lod, shape = self.preprocess(batch_data) + + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + input_handle.copy_from_cpu(data) + input_handle.set_lod(lod) + input_handle.reshape(shape) + + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) + + batch_result = parse_result(batch_data, output_handle, self.id2label_dict, interventer=self.custom) results += batch_result for index in empty_str_indexes: @@ -421,8 +351,7 @@ def run_cmd(self, argvs): results = self.lexical_analysis(texts=input_data, use_gpu=args.use_gpu, batch_size=args.batch_size, - return_tag=args.return_tag, - use_device=args.use_device) + return_tag=args.return_tag) return results @@ -458,9 +387,6 @@ def add_module_config_arg(self): type=ast.literal_eval, default=True, help="whether return tags of results or not") - self.arg_config_group.add_argument('--use_device', - choices=["cpu", "gpu", "xpu", "npu"], - help="use cpu, gpu, xpu or npu. overwrites use_gpu flag.") def add_module_input_arg(self): """ @@ -489,30 +415,3 @@ def check_input_data(self, args): raise DataFormatError return input_data - - -if __name__ == '__main__': - lac = LAC(user_dict="user.dict") - # or use the fuction user_dict to set - # lac.set_user_dict("user.dict") - - test_text = ["今天是个好日子", "天气预报说今天要下雨", "", "下一班地铁马上就要到了", "", "调料份量不能多,也不能少,味道才能正好", "", "", "春天的花开秋天的风以及冬天的落阳"] - - # execute predict and print the result - results = lac.cut(text=test_text, use_gpu=True, batch_size=7, return_tag=True) - for result in results: - if six.PY2: - print(json.dumps(result['word'], encoding="utf8", ensure_ascii=False)) - print(json.dumps(result['tag'], encoding="utf8", ensure_ascii=False)) - else: - print(result['word']) - print(result['tag']) - - # delete the costomized dictionary - lac.del_user_dict() - - results = lac.cut(text="春天的花开秋天的风以及冬天的落阳", use_gpu=False, batch_size=1, return_tag=False) - print(results) - - # get the tags that was exploited as pretraining lac - print(lac.get_tags()) diff --git a/modules/text/lexical_analysis/lac/processor.py b/modules/text/lexical_analysis/lac/processor.py index 1521182ee..270c95dcd 100644 --- a/modules/text/lexical_analysis/lac/processor.py +++ b/modules/text/lexical_analysis/lac/processor.py @@ -1,11 +1,13 @@ # -*- coding:utf-8 -*- import io -import numpy as np import os + +import numpy as np import six class Query(object): + def __init__(self, lac_query): self.set_query(lac_query) @@ -35,6 +37,7 @@ def set_query(self, lac_query): class Bound(object): + def __init__(self, start_index=0, end_index=0, left_bound=0, right_bound=0, left_char_bound=0, right_char_bound=0): self.start_index = start_index # 命中的词的起始位置,char级别 self.end_index = end_index # 命中的词的结束位置,char级别 @@ -45,6 +48,7 @@ def __init__(self, start_index=0, end_index=0, left_bound=0, right_bound=0, left class Interventer(object): + def __init__(self, ngram_dict_path, user_dict_path): self.ngram_dict_path = ngram_dict_path self.user_dict_path = user_dict_path diff --git a/modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/README.md b/modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/README.md index dc23a5dfb..05d9a441d 100644 --- a/modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/README.md +++ b/modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/README.md @@ -1,6 +1,6 @@ # ernie_skep_sentiment_analysis |模型名称|ernie_skep_sentiment_analysis| -| :--- | :---: | +| :--- | :---: | |类别|文本-情感分析| |网络|SKEP| |数据集|百度自建数据集| @@ -29,13 +29,13 @@ - ### 1、环境依赖 - paddlepaddle >= 1.8.0 - + - paddlehub >= 1.7.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) - ### 2、安装 - ```shell - $ hub install ernie_skep_sentiment_analysis==1.0.0 + $ hub install ernie_skep_sentiment_analysis ``` - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) @@ -49,7 +49,7 @@ - ```shell $ hub run ernie_skep_sentiment_analysis --input_text='虽然小明很努力,但是他还是没有考100分' ``` - + - ### 2、预测代码示例 - ```python @@ -61,13 +61,13 @@ # Predict sentiment label test_texts = ['你不是不聪明,而是不认真', '虽然小明很努力,但是他还是没有考100分'] results = module.predict_sentiment(test_texts, use_gpu=False) - + for result in results: print(result['text']) print(result['sentiment_label']) print(result['positive_probs']) print(result['negative_probs']) - + # 你不是不聪明,而是不认真 negative 0.10738129168748856 0.8926186561584473 # 虽然小明很努力,但是他还是没有考100分 negative 0.05391530692577362 0.94608473777771 ``` @@ -137,7 +137,7 @@ headers = {"Content-type": "application/json"} url = "http://127.0.0.1:8866/predict/ernie_skep_sentiment_analysis" r = requests.post(url=url, headers=headers, data=json.dumps(data)) - + # 打印预测结果 print(r.json()["results"]) ``` @@ -147,7 +147,11 @@ * 1.0.0 初始发布 - + +* 1.0.1 + + 移除 fluid api + - ```shell - $ hub install ernie_skep_sentiment_analysis==1.0.0 + $ hub install ernie_skep_sentiment_analysis==1.0.1 ``` diff --git a/modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/model/ernie.py b/modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/model/ernie.py index cc643efc5..52790fce7 100644 --- a/modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/model/ernie.py +++ b/modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/model/ernie.py @@ -13,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. """ERNIE""" - from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -22,261 +21,8 @@ import json import logging -import paddle.fluid as fluid import six -from .transformer_encoder import encoder, pre_process_layer -from .transformer_encoder import gelu - - -class ErnieModel(object): - """ - ErnieModel - """ - - def __init__(self, src_ids, position_ids, sentence_ids, input_mask, config, weight_sharing=True, use_fp16=False): - """ - :param src_ids: - :param position_ids: - :param sentence_ids: - :param input_mask: - :param config: - :param weight_sharing: - :param use_fp16: - """ - self._hidden_size = config.get('hidden_size', 768) - self._emb_size = config.get('emb_size', self._hidden_size) - self._n_layer = config.get('num_hidden_layers', 12) - self._n_head = config.get('num_attention_heads', 12) - self._voc_size = config.get('vocab_size', 30522) - self._max_position_seq_len = config.get('max_position_embeddings', 512) - self._param_share = config.get('param_share', "normal") - self._pre_encoder_cmd = config.get('pre_encoder_cmd', "nd") - self._preprocess_cmd = config.get('preprocess_cmd', "") - self._postprocess_cmd = config.get('postprocess_cmd', "dan") - self._epsilon = config.get('epsilon', 1e-05) - self._emb_mapping_in = config.get('emb_mapping_in', False) - self._n_layer_per_block = config.get('n_layer_per_block', 1) - - if config.has('sent_type_vocab_size'): - self._sent_types = config['sent_type_vocab_size'] - else: - self._sent_types = config.get('type_vocab_size', 2) - - self._use_sentence_id = config.get('use_sentence_id', True) - self._use_task_id = config.get('use_task_id', False) - if self._use_task_id: - self._task_types = config.get('task_type_vocab_size', 3) - self._hidden_act = config.get('hidden_act', 'gelu') - self._prepostprocess_dropout = config.get('hidden_dropout_prob', 0.1) - self._attention_dropout = config.get('attention_probs_dropout_prob', 0.1) - self._weight_sharing = weight_sharing - - self._word_emb_name = "word_embedding" - self._pos_emb_name = "pos_embedding" - self._sent_emb_name = "sent_embedding" - self._task_emb_name = "task_embedding" - self._dtype = "float16" if use_fp16 else "float32" - self._emb_dtype = "float32" - # Initialize all weigths by truncated normal initializer, and all biases - # will be initialized by constant zero by default. - self._param_initializer = fluid.initializer.TruncatedNormal(scale=config.get('initializer_range', 0.02)) - - self._build_model(src_ids, position_ids, sentence_ids, input_mask) - - def _build_model(self, src_ids, position_ids, sentence_ids, input_mask): - """ - :param src_ids: - :param position_ids: - :param sentence_ids: - :param input_mask: - :return: - """ - # padding id in vocabulary must be set to 0 - emb_out = fluid.layers.embedding( - input=src_ids, - dtype=self._emb_dtype, - size=[self._voc_size, self._emb_size], - param_attr=fluid.ParamAttr(name=self._word_emb_name, initializer=self._param_initializer), - is_sparse=False) - - position_emb_out = fluid.layers.embedding( - input=position_ids, - dtype=self._emb_dtype, - size=[self._max_position_seq_len, self._emb_size], - param_attr=fluid.ParamAttr(name=self._pos_emb_name, initializer=self._param_initializer)) - - emb_out = emb_out + position_emb_out - - if self._use_sentence_id: - sent_emb_out = fluid.layers.embedding( - sentence_ids, - dtype=self._emb_dtype, - size=[self._sent_types, self._emb_size], - param_attr=fluid.ParamAttr(name=self._sent_emb_name, initializer=self._param_initializer)) - - emb_out = emb_out + sent_emb_out - - emb_out = pre_process_layer( - emb_out, self._pre_encoder_cmd, self._prepostprocess_dropout, name='pre_encoder', epsilon=self._epsilon) - - if self._emb_mapping_in: - emb_out = fluid.layers.fc( - input=emb_out, - num_flatten_dims=2, - size=self._hidden_size, - param_attr=fluid.ParamAttr(name='emb_hidden_mapping', initializer=self._param_initializer), - bias_attr='emb_hidden_mapping_bias') - - if self._dtype == "float16": - emb_out = fluid.layers.cast(x=emb_out, dtype=self._dtype) - input_mask = fluid.layers.cast(x=input_mask, dtype=self._dtype) - self_attn_mask = fluid.layers.matmul(x=input_mask, y=input_mask, transpose_y=True) - - self_attn_mask = fluid.layers.scale(x=self_attn_mask, scale=10000.0, bias=-1.0, bias_after_scale=False) - n_head_self_attn_mask = fluid.layers.stack(x=[self_attn_mask] * self._n_head, axis=1) - n_head_self_attn_mask.stop_gradient = True - - self._enc_out, self._checkpoints = encoder( - enc_input=emb_out, - attn_bias=n_head_self_attn_mask, - n_layer=self._n_layer, - n_head=self._n_head, - d_key=self._hidden_size // self._n_head, - d_value=self._hidden_size // self._n_head, - d_model=self._hidden_size, - d_inner_hid=self._hidden_size * 4, - prepostprocess_dropout=self._prepostprocess_dropout, - attention_dropout=self._attention_dropout, - relu_dropout=0, - hidden_act=self._hidden_act, - preprocess_cmd=self._preprocess_cmd, - postprocess_cmd=self._postprocess_cmd, - param_initializer=self._param_initializer, - name='encoder', - param_share=self._param_share, - epsilon=self._epsilon, - n_layer_per_block=self._n_layer_per_block) - if self._dtype == "float16": - self._enc_out = fluid.layers.cast(x=self._enc_out, dtype=self._emb_dtype) - - def get_sequence_output(self): - """ - :return: - """ - return self._enc_out - - def get_pooled_output(self): - """Get the first feature of each sequence for classification""" - next_sent_feat = fluid.layers.slice(input=self._enc_out, axes=[1], starts=[0], ends=[1]) - """ - if self._dtype == "float16": - next_sent_feat = fluid.layers.cast( - x=next_sent_feat, dtype=self._emb_dtype) - - next_sent_feat = fluid.layers.fc( - input=next_sent_feat, - size=self._emb_size, - param_attr=fluid.ParamAttr( - name="mask_lm_trans_fc.w_0", initializer=self._param_initializer), - bias_attr="mask_lm_trans_fc.b_0") - """ - """ - next_sent_feat = fluid.layers.fc( - input=next_sent_feat, - size=self._emb_size, - param_attr=fluid.ParamAttr( - name="mask_lm_trans_fc.w_0", initializer=self._param_initializer), - bias_attr="mask_lm_trans_fc.b_0") - - """ - next_sent_feat = fluid.layers.fc( - input=next_sent_feat, - size=self._hidden_size, - act="tanh", - param_attr=fluid.ParamAttr(name="pooled_fc.w_0", initializer=self._param_initializer), - bias_attr="pooled_fc.b_0") - return next_sent_feat - - def get_lm_output(self, mask_label, mask_pos): - """Get the loss & accuracy for pretraining""" - mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32') - # extract the first token feature in each sentence - self.next_sent_feat = self.get_pooled_output() - reshaped_emb_out = fluid.layers.reshape(x=self._enc_out, shape=[-1, self._hidden_size]) - # extract masked tokens' feature - mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos) - - if self._dtype == "float16": - mask_feat = fluid.layers.cast(x=mask_feat, dtype=self._emb_dtype) - - # transform: fc - if self._hidden_act == 'gelu' or self._hidden_act == 'gelu.precise': - _hidden_act = 'gelu' - elif self._hidden_act == 'gelu.approximate': - _hidden_act = None - else: - _hidden_act = self._hidden_act - mask_trans_feat = fluid.layers.fc( - input=mask_feat, - size=self._emb_size, - act=_hidden_act, - param_attr=fluid.ParamAttr(name='mask_lm_trans_fc.w_0', initializer=self._param_initializer), - bias_attr=fluid.ParamAttr(name='mask_lm_trans_fc.b_0')) - if self._hidden_act == 'gelu.approximate': - mask_trans_feat = gelu(mask_trans_feat) - else: - pass - # transform: layer norm - mask_trans_feat = fluid.layers.layer_norm( - mask_trans_feat, - begin_norm_axis=len(mask_trans_feat.shape) - 1, - param_attr=fluid.ParamAttr( - name='mask_lm_trans_layer_norm_scale', initializer=fluid.initializer.Constant(1.)), - bias_attr=fluid.ParamAttr(name='mask_lm_trans_layer_norm_bias', initializer=fluid.initializer.Constant(1.))) - # transform: layer norm - # mask_trans_feat = pre_process_layer( - # mask_trans_feat, 'n', name='mask_lm_trans') - - mask_lm_out_bias_attr = fluid.ParamAttr( - name="mask_lm_out_fc.b_0", initializer=fluid.initializer.Constant(value=0.0)) - if self._weight_sharing: - fc_out = fluid.layers.matmul( - x=mask_trans_feat, - y=fluid.default_main_program().global_block().var(self._word_emb_name), - transpose_y=True) - fc_out += fluid.layers.create_parameter( - shape=[self._voc_size], dtype=self._emb_dtype, attr=mask_lm_out_bias_attr, is_bias=True) - - else: - fc_out = fluid.layers.fc( - input=mask_trans_feat, - size=self._voc_size, - param_attr=fluid.ParamAttr(name="mask_lm_out_fc.w_0", initializer=self._param_initializer), - bias_attr=mask_lm_out_bias_attr) - - mask_lm_loss = fluid.layers.softmax_with_cross_entropy(logits=fc_out, label=mask_label) - mean_mask_lm_loss = fluid.layers.mean(mask_lm_loss) - - return mean_mask_lm_loss - - def get_task_output(self, task, task_labels): - """ - :param task: - :param task_labels: - :return: - """ - task_fc_out = fluid.layers.fc( - input=self.next_sent_feat, - size=task["num_labels"], - param_attr=fluid.ParamAttr(name=task["task_name"] + "_fc.w_0", initializer=self._param_initializer), - bias_attr=task["task_name"] + "_fc.b_0") - task_loss, task_softmax = fluid.layers.softmax_with_cross_entropy( - logits=task_fc_out, label=task_labels, return_softmax=True) - task_acc = fluid.layers.accuracy(input=task_softmax, label=task_labels) - mean_task_loss = fluid.layers.mean(task_loss) - return mean_task_loss, task_acc - class ErnieConfig(object): """parse ernie config""" diff --git a/modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/model/transformer_encoder.py b/modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/model/transformer_encoder.py deleted file mode 100644 index 6c562795e..000000000 --- a/modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/model/transformer_encoder.py +++ /dev/null @@ -1,450 +0,0 @@ -# -*- coding:utf-8 -** -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Transformer encoder.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from functools import partial - -import paddle.fluid as fluid -import paddle.fluid.layers as layers -import numpy as np - - -def gelu(x): - """Gaussian Error Linear Unit. - - This is a smoother version of the RELU. - Original paper: https://arxiv.org/abs/1606.08415 - Args: - x: float Tensor to perform activation. - - Returns: - `x` with the GELU activation applied. - """ - cdf = 0.5 * (1.0 + fluid.layers.tanh((np.sqrt(2.0 / np.pi) * (x + 0.044715 * fluid.layers.pow(x, 3.0))))) - return x * cdf - - -def multi_head_attention(queries, - keys, - values, - attn_bias, - d_key, - d_value, - d_model, - n_head=1, - dropout_rate=0., - cache=None, - param_initializer=None, - name='multi_head_att'): - """ - Multi-Head Attention. Note that attn_bias is added to the logit before - computing softmax activiation to mask certain selected positions so that - they will not considered in attention weights. - """ - keys = queries if keys is None else keys - values = keys if values is None else values - if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3): - raise ValueError( - "Inputs: quries, keys and values should all be 3-D tensors. but {} v.s. {} v.s. {}"\ - .format(queries.shape, keys.shape, values.shape)) - - def __compute_qkv(queries, keys, values, n_head, d_key, d_value): - """ - Add linear projection to queries, keys, and values. - """ - q = layers.fc( - input=queries, - size=d_key * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr(name=name + '_query_fc.w_0', initializer=param_initializer), - bias_attr=name + '_query_fc.b_0') - k = layers.fc( - input=keys, - size=d_key * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr(name=name + '_key_fc.w_0', initializer=param_initializer), - bias_attr=name + '_key_fc.b_0') - v = layers.fc( - input=values, - size=d_value * n_head, - num_flatten_dims=2, - param_attr=fluid.ParamAttr(name=name + '_value_fc.w_0', initializer=param_initializer), - bias_attr=name + '_value_fc.b_0') - return q, k, v - - def __split_heads(x, n_head): - """ - Reshape the last dimension of inpunt tensor x so that it becomes two - dimensions and then transpose. Specifically, input a tensor with shape - [bs, max_sequence_length, n_head * hidden_dim] then output a tensor - with shape [bs, n_head, max_sequence_length, hidden_dim]. - """ - hidden_size = x.shape[-1] - # The value 0 in shape attr means copying the corresponding dimension - # size of the input as the output dimension size. - reshaped = layers.reshape(x=x, shape=[0, 0, n_head, hidden_size // n_head], inplace=True) - - # permuate the dimensions into: - # [batch_size, n_head, max_sequence_len, hidden_size_per_head] - return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) - - def __combine_heads(x): - """ - Transpose and then reshape the last two dimensions of inpunt tensor x - so that it becomes one dimension, which is reverse to __split_heads. - """ - if len(x.shape) == 3: return x - if len(x.shape) != 4: - raise ValueError("Input(x) should be a 4-D Tensor.") - - trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) - # The value 0 in shape attr means copying the corresponding dimension - # size of the input as the output dimension size. - return layers.reshape(x=trans_x, shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]], inplace=True) - - def scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate): - """ - Scaled Dot-Product Attention - """ - scaled_q = layers.scale(x=q, scale=d_key**-0.5) - product = layers.matmul(x=scaled_q, y=k, transpose_y=True) - if attn_bias: - product += attn_bias - weights = layers.softmax(product) - if dropout_rate: - weights = layers.dropout( - weights, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) - out = layers.matmul(weights, v) - return out - - q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value) - - if cache is not None: # use cache and concat time steps - # Since the inplace reshape in __split_heads changes the shape of k and - # v, which is the cache input for next time step, reshape the cache - # input from the previous time step first. - k = cache["k"] = layers.concat([layers.reshape(cache["k"], shape=[0, 0, d_model]), k], axis=1) - v = cache["v"] = layers.concat([layers.reshape(cache["v"], shape=[0, 0, d_model]), v], axis=1) - - q = __split_heads(q, n_head) - k = __split_heads(k, n_head) - v = __split_heads(v, n_head) - - ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate) - - out = __combine_heads(ctx_multiheads) - - # Project back to the model size. - proj_out = layers.fc( - input=out, - size=d_model, - num_flatten_dims=2, - param_attr=fluid.ParamAttr(name=name + '_output_fc.w_0', initializer=param_initializer), - bias_attr=name + '_output_fc.b_0') - return proj_out - - -def positionwise_feed_forward(x, d_inner_hid, d_hid, dropout_rate, hidden_act, param_initializer=None, name='ffn'): - """ - Position-wise Feed-Forward Networks. - This module consists of two linear transformations with a ReLU activation - in between, which is applied to each position separately and identically. - """ - if hidden_act == 'gelu' or hidden_act == 'gelu.precise': - _hidden_act = 'gelu' - elif hidden_act == 'gelu.approximate': - _hidden_act = None - else: - _hidden_act = hidden_act - hidden = layers.fc( - input=x, - size=d_inner_hid, - num_flatten_dims=2, - act=_hidden_act, - param_attr=fluid.ParamAttr(name=name + '_fc_0.w_0', initializer=param_initializer), - bias_attr=name + '_fc_0.b_0') - if hidden_act == 'gelu.approximate': - hidden = gelu(hidden) - - if dropout_rate: - hidden = layers.dropout( - hidden, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) - out = layers.fc( - input=hidden, - size=d_hid, - num_flatten_dims=2, - param_attr=fluid.ParamAttr(name=name + '_fc_1.w_0', initializer=param_initializer), - bias_attr=name + '_fc_1.b_0') - return out - - -def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0., epsilon=1e-12, name=''): - """ - Add residual connection, layer normalization and droput to the out tensor - optionally according to the value of process_cmd. - This will be used before or after multi-head attention and position-wise - feed-forward networks. - """ - for cmd in process_cmd: - if cmd == "a": # add residual connection - out = out + prev_out if prev_out else out - elif cmd == "n": # add layer normalization - out_dtype = out.dtype - if out_dtype == fluid.core.VarDesc.VarType.FP16: - out = layers.cast(x=out, dtype="float32") - out = layers.layer_norm( - out, - begin_norm_axis=len(out.shape) - 1, - param_attr=fluid.ParamAttr(name=name + '_layer_norm_scale', initializer=fluid.initializer.Constant(1.)), - bias_attr=fluid.ParamAttr(name=name + '_layer_norm_bias', initializer=fluid.initializer.Constant(0.)), - epsilon=epsilon) - if out_dtype == fluid.core.VarDesc.VarType.FP16: - out = layers.cast(x=out, dtype="float16") - elif cmd == "d": # add dropout - if dropout_rate: - out = layers.dropout( - out, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) - return out - - -pre_process_layer = partial(pre_post_process_layer, None) -post_process_layer = pre_post_process_layer - - -def encoder_layer( - enc_input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd="n", - postprocess_cmd="da", - param_initializer=None, - name='', - epsilon=1e-12, -): - """The encoder layers that can be stacked to form a deep encoder. - This module consits of a multi-head (self) attention followed by - position-wise feed-forward networks and both the two components companied - with the post_process_layer to add residual connection, layer normalization - and droput. - """ - - attn_output = multi_head_attention( - enc_input, - None, - None, - attn_bias, - d_key, - d_value, - d_model, - n_head, - attention_dropout, - param_initializer=param_initializer, - name=name + '_multi_head_att') - - attn_output = post_process_layer( - enc_input, attn_output, postprocess_cmd, prepostprocess_dropout, name=name + '_post_att', epsilon=epsilon) - - ffd_output = positionwise_feed_forward( - attn_output, - d_inner_hid, - d_model, - relu_dropout, - hidden_act, - param_initializer=param_initializer, - name=name + '_ffn') - - return post_process_layer( - attn_output, ffd_output, postprocess_cmd, prepostprocess_dropout, name=name + '_post_ffn', - epsilon=epsilon), ffd_output - - -def encoder_inner_share(enc_input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd, - postprocess_cmd, - epsilon, - param_initializer=None, - name='', - n_layer_per_block=1): - """ - The encoder_inner_share is composed of n_layer_per_block layers returned by calling - encoder_layer. - """ - _checkpoints = [] - for i in range(n_layer_per_block): - enc_output, cp = encoder_layer( - enc_input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd, - postprocess_cmd, - param_initializer=param_initializer, - name=name + '_layer_' + str(i), - epsilon=epsilon, - ) - _checkpoints.append(cp) - enc_input = enc_output - - return enc_output, _checkpoints - - -def encoder_outer_share(enc_input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd, - postprocess_cmd, - epsilon, - param_initializer=None, - name='', - n_layer_per_block=1): - """ - The encoder_outer_share is composed of n_layer_per_block layers returned by calling - encoder_layer. - """ - _checkpoints = [] - for i in range(n_layer_per_block): - enc_output, cp = encoder_layer( - enc_input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd, - postprocess_cmd, - param_initializer=param_initializer, - name=name, - epsilon=epsilon) - _checkpoints.append(cp) - enc_input = enc_output - - return enc_output, _checkpoints - - -def encoder(enc_input, - attn_bias, - n_layer, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd, - postprocess_cmd, - epsilon, - n_layer_per_block, - param_initializer=None, - name='', - param_share=None): - """ - The encoder is composed of a stack of identical layers returned by calling - encoder_layer . - """ - checkpoints = [] - # for outer_share it will share same param in one block, - # and for inner_share it will share param across blocks, rather than in one same block - # - # outer-share inner-share - # [1] [1] ----\ 1st block - # [1] [2] ----/ - # [2] [1] ----\ 2nd block - # [2] [2] ----/ - - if param_share == "normal" or param_share == 'outer_share': - #n_layer_per_block=1, n_layer=24 for bert-large - #n_layer_per_block=1, n_layer=12 for bert-base - #n_layer_per_block=12, n_layer=12 for albert-xxlarge - #n_layer_per_block=6, n_layer=12 for albert-xxlarge-outershare - enc_fn = encoder_outer_share - name_fn = lambda i: name + '_layer_' + str(i) - elif param_share == "inner_share": - #n_layer_per_block = 2 - enc_fn = encoder_inner_share - name_fn = lambda i: name - else: - raise ValueError('unsupported param share mode') - - for i in range(n_layer // n_layer_per_block): - enc_output, cp = enc_fn( - enc_input, - attn_bias, - n_head, - d_key, - d_value, - d_model, - d_inner_hid, - prepostprocess_dropout, - attention_dropout, - relu_dropout, - hidden_act, - preprocess_cmd, - postprocess_cmd, - param_initializer=param_initializer, - name=name_fn(i), - n_layer_per_block=n_layer_per_block, - epsilon=epsilon, - ) - checkpoints.extend(cp) - enc_input = enc_output - enc_output = pre_process_layer( - enc_output, preprocess_cmd, prepostprocess_dropout, name="post_encoder", epsilon=epsilon) - - return enc_output, checkpoints diff --git a/modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/module.py b/modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/module.py index e30d80fc2..50a0506b6 100644 --- a/modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/module.py +++ b/modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/module.py @@ -20,19 +20,22 @@ import ast import os -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub import TransformerModule -from paddlehub.module.module import moduleinfo, runnable, serving -from paddlehub.reader.tokenization import convert_to_unicode, FullTokenizer -from paddlehub.reader.batching import pad_batch_data import numpy as np +from ernie_skep_sentiment_analysis.model.ernie import ErnieConfig +from paddle.framework import core -from ernie_skep_sentiment_analysis.model.ernie import ErnieModel, ErnieConfig +from paddlehub import TransformerModule +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving +from paddlehub.reader.batching import pad_batch_data +from paddlehub.reader.tokenization import convert_to_unicode +from paddlehub.reader.tokenization import FullTokenizer @moduleinfo( name="ernie_skep_sentiment_analysis", - version="1.0.0", + version="1.0.1", summary= "SKEP: Sentiment Knowledge Enhanced Pre-training for Sentiment Analysis. Ernie_skep_sentiment_analysis module is initialize with enie_1.0_chn_large when pretraining. This module is finetuned on ChnSentiCorp dataset to do sentiment claasification. It can do sentiment analysis prediction directly, label as positive or negative.", author="baidu-nlp", @@ -69,7 +72,7 @@ def _set_config(self): model_file_path = os.path.join(self.infer_model_path, 'model') params_file_path = os.path.join(self.infer_model_path, 'params') - config = AnalysisConfig(model_file_path, params_file_path) + config = core.AnalysisConfig(model_file_path, params_file_path) try: _places = os.environ["CUDA_VISIBLE_DEVICES"] int(_places[0]) @@ -84,38 +87,13 @@ def _set_config(self): config.disable_glog_info() - self.predictor = create_paddle_predictor(config) - - def net(self, input_ids, position_ids, segment_ids, input_mask): - """ - create neural network. - Args: - input_ids (tensor): the word ids. - position_ids (tensor): the position ids. - segment_ids (tensor): the segment ids. - input_mask (tensor): the padding mask. - - Returns: - pooled_output (tensor): sentence-level output for classification task. - sequence_output (tensor): token-level output for sequence task. - """ - ernie = ErnieModel( - src_ids=input_ids, - position_ids=position_ids, - sentence_ids=segment_ids, - input_mask=input_mask, - config=self.ernie_config, - use_fp16=False) - - pooled_output = ernie.get_pooled_output() - sequence_output = ernie.get_sequence_output() - return pooled_output, sequence_output + self.predictor = core.create_paddle_predictor(config) def array2tensor(self, arr_data): """ convert numpy array to PaddleTensor """ - tensor_data = PaddleTensor(arr_data) + tensor_data = core.PaddleTensor(arr_data) return tensor_data @serving @@ -212,11 +190,10 @@ def run_cmd(self, argvs): """ Run as a command """ - self.parser = argparse.ArgumentParser( - description="Run the %s module." % self.name, - prog='hub run %s' % self.name, - usage='%(prog)s', - add_help=True) + self.parser = argparse.ArgumentParser(description="Run the %s module." % self.name, + prog='hub run %s' % self.name, + usage='%(prog)s', + add_help=True) self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") self.arg_config_group = self.parser.add_argument_group( @@ -233,8 +210,10 @@ def add_module_config_arg(self): """ Add the command config options """ - self.arg_config_group.add_argument( - '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") + self.arg_config_group.add_argument('--use_gpu', + type=ast.literal_eval, + default=False, + help="whether use GPU or not") def add_module_input_arg(self): """ diff --git a/modules/text/sentiment_analysis/senta_bilstm/README.md b/modules/text/sentiment_analysis/senta_bilstm/README.md index 21a5e54c0..e2fc07977 100644 --- a/modules/text/sentiment_analysis/senta_bilstm/README.md +++ b/modules/text/sentiment_analysis/senta_bilstm/README.md @@ -1,6 +1,6 @@ # senta_bilstm |模型名称|senta_bilstm| -| :--- | :---: | +| :--- | :---: | |类别|文本-情感分析| |网络|BiLSTM| |数据集|百度自建数据集| @@ -22,7 +22,7 @@ - ### 1、环境依赖 - paddlepaddle >= 1.8.0 - + - paddlehub >= 1.8.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) - ### 2、安装 @@ -45,12 +45,12 @@ 或者 - ```shell $ hub run senta_bilstm --input_file test.txt - ``` + ``` - test.txt 存放待预测文本, 如: > 这家餐厅很好吃 - + > 这部电影真的很差劲 - + - 通过命令行方式实现文字识别模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) - ### 2、预测代码示例 @@ -60,17 +60,17 @@ senta = hub.Module(name="senta_bilstm") test_text = ["这家餐厅很好吃", "这部电影真的很差劲"] - results = senta.sentiment_classify(texts=test_text, + results = senta.sentiment_classify(texts=test_text, use_gpu=False, batch_size=1) - + for result in results: print(result['text']) print(result['sentiment_label']) print(result['sentiment_key']) print(result['positive_probs']) print(result['negative_probs']) - + # 这家餐厅很好吃 1 positive 0.9407 0.0593 # 这部电影真的很差劲 0 negative 0.02 0.98 ``` @@ -80,7 +80,7 @@ - ```python def sentiment_classify(texts=[], data={}, use_gpu=False, batch_size=1) ``` - + - senta_bilstm预测接口,预测输入句子的情感分类(二分类,积极/消极) - **参数** @@ -170,7 +170,7 @@ * 1.0.0 初始发布 - + * 1.0.1 词汇表升级 @@ -182,6 +182,11 @@ * 1.2.0 模型升级,支持用于文本分类,文本匹配等各种任务迁移学习 + +* 1.2.1 + + 移除 fluid api + - ```shell - $ hub install senta_bilstm==1.2.0 + $ hub install senta_bilstm==1.2.1 ``` diff --git a/modules/text/sentiment_analysis/senta_bilstm/README_en.md b/modules/text/sentiment_analysis/senta_bilstm/README_en.md index ae7ca125a..240afd0e7 100644 --- a/modules/text/sentiment_analysis/senta_bilstm/README_en.md +++ b/modules/text/sentiment_analysis/senta_bilstm/README_en.md @@ -24,7 +24,7 @@ - ### 1、Environmental dependence - paddlepaddle >= 1.8.0 - + - paddlehub >= 1.8.0 | [How to install PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) - ### 2、Installation @@ -47,16 +47,16 @@ or - ```shell $ hub run senta_bilstm --input_file test.txt - ``` + ``` - test.txt stores the text to be predicted, for example: - + > 这家餐厅很好吃 - + > 这部电影真的很差劲 - + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command line instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) - - + + - ### 2、Prediction Code Example - ```python @@ -64,17 +64,17 @@ senta = hub.Module(name="senta_bilstm") test_text = ["这家餐厅很好吃", "这部电影真的很差劲"] - results = senta.sentiment_classify(texts=test_text, + results = senta.sentiment_classify(texts=test_text, use_gpu=False, batch_size=1) - + for result in results: print(result['text']) print(result['sentiment_label']) print(result['sentiment_key']) print(result['positive_probs']) print(result['negative_probs']) - + # 这家餐厅很好吃 1 positive 0.9407 0.0593 # 这部电影真的很差劲 0 negative 0.02 0.98 ``` @@ -84,7 +84,7 @@ - ```python def sentiment_classify(texts=[], data={}, use_gpu=False, batch_size=1) ``` - + - senta_bilstm predicting interfaces, predicting sentiment classification of input sentences (dichotomies, positive/negative) - **Parameter** @@ -173,7 +173,7 @@ * 1.0.0 First release - + * 1.0.1 Vocabulary upgrade @@ -182,9 +182,10 @@ Significantly improve predictive performance -* 1.2.0 +* 1.2.1 + + Remove fluid api - Model upgrade, support transfer learning for text classification, text matching and other tasks - ```shell - $ hub install senta_bilstm==1.2.0 + $ hub install senta_bilstm==1.2.1 ``` diff --git a/modules/text/sentiment_analysis/senta_bilstm/module.py b/modules/text/sentiment_analysis/senta_bilstm/module.py index 0ee5ca73e..c1607997b 100644 --- a/modules/text/sentiment_analysis/senta_bilstm/module.py +++ b/modules/text/sentiment_analysis/senta_bilstm/module.py @@ -6,25 +6,26 @@ import json import math import os + import six +from senta_bilstm.processor import load_vocab +from senta_bilstm.processor import postprocess +from senta_bilstm.processor import preprocess -import paddle.fluid as fluid import paddlehub as hub from paddlehub.common.paddle_helper import add_vars_prefix -from paddlehub.module.module import moduleinfo, serving - -from senta_bilstm.net import bilstm_net -from senta_bilstm.processor import load_vocab, preprocess, postprocess +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import serving -@moduleinfo( - name="senta_bilstm", - version="1.2.0", - summary="Baidu's open-source Sentiment Classification System.", - author="baidu-nlp", - author_email="", - type="nlp/sentiment_analysis") +@moduleinfo(name="senta_bilstm", + version="1.2.1", + summary="Baidu's open-source Sentiment Classification System.", + author="baidu-nlp", + author_email="", + type="nlp/sentiment_analysis") class SentaBiLSTM(hub.NLPPredictionModule): + def _initialize(self): """ initialize with the necessary elements @@ -47,111 +48,6 @@ def word_seg_module(self): self._word_seg_module = hub.Module(name="lac") return self._word_seg_module - def context(self, trainable=False, max_seq_len=128, num_slots=1): - """ - Get the input ,output and program of the pretrained senta_bilstm - - Args: - trainable(bool): whether fine-tune the pretrained parameters of senta_bilstm or not. - max_seq_len (int): It will limit the total sequence returned so that it has a maximum length. - num_slots(int): It's number of data inputted to the model, selectted as following options: - - - 1(default): There's only one data to be feeded in the model, e.g. the module is used for text classification task. - - 2: There are two data to be feeded in the model, e.g. the module is used for text matching task (point-wise). - - 3: There are three data to be feeded in the model, e.g. the module is used for text matching task (pair-wise). - - Returns: - inputs(dict): the input variables of senta_bilstm (words) - outputs(dict): the output variables of input words (word embeddings and label probilities); - the sentence embedding and sequence length of the first input text. - main_program(Program): the main_program of Senta with pretrained prameters - """ - assert num_slots >= 1 and num_slots <= 3, "num_slots must be 1, 2, or 3, but the input is %d" % num_slots - main_program = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(main_program, startup_program): - text_1 = fluid.layers.data(name="text", shape=[-1, max_seq_len, 1], dtype="int64", lod_level=0) - seq_len = fluid.layers.data(name="seq_len", shape=[1], dtype='int64', lod_level=0) - seq_len_used = fluid.layers.squeeze(seq_len, axes=[1]) - - # Add embedding layer. - w_param_attrs = fluid.ParamAttr( - name="embedding_0.w_0", initializer=fluid.initializer.TruncatedNormal(scale=0.02), trainable=trainable) - dict_dim = 1256607 - emb_1 = fluid.layers.embedding( - input=text_1, - size=[dict_dim, 128], - is_sparse=True, - padding_idx=dict_dim - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_1_name = emb_1.name - data_list = [text_1] - emb_name_list = [emb_1_name] - - # Add lstm layer. - pred, fc = bilstm_net(emb_1, seq_len_used) - pred_name = pred.name - fc_name = fc.name - - if num_slots > 1: - text_2 = fluid.data(name='text_2', shape=[-1, max_seq_len], dtype='int64', lod_level=0) - emb_2 = fluid.embedding( - input=text_2, - size=[dict_dim, 128], - is_sparse=True, - padding_idx=dict_dim - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_2_name = emb_2.name - data_list.append(text_2) - emb_name_list.append(emb_2_name) - - if num_slots > 2: - text_3 = fluid.data(name='text_3', shape=[-1, max_seq_len], dtype='int64', lod_level=0) - emb_3 = fluid.embedding( - input=text_3, - size=[dict_dim, 128], - is_sparse=True, - padding_idx=dict_dim - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_3_name = emb_3.name - data_list.append(text_3) - emb_name_list.append(emb_3_name) - - variable_names = filter(lambda v: v not in ['text', 'text_2', 'text_3', "seq_len"], - list(main_program.global_block().vars.keys())) - prefix_name = "@HUB_{}@".format(self.name) - add_vars_prefix(program=main_program, prefix=prefix_name, vars=variable_names) - - for param in main_program.global_block().iter_parameters(): - param.trainable = trainable - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - # Load the senta_lstm pretrained model. - def if_exist(var): - return os.path.exists(os.path.join(self.pretrained_model_path, var.name)) - - fluid.io.load_vars(exe, self.pretrained_model_path, predicate=if_exist) - - inputs = {'seq_len': seq_len} - outputs = { - "class_probs": main_program.global_block().vars[prefix_name + pred_name], - "sentence_feature": main_program.global_block().vars[prefix_name + fc_name] - } - for index, data in enumerate(data_list): - if index == 0: - inputs['text'] = data - outputs['emb'] = main_program.global_block().vars[prefix_name + emb_name_list[0]] - else: - inputs['text_%s' % (index + 1)] = data - outputs['emb_%s' % (index + 1)] = main_program.global_block().vars[prefix_name + - emb_name_list[index]] - return inputs, outputs, main_program - @serving def sentiment_classify(self, texts=[], data={}, use_gpu=False, batch_size=1): """ @@ -212,27 +108,3 @@ def get_labels(self): """ self.labels = {"positive": 1, "negative": 0} return self.labels - - -if __name__ == "__main__": - senta = SentaBiLSTM() - inputs, outputs, main_program = senta.context(num_slots=3) - print(inputs) - print(outputs) - # Data to be predicted - test_text = ["这家餐厅很好吃", "这部电影真的很差劲"] - - # execute predict and print the result - input_dict = {"text": test_text} - results = senta.sentiment_classify(data=input_dict, batch_size=3) - for index, result in enumerate(results): - if six.PY2: - print(json.dumps(results[index], encoding="utf8", ensure_ascii=False)) - else: - print(results[index]) - results = senta.sentiment_classify(texts=test_text) - for index, result in enumerate(results): - if six.PY2: - print(json.dumps(results[index], encoding="utf8", ensure_ascii=False)) - else: - print(results[index]) diff --git a/modules/text/sentiment_analysis/senta_bilstm/net.py b/modules/text/sentiment_analysis/senta_bilstm/net.py deleted file mode 100755 index e7694c4be..000000000 --- a/modules/text/sentiment_analysis/senta_bilstm/net.py +++ /dev/null @@ -1,31 +0,0 @@ -# -*- coding:utf-8 -*- -import paddle.fluid as fluid - - -def bilstm_net(emb, seq_len, emb_dim=128, hid_dim=128, hid_dim2=96, class_dim=2, emb_lr=30.0): - """ - Bi-Lstm net - """ - # unpad the token_feature - unpad_feature = fluid.layers.sequence_unpad(emb, length=seq_len) - - # bi-lstm layer - fc0 = fluid.layers.fc(input=unpad_feature, size=hid_dim * 4) - rfc0 = fluid.layers.fc(input=unpad_feature, size=hid_dim * 4) - lstm_h, c = fluid.layers.dynamic_lstm(input=fc0, size=hid_dim * 4, is_reverse=False) - rlstm_h, c = fluid.layers.dynamic_lstm(input=rfc0, size=hid_dim * 4, is_reverse=True) - - # extract last layer - lstm_last = fluid.layers.sequence_last_step(input=lstm_h) - rlstm_last = fluid.layers.sequence_last_step(input=rlstm_h) - lstm_last_tanh = fluid.layers.tanh(lstm_last) - rlstm_last_tanh = fluid.layers.tanh(rlstm_last) - - # concat layer - lstm_concat = fluid.layers.concat(input=[lstm_last, rlstm_last], axis=1) - # full connect layer - fc1 = fluid.layers.fc(input=lstm_concat, size=hid_dim2, act='tanh') - # softmax layer - prediction = fluid.layers.fc(input=fc1, size=class_dim, act='softmax') - - return prediction, fc1 diff --git a/modules/text/sentiment_analysis/senta_bow/README.md b/modules/text/sentiment_analysis/senta_bow/README.md index 4812ccdf8..a0f730ffb 100644 --- a/modules/text/sentiment_analysis/senta_bow/README.md +++ b/modules/text/sentiment_analysis/senta_bow/README.md @@ -1,7 +1,7 @@ # senta_bow |模型名称|senta_bow| -| :--- | :---: | +| :--- | :---: | |类别|文本-情感分析| |网络|BOW| |数据集|百度自建数据集| @@ -25,7 +25,7 @@ - ### 1、环境依赖 - paddlepaddle >= 1.8.0 - + - paddlehub >= 1.8.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) - ### 2、安装 @@ -52,9 +52,9 @@ ``` - test.txt 存放待预测文本, 如: > 这家餐厅很好吃 - + > 这部电影真的很差劲 - + - 通过命令行方式实现文字识别模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) - ### 2、预测代码示例 @@ -73,12 +73,12 @@ print(result['sentiment_key']) print(result['positive_probs']) print(result['negative_probs']) - + # 这家餐厅很好吃 1 positive 0.9782 0.0218 # 这部电影真的很差劲 0 negative 0.0124 0.9876 ``` - + - ### 3、API - ```python @@ -175,7 +175,7 @@ * 1.0.0 初始发布 - + * 1.0.1 词汇表升级 @@ -187,7 +187,11 @@ * 1.2.0 模型升级,支持用于文本分类,文本匹配等各种任务迁移学习 - + +* 1.2.1 + + 移除 fluid api + - ```shell - $ hub install senta_bow==1.2.0 + $ hub install senta_bow==1.2.1 ``` diff --git a/modules/text/sentiment_analysis/senta_bow/module.py b/modules/text/sentiment_analysis/senta_bow/module.py index 04f4d275f..ede463151 100644 --- a/modules/text/sentiment_analysis/senta_bow/module.py +++ b/modules/text/sentiment_analysis/senta_bow/module.py @@ -6,25 +6,26 @@ import json import math import os + import six +from senta_bow.processor import load_vocab +from senta_bow.processor import postprocess +from senta_bow.processor import preprocess -import paddle.fluid as fluid import paddlehub as hub from paddlehub.common.paddle_helper import add_vars_prefix -from paddlehub.module.module import serving, moduleinfo - -from senta_bow.net import bow_net -from senta_bow.processor import load_vocab, preprocess, postprocess +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import serving -@moduleinfo( - name="senta_bow", - version="1.2.0", - summary="Baidu's open-source Sentiment Classification System.", - author="baidu-nlp", - author_email="", - type="nlp/sentiment_analysis") +@moduleinfo(name="senta_bow", + version="1.2.1", + summary="Baidu's open-source Sentiment Classification System.", + author="baidu-nlp", + author_email="", + type="nlp/sentiment_analysis") class SentaBow(hub.NLPPredictionModule): + def _initialize(self): """ initialize with the necessary elements @@ -47,111 +48,6 @@ def word_seg_module(self): self._word_seg_module = hub.Module(name="lac") return self._word_seg_module - def context(self, trainable=False, max_seq_len=128, num_slots=1): - """ - Get the input ,output and program of the pretrained senta_bow - - Args: - trainable(bool): Whether fine-tune the pretrained parameters of senta_bow or not. - max_seq_len (int): It will limit the total sequence returned so that it has a maximum length. - num_slots(int): It's number of data inputted to the model, selectted as following options: - - - 1(default): There's only one data to be feeded in the model, e.g. the module is used for text classification task. - - 2: There are two data to be feeded in the model, e.g. the module is used for text matching task (point-wise). - - 3: There are three data to be feeded in the model, e.g. the module is used for text matching task (pair-wise). - - Returns: - inputs(dict): the input variables of senta_bow (words) - outputs(dict): the output variables of input words (word embeddings and label probilities); - the sentence embedding and sequence length of the first input text. - main_program(Program): the main_program of Senta with pretrained prameters - """ - assert num_slots >= 1 and num_slots <= 3, "num_slots must be 1, 2, or 3, but the input is %d" % num_slots - main_program = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(main_program, startup_program): - text_1 = fluid.layers.data(name="text", shape=[-1, max_seq_len, 1], dtype="int64", lod_level=0) - seq_len = fluid.layers.data(name="seq_len", shape=[1], dtype='int64', lod_level=0) - seq_len_used = fluid.layers.squeeze(seq_len, axes=[1]) - - # Add embedding layer. - w_param_attrs = fluid.ParamAttr( - name="embedding_0.w_0", initializer=fluid.initializer.TruncatedNormal(scale=0.02), trainable=trainable) - dict_dim = 1256607 - emb_1 = fluid.layers.embedding( - input=text_1, - size=[dict_dim, 128], - is_sparse=True, - padding_idx=dict_dim - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_1_name = emb_1.name - data_list = [text_1] - emb_name_list = [emb_1_name] - - # Add lstm layer. - pred, fc = bow_net(emb_1, seq_len_used) - pred_name = pred.name - fc_name = fc.name - - if num_slots > 1: - text_2 = fluid.data(name='text_2', shape=[-1, max_seq_len], dtype='int64', lod_level=0) - emb_2 = fluid.embedding( - input=text_2, - size=[dict_dim, 128], - is_sparse=True, - padding_idx=dict_dim - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_2_name = emb_2.name - data_list.append(text_2) - emb_name_list.append(emb_2_name) - - if num_slots > 2: - text_3 = fluid.data(name='text_3', shape=[-1, max_seq_len], dtype='int64', lod_level=0) - emb_3 = fluid.embedding( - input=text_3, - size=[dict_dim, 128], - is_sparse=True, - padding_idx=dict_dim - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_3_name = emb_3.name - data_list.append(text_3) - emb_name_list.append(emb_3_name) - - variable_names = filter(lambda v: v not in ['text', 'text_2', 'text_3', "seq_len"], - list(main_program.global_block().vars.keys())) - prefix_name = "@HUB_{}@".format(self.name) - add_vars_prefix(program=main_program, prefix=prefix_name, vars=variable_names) - - for param in main_program.global_block().iter_parameters(): - param.trainable = trainable - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - # Load the senta_bow pretrained model. - def if_exist(var): - return os.path.exists(os.path.join(self.pretrained_model_path, var.name)) - - fluid.io.load_vars(exe, self.pretrained_model_path, predicate=if_exist) - - inputs = {'seq_len': seq_len} - outputs = { - "class_probs": main_program.global_block().vars[prefix_name + pred_name], - "sentence_feature": main_program.global_block().vars[prefix_name + fc_name] - } - for index, data in enumerate(data_list): - if index == 0: - inputs['text'] = data - outputs['emb'] = main_program.global_block().vars[prefix_name + emb_name_list[0]] - else: - inputs['text_%s' % (index + 1)] = data - outputs['emb_%s' % (index + 1)] = main_program.global_block().vars[prefix_name + - emb_name_list[index]] - return inputs, outputs, main_program - @serving def sentiment_classify(self, texts=[], data={}, use_gpu=False, batch_size=1): """ @@ -212,28 +108,3 @@ def get_labels(self): """ self.labels = {"positive": 1, "negative": 0} return self.labels - - -if __name__ == "__main__": - senta = SentaBow() - inputs, outputs, main_program = senta.context(num_slots=3) - print(inputs) - print('*' * 20) - print(outputs) - # Data to be predicted - test_text = ["这家餐厅很好吃", "这部电影真的很差劲"] - - # execute predict and print the result - input_dict = {"text": test_text} - results = senta.sentiment_classify(data=input_dict) - for index, result in enumerate(results): - if six.PY2: - print(json.dumps(results[index], encoding="utf8", ensure_ascii=False)) - else: - print(results[index]) - results = senta.sentiment_classify(texts=test_text) - for index, result in enumerate(results): - if six.PY2: - print(json.dumps(results[index], encoding="utf8", ensure_ascii=False)) - else: - print(results[index]) diff --git a/modules/text/sentiment_analysis/senta_bow/net.py b/modules/text/sentiment_analysis/senta_bow/net.py deleted file mode 100755 index 173e2fe04..000000000 --- a/modules/text/sentiment_analysis/senta_bow/net.py +++ /dev/null @@ -1,22 +0,0 @@ -# -*- coding:utf-8 -*- -import paddle.fluid as fluid - - -def bow_net(emb, seq_len, hid_dim=128, hid_dim2=96, class_dim=2): - """ - Bow net - """ - # unpad the token_feature - unpad_feature = fluid.layers.sequence_unpad(emb, length=seq_len) - - # bow layer - bow = fluid.layers.sequence_pool(input=unpad_feature, pool_type='sum') - bow_tanh = fluid.layers.tanh(bow) - # full connect layer - fc_1 = fluid.layers.fc(input=bow_tanh, size=hid_dim, act="tanh") - fc_2 = fluid.layers.fc(input=fc_1, size=hid_dim2, act="tanh") - - # softmax layer - prediction = fluid.layers.fc(input=[fc_2], size=class_dim, act="softmax") - - return prediction, fc_2 diff --git a/modules/text/sentiment_analysis/senta_cnn/README.md b/modules/text/sentiment_analysis/senta_cnn/README.md index c099fea79..f024a44e4 100644 --- a/modules/text/sentiment_analysis/senta_cnn/README.md +++ b/modules/text/sentiment_analysis/senta_cnn/README.md @@ -1,6 +1,6 @@ # senta_cnn |模型名称|senta_cnn| -| :--- | :---: | +| :--- | :---: | |类别|文本-情感分析| |网络|CNN| |数据集|百度自建数据集| @@ -21,7 +21,7 @@ - ### 1、环境依赖 - paddlepaddle >= 1.8.0 - + - paddlehub >= 1.8.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) - ### 2、安装 @@ -47,9 +47,9 @@ ``` - test.txt 存放待预测文本, 如: > 这家餐厅很好吃 - + > 这部电影真的很差劲 - + - 通过命令行方式实现文字识别模型的调用,更多请见:[PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) - ### 2、预测代码示例 @@ -59,17 +59,17 @@ senta = hub.Module(name="senta_cnn") test_text = ["这家餐厅很好吃", "这部电影真的很差劲"] - results = senta.sentiment_classify(texts=test_text, + results = senta.sentiment_classify(texts=test_text, use_gpu=False, batch_size=1) - + for result in results: print(result['text']) print(result['sentiment_label']) print(result['sentiment_key']) print(result['positive_probs']) print(result['negative_probs']) - + # 这家餐厅很好吃 1 positive 0.7902 0.2098 # 这部电影真的很差劲 0 negative 0.0343 0.9657 ``` @@ -79,7 +79,7 @@ - ```python def sentiment_classify(texts=[], data={}, use_gpu=False, batch_size=1) ``` - + - senta_cnn预测接口,预测输入句子的情感分类(二分类,积极/消极) - **参数** @@ -93,7 +93,7 @@ - results(list): 情感分类结果 - + - ```python def get_labels() ``` @@ -171,7 +171,7 @@ * 1.0.0 初始发布 - + * 1.0.1 词汇表升级 @@ -183,7 +183,11 @@ * 1.2.0 模型升级,支持用于文本分类,文本匹配等各种任务迁移学习 - + +* 1.2.1 + + 移除 fluid api + - ```shell - $ hub install senta_cnn==1.2.0 + $ hub install senta_cnn==1.2.1 ``` diff --git a/modules/text/sentiment_analysis/senta_cnn/module.py b/modules/text/sentiment_analysis/senta_cnn/module.py index 6aa7c61fd..de92229ee 100644 --- a/modules/text/sentiment_analysis/senta_cnn/module.py +++ b/modules/text/sentiment_analysis/senta_cnn/module.py @@ -6,25 +6,26 @@ import json import math import os + import six +from senta_cnn.processor import load_vocab +from senta_cnn.processor import postprocess +from senta_cnn.processor import preprocess -import paddle.fluid as fluid import paddlehub as hub from paddlehub.common.paddle_helper import add_vars_prefix -from paddlehub.module.module import moduleinfo, serving - -from senta_cnn.net import cnn_net -from senta_cnn.processor import load_vocab, preprocess, postprocess +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import serving -@moduleinfo( - name="senta_cnn", - version="1.2.0", - summary="Baidu's open-source Sentiment Classification System.", - author="baidu-nlp", - author_email="", - type="nlp/sentiment_analysis") +@moduleinfo(name="senta_cnn", + version="1.2.1", + summary="Baidu's open-source Sentiment Classification System.", + author="baidu-nlp", + author_email="", + type="nlp/sentiment_analysis") class SentaCNN(hub.NLPPredictionModule): + def _initialize(self, user_dict=None): """ initialize with the necessary elements @@ -47,104 +48,6 @@ def word_seg_module(self): self._word_seg_module = hub.Module(name="lac") return self._word_seg_module - def context(self, trainable=False, max_seq_len=128, num_slots=1): - """ - Get the input ,output and program of the pretrained senta_cnn - - Args: - trainable(bool): Whether fine-tune the pretrained parameters of senta_cnn or not. - max_seq_len (int): It will limit the total sequence returned so that it has a maximum length. - num_slots(int): It's number of data inputted to the model, selectted as following options: - - - 1(default): There's only one data to be feeded in the model, e.g. the module is used for text classification task. - - 2: There are two data to be feeded in the model, e.g. the module is used for text matching task (point-wise). - - 3: There are three data to be feeded in the model, e.g. the module is used for text matching task (pair-wise). - - Returns: - inputs(dict): the input variables of senta_cnn (words) - outputs(dict): the output variables of input words (word embeddings and label probilities); - the sentence embedding and sequence length of the first input text. - main_program(Program): the main_program of Senta with pretrained prameters - """ - assert num_slots >= 1 and num_slots <= 3, "num_slots must be 1, 2, or 3, but the input is %d" % num_slots - main_program = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(main_program, startup_program): - text_1 = fluid.layers.data(name="text", shape=[-1, max_seq_len, 1], dtype="int64", lod_level=0) - seq_len = fluid.layers.data(name="seq_len", shape=[1], dtype='int64', lod_level=0) - seq_len_used = fluid.layers.squeeze(seq_len, axes=[1]) - - # Add embedding layer. - w_param_attrs = fluid.ParamAttr( - name="embedding_0.w_0", initializer=fluid.initializer.TruncatedNormal(scale=0.02), trainable=trainable) - dict_dim = 1256607 - emb_1 = fluid.layers.embedding( - input=text_1, size=[dict_dim, 128], padding_idx=dict_dim - 1, dtype='float32', param_attr=w_param_attrs) - emb_1_name = emb_1.name - data_list = [text_1] - emb_name_list = [emb_1_name] - - # Add lstm layer. - pred, fc = cnn_net(emb_1, seq_len_used) - pred_name = pred.name - fc_name = fc.name - - if num_slots > 1: - text_2 = fluid.data(name='text_2', shape=[-1, max_seq_len], dtype='int64', lod_level=0) - emb_2 = fluid.embedding( - input=text_2, - size=[dict_dim, 128], - padding_idx=dict_dim - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_2_name = emb_2.name - data_list.append(text_2) - emb_name_list.append(emb_2_name) - - if num_slots > 2: - text_3 = fluid.data(name='text_3', shape=[-1, max_seq_len], dtype='int64', lod_level=0) - emb_3 = fluid.embedding( - input=text_3, - size=[dict_dim, 128], - padding_idx=dict_dim - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_3_name = emb_3.name - data_list.append(text_3) - emb_name_list.append(emb_3_name) - - variable_names = filter(lambda v: v not in ['text', 'text_2', 'text_3', "seq_len"], - list(main_program.global_block().vars.keys())) - prefix_name = "@HUB_{}@".format(self.name) - add_vars_prefix(program=main_program, prefix=prefix_name, vars=variable_names) - - for param in main_program.global_block().iter_parameters(): - param.trainable = trainable - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - # Load the senta_lstm pretrained model. - def if_exist(var): - return os.path.exists(os.path.join(self.pretrained_model_path, var.name)) - - fluid.io.load_vars(exe, self.pretrained_model_path, predicate=if_exist) - - inputs = {'seq_len': seq_len} - outputs = { - "class_probs": main_program.global_block().vars[prefix_name + pred_name], - "sentence_feature": main_program.global_block().vars[prefix_name + fc_name] - } - for index, data in enumerate(data_list): - if index == 0: - inputs['text'] = data - outputs['emb'] = main_program.global_block().vars[prefix_name + emb_name_list[0]] - else: - inputs['text_%s' % (index + 1)] = data - outputs['emb_%s' % (index + 1)] = main_program.global_block().vars[prefix_name + - emb_name_list[index]] - return inputs, outputs, main_program - @serving def sentiment_classify(self, texts=[], data={}, use_gpu=False, batch_size=1): """ @@ -205,25 +108,3 @@ def get_labels(self): """ self.labels = {"positive": 1, "negative": 0} return self.labels - - -if __name__ == "__main__": - senta = SentaCNN() - inputs, outputs, program = senta.context(num_slots=3) - # Data to be predicted - test_text = ["这家餐厅很好吃", "这部电影真的很差劲"] - - # execute predict and print the result - input_dict = {"text": test_text} - results = senta.sentiment_classify(data=input_dict) - for index, result in enumerate(results): - if six.PY2: - print(json.dumps(results[index], encoding="utf8", ensure_ascii=False)) - else: - print(results[index]) - results = senta.sentiment_classify(texts=test_text) - for index, result in enumerate(results): - if six.PY2: - print(json.dumps(results[index], encoding="utf8", ensure_ascii=False)) - else: - print(results[index]) diff --git a/modules/text/sentiment_analysis/senta_cnn/net.py b/modules/text/sentiment_analysis/senta_cnn/net.py deleted file mode 100755 index 339471dc7..000000000 --- a/modules/text/sentiment_analysis/senta_cnn/net.py +++ /dev/null @@ -1,21 +0,0 @@ -# -*- coding:utf-8 -*- -import paddle.fluid as fluid - - -def cnn_net(emb, seq_len, hid_dim=128, hid_dim2=96, class_dim=2, win_size=3): - """ - Conv net - """ - # unpad the token_feature - unpad_feature = fluid.layers.sequence_unpad(emb, length=seq_len) - - # convolution layer - conv_3 = fluid.nets.sequence_conv_pool( - input=unpad_feature, num_filters=hid_dim, filter_size=win_size, act="tanh", pool_type="max") - # full connect layer - fc_1 = fluid.layers.fc(input=[conv_3], size=hid_dim2) - - # softmax layer - prediction = fluid.layers.fc(input=[fc_1], size=class_dim, act="softmax") - - return prediction, fc_1 diff --git a/modules/text/sentiment_analysis/senta_gru/README.md b/modules/text/sentiment_analysis/senta_gru/README.md index bbbec4c21..7f0e25a7f 100644 --- a/modules/text/sentiment_analysis/senta_gru/README.md +++ b/modules/text/sentiment_analysis/senta_gru/README.md @@ -1,7 +1,7 @@ # senta_gru |模型名称|senta_gru| -| :--- | :---: | +| :--- | :---: | |类别|文本-情感分析| |网络|GRU| |数据集|百度自建数据集| @@ -25,7 +25,7 @@ - ### 1、环境依赖 - paddlepaddle >= 1.8.0 - + - paddlehub >= 1.8.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) - ### 2、安装 @@ -51,9 +51,9 @@ ``` - test.txt 存放待预测文本, 如: > 这家餐厅很好吃 - + > 这部电影真的很差劲 - + - 通过命令行方式实现文字识别模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) - ### 2、预测代码示例 @@ -75,7 +75,7 @@ # 这家餐厅很好吃 1 positive 0.9607 0.0393 # 这部电影真的很差劲 0 negative 0.0187 0.9813 ``` - + - ### 3、API - ```python @@ -94,7 +94,7 @@ - results(list): 情感分类结果 - + - ```python def get_labels() ``` @@ -147,7 +147,7 @@ # 待预测数据 text = ["这家餐厅很好吃", "这部电影真的很差劲"] - + # 设置运行配置 # 对应本地预测senta_gru.sentiment_classify(texts=text, batch_size=1, use_gpu=True) data = {"texts": text, "batch_size": 1, "use_gpu":True} @@ -157,7 +157,7 @@ url = "http://HOST_IP:8866/predict/senta_gru" headers = {"Content-Type": "application/json"} r = requests.post(url=url, headers=headers, data=json.dumps(data)) - + # 打印预测结果 print(json.dumps(r.json(), indent=4, ensure_ascii=False)) ``` @@ -181,7 +181,11 @@ * 1.2.0 模型升级,支持用于文本分类,文本匹配等各种任务迁移学习 - + +* 1.2.1 + + 移除 fluid api + - ```shell - $ hub install senta_gru==1.2.0 + $ hub install senta_gru==1.2.1 ``` diff --git a/modules/text/sentiment_analysis/senta_gru/module.py b/modules/text/sentiment_analysis/senta_gru/module.py index 7d578947f..7dc3ec47f 100644 --- a/modules/text/sentiment_analysis/senta_gru/module.py +++ b/modules/text/sentiment_analysis/senta_gru/module.py @@ -6,25 +6,26 @@ import json import math import os + import six +from senta_gru.processor import load_vocab +from senta_gru.processor import postprocess +from senta_gru.processor import preprocess -import paddle.fluid as fluid import paddlehub as hub from paddlehub.common.paddle_helper import add_vars_prefix -from paddlehub.module.module import moduleinfo, serving - -from senta_gru.net import gru_net -from senta_gru.processor import load_vocab, preprocess, postprocess +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import serving -@moduleinfo( - name="senta_gru", - version="1.2.0", - summary="Baidu's open-source Sentiment Classification System.", - author="baidu-nlp", - author_email="", - type="nlp/sentiment_analysis") +@moduleinfo(name="senta_gru", + version="1.2.1", + summary="Baidu's open-source Sentiment Classification System.", + author="baidu-nlp", + author_email="", + type="nlp/sentiment_analysis") class SentaGRU(hub.NLPPredictionModule): + def _initialize(self, user_dict=None): """ initialize with the necessary elements @@ -47,104 +48,6 @@ def word_seg_module(self): self._word_seg_module = hub.Module(name="lac") return self._word_seg_module - def context(self, trainable=False, max_seq_len=128, num_data=1): - """ - Get the input ,output and program of the pretrained senta_gru - - Args: - trainable(bool): Whether fine-tune the pretrained parameters of senta_gru or not. - max_seq_len (int): It will limit the total sequence returned so that it has a maximum length. - num_data(int): It's number of data inputted to the model, selectted as following options: - - - 1(default): There's only one data to be feeded in the model, e.g. the module is used for text classification task. - - 2: There are two data to be feeded in the model, e.g. the module is used for text matching task (point-wise). - - 3: There are three data to be feeded in the model, e.g. the module is used for text matching task (pair-wise). - - Returns: - inputs(dict): the input variables of senta_gru (words) - outputs(dict): the output variables of input words (word embeddings and label probilities); - the sentence embedding and sequence length of the first input text. - main_program(Program): the main_program of Senta with pretrained prameters - """ - assert num_data >= 1 and num_data <= 3, "num_data(%d) must be 1, 2, or 3" % num_data - main_program = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(main_program, startup_program): - text_1 = fluid.layers.data(name="text", shape=[-1, max_seq_len, 1], dtype="int64", lod_level=0) - seq_len = fluid.layers.data(name="seq_len", shape=[1], dtype='int64', lod_level=0) - seq_len_used = fluid.layers.squeeze(seq_len, axes=[1]) - - # Add embedding layer. - w_param_attrs = fluid.ParamAttr( - name="embedding_0.w_0", initializer=fluid.initializer.TruncatedNormal(scale=0.02), trainable=trainable) - dict_dim = 1256607 - emb_1 = fluid.layers.embedding( - input=text_1, size=[dict_dim, 128], padding_idx=dict_dim - 1, dtype='float32', param_attr=w_param_attrs) - emb_1_name = emb_1.name - data_list = [text_1] - emb_name_list = [emb_1_name] - - # Add lstm layer. - pred, fc = gru_net(emb_1, seq_len_used) - pred_name = pred.name - fc_name = fc.name - - if num_data > 1: - text_2 = fluid.data(name='text_2', shape=[-1, max_seq_len], dtype='int64', lod_level=0) - emb_2 = fluid.embedding( - input=text_2, - size=[dict_dim, 128], - padding_idx=dict_dim - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_2_name = emb_2.name - data_list.append(text_2) - emb_name_list.append(emb_2_name) - - if num_data > 2: - text_3 = fluid.data(name='text_3', shape=[-1, max_seq_len], dtype='int64', lod_level=0) - emb_3 = fluid.embedding( - input=text_3, - size=[dict_dim, 128], - padding_idx=dict_dim - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_3_name = emb_3.name - data_list.append(text_3) - emb_name_list.append(emb_3_name) - - variable_names = filter(lambda v: v not in ['text', 'text_2', 'text_3', "seq_len"], - list(main_program.global_block().vars.keys())) - prefix_name = "@HUB_{}@".format(self.name) - add_vars_prefix(program=main_program, prefix=prefix_name, vars=variable_names) - - for param in main_program.global_block().iter_parameters(): - param.trainable = trainable - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - # Load the senta_lstm pretrained model. - def if_exist(var): - return os.path.exists(os.path.join(self.pretrained_model_path, var.name)) - - fluid.io.load_vars(exe, self.pretrained_model_path, predicate=if_exist) - - inputs = {'seq_len': seq_len} - outputs = { - "class_probs": main_program.global_block().vars[prefix_name + pred_name], - "sentence_feature": main_program.global_block().vars[prefix_name + fc_name] - } - for index, data in enumerate(data_list): - if index == 0: - inputs['text'] = data - outputs['emb'] = main_program.global_block().vars[prefix_name + emb_name_list[0]] - else: - inputs['text_%s' % (index + 1)] = data - outputs['emb_%s' % (index + 1)] = main_program.global_block().vars[prefix_name + - emb_name_list[index]] - return inputs, outputs, main_program - @serving def sentiment_classify(self, texts=[], data={}, use_gpu=False, batch_size=1): """ @@ -205,25 +108,3 @@ def get_labels(self): """ self.labels = {"positive": 1, "negative": 0} return self.labels - - -if __name__ == "__main__": - senta = SentaGRU() - inputs, outputs, main_program = senta.context(num_slots=3) - # Data to be predicted - test_text = ["这家餐厅很好吃", "这部电影真的很差劲"] - - # execute predict and print the result - input_dict = {"text": test_text} - results = senta.sentiment_classify(data=input_dict) - for index, result in enumerate(results): - if six.PY2: - print(json.dumps(results[index], encoding="utf8", ensure_ascii=False)) - else: - print(results[index]) - results = senta.sentiment_classify(texts=test_text) - for index, result in enumerate(results): - if six.PY2: - print(json.dumps(results[index], encoding="utf8", ensure_ascii=False)) - else: - print(results[index]) diff --git a/modules/text/sentiment_analysis/senta_gru/net.py b/modules/text/sentiment_analysis/senta_gru/net.py deleted file mode 100755 index a32f64e87..000000000 --- a/modules/text/sentiment_analysis/senta_gru/net.py +++ /dev/null @@ -1,23 +0,0 @@ -# -*- coding:utf-8 -*- -import paddle.fluid as fluid - - -def gru_net(emb, seq_len, emb_dim=128, hid_dim=128, hid_dim2=96, class_dim=2, emb_lr=30.0): - """ - gru net - """ - # unpad the token_feature - unpad_feature = fluid.layers.sequence_unpad(emb, length=seq_len) - - fc0 = fluid.layers.fc(input=unpad_feature, size=hid_dim * 3) - - # GRU layer - gru_h = fluid.layers.dynamic_gru(input=fc0, size=hid_dim, is_reverse=False) - gru_max = fluid.layers.sequence_pool(input=gru_h, pool_type='max') - gru_max_tanh = fluid.layers.tanh(gru_max) - - # full connect layer - fc1 = fluid.layers.fc(input=gru_max_tanh, size=hid_dim2, act='tanh') - # softmax layer - prediction = fluid.layers.fc(input=fc1, size=class_dim, act='softmax') - return prediction, fc1 diff --git a/modules/text/sentiment_analysis/senta_lstm/README.md b/modules/text/sentiment_analysis/senta_lstm/README.md index 6c7a83453..78a767012 100644 --- a/modules/text/sentiment_analysis/senta_lstm/README.md +++ b/modules/text/sentiment_analysis/senta_lstm/README.md @@ -1,6 +1,6 @@ # senta_lstm |模型名称|senta_lstm| -| :--- | :---: | +| :--- | :---: | |类别|文本-情感分析| |网络|LSTM| |数据集|百度自建数据集| @@ -22,7 +22,7 @@ - ### 1、环境依赖 - paddlepaddle >= 1.8.0 - + - paddlehub >= 1.8.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) - ### 2、安装 @@ -48,9 +48,9 @@ ``` - test.txt 存放待预测文本, 如: > 这家餐厅很好吃 - + > 这部电影真的很差劲 - + - 通过命令行方式实现文字识别模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) - ### 2、预测代码示例 @@ -60,17 +60,17 @@ senta = hub.Module(name="senta_lstm") test_text = ["这家餐厅很好吃", "这部电影真的很差劲"] - results = senta.sentiment_classify(texts=test_text, + results = senta.sentiment_classify(texts=test_text, use_gpu=False, batch_size=1) - + for result in results: print(result['text']) print(result['sentiment_label']) print(result['sentiment_key']) print(result['positive_probs']) print(result['negative_probs']) - + # 这家餐厅很好吃 1 positive 0.9285 0.0715 # 这部电影真的很差劲 0 negative 0.0187 0.9813 ``` @@ -80,7 +80,7 @@ - ```python sentiment_classify(texts=[], data={}, use_gpu=False, batch_size=1) ``` - + - senta_lstm预测接口,预测输入句子的情感分类(二分类,积极/消极) - **参数** @@ -107,7 +107,7 @@ - ```python get_vocab_path() ``` - + - 获取预训练时使用的词汇表 - **返回** @@ -171,15 +171,19 @@ * 1.0.1 词汇表升级 - + * 1.1.0 大幅提升预测性能 - + * 1.2.0 模型升级,支持用于文本分类,文本匹配等各种任务迁移学习 - + +* 1.2.1 + + 移除 fluid api + - ```shell - $ hub install senta_lstm==1.2.0 + $ hub install senta_lstm==1.2.1 ``` diff --git a/modules/text/sentiment_analysis/senta_lstm/module.py b/modules/text/sentiment_analysis/senta_lstm/module.py index cc38f0b52..16b5b0b8f 100644 --- a/modules/text/sentiment_analysis/senta_lstm/module.py +++ b/modules/text/sentiment_analysis/senta_lstm/module.py @@ -6,25 +6,26 @@ import json import math import os + import six +from senta_lstm.processor import load_vocab +from senta_lstm.processor import postprocess +from senta_lstm.processor import preprocess -import paddle.fluid as fluid import paddlehub as hub from paddlehub.common.paddle_helper import add_vars_prefix -from paddlehub.module.module import moduleinfo, serving - -from senta_lstm.net import lstm_net -from senta_lstm.processor import load_vocab, preprocess, postprocess +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import serving -@moduleinfo( - name="senta_lstm", - version="1.2.0", - summary="Baidu's open-source Sentiment Classification System.", - author="baidu-nlp", - author_email="", - type="nlp/sentiment_analysis") +@moduleinfo(name="senta_lstm", + version="1.2.1", + summary="Baidu's open-source Sentiment Classification System.", + author="baidu-nlp", + author_email="", + type="nlp/sentiment_analysis") class SentaLSTM(hub.NLPPredictionModule): + def _initialize(self, user_dict=None): """ initialize with the necessary elements @@ -47,104 +48,6 @@ def word_seg_module(self): self._word_seg_module = hub.Module(name="lac") return self._word_seg_module - def context(self, trainable=False, max_seq_len=128, num_slots=1): - """ - Get the input ,output and program of the pretrained senta_lstm - - Args: - trainable(bool): Whether fine-tune the pretrained parameters of senta_lstm or not. - max_seq_len (int): It will limit the total sequence returned so that it has a maximum length. - num_slots(int): It's number of data inputted to the model, selectted as following options: - - - 1(default): There's only one data to be feeded in the model, e.g. the module is used for text classification task. - - 2: There are two data to be feeded in the model, e.g. the module is used for text matching task (point-wise). - - 3: There are three data to be feeded in the model, e.g. the module is used for text matching task (pair-wise). - - Returns: - inputs(dict): the input variables of senta_lstm (words) - outputs(dict): the output variables of input words (word embeddings and label probilities); - the sentence embedding and sequence length of the first input text. - main_program(Program): the main_program of Senta with pretrained prameters - """ - assert num_slots >= 1 and num_slots <= 3, "num_slots must be 1, 2, or 3, but the input is %d" % num_slots - main_program = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(main_program, startup_program): - text_1 = fluid.layers.data(name="text", shape=[-1, max_seq_len, 1], dtype="int64", lod_level=0) - seq_len = fluid.layers.data(name="seq_len", shape=[1], dtype='int64', lod_level=0) - seq_len_used = fluid.layers.squeeze(seq_len, axes=[1]) - - # Add embedding layer. - w_param_attrs = fluid.ParamAttr( - name="embedding_0.w_0", initializer=fluid.initializer.TruncatedNormal(scale=0.02), trainable=trainable) - dict_dim = 1256607 - emb_1 = fluid.layers.embedding( - input=text_1, size=[dict_dim, 128], padding_idx=dict_dim - 1, dtype='float32', param_attr=w_param_attrs) - emb_1_name = emb_1.name - data_list = [text_1] - emb_name_list = [emb_1_name] - - # Add lstm layer. - pred, fc = lstm_net(emb_1, seq_len_used) - pred_name = pred.name - fc_name = fc.name - - if num_slots > 1: - text_2 = fluid.data(name='text_2', shape=[-1, max_seq_len], dtype='int64', lod_level=0) - emb_2 = fluid.embedding( - input=text_2, - size=[dict_dim, 128], - padding_idx=dict_dim - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_2_name = emb_2.name - data_list.append(text_2) - emb_name_list.append(emb_2_name) - - if num_slots > 2: - text_3 = fluid.data(name='text_3', shape=[-1, max_seq_len], dtype='int64', lod_level=0) - emb_3 = fluid.embedding( - input=text_3, - size=[dict_dim, 128], - padding_idx=dict_dim - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_3_name = emb_3.name - data_list.append(text_3) - emb_name_list.append(emb_3_name) - - variable_names = filter(lambda v: v not in ['text', 'text_2', 'text_3', "seq_len"], - list(main_program.global_block().vars.keys())) - prefix_name = "@HUB_{}@".format(self.name) - add_vars_prefix(program=main_program, prefix=prefix_name, vars=variable_names) - - for param in main_program.global_block().iter_parameters(): - param.trainable = trainable - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - # Load the senta_lstm pretrained model. - def if_exist(var): - return os.path.exists(os.path.join(self.pretrained_model_path, var.name)) - - fluid.io.load_vars(exe, self.pretrained_model_path, predicate=if_exist) - - inputs = {'seq_len': seq_len} - outputs = { - "class_probs": main_program.global_block().vars[prefix_name + pred_name], - "sentence_feature": main_program.global_block().vars[prefix_name + fc_name] - } - for index, data in enumerate(data_list): - if index == 0: - inputs['text'] = data - outputs['emb'] = main_program.global_block().vars[prefix_name + emb_name_list[0]] - else: - inputs['text_%s' % (index + 1)] = data - outputs['emb_%s' % (index + 1)] = main_program.global_block().vars[prefix_name + - emb_name_list[index]] - return inputs, outputs, main_program - @serving def sentiment_classify(self, texts=[], data={}, use_gpu=False, batch_size=1): """ @@ -205,20 +108,3 @@ def get_labels(self): """ self.labels = {"positive": 1, "negative": 0} return self.labels - - -if __name__ == "__main__": - senta = SentaLSTM() - senta.context(num_slots=3) - # Data to be predicted - test_text = ["这家餐厅很好吃", "这部电影真的很差劲"] - - # execute predict and print the result - input_dict = {"text": test_text} - results = senta.sentiment_classify(data=input_dict) - - for index, result in enumerate(results): - if six.PY2: - print(json.dumps(results[index], encoding="utf8", ensure_ascii=False)) - else: - print(results[index]) diff --git a/modules/text/sentiment_analysis/senta_lstm/net.py b/modules/text/sentiment_analysis/senta_lstm/net.py deleted file mode 100755 index 3b5a83870..000000000 --- a/modules/text/sentiment_analysis/senta_lstm/net.py +++ /dev/null @@ -1,22 +0,0 @@ -# -*- coding:utf-8 -*- -import paddle.fluid as fluid - - -def lstm_net(emb, seq_len, hid_dim=128, hid_dim2=96, class_dim=2, emb_lr=30.0): - """ - Lstm net - """ - # unpad the token_feature - unpad_feature = fluid.layers.sequence_unpad(emb, length=seq_len) - # Lstm layer - fc0 = fluid.layers.fc(input=unpad_feature, size=hid_dim * 4) - lstm_h, c = fluid.layers.dynamic_lstm(input=fc0, size=hid_dim * 4, is_reverse=False) - # max pooling layer - lstm_max = fluid.layers.sequence_pool(input=lstm_h, pool_type='max') - lstm_max_tanh = fluid.layers.tanh(lstm_max) - # full connect layer - fc1 = fluid.layers.fc(input=lstm_max_tanh, size=hid_dim2, act='tanh') - # softmax layer - prediction = fluid.layers.fc(input=fc1, size=class_dim, act='softmax') - - return prediction, fc1 diff --git a/modules/text/text_review/porn_detection_lstm/README.md b/modules/text/text_review/porn_detection_lstm/README.md index c2bbd1ad8..4af0d576c 100644 --- a/modules/text/text_review/porn_detection_lstm/README.md +++ b/modules/text/text_review/porn_detection_lstm/README.md @@ -21,7 +21,7 @@ - ### 1、环境依赖 - paddlepaddle >= 1.6.2 - + - paddlehub >= 1.6.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) - ### 2、安装 @@ -39,44 +39,44 @@ - ```shell $ hub run porn_detection_lstm --input_text "黄片下载" ``` - + - 或者 - ```shell $ hub run porn_detection_lstm --input_file test.txt ``` - + - 其中test.txt存放待审查文本,每行仅放置一段待审核文本 - + - 通过命令行方式实现hub模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) - ### 2、预测代码示例 - ```python import paddlehub as hub - + porn_detection_lstm = hub.Module(name="porn_detection_lstm") - + test_text = ["黄片下载", "打击黄牛党"] - + results = porn_detection_lstm.detection(texts=test_text, use_gpu=True, batch_size=1) - + for index, text in enumerate(test_text): results[index]["text"] = text for index, result in enumerate(results): print(results[index]) - + # 输出结果如下: # {'text': '黄片下载', 'porn_detection_label': 1, 'porn_detection_key': 'porn', 'porn_probs': 0.9879, 'not_porn_probs': 0.0121} # {'text': '打击黄牛党', 'porn_detection_label': 0, 'porn_detection_key': 'not_porn', 'porn_probs': 0.0004, 'not_porn_probs': 0.9996} ``` - + - ### 3、API - ```python def detection(texts=[], data={}, use_gpu=False, batch_size=1): ``` - + - porn_detection_lstm预测接口,鉴定输入句子是否为黄文 - **参数** @@ -84,36 +84,23 @@ - data(dict): 预测数据,key必须为text,value是带预测数据。如果使用data参数,则不用传入texts参数,二选一即可。建议使用texts参数,data参数后续会废弃。 - use_gpu(bool): 是否使用GPU预测 - batch_size(int): 批处理大小 - + - **返回** - results(list): 鉴定结果 - - - ```python - def context(trainable=False): - ``` - - - 获取porn_detection_lstm的预训练program以及program的输入输出变量 - - **参数** - - trainable(bool): trainable=True表示program中的参数在Fine-tune时需要微调,否则保持不变。 - - **返回** - - - inputs(dict): program的输入变量 - - outputs(dict): program的输出变量 - - main_program(Program): 带有预训练参数的program - + - ```python def get_labels(): ``` - 获取porn_detection_lstm的可识别的类别及其编号 - + - **返回** - labels(dict): porn_detection_lstm的类别及其对应编号(二分类,是/不是) - + - ```python def get_vocab_path(): ``` - 获取预训练时使用的词汇表 - + - **返回** - vocab_path(str): 词汇表路径 @@ -139,20 +126,20 @@ - ```python import requests import json - + # 待预测数据 text = ["黄片下载", "打击黄牛党"] - + # 设置运行配置 # 对应本地预测porn_detection_lstm.detection(texts=text, batch_size=1, use_gpu=True) data = {"texts": text, "batch_size": 1, "use_gpu":True} - + # 指定预测方法为porn_detection_lstm并发送post请求,content-type类型应指定json方式 # HOST_IP为服务器IP url = "http://HOST_IP:8866/predict/porn_detection_lstm" headers = {"Content-Type": "application/json"} r = requests.post(url=url, headers=headers, data=json.dumps(data)) - + # 打印预测结果 print(json.dumps(r.json(), indent=4, ensure_ascii=False)) ``` @@ -169,6 +156,10 @@ 大幅提升预测性能,同时简化接口使用 +* 1.1.1 + + 移除 fluid api + - ```shell - $ hub install porn_detection_lstm==1.1.0 + $ hub install porn_detection_lstm==1.1.1 ``` diff --git a/modules/text/text_review/porn_detection_lstm/module.py b/modules/text/text_review/porn_detection_lstm/module.py index e1b7778a5..95aba3d87 100644 --- a/modules/text/text_review/porn_detection_lstm/module.py +++ b/modules/text/text_review/porn_detection_lstm/module.py @@ -6,25 +6,28 @@ import json import math import os + +import paddle import six +from porn_detection_lstm.processor import load_vocab +from porn_detection_lstm.processor import postprocess +from porn_detection_lstm.processor import preprocess -import paddle.fluid as fluid import paddlehub as hub from paddlehub.common.paddle_helper import get_variable_info -from paddlehub.module.module import moduleinfo, serving +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import serving from paddlehub.reader import tokenization -from porn_detection_lstm.processor import load_vocab, preprocess, postprocess - -@moduleinfo( - name="porn_detection_lstm", - version="1.1.0", - summary="Baidu's open-source Porn Detection Model.", - author="baidu-nlp", - author_email="", - type="nlp/sentiment_analysis") +@moduleinfo(name="porn_detection_lstm", + version="1.1.1", + summary="Baidu's open-source Porn Detection Model.", + author="baidu-nlp", + author_email="", + type="nlp/sentiment_analysis") class PornDetectionLSTM(hub.NLPPredictionModule): + def _initialize(self): """ initialize with the necessary elements @@ -42,41 +45,6 @@ def _initialize(self): self._set_config() - def context(self, trainable=False): - """ - Get the input ,output and program of the pretrained porn_detection_lstm - Args: - trainable(bool): whether fine-tune the pretrained parameters of porn_detection_lstm or not - Returns: - inputs(dict): the input variables of porn_detection_lstm (words) - outputs(dict): the output variables of porn_detection_lstm (the sentiment prediction results) - main_program(Program): the main_program of lac with pretrained prameters - """ - place = fluid.CPUPlace() - exe = fluid.Executor(place) - program, feed_target_names, fetch_targets = fluid.io.load_inference_model( - dirname=self.pretrained_model_path, executor=exe) - - with open(self.param_file, 'r') as file: - params_list = file.readlines() - for param in params_list: - param = param.strip() - var = program.global_block().var(param) - var_info = get_variable_info(var) - program.global_block().create_parameter( - shape=var_info['shape'], dtype=var_info['dtype'], name=var_info['name']) - - for param in program.global_block().iter_parameters(): - param.trainable = trainable - - for name, var in program.global_block().vars.items(): - if name == feed_target_names[0]: - inputs = {"words": var} - # output of sencond layer from the end prediction layer (fc-softmax) - if name == "@HUB_porn_detection_lstm@layer_norm_0.tmp_2": - outputs = {"class_probs": fetch_targets[0], "sentence_feature": var} - return inputs, outputs, program - @serving def detection(self, texts=[], data={}, use_gpu=False, batch_size=1): """ @@ -134,27 +102,3 @@ def get_labels(self): """ self.labels = {"porn": 1, "not_porn": 0} return self.labels - - -if __name__ == "__main__": - porn_detection_lstm = PornDetectionLSTM() - porn_detection_lstm.context() - test_text = ["黄片下载", "打击黄牛党"] - - results = porn_detection_lstm.detection(texts=test_text) - for index, text in enumerate(test_text): - results[index]["text"] = text - for index, result in enumerate(results): - if six.PY2: - print(json.dumps(results[index], encoding="utf8", ensure_ascii=False)) - else: - print(results[index]) - input_dict = {"text": test_text} - results = porn_detection_lstm.detection(data=input_dict) - for index, text in enumerate(test_text): - results[index]["text"] = text - for index, result in enumerate(results): - if six.PY2: - print(json.dumps(results[index], encoding="utf8", ensure_ascii=False)) - else: - print(results[index]) diff --git a/paddlehub/compat/module/module_v1.py b/paddlehub/compat/module/module_v1.py index 2e9b72b92..848eabf28 100644 --- a/paddlehub/compat/module/module_v1.py +++ b/paddlehub/compat/module/module_v1.py @@ -12,10 +12,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - import functools import os -from typing import Tuple, List +from typing import List +from typing import Tuple import paddle import paddle2onnx @@ -23,7 +23,8 @@ from paddlehub.compat import paddle_utils from paddlehub.compat.module import module_v1_utils -from paddlehub.utils import utils, log +from paddlehub.utils import log +from paddlehub.utils import utils class ModuleV1(object): @@ -85,16 +86,15 @@ def _load_parameters(self): # Since the pre-trained model saved by the old version of Paddle cannot restore the corresponding # parameters, we need to restore them manually. - global_block.create_parameter( - name=name, - shape=var.shape, - dtype=var.dtype, - type=var.type, - lod_level=var.lod_level, - error_clip=var.error_clip, - stop_gradient=var.stop_gradient, - is_data=var.is_data, - **attrs) + global_block.create_parameter(name=name, + shape=var.shape, + dtype=var.dtype, + type=var.type, + lod_level=var.lod_level, + error_clip=var.error_clip, + stop_gradient=var.stop_gradient, + is_data=var.is_data, + **attrs) log.logger.info('{} pretrained paramaters loaded by PaddleHub'.format(num_param_loaded)) @@ -112,7 +112,7 @@ def _generate_func(self): def _load_model(self): model_path = os.path.join(self.directory, 'model') exe = paddle.static.Executor(paddle.CPUPlace()) - self.program, _, _ = paddle.fluid.io.load_inference_model(model_path, executor=exe) + self.program, _, _ = paddle.static.load_inference_model(model_path, executor=exe) # Clear the callstack since it may leak the privacy of the creator. for block in self.program.blocks: @@ -122,7 +122,10 @@ def _load_model(self): op._set_attr('op_callstack', ['']) @paddle_utils.run_in_static_mode - def context(self, signature: str = None, for_test: bool = False, trainable: bool = True, + def context(self, + signature: str = None, + for_test: bool = False, + trainable: bool = True, max_seq_len: int = 128) -> Tuple[dict, dict, paddle.static.Program]: '''Get module context information, including graph structure and graph input and output variables.''' program = self.program.clone(for_test=for_test) @@ -171,6 +174,7 @@ def __call__(self, sign_name: str, data: dict, use_gpu: bool = False, batch_size '''Call the specified signature function for prediction.''' def _get_reader_and_feeder(data_format, data, place): + def _reader(process_data): for item in zip(*process_data): yield item @@ -284,14 +288,13 @@ def save_inference_model(self, exe = paddle.static.Executor(place) feed_dict, fetch_dict, program = self.context(for_test=True, trainable=False) - paddle.fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=[var.name for var in list(feed_dict.values())], - target_vars=list(fetch_dict.values()), - model_filename=model_filename, - params_filename=params_filename) + paddle.static.save_inference_model(dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=[var.name for var in list(feed_dict.values())], + target_vars=list(fetch_dict.values()), + model_filename=model_filename, + params_filename=params_filename) log.logger.info('Paddle Inference model saved in {}.'.format(dirname)) @@ -315,13 +318,12 @@ def export_onnx_model(self, dirname: str, **kwargs): outputs = [program.global_block().vars[key] for key in outputs] save_file = os.path.join(dirname, '{}.onnx'.format(self.name)) - paddle2onnx.program2onnx( - program=program, - scope=paddle.static.global_scope(), - feed_var_names=inputs, - target_vars=outputs, - save_file=save_file, - **kwargs) + paddle2onnx.program2onnx(program=program, + scope=paddle.static.global_scope(), + feed_var_names=inputs, + target_vars=outputs, + save_file=save_file, + **kwargs) def sub_modules(self, recursive: bool = True): ''' diff --git a/paddlehub/compat/module/nlp_module.py b/paddlehub/compat/module/nlp_module.py index d7209774f..d8b992dd4 100644 --- a/paddlehub/compat/module/nlp_module.py +++ b/paddlehub/compat/module/nlp_module.py @@ -12,32 +12,38 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - import argparse import ast import os import re -import six -from typing import Any, List, Text, Tuple +from typing import Any +from typing import List +from typing import Text +from typing import Tuple -import paddle import numpy as np +import paddle +import six +from paddle.framework import core from paddlehub.compat import paddle_utils -from paddlehub.compat.task.transformer_emb_task import TransformerEmbeddingTask from paddlehub.compat.task.config import RunConfig from paddlehub.compat.task.reader import ClassifyReader -from paddlehub.module.module import runnable, RunModule +from paddlehub.compat.task.transformer_emb_task import TransformerEmbeddingTask +from paddlehub.module.module import RunModule +from paddlehub.module.module import runnable from paddlehub.utils.parser import txt_parser from paddlehub.utils.utils import sys_stdin_encoding class DataFormatError(Exception): + def __init__(self, *args): self.args = args class NLPBaseModule(RunModule): + def get_vocab_path(self): ''' Get the path to the vocabulary whih was used to pretrain @@ -48,12 +54,13 @@ def get_vocab_path(self): class NLPPredictionModule(NLPBaseModule): + def _set_config(self): '''predictor config setting''' - cpu_config = paddle.fluid.core.AnalysisConfig(self.pretrained_model_path) + cpu_config = core.AnalysisConfig(self.pretrained_model_path) cpu_config.disable_glog_info() cpu_config.disable_gpu() - self.cpu_predictor = paddle.fluid.core.create_paddle_predictor(cpu_config) + self.cpu_predictor = core.create_paddle_predictor(cpu_config) try: _places = os.environ['CUDA_VISIBLE_DEVICES'] @@ -62,10 +69,10 @@ def _set_config(self): except: use_gpu = False if use_gpu: - gpu_config = paddle.fluid.core.AnalysisConfig(self.pretrained_model_path) + gpu_config = core.AnalysisConfig(self.pretrained_model_path) gpu_config.disable_glog_info() gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) - self.gpu_predictor = paddle.fluid.core.create_paddle_predictor(gpu_config) + self.gpu_predictor = core.create_paddle_predictor(gpu_config) def texts2tensor(self, texts: List[dict]) -> paddle.Tensor: ''' @@ -81,7 +88,7 @@ def texts2tensor(self, texts: List[dict]) -> paddle.Tensor: for i, text in enumerate(texts): data += text['processed'] lod.append(len(text['processed']) + lod[i]) - tensor = paddle.fluid.core.PaddleTensor(np.array(data).astype('int64')) + tensor = core.PaddleTensor(np.array(data).astype('int64')) tensor.name = 'words' tensor.lod = [lod] tensor.shape = [lod[-1], 1] @@ -108,11 +115,10 @@ def to_unicode(self, texts: str) -> Text: @runnable def run_cmd(self, argvs: List[Any]): '''Run as a command''' - self.parser = argparse.ArgumentParser( - description='Run the %s module.' % self.name, - prog='hub run %s' % self.name, - usage='%(prog)s', - add_help=True) + self.parser = argparse.ArgumentParser(description='Run the %s module.' % self.name, + prog='hub run %s' % self.name, + usage='%(prog)s', + add_help=True) self.arg_input_group = self.parser.add_argument_group(title='Input options', description='Input data. Required') self.arg_config_group = self.parser.add_argument_group( @@ -135,8 +141,10 @@ def run_cmd(self, argvs: List[Any]): def add_module_config_arg(self): '''Add the command config options''' - self.arg_config_group.add_argument( - '--use_gpu', type=ast.literal_eval, default=False, help='whether use GPU for prediction') + self.arg_config_group.add_argument('--use_gpu', + type=ast.literal_eval, + default=False, + help='whether use GPU for prediction') self.arg_config_group.add_argument('--batch_size', type=int, default=1, help='batch size for prediction') @@ -172,8 +180,11 @@ def __init__(self, **kwargs): if not directory: return - super(TransformerModule, self).__init__( - name=name, directory=directory, module_dir=module_dir, version=version, **kwargs) + super(TransformerModule, self).__init__(name=name, + directory=directory, + module_dir=module_dir, + version=version, + **kwargs) self.max_seq_len = max_seq_len @@ -186,21 +197,20 @@ def existed_params(var): return False return os.path.exists(os.path.join(pretraining_params_path, var.name)) - paddle.static.load( - executor=exe, - model_path=pretraining_params_path, - program=main_program, - var_list=main_program.all_parameters()) + paddle.static.load(executor=exe, + model_path=pretraining_params_path, + program=main_program, + var_list=main_program.all_parameters()) def param_prefix(self) -> str: return '@HUB_%s@' % self.name @paddle_utils.run_in_static_mode def context( - self, - max_seq_len: int = None, - trainable: bool = True, - num_slots: int = 1, + self, + max_seq_len: int = None, + trainable: bool = True, + num_slots: int = 1, ) -> Tuple[dict, dict, paddle.static.Program]: ''' get inputs, outputs and program from pre-trained module @@ -225,42 +235,64 @@ def context( module_program = paddle.static.Program() startup_program = paddle.static.Program() with paddle.static.program_guard(module_program, startup_program): - with paddle.fluid.unique_name.guard(): + with paddle.utils.unique_name.guar.guard(): input_ids = paddle.static.data(name='input_ids', shape=[-1, max_seq_len, 1], dtype='int64', lod_level=0) - position_ids = paddle.static.data( - name='position_ids', shape=[-1, max_seq_len, 1], dtype='int64', lod_level=0) - segment_ids = paddle.static.data( - name='segment_ids', shape=[-1, max_seq_len, 1], dtype='int64', lod_level=0) - input_mask = paddle.static.data( - name='input_mask', shape=[-1, max_seq_len, 1], dtype='float32', lod_level=0) + position_ids = paddle.static.data(name='position_ids', + shape=[-1, max_seq_len, 1], + dtype='int64', + lod_level=0) + segment_ids = paddle.static.data(name='segment_ids', + shape=[-1, max_seq_len, 1], + dtype='int64', + lod_level=0) + input_mask = paddle.static.data(name='input_mask', + shape=[-1, max_seq_len, 1], + dtype='float32', + lod_level=0) pooled_output, sequence_output = self.net(input_ids, position_ids, segment_ids, input_mask) data_list = [(input_ids, position_ids, segment_ids, input_mask)] output_name_list = [(pooled_output.name, sequence_output.name)] if num_slots > 1: - input_ids_2 = paddle.static.data( - name='input_ids_2', shape=[-1, max_seq_len, 1], dtype='int64', lod_level=0) - position_ids_2 = paddle.static.data( - name='position_ids_2', shape=[-1, max_seq_len, 1], dtype='int64', lod_level=0) - segment_ids_2 = paddle.static.data( - name='segment_ids_2', shape=[-1, max_seq_len, 1], dtype='int64', lod_level=0) - input_mask_2 = paddle.static.data( - name='input_mask_2', shape=[-1, max_seq_len, 1], dtype='float32', lod_level=0) + input_ids_2 = paddle.static.data(name='input_ids_2', + shape=[-1, max_seq_len, 1], + dtype='int64', + lod_level=0) + position_ids_2 = paddle.static.data(name='position_ids_2', + shape=[-1, max_seq_len, 1], + dtype='int64', + lod_level=0) + segment_ids_2 = paddle.static.data(name='segment_ids_2', + shape=[-1, max_seq_len, 1], + dtype='int64', + lod_level=0) + input_mask_2 = paddle.static.data(name='input_mask_2', + shape=[-1, max_seq_len, 1], + dtype='float32', + lod_level=0) pooled_output_2, sequence_output_2 = self.net(input_ids_2, position_ids_2, segment_ids_2, input_mask_2) data_list.append((input_ids_2, position_ids_2, segment_ids_2, input_mask_2)) output_name_list.append((pooled_output_2.name, sequence_output_2.name)) if num_slots > 2: - input_ids_3 = paddle.static.data( - name='input_ids_3', shape=[-1, max_seq_len, 1], dtype='int64', lod_level=0) - position_ids_3 = paddle.static.data( - name='position_ids_3', shape=[-1, max_seq_len, 1], dtype='int64', lod_level=0) - segment_ids_3 = paddle.static.data( - name='segment_ids_3', shape=[-1, max_seq_len, 1], dtype='int64', lod_level=0) - input_mask_3 = paddle.static.data( - name='input_mask_3', shape=[-1, max_seq_len, 1], dtype='float32', lod_level=0) + input_ids_3 = paddle.static.data(name='input_ids_3', + shape=[-1, max_seq_len, 1], + dtype='int64', + lod_level=0) + position_ids_3 = paddle.static.data(name='position_ids_3', + shape=[-1, max_seq_len, 1], + dtype='int64', + lod_level=0) + segment_ids_3 = paddle.static.data(name='segment_ids_3', + shape=[-1, max_seq_len, 1], + dtype='int64', + lod_level=0) + input_mask_3 = paddle.static.data(name='input_mask_3', + shape=[-1, max_seq_len, 1], + dtype='float32', + lod_level=0) pooled_output_3, sequence_output_3 = self.net(input_ids_3, position_ids_3, segment_ids_3, input_mask_3) data_list.append((input_ids_3, position_ids_3, segment_ids_3, input_mask_3)) @@ -305,10 +337,12 @@ def context( inputs['position_ids_%s' % (index + 1)] = data[1] inputs['segment_ids_%s' % (index + 1)] = data[2] inputs['input_mask_%s' % (index + 1)] = data[3] - outputs['pooled_output_%s' % (index + 1)] = module_program.global_block().vars[ - self.param_prefix() + output_name_list[index][0]] - outputs['sequence_output_%s' % (index + 1)] = module_program.global_block().vars[ - self.param_prefix() + output_name_list[index][1]] + outputs['pooled_output_%s' % + (index + 1)] = module_program.global_block().vars[self.param_prefix() + + output_name_list[index][0]] + outputs['sequence_output_%s' % + (index + 1)] = module_program.global_block().vars[self.param_prefix() + + output_name_list[index][1]] return inputs, outputs, module_program diff --git a/paddlehub/compat/paddle_utils.py b/paddlehub/compat/paddle_utils.py index 77f6f6764..742617fd3 100644 --- a/paddlehub/compat/paddle_utils.py +++ b/paddlehub/compat/paddle_utils.py @@ -12,30 +12,31 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - import contextlib import copy -from typing import Callable, List +from typing import Callable +from typing import List import paddle +from paddle.framework import core from paddlehub.utils.utils import Version dtype_map = { - paddle.fluid.core.VarDesc.VarType.FP32: "float32", - paddle.fluid.core.VarDesc.VarType.FP64: "float64", - paddle.fluid.core.VarDesc.VarType.FP16: "float16", - paddle.fluid.core.VarDesc.VarType.INT32: "int32", - paddle.fluid.core.VarDesc.VarType.INT16: "int16", - paddle.fluid.core.VarDesc.VarType.INT64: "int64", - paddle.fluid.core.VarDesc.VarType.BOOL: "bool", - paddle.fluid.core.VarDesc.VarType.INT16: "int16", - paddle.fluid.core.VarDesc.VarType.UINT8: "uint8", - paddle.fluid.core.VarDesc.VarType.INT8: "int8", + core.VarDesc.VarType.FP32: "float32", + core.VarDesc.VarType.FP64: "float64", + core.VarDesc.VarType.FP16: "float16", + core.VarDesc.VarType.INT32: "int32", + core.VarDesc.VarType.INT16: "int16", + core.VarDesc.VarType.INT64: "int64", + core.VarDesc.VarType.BOOL: "bool", + core.VarDesc.VarType.INT16: "int16", + core.VarDesc.VarType.UINT8: "uint8", + core.VarDesc.VarType.INT8: "int8", } -def convert_dtype_to_string(dtype: str) -> paddle.fluid.core.VarDesc.VarType: +def convert_dtype_to_string(dtype: str) -> core.VarDesc.VarType: if dtype in dtype_map: return dtype_map[dtype] raise TypeError("dtype shoule in %s" % list(dtype_map.keys())) diff --git a/paddlehub/compat/task/base_task.py b/paddlehub/compat/task/base_task.py index 4258f4dec..6efb34f8f 100644 --- a/paddlehub/compat/task/base_task.py +++ b/paddlehub/compat/task/base_task.py @@ -12,22 +12,29 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - import contextlib import inspect import os from functools import partial -from typing import Any, Callable, Generator, Generic, Iterator, List, Union +from typing import Any +from typing import Callable +from typing import Generator +from typing import Generic +from typing import Iterator +from typing import List +from typing import Union -import paddle import numpy as np +import paddle +from paddle.framework import core from visualdl import LogWriter from paddlehub.compat import paddle_utils +from paddlehub.compat.task.checkpoint import load_checkpoint from paddlehub.compat.task.config import RunConfig from paddlehub.compat.task.hook import TaskHooks -from paddlehub.compat.task.task_utils import RunEnv, RunState -from paddlehub.compat.task.checkpoint import load_checkpoint +from paddlehub.compat.task.task_utils import RunEnv +from paddlehub.compat.task.task_utils import RunState from paddlehub.utils.log import logger from paddlehub.utils.utils import generate_tempdir @@ -66,8 +73,8 @@ def __init__(self, else: self._base_main_program = paddle_utils.clone_program(main_program, for_test=False) if startup_program is None: - self._base_startup_program = paddle_utils.clone_program( - paddle.static.default_startup_program(), for_test=False) + self._base_startup_program = paddle_utils.clone_program(paddle.static.default_startup_program(), + for_test=False) else: self._base_startup_program = paddle_utils.clone_program(startup_program, for_test=False) self.is_checkpoint_loaded = False @@ -168,7 +175,7 @@ def _build_env(self): self.env.startup_program = paddle.static.Program() with paddle.static.program_guard(self.env.main_program, self._base_startup_program): - with paddle.fluid.unique_name.guard(self.env.UNG): + with paddle.utils.unique_name.guard(self.env.UNG): self.env.outputs = self._build_net() if self.is_train_phase or self.is_test_phase: self.env.labels = self._add_label() @@ -181,11 +188,12 @@ def _build_env(self): if self.is_train_phase: with paddle.static.program_guard(self.env.main_program, self._base_startup_program): - with paddle.fluid.unique_name.guard(self.env.UNG): + with paddle.utils.unique_name.guard(self.env.UNG): if self._compatible_mode: # This branch is compatible code for usage deprecated in paddlehub v1.8. - self._base_data_reader.data_generator( - batch_size=self.config.batch_size, phase='train', shuffle=True) + self._base_data_reader.data_generator(batch_size=self.config.batch_size, + phase='train', + shuffle=True) num_train_examples = self._base_data_reader.num_examples['train'] try: # nlp_reader @@ -300,7 +308,9 @@ def main_program_to_be_run(self) -> Union[paddle.static.Program, paddle.static.C @property def generator(self) -> Generator: + def data_generator(records): + def wrapper(): for record in records: values = [] @@ -311,8 +321,10 @@ def wrapper(): return wrapper if self._compatible_mode: - self.env.generator = self._base_data_reader.data_generator( - batch_size=self.config.batch_size, phase=self.phase, data=self._predict_data, return_list=True) + self.env.generator = self._base_data_reader.data_generator(batch_size=self.config.batch_size, + phase=self.phase, + data=self._predict_data, + return_list=True) else: if self.is_predict_phase: records = self._predict_data @@ -360,7 +372,7 @@ def metrics(self) -> List[str]: return self.env.metrics @property - def unique_name_generator(self) -> paddle.fluid.unique_name.UniqueNameGenerator: + def unique_name_generator(self): return self.env.UNG @property @@ -504,16 +516,16 @@ def _default_eval_end_event(self, run_states: List[RunState]): ''' eval_scores, eval_loss, run_speed = self._calculate_metrics(run_states) if 'train' in self._envs: - self.vdl_writer.add_scalar( - tag='Loss_{}'.format(self.phase), value=eval_loss, step=self._envs['train'].current_step) + self.vdl_writer.add_scalar(tag='Loss_{}'.format(self.phase), + value=eval_loss, + step=self._envs['train'].current_step) log_scores = '' for metric in eval_scores: if 'train' in self._envs: - self.vdl_writer.add_scalar( - tag='{}_{}'.format(metric, self.phase), - value=eval_scores[metric], - step=self._envs['train'].current_step) + self.vdl_writer.add_scalar(tag='{}_{}'.format(metric, self.phase), + value=eval_scores[metric], + step=self._envs['train'].current_step) log_scores += '{}={:.5f} '.format(metric, eval_scores[metric]) logger.eval('[{} dataset evaluation result] loss={:.5f} {}[step/sec: {:.2f}]'.format( @@ -540,12 +552,14 @@ def _default_log_interval_event(self, run_states: List[RunState]): run_states (object): the results in train phase ''' scores, avg_loss, run_speed = self._calculate_metrics(run_states) - self.vdl_writer.add_scalar( - tag='Loss_{}'.format(self.phase), value=avg_loss, step=self._envs['train'].current_step) + self.vdl_writer.add_scalar(tag='Loss_{}'.format(self.phase), + value=avg_loss, + step=self._envs['train'].current_step) log_scores = '' for metric in scores: - self.vdl_writer.add_scalar( - tag='{}_{}'.format(metric, self.phase), value=scores[metric], step=self._envs['train'].current_step) + self.vdl_writer.add_scalar(tag='{}_{}'.format(metric, self.phase), + value=scores[metric], + step=self._envs['train'].current_step) log_scores += '{}={:.5f} '.format(metric, scores[metric]) logger.train('step {} / {}: loss={:.5f} {}[step/sec: {:.2f}]'.format(self.current_step, self.max_train_steps, avg_loss, log_scores, run_speed)) @@ -569,7 +583,7 @@ def _add_label(self): raise NotImplementedError def _add_metrics(self): - # Some metrics like acc, auc can be calculated by fluid.layers + # Some metrics like acc, auc # The others can be calculated in _calculate_metrics function raise NotImplementedError @@ -590,6 +604,7 @@ def load_checkpoint(self): return is_load_successful def load_parameters(self, dirname): + def if_exist(var): path = os.path.join(dirname, var.name) return os.path.exists(path) @@ -598,14 +613,13 @@ def if_exist(var): def save_inference_model(self, dirname: str, model_filename: str = None, params_filename: str = None): with self.phase_guard('predict'): - paddle.static.save_inference_model( - dirname=dirname, - executor=self.exe, - main_program=self.main_program, - feeded_var_names=self.feed_list, - target_vars=self.fetch_var_list, - model_filename=model_filename, - params_filename=params_filename) + paddle.static.save_inference_model(dirname=dirname, + executor=self.exe, + main_program=self.main_program, + feeded_var_names=self.feed_list, + target_vars=self.fetch_var_list, + model_filename=model_filename, + params_filename=params_filename) def finetune_and_eval(self) -> List[RunState]: return self.finetune(do_eval=True) @@ -673,7 +687,7 @@ def eval(self, phase: str = 'dev', load_best_model: bool = False) -> List[RunSta self._eval_end_event(run_states) return run_states - def _create_predictor(self) -> paddle.fluid.core.PaddlePredictor: + def _create_predictor(self) -> core.PaddlePredictor: ''' create high-performance predictor for predict. Returns: @@ -681,7 +695,7 @@ def _create_predictor(self) -> paddle.fluid.core.PaddlePredictor: ''' with generate_tempdir() as _dir: self.save_inference_model(dirname=_dir) - predictor_config = paddle.fluid.core.AnalysisConfig(_dir) + predictor_config = core.AnalysisConfig(_dir) predictor_config.disable_glog_info() if self.config.use_cuda: @@ -690,7 +704,7 @@ def _create_predictor(self) -> paddle.fluid.core.PaddlePredictor: else: predictor_config.disable_gpu() predictor_config.enable_memory_optim() - return paddle.fluid.core.create_paddle_predictor(predictor_config) + return core.create_paddle_predictor(predictor_config) def _run_with_predictor(self) -> List[RunState]: ''' @@ -723,7 +737,7 @@ def _run_with_predictor(self) -> List[RunState]: tensor_batch = [[] for i in range(len(self.feed_list))] for i in range(len(processed_batch)): processed_batch[i] = np.array(processed_batch[i]).reshape(feed_var_shape[i]).astype(feed_var_type[i]) - tensor_batch[i] = paddle.fluid.core.PaddleTensor(processed_batch[i]) + tensor_batch[i] = core.PaddleTensor(processed_batch[i]) fetch_result = self._predictor.run(tensor_batch) for index, result in enumerate(fetch_result): @@ -737,12 +751,12 @@ def _run_with_predictor(self) -> List[RunState]: return global_run_states def predict( - self, - data: List[Any] = None, - label_list: List[Any] = None, - load_best_model: bool = True, - return_result: bool = True, - accelerate_mode: bool = True, + self, + data: List[Any] = None, + label_list: List[Any] = None, + load_best_model: bool = True, + return_result: bool = True, + accelerate_mode: bool = True, ) -> List[RunState]: ''' make prediction for the input data. @@ -802,14 +816,20 @@ def _run(self, do_eval: bool = False) -> List[RunState]: RunState: the running result of specific phase ''' with paddle.static.program_guard(self.main_program, self.startup_program): - data_loader = paddle.io.DataLoader.from_generator( - feed_list=self.feed_var_list, capacity=64, use_double_buffer=True, iterable=True) + data_loader = paddle.io.DataLoader.from_generator(feed_list=self.feed_var_list, + capacity=64, + use_double_buffer=True, + iterable=True) if self.is_predict_phase: - data_reader = data_loader.set_sample_generator( - self.generator, places=self.places, batch_size=self.config.batch_size, drop_last=False) + data_reader = data_loader.set_sample_generator(self.generator, + places=self.places, + batch_size=self.config.batch_size, + drop_last=False) else: - data_reader = data_loader.set_sample_generator( - self.generator, places=self.places, batch_size=self.config.batch_size, drop_last=True) + data_reader = data_loader.set_sample_generator(self.generator, + places=self.places, + batch_size=self.config.batch_size, + drop_last=True) global_run_states = [] period_run_states = [] @@ -822,8 +842,10 @@ def _run(self, do_eval: bool = False) -> List[RunState]: tmp = np.array(batch[0][tmp_name]) num_batch_examples = tmp.shape[0] - fetch_result = self.exe.run( - self.main_program_to_be_run, feed=batch, fetch_list=self.fetch_list, return_numpy=self.return_numpy) + fetch_result = self.exe.run(self.main_program_to_be_run, + feed=batch, + fetch_list=self.fetch_list, + return_numpy=self.return_numpy) if not self.return_numpy: fetch_result = [np.array(x) for x in fetch_result] diff --git a/paddlehub/module/module.py b/paddlehub/module/module.py index c0d5451b8..d494eb393 100644 --- a/paddlehub/module/module.py +++ b/paddlehub/module/module.py @@ -12,7 +12,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - import ast import builtins import codecs @@ -20,18 +19,25 @@ import os import re import sys -from typing import Callable, Generic, List, Optional, Union +from typing import Callable +from typing import Generic +from typing import List +from typing import Optional +from typing import Union import paddle import paddle2onnx from easydict import EasyDict -from paddlehub.utils import parser, log, utils from paddlehub.compat import paddle_utils from paddlehub.compat.module.module_v1 import ModuleV1 +from paddlehub.utils import log +from paddlehub.utils import parser +from paddlehub.utils import utils class InvalidHubModule(Exception): + def __init__(self, directory: str): self.directory = directory @@ -194,12 +200,11 @@ def save_inference_model(self, for key, _sub_module in self.sub_modules().items(): try: sub_dirname = os.path.normpath(os.path.join(dirname, key)) - _sub_module.save_inference_model( - sub_dirname, - include_sub_modules=include_sub_modules, - model_filename=model_filename, - params_filename=params_filename, - combined=combined) + _sub_module.save_inference_model(sub_dirname, + include_sub_modules=include_sub_modules, + model_filename=model_filename, + params_filename=params_filename, + combined=combined) except: utils.record_exception('Failed to save sub module {}'.format(_sub_module.name)) @@ -249,21 +254,20 @@ def save_inference_model(self, if os.path.exists(os.path.join(self._pretrained_model_path, '__params__')): _params_filename = '__params__' - program, feeded_var_names, target_vars = paddle.fluid.io.load_inference_model( + program, feeded_var_names, target_vars = paddle.static.load_inference_model( dirname=self._pretrained_model_path, executor=exe, model_filename=_model_filename, params_filename=_params_filename, ) - paddle.fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) + paddle.static.save_inference_model(dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) log.logger.info('Paddle Inference model saved in {}.'.format(dirname)) @@ -333,19 +337,17 @@ def export_onnx_model(self, save_file = os.path.join(dirname, '{}.onnx'.format(self.name)) - program, inputs, outputs = paddle.fluid.io.load_inference_model( - dirname=self._pretrained_model_path, - model_filename=model_filename, - params_filename=params_filename, - executor=exe) + program, inputs, outputs = paddle.static.load_inference_model(dirname=self._pretrained_model_path, + model_filename=model_filename, + params_filename=params_filename, + executor=exe) - paddle2onnx.program2onnx( - program=program, - scope=paddle.static.global_scope(), - feed_var_names=inputs, - target_vars=outputs, - save_file=save_file, - **kwargs) + paddle2onnx.program2onnx(program=program, + scope=paddle.static.global_scope(), + feed_var_names=inputs, + target_vars=outputs, + save_file=save_file, + **kwargs) class Module(object): @@ -385,14 +387,13 @@ def __new__(cls, from paddlehub.server.server import CacheUpdater # This branch come from hub.Module(name='xxx') or hub.Module(directory='xxx') if name: - module = cls.init_with_name( - name=name, - version=version, - source=source, - update=update, - branch=branch, - ignore_env_mismatch=ignore_env_mismatch, - **kwargs) + module = cls.init_with_name(name=name, + version=version, + source=source, + update=update, + branch=branch, + ignore_env_mismatch=ignore_env_mismatch, + **kwargs) CacheUpdater("update_cache", module=name, version=version).start() elif directory: module = cls.init_with_directory(directory=directory, **kwargs) @@ -484,13 +485,12 @@ def init_with_name(cls, manager = LocalModuleManager() user_module_cls = manager.search(name, source=source, branch=branch) if not user_module_cls or not user_module_cls.version.match(version): - user_module_cls = manager.install( - name=name, - version=version, - source=source, - update=update, - branch=branch, - ignore_env_mismatch=ignore_env_mismatch) + user_module_cls = manager.install(name=name, + version=version, + source=source, + update=update, + branch=branch, + ignore_env_mismatch=ignore_env_mismatch) directory = manager._get_normalized_path(user_module_cls.name) From 185ba23d7f27aefe52bad9b1803560c152dabfbf Mon Sep 17 00:00:00 2001 From: buchongyu <18001307871@163.com> Date: Mon, 11 Jul 2022 20:26:12 +0800 Subject: [PATCH 023/117] fix a bug, embedding result is a tensor, unpacking error (#1910) * fix a bug, embedding result is a tensor, unpacking error * fix a bug, embedding result is a tensor, unpacking error. * doc format * fix chinese_electra_small README.md * fix electra model version case Co-authored-by: wuzewu --- .../chinese_electra_base/README.md | 213 +++++++++-------- .../chinese_electra_base/module.py | 5 +- .../chinese_electra_small/README.md | 214 +++++++++-------- .../chinese_electra_small/module.py | 5 +- .../language_model/electra_base/README.md | 211 +++++++++-------- .../language_model/electra_base/module.py | 5 +- .../language_model/electra_large/README.md | 217 ++++++++++-------- .../language_model/electra_large/module.py | 5 +- .../language_model/electra_small/README.md | 213 +++++++++-------- .../language_model/electra_small/module.py | 5 +- paddlehub/module/nlp_module.py | 12 +- 11 files changed, 600 insertions(+), 505 deletions(-) diff --git a/modules/text/language_model/chinese_electra_base/README.md b/modules/text/language_model/chinese_electra_base/README.md index 0d11b4902..c28707a54 100644 --- a/modules/text/language_model/chinese_electra_base/README.md +++ b/modules/text/language_model/chinese_electra_base/README.md @@ -1,6 +1,17 @@ -```shell -$ hub install chinese-electra-base==2.0.1 -``` +# chinese-electra-base +|模型名称|chinese-electra-base| +| :--- | :---: | +|类别|文本-语义模型| +|网络|ELECTRA| +|数据集|中文维基+通用数据| +|是否支持Fine-tuning|是| +|模型大小|390MB| +|最新更新日期|2022-02-08| +|数据指标|-| + +## 一、模型基本信息 + +- ### 模型介绍


@@ -8,71 +19,26 @@ $ hub install chinese-electra-base==2.0.1 更多详情请参考[ELECTRA论文](https://openreview.net/pdf?id=r1xMH1BtvB) -## API -```python -def __init__( - task=None, - load_checkpoint=None, - label_map=None, - num_classes=2, - suffix=False, - **kwargs, -) -``` - -创建Module对象(动态图组网版本)。 +## 二、安装 -**参数** - -* `task`: 任务名称,可为`seq-cls`(文本分类任务,原来的`sequence_classification`在未来会被弃用)或`token-cls`(序列标注任务)。 -* `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 -* `label_map`:预测时的类别映射表。 -* `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 -* `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 - -```python -def predict( - data, - max_seq_len=128, - batch_size=1, - use_gpu=False -) -``` - -**参数** - -* `data`: 待预测数据,格式为\[\[sample\_a\_text\_a, sample\_a\_text\_b\], \[sample\_b\_text\_a, sample\_b\_text\_b\],…,\],其中每个元素都是一个样例,每个样例可以包含text\_a与text\_b。每个样例文本数量(1个或者2个)需和训练时保持一致。 -* `max_seq_len`:模型处理文本的最大长度 -* `batch_size`:模型批处理大小 -* `use_gpu`:是否使用gpu,默认为False。对于GPU用户,建议开启use_gpu。 - -**返回** - -* `results`:list类型,不同任务类型的返回结果如下 - * 文本分类:列表里包含每个句子的预测标签,格式为\[label\_1, label\_2, …,\] - * 序列标注:列表里包含每个句子每个token的预测标签,格式为\[\[token\_1, token\_2, …,\], \[token\_1, token\_2, …,\], …,\] - -```python -def get_embedding( - data, - use_gpu=False -) -``` +- ### 1、环境依赖 -用于获取输入文本的句子粒度特征与字粒度特征 + - paddlepaddle >= 2.0.0 -**参数** + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -* `data`:输入文本列表,格式为\[\[sample\_a\_text\_a, sample\_a\_text\_b\], \[sample\_b\_text\_a, sample\_b\_text\_b\],…,\],其中每个元素都是一个样例,每个样例可以包含text\_a与text\_b。 -* `use_gpu`:是否使用gpu,默认为False。对于GPU用户,建议开启use_gpu。 +- ### 2、安装 -**返回** + - ```shell + $ hub install chinese-electra-base==2.0.2 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) -* `results`:list类型,格式为\[\[sample\_a\_pooled\_feature, sample\_a\_seq\_feature\], \[sample\_b\_pooled\_feature, sample\_b\_seq\_feature\],…,\],其中每个元素都是对应样例的特征输出,每个样例都有句子粒度特征pooled\_feature与字粒度特征seq\_feature。 +## 三、模型API预测 -**代码示例** +- ### 1、预测代码示例 ```python import paddlehub as hub @@ -96,59 +62,110 @@ for idx, text in enumerate(data): ``` 详情可参考PaddleHub示例: -- [文本分类](https://github.com/PaddlePaddle/PaddleHub/tree/release/v2.0.0-beta/demo/text_classification) -- [序列标注](https://github.com/PaddlePaddle/PaddleHub/tree/release/v2.0.0-beta/demo/sequence_labeling) +- [文本分类](../../../../demo/text_classification) +- [序列标注](../../../../demo/sequence_labeling) -## 服务部署 +- ### 2、API + - ```python + def __init__( + task=None, + load_checkpoint=None, + label_map=None, + num_classes=2, + suffix=False, + **kwargs, + ) + ``` -PaddleHub Serving可以部署一个在线获取预训练词向量。 + - 创建Module对象(动态图组网版本)。 -### Step1: 启动PaddleHub Serving + - **参数** -运行启动命令: + - `task`: 任务名称,可为`seq-cls`(文本分类任务,原来的`sequence_classification`在未来会被弃用)或`token-cls`(序列标注任务)。 + - `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 + - `label_map`:预测时的类别映射表。 + - `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 + - `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 -```shell -$ hub serving start -m chinese-electra-base -``` + - ```python + def predict( + data, + max_seq_len=128, + batch_size=1, + use_gpu=False + ) + ``` -这样就完成了一个获取预训练词向量服务化API的部署,默认端口号为8866。 + - **参数** -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - `data`: 待预测数据,格式为\[\[sample\_a\_text\_a, sample\_a\_text\_b\], \[sample\_b\_text\_a, sample\_b\_text\_b\],…,\],其中每个元素都是一个样例,每个样例可以包含text\_a与text\_b。每个样例文本数量(1个或者2个)需和训练时保持一致。 + - `max_seq_len`:模型处理文本的最大长度 + - `batch_size`:模型批处理大小 + - `use_gpu`:是否使用gpu,默认为False。对于GPU用户,建议开启use_gpu。 -### Step2: 发送预测请求 + - **返回** -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - `results`:list类型,不同任务类型的返回结果如下 + - 文本分类:列表里包含每个句子的预测标签,格式为\[label\_1, label\_2, …,\] + - 序列标注:列表里包含每个句子每个token的预测标签,格式为\[\[token\_1, token\_2, …,\], \[token\_1, token\_2, …,\], …,\] -```python -import requests -import json - -# 指定用于获取embedding的文本[[text_1], [text_2], ... ]} -text = [["今天是个好日子"], ["天气预报说今天要下雨"]] -# 以key的方式指定text传入预测方法的时的参数,此例中为"data" -# 对应本地部署,则为module.get_embedding(data=text) -data = {"data": text} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://127.0.0.1:8866/predict/chinese-electra-base" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} - -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - ```python + def get_embedding( + data, + use_gpu=False + ) + ``` + + - 用于获取输入文本的句子粒度特征与字粒度特征 + + - **参数** -## 查看代码 + - `data`:输入文本列表,格式为\[\[sample\_a\_text\_a, sample\_a\_text\_b\], \[sample\_b\_text\_a, sample\_b\_text\_b\],…,\],其中每个元素都是一个样例,每个样例可以包含text\_a与text\_b。 + - `use_gpu`:是否使用gpu,默认为False。对于GPU用户,建议开启use_gpu。 -https://github.com/ymcui/Chinese-ELECTRA + - **返回** + - `results`:list类型,格式为\[\[sample\_a\_pooled\_feature, sample\_a\_seq\_feature\], \[sample\_b\_pooled\_feature, sample\_b\_seq\_feature\],…,\],其中每个元素都是对应样例的特征输出,每个样例都有句子粒度特征pooled\_feature与字粒度特征seq\_feature。 -## 依赖 -paddlepaddle >= 2.0.0 +## 四、服务部署 -paddlehub >= 2.0.0 +- PaddleHub Serving可以部署一个在线获取预训练词向量。 -## 更新历史 +- ### 第一步:启动PaddleHub Serving + + - ```shell + $ hub serving start -m chinese-electra-base + ``` + + - 这样就完成了一个获取预训练词向量服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于获取embedding的文本[[text_1], [text_2], ... ]} + text = [["今天是个好日子"], ["天气预报说今天要下雨"]] + # 以key的方式指定text传入预测方法的时的参数,此例中为"data" + # 对应本地部署,则为module.get_embedding(data=text) + data = {"data": text} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/chinese-electra-base" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + +## 五、更新历史 * 1.0.0 @@ -161,3 +178,7 @@ paddlehub >= 2.0.0 * 2.0.1 增加文本匹配任务`text-matching` + +* 2.0.2 + + 修复词嵌入模型预测的问题 diff --git a/modules/text/language_model/chinese_electra_base/module.py b/modules/text/language_model/chinese_electra_base/module.py index 52a9a9fd2..db66dba10 100644 --- a/modules/text/language_model/chinese_electra_base/module.py +++ b/modules/text/language_model/chinese_electra_base/module.py @@ -28,7 +28,7 @@ @moduleinfo( name="chinese-electra-base", - version="2.0.1", + version="2.0.2", summary= "chinese-electra-base, 12-layer, 768-hidden, 12-heads, 102M parameters. The module is executed as paddle.dygraph.", author="ymcui", @@ -163,8 +163,7 @@ def forward(self, return probs, loss, {'acc': acc} return probs else: - sequence_output, pooled_output = result - return sequence_output, pooled_output + return result @staticmethod def get_tokenizer(*args, **kwargs): diff --git a/modules/text/language_model/chinese_electra_small/README.md b/modules/text/language_model/chinese_electra_small/README.md index e4d49d10a..e6ed73dd5 100644 --- a/modules/text/language_model/chinese_electra_small/README.md +++ b/modules/text/language_model/chinese_electra_small/README.md @@ -1,78 +1,42 @@ -```shell -$ hub install chinese-electra-small==2.0.1 -``` +# chinese-electra-small +|模型名称|chinese-electra-small| +| :--- | :---: | +|类别|文本-语义模型| +|网络|ELECTRA| +|数据集|中文维基+通用数据| +|是否支持Fine-tuning|是| +|模型大小|47MB| +|最新更新日期|2022-02-08| +|数据指标|-| + + +## 一、模型基本信息 +- ### 模型介绍


更多详情请参考[ELECTRA论文](https://openreview.net/pdf?id=r1xMH1BtvB) -## API -```python -def __init__( - task=None, - load_checkpoint=None, - label_map=None, - num_classes=2, - suffix=False, - **kwargs, -) -``` - -创建Module对象(动态图组网版本)。 - -**参数** - -* `task`: 任务名称,可为`seq-cls`(文本分类任务,原来的`sequence_classification`在未来会被弃用)或`token-cls`(序列标注任务)。 -* `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 -* `label_map`:预测时的类别映射表。 -* `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 -* `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 - -```python -def predict( - data, - max_seq_len=128, - batch_size=1, - use_gpu=False -) -``` - -**参数** - -* `data`: 待预测数据,格式为\[\[sample\_a\_text\_a, sample\_a\_text\_b\], \[sample\_b\_text\_a, sample\_b\_text\_b\],…,\],其中每个元素都是一个样例,每个样例可以包含text\_a与text\_b。每个样例文本数量(1个或者2个)需和训练时保持一致。 -* `max_seq_len`:模型处理文本的最大长度 -* `batch_size`:模型批处理大小 -* `use_gpu`:是否使用gpu,默认为False。对于GPU用户,建议开启use_gpu。 - -**返回** - -* `results`:list类型,不同任务类型的返回结果如下 - * 文本分类:列表里包含每个句子的预测标签,格式为\[label\_1, label\_2, …,\] - * 序列标注:列表里包含每个句子每个token的预测标签,格式为\[\[token\_1, token\_2, …,\], \[token\_1, token\_2, …,\], …,\] - -```python -def get_embedding( - data, - use_gpu=False -) -``` - -用于获取输入文本的句子粒度特征与字粒度特征 +## 二、安装 +- ### 1、环境依赖 -**参数** + - paddlepaddle >= 2.0.0 -* `data`:输入文本列表,格式为\[\[sample\_a\_text\_a, sample\_a\_text\_b\], \[sample\_b\_text\_a, sample\_b\_text\_b\],…,\],其中每个元素都是一个样例,每个样例可以包含text\_a与text\_b。 -* `use_gpu`:是否使用gpu,默认为False。对于GPU用户,建议开启use_gpu。 + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -**返回** +- ### 2、安装 -* `results`:list类型,格式为\[\[sample\_a\_pooled\_feature, sample\_a\_seq\_feature\], \[sample\_b\_pooled\_feature, sample\_b\_seq\_feature\],…,\],其中每个元素都是对应样例的特征输出,每个样例都有句子粒度特征pooled\_feature与字粒度特征seq\_feature。 + - ```shell + $ hub install chinese-electra-small==2.0.2 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API预测 -**代码示例** +- ### 1、预测代码示例 ```python import paddlehub as hub @@ -96,59 +60,109 @@ for idx, text in enumerate(data): ``` 详情可参考PaddleHub示例: -- [文本分类](https://github.com/PaddlePaddle/PaddleHub/tree/release/v2.0.0-beta/demo/text_classification) -- [序列标注](https://github.com/PaddlePaddle/PaddleHub/tree/release/v2.0.0-beta/demo/sequence_labeling) +- [文本分类](../../../../demo/text_classification) +- [序列标注](../../../../demo/sequence_labeling) + +- ### 2、API + - ```python + def __init__( + task=None, + load_checkpoint=None, + label_map=None, + num_classes=2, + suffix=False, + **kwargs, + ) + ``` -## 服务部署 + - 创建Module对象(动态图组网版本)。 -PaddleHub Serving可以部署一个在线获取预训练词向量。 + - **参数** -### Step1: 启动PaddleHub Serving + - `task`: 任务名称,可为`seq-cls`(文本分类任务,原来的`sequence_classification`在未来会被弃用)或`token-cls`(序列标注任务)。 + - `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 + - `label_map`:预测时的类别映射表。 + - `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 + - `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 -运行启动命令: + - ```python + def predict( + data, + max_seq_len=128, + batch_size=1, + use_gpu=False + ) + ``` -```shell -$ hub serving start -m chinese-electra-small -``` + - **参数** -这样就完成了一个获取预训练词向量服务化API的部署,默认端口号为8866。 + - `data`: 待预测数据,格式为\[\[sample\_a\_text\_a, sample\_a\_text\_b\], \[sample\_b\_text\_a, sample\_b\_text\_b\],…,\],其中每个元素都是一个样例,每个样例可以包含text\_a与text\_b。每个样例文本数量(1个或者2个)需和训练时保持一致。 + - `max_seq_len`:模型处理文本的最大长度 + - `batch_size`:模型批处理大小 + - `use_gpu`:是否使用gpu,默认为False。对于GPU用户,建议开启use_gpu。 -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - **返回** -### Step2: 发送预测请求 + - `results`:list类型,不同任务类型的返回结果如下 + - 文本分类:列表里包含每个句子的预测标签,格式为\[label\_1, label\_2, …,\] + - 序列标注:列表里包含每个句子每个token的预测标签,格式为\[\[token\_1, token\_2, …,\], \[token\_1, token\_2, …,\], …,\] -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_embedding( + data, + use_gpu=False + ) + ``` -```python -import requests -import json - -# 指定用于获取embedding的文本[[text_1], [text_2], ... ]} -text = [["今天是个好日子"], ["天气预报说今天要下雨"]] -# 以key的方式指定text传入预测方法的时的参数,此例中为"data" -# 对应本地部署,则为module.get_embedding(data=text) -data = {"data": text} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://127.0.0.1:8866/predict/chinese-electra-small" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} - -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 用于获取输入文本的句子粒度特征与字粒度特征 -## 查看代码 + - **参数** -https://github.com/ymcui/Chinese-ELECTRA + - `data`:输入文本列表,格式为\[\[sample\_a\_text\_a, sample\_a\_text\_b\], \[sample\_b\_text\_a, sample\_b\_text\_b\],…,\],其中每个元素都是一个样例,每个样例可以包含text\_a与text\_b。 + - `use_gpu`:是否使用gpu,默认为False。对于GPU用户,建议开启use_gpu。 + - **返回** -## 依赖 + - `results`:list类型,格式为\[\[sample\_a\_pooled\_feature, sample\_a\_seq\_feature\], \[sample\_b\_pooled\_feature, sample\_b\_seq\_feature\],…,\],其中每个元素都是对应样例的特征输出,每个样例都有句子粒度特征pooled\_feature与字粒度特征seq\_feature。 -paddlepaddle >= 2.0.0 +## 四、服务部署 -paddlehub >= 2.0.0 +- PaddleHub Serving可以部署一个在线获取预训练词向量。 -## 更新历史 +- ### 第一步:启动PaddleHub Serving + + - ```shell + $ hub serving start -m chinese-electra-small + ``` + + - 这样就完成了一个获取预训练词向量服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于获取embedding的文本[[text_1], [text_2], ... ]} + text = [["今天是个好日子"], ["天气预报说今天要下雨"]] + # 以key的方式指定text传入预测方法的时的参数,此例中为"data" + # 对应本地部署,则为module.get_embedding(data=text) + data = {"data": text} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/chinese-electra-small" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + +## 五、更新历史 * 1.0.0 @@ -161,3 +175,7 @@ paddlehub >= 2.0.0 * 2.0.1 增加文本匹配任务`text-matching` + +* 2.0.2 + + 修复词嵌入模型预测的问题 diff --git a/modules/text/language_model/chinese_electra_small/module.py b/modules/text/language_model/chinese_electra_small/module.py index 9e71856dd..362f56ee6 100644 --- a/modules/text/language_model/chinese_electra_small/module.py +++ b/modules/text/language_model/chinese_electra_small/module.py @@ -28,7 +28,7 @@ @moduleinfo( name="chinese-electra-small", - version="2.0.1", + version="2.0.2", summary= "chinese-electra-small, 12-layer, 256-hidden, 4-heads, 12M parameters. The module is executed as paddle.dygraph.", author="ymcui", @@ -163,8 +163,7 @@ def forward(self, return probs, loss, {'acc': acc} return probs else: - sequence_output, pooled_output = result - return sequence_output, pooled_output + return result @staticmethod def get_tokenizer(*args, **kwargs): diff --git a/modules/text/language_model/electra_base/README.md b/modules/text/language_model/electra_base/README.md index 61b995c1f..036c8654f 100644 --- a/modules/text/language_model/electra_base/README.md +++ b/modules/text/language_model/electra_base/README.md @@ -1,6 +1,17 @@ -```shell -$ hub install electra-base==1.0.1 -``` +# electra-base +|模型名称|electra-base| +| :--- | :---: | +|类别|文本-语义模型| +|网络|ELECTRA| +|数据集|英文维基百科| +|是否支持Fine-tuning|是| +|模型大小|630MB| +|最新更新日期|2022-02-08| +|数据指标|-| + +## 一、模型基本信息 + +- ### 模型介绍


@@ -8,71 +19,25 @@ $ hub install electra-base==1.0.1 更多详情请参考[ELECTRA论文](https://openreview.net/pdf?id=r1xMH1BtvB) -## API -```python -def __init__( - task=None, - load_checkpoint=None, - label_map=None, - num_classes=2, - suffix=False, - **kwargs, -) -``` - -创建Module对象(动态图组网版本)。 - -**参数** - -* `task`: 任务名称,可为`seq-cls`(文本分类任务,原来的`sequence_classification`在未来会被弃用)或`token-cls`(序列标注任务)。 -* `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 -* `label_map`:预测时的类别映射表。 -* `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 -* `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 - -```python -def predict( - data, - max_seq_len=128, - batch_size=1, - use_gpu=False -) -``` - -**参数** - -* `data`: 待预测数据,格式为\[\[sample\_a\_text\_a, sample\_a\_text\_b\], \[sample\_b\_text\_a, sample\_b\_text\_b\],…,\],其中每个元素都是一个样例,每个样例可以包含text\_a与text\_b。每个样例文本数量(1个或者2个)需和训练时保持一致。 -* `max_seq_len`:模型处理文本的最大长度 -* `batch_size`:模型批处理大小 -* `use_gpu`:是否使用gpu,默认为False。对于GPU用户,建议开启use_gpu。 - -**返回** - -* `results`:list类型,不同任务类型的返回结果如下 - * 文本分类:列表里包含每个句子的预测标签,格式为\[label\_1, label\_2, …,\] - * 序列标注:列表里包含每个句子每个token的预测标签,格式为\[\[token\_1, token\_2, …,\], \[token\_1, token\_2, …,\], …,\] - -```python -def get_embedding( - data, - use_gpu=False -) -``` +## 二、安装 -用于获取输入文本的句子粒度特征与字粒度特征 +- ### 1、环境依赖 -**参数** + - paddlepaddle >= 2.0.0 -* `data`:输入文本列表,格式为\[\[sample\_a\_text\_a, sample\_a\_text\_b\], \[sample\_b\_text\_a, sample\_b\_text\_b\],…,\],其中每个元素都是一个样例,每个样例可以包含text\_a与text\_b。 -* `use_gpu`:是否使用gpu,默认为False。对于GPU用户,建议开启use_gpu。 + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -**返回** +- ### 2、安装 -* `results`:list类型,格式为\[\[sample\_a\_pooled\_feature, sample\_a\_seq\_feature\], \[sample\_b\_pooled\_feature, sample\_b\_seq\_feature\],…,\],其中每个元素都是对应样例的特征输出,每个样例都有句子粒度特征pooled\_feature与字粒度特征seq\_feature。 + - ```shell + $ hub install electra-base==1.0.2 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API预测 -**代码示例** +- ### 1、预测代码示例 ```python import paddlehub as hub @@ -99,56 +64,108 @@ for idx, text in enumerate(data): - [文本分类](https://github.com/PaddlePaddle/PaddleHub/tree/release/v2.0.0-beta/demo/text_classification) - [序列标注](https://github.com/PaddlePaddle/PaddleHub/tree/release/v2.0.0-beta/demo/sequence_labeling) -## 服务部署 +- ### 2、API + - ```python + def __init__( + task=None, + load_checkpoint=None, + label_map=None, + num_classes=2, + suffix=False, + **kwargs, + ) + ``` -PaddleHub Serving可以部署一个在线获取预训练词向量。 + - 创建Module对象(动态图组网版本)。 -### Step1: 启动PaddleHub Serving + - **参数** -运行启动命令: + - `task`: 任务名称,可为`seq-cls`(文本分类任务,原来的`sequence_classification`在未来会被弃用)或`token-cls`(序列标注任务)。 + - `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 + - `label_map`:预测时的类别映射表。 + - `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 + - `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`。 + *- `**kwargs`:用户额外指定的关键字字典类型的参数。 -```shell -$ hub serving start -m electra-base -``` + - ```python + def predict( + data, + max_seq_len=128, + batch_size=1, + use_gpu=False + ) + ``` -这样就完成了一个获取预训练词向量服务化API的部署,默认端口号为8866。 + - **参数** -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - `data`: 待预测数据,格式为\[\[sample\_a\_text\_a, sample\_a\_text\_b\], \[sample\_b\_text\_a, sample\_b\_text\_b\],…,\],其中每个元素都是一个样例,每个样例可以包含text\_a与text\_b。每个样例文本数量(1个或者2个)需和训练时保持一致。 + - `max_seq_len`:模型处理文本的最大长度 + - `batch_size`:模型批处理大小 + - `use_gpu`:是否使用gpu,默认为False。对于GPU用户,建议开启use_gpu。 -### Step2: 发送预测请求 + - **返回** -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - `results`:list类型,不同任务类型的返回结果如下 + - 文本分类:列表里包含每个句子的预测标签,格式为\[label\_1, label\_2, …,\] + - 序列标注:列表里包含每个句子每个token的预测标签,格式为\[\[token\_1, token\_2, …,\], \[token\_1, token\_2, …,\], …,\] + + - ```python + def get_embedding( + data, + use_gpu=False + ) + ``` + + - 用于获取输入文本的句子粒度特征与字粒度特征 + + - **参数** + + - `data`:输入文本列表,格式为\[\[sample\_a\_text\_a, sample\_a\_text\_b\], \[sample\_b\_text\_a, sample\_b\_text\_b\],…,\],其中每个元素都是一个样例,每个样例可以包含text\_a与text\_b。 + - `use_gpu`:是否使用gpu,默认为False。对于GPU用户,建议开启use_gpu。 + + - **返回** + + - `results`:list类型,格式为\[\[sample\_a\_pooled\_feature, sample\_a\_seq\_feature\], \[sample\_b\_pooled\_feature, sample\_b\_seq\_feature\],…,\],其中每个元素都是对应样例的特征输出,每个样例都有句子粒度特征pooled\_feature与字粒度特征seq\_feature。 -```python -import requests -import json - -# 指定用于获取embedding的文本[[text_1], [text_2], ... ]} -text = [["今天是个好日子"], ["天气预报说今天要下雨"]] -# 以key的方式指定text传入预测方法的时的参数,此例中为"data" -# 对应本地部署,则为module.get_embedding(data=text) -data = {"data": text} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://127.0.0.1:8866/predict/electra-base" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} - -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` -## 查看代码 +## 四、服务部署 -https://github.com/google-research/electra +- PaddleHub Serving可以部署一个在线获取预训练词向量。 +- ### 第一步:启动PaddleHub Serving -## 依赖 + - ```shell + $ hub serving start -m electra-base + ``` -paddlepaddle >= 2.0.0 + - 这样就完成了一个获取预训练词向量服务化API的部署,默认端口号为8866。 -paddlehub >= 2.0.0 + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 -## 更新历史 +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于获取embedding的文本[[text_1], [text_2], ... ]} + text = [["今天是个好日子"], ["天气预报说今天要下雨"]] + # 以key的方式指定text传入预测方法的时的参数,此例中为"data" + # 对应本地部署,则为module.get_embedding(data=text) + data = {"data": text} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/electra-base" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -157,3 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 增加文本匹配任务`text-matching` + +* 1.0.2 + + 修复词嵌入模型预测的问题 diff --git a/modules/text/language_model/electra_base/module.py b/modules/text/language_model/electra_base/module.py index 9f4c473c7..97ea3a9ed 100644 --- a/modules/text/language_model/electra_base/module.py +++ b/modules/text/language_model/electra_base/module.py @@ -28,7 +28,7 @@ @moduleinfo( name="electra-base", - version="1.0.1", + version="1.0.2", summary="electra-base, 12-layer, 768-hidden, 12-heads, 110M parameters. The module is executed as paddle.dygraph.", author="paddlepaddle", author_email="", @@ -162,8 +162,7 @@ def forward(self, return probs, loss, {'acc': acc} return probs else: - sequence_output, pooled_output = result - return sequence_output, pooled_output + return result @staticmethod def get_tokenizer(*args, **kwargs): diff --git a/modules/text/language_model/electra_large/README.md b/modules/text/language_model/electra_large/README.md index 0eae56097..05e7e28f3 100644 --- a/modules/text/language_model/electra_large/README.md +++ b/modules/text/language_model/electra_large/README.md @@ -1,6 +1,17 @@ -```shell -$ hub install electra-large==1.0.1 -``` +# electra-large +|模型名称|electra-large| +| :--- | :---: | +|类别|文本-语义模型| +|网络|ELECTRA| +|数据集|英文维基百科| +|是否支持Fine-tuning|是| +|模型大小|1.9GB| +|最新更新日期|2022-02-08| +|数据指标|-| + +## 一、模型基本信息 + +- ### 模型介绍


@@ -8,72 +19,25 @@ $ hub install electra-large==1.0.1 更多详情请参考[ELECTRA论文](https://openreview.net/pdf?id=r1xMH1BtvB) -## API -```python -def __init__( - task=None, - load_checkpoint=None, - label_map=None, - num_classes=2, - suffix=False, - **kwargs, -) -``` - -创建Module对象(动态图组网版本)。 - -**参数** - -* `task`: 任务名称,可为`seq-cls`(文本分类任务,原来的`sequence_classification`在未来会被弃用)或`token-cls`(序列标注任务)。 -* `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 -* `label_map`:预测时的类别映射表。 -* `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 -* `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 - -```python -def predict( - data, - max_seq_len=128, - batch_size=1, - use_gpu=False -) -``` - -**参数** - -* `data`: 待预测数据,格式为\[\[sample\_a\_text\_a, sample\_a\_text\_b\], \[sample\_b\_text\_a, sample\_b\_text\_b\],…,\],其中每个元素都是一个样例,每个样例可以包含text\_a与text\_b。每个样例文本数量(1个或者2个)需和训练时保持一致。 -* `max_seq_len`:模型处理文本的最大长度 -* `batch_size`:模型批处理大小 -* `use_gpu`:是否使用gpu,默认为False。对于GPU用户,建议开启use_gpu。 - -**返回** - -* `results`:list类型,不同任务类型的返回结果如下 - * 文本分类:列表里包含每个句子的预测标签,格式为\[label\_1, label\_2, …,\] - * 序列标注:列表里包含每个句子每个token的预测标签,格式为\[\[token\_1, token\_2, …,\], \[token\_1, token\_2, …,\], …,\] - -```python -def get_embedding( - data, - use_gpu=False -) -``` - -用于获取输入文本的句子粒度特征与字粒度特征 - -**参数** +## 二、安装 -* `data`:输入文本列表,格式为\[\[sample\_a\_text\_a, sample\_a\_text\_b\], \[sample\_b\_text\_a, sample\_b\_text\_b\],…,\],其中每个元素都是一个样例,每个样例可以包含text\_a与text\_b。 -* `use_gpu`:是否使用gpu,默认为False。对于GPU用户,建议开启use_gpu。 +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 -**返回** + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -* `results`:list类型,格式为\[\[sample\_a\_pooled\_feature, sample\_a\_seq\_feature\], \[sample\_b\_pooled\_feature, sample\_b\_seq\_feature\],…,\],其中每个元素都是对应样例的特征输出,每个样例都有句子粒度特征pooled\_feature与字粒度特征seq\_feature。 +- ### 2、安装 + - ```shell + $ hub install electra-large==1.0.2 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) -**代码示例** +## 三、模型API预测 +- ### 1、预测代码示例 ```python import paddlehub as hub @@ -96,59 +60,110 @@ for idx, text in enumerate(data): ``` 详情可参考PaddleHub示例: -- [文本分类](https://github.com/PaddlePaddle/PaddleHub/tree/release/v2.0.0-beta/demo/text_classification) -- [序列标注](https://github.com/PaddlePaddle/PaddleHub/tree/release/v2.0.0-beta/demo/sequence_labeling) +- [文本分类](../../../../demo/text_classification) +- [序列标注](../../../../demo/sequence_labeling) + +- ### 2、API + - ```python + def __init__( + task=None, + load_checkpoint=None, + label_map=None, + num_classes=2, + suffix=False, + **kwargs, + ) + ``` -## 服务部署 + - 创建Module对象(动态图组网版本)。 -PaddleHub Serving可以部署一个在线获取预训练词向量。 + - **参数** -### Step1: 启动PaddleHub Serving + - `task`: 任务名称,可为`seq-cls`(文本分类任务,原来的`sequence_classification`在未来会被弃用)或`token-cls`(序列标注任务)。 + - `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 + - `label_map`:预测时的类别映射表。 + - `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 + - `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 -运行启动命令: + - ```python + def predict( + data, + max_seq_len=128, + batch_size=1, + use_gpu=False + ) + ``` -```shell -$ hub serving start -m electra-large -``` + - **参数** -这样就完成了一个获取预训练词向量服务化API的部署,默认端口号为8866。 + - `data`: 待预测数据,格式为\[\[sample\_a\_text\_a, sample\_a\_text\_b\], \[sample\_b\_text\_a, sample\_b\_text\_b\],…,\],其中每个元素都是一个样例,每个样例可以包含text\_a与text\_b。每个样例文本数量(1个或者2个)需和训练时保持一致。 + - `max_seq_len`:模型处理文本的最大长度 + - `batch_size`:模型批处理大小 + - `use_gpu`:是否使用gpu,默认为False。对于GPU用户,建议开启use_gpu。 -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - **返回** -### Step2: 发送预测请求 + - `results`:list类型,不同任务类型的返回结果如下 + - 文本分类:列表里包含每个句子的预测标签,格式为\[label\_1, label\_2, …,\] + - 序列标注:列表里包含每个句子每个token的预测标签,格式为\[\[token\_1, token\_2, …,\], \[token\_1, token\_2, …,\], …,\] -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_embedding( + data, + use_gpu=False + ) + ``` -```python -import requests -import json - -# 指定用于获取embedding的文本[[text_1], [text_2], ... ]} -text = [["今天是个好日子"], ["天气预报说今天要下雨"]] -# 以key的方式指定text传入预测方法的时的参数,此例中为"data" -# 对应本地部署,则为module.get_embedding(data=text) -data = {"data": text} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://127.0.0.1:8866/predict/electra-large" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} - -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 用于获取输入文本的句子粒度特征与字粒度特征 -## 查看代码 + - **参数** -https://github.com/google-research/electra + - `data`:输入文本列表,格式为\[\[sample\_a\_text\_a, sample\_a\_text\_b\], \[sample\_b\_text\_a, sample\_b\_text\_b\],…,\],其中每个元素都是一个样例,每个样例可以包含text\_a与text\_b。 + - `use_gpu`:是否使用gpu,默认为False。对于GPU用户,建议开启use_gpu。 + - **返回** -## 依赖 + - `results`:list类型,格式为\[\[sample\_a\_pooled\_feature, sample\_a\_seq\_feature\], \[sample\_b\_pooled\_feature, sample\_b\_seq\_feature\],…,\],其中每个元素都是对应样例的特征输出,每个样例都有句子粒度特征pooled\_feature与字粒度特征seq\_feature。 -paddlepaddle >= 2.0.0 -paddlehub >= 2.0.0 +## 四、服务部署 -## 更新历史 +- PaddleHub Serving可以部署一个在线获取预训练词向量。 + +- ### 第一步:启动PaddleHub Serving + + - ```shell + $ hub serving start -m electra-large + ``` + + - 这样就完成了一个获取预训练词向量服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于获取embedding的文本[[text_1], [text_2], ... ]} + text = [["今天是个好日子"], ["天气预报说今天要下雨"]] + # 以key的方式指定text传入预测方法的时的参数,此例中为"data" + # 对应本地部署,则为module.get_embedding(data=text) + data = {"data": text} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/electra-large" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + +## 五、更新历史 * 1.0.0 @@ -157,3 +172,7 @@ paddlehub >= 2.0.0 * 1.0.1 增加文本匹配任务`text-matching` + +* 1.0.2 + + 修复词嵌入模型预测的问题 diff --git a/modules/text/language_model/electra_large/module.py b/modules/text/language_model/electra_large/module.py index 8360f30f7..16d08b59f 100644 --- a/modules/text/language_model/electra_large/module.py +++ b/modules/text/language_model/electra_large/module.py @@ -28,7 +28,7 @@ @moduleinfo( name="electra-large", - version="1.0.1", + version="1.0.2", summary="electra-large, 24-layer, 1024-hidden, 16-heads, 335M parameters. The module is executed as paddle.dygraph.", author="paddlepaddle", author_email="", @@ -162,8 +162,7 @@ def forward(self, return probs, loss, {'acc': acc} return probs else: - sequence_output, pooled_output = result - return sequence_output, pooled_output + return result @staticmethod def get_tokenizer(*args, **kwargs): diff --git a/modules/text/language_model/electra_small/README.md b/modules/text/language_model/electra_small/README.md index bb2adb75f..32ed69058 100644 --- a/modules/text/language_model/electra_small/README.md +++ b/modules/text/language_model/electra_small/README.md @@ -1,78 +1,42 @@ -```shell -$ hub install electra-small==1.0.1 -``` - +# electra-small +|模型名称|electra-small| +| :--- | :---: | +|类别|文本-语义模型| +|网络|ELECTRA| +|数据集|英文维基百科| +|是否支持Fine-tuning|是| +|模型大小|78MB| +|最新更新日期|2022-02-08| +|数据指标|-| + +## 一、模型基本信息 + +- ### 模型介绍


更多详情请参考[ELECTRA论文](https://openreview.net/pdf?id=r1xMH1BtvB) -## API -```python -def __init__( - task=None, - load_checkpoint=None, - label_map=None, - num_classes=2, - suffix=False, - **kwargs, -) -``` - -创建Module对象(动态图组网版本)。 +## 二、安装 -**参数** +- ### 1、环境依赖 -* `task`: 任务名称,可为`seq-cls`(文本分类任务,原来的`sequence_classification`在未来会被弃用)或`token-cls`(序列标注任务)。 -* `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 -* `label_map`:预测时的类别映射表。 -* `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 -* `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 + - paddlepaddle >= 2.0.0 -```python -def predict( - data, - max_seq_len=128, - batch_size=1, - use_gpu=False -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -**参数** +- ### 2、安装 -* `data`: 待预测数据,格式为\[\[sample\_a\_text\_a, sample\_a\_text\_b\], \[sample\_b\_text\_a, sample\_b\_text\_b\],…,\],其中每个元素都是一个样例,每个样例可以包含text\_a与text\_b。每个样例文本数量(1个或者2个)需和训练时保持一致。 -* `max_seq_len`:模型处理文本的最大长度 -* `batch_size`:模型批处理大小 -* `use_gpu`:是否使用gpu,默认为False。对于GPU用户,建议开启use_gpu。 + - ```shell + $ hub install electra-small==1.0.2 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) -**返回** +## 三、模型API预测 -* `results`:list类型,不同任务类型的返回结果如下 - * 文本分类:列表里包含每个句子的预测标签,格式为\[label\_1, label\_2, …,\] - * 序列标注:列表里包含每个句子每个token的预测标签,格式为\[\[token\_1, token\_2, …,\], \[token\_1, token\_2, …,\], …,\] - -```python -def get_embedding( - data, - use_gpu=False -) -``` - -用于获取输入文本的句子粒度特征与字粒度特征 - -**参数** - -* `data`:输入文本列表,格式为\[\[sample\_a\_text\_a, sample\_a\_text\_b\], \[sample\_b\_text\_a, sample\_b\_text\_b\],…,\],其中每个元素都是一个样例,每个样例可以包含text\_a与text\_b。 -* `use_gpu`:是否使用gpu,默认为False。对于GPU用户,建议开启use_gpu。 - -**返回** - -* `results`:list类型,格式为\[\[sample\_a\_pooled\_feature, sample\_a\_seq\_feature\], \[sample\_b\_pooled\_feature, sample\_b\_seq\_feature\],…,\],其中每个元素都是对应样例的特征输出,每个样例都有句子粒度特征pooled\_feature与字粒度特征seq\_feature。 - - -**代码示例** +- ### 1、预测代码示例 ```python import paddlehub as hub @@ -96,60 +60,109 @@ for idx, text in enumerate(data): ``` 详情可参考PaddleHub示例: -- [文本分类](https://github.com/PaddlePaddle/PaddleHub/tree/release/v2.0.0-beta/demo/text_classification) -- [序列标注](https://github.com/PaddlePaddle/PaddleHub/tree/release/v2.0.0-beta/demo/sequence_labeling) +- [文本分类](../../../../demo/text_classification) +- [序列标注](../../../../demo/sequence_labeling) -## 服务部署 +- ### 2、API -PaddleHub Serving可以部署一个在线获取预训练词向量。 + - ```python + def __init__( + task=None, + load_checkpoint=None, + label_map=None, + num_classes=2, + suffix=False, + **kwargs, + ) + ``` -### Step1: 启动PaddleHub Serving + - 创建Module对象(动态图组网版本)。 -运行启动命令: + - **参数** -```shell -$ hub serving start -m electra-small -``` + - `task`: 任务名称,可为`seq-cls`(文本分类任务,原来的`sequence_classification`在未来会被弃用)或`token-cls`(序列标注任务)。 + - `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 + - `label_map`:预测时的类别映射表。 + - `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 + - `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 -这样就完成了一个获取预训练词向量服务化API的部署,默认端口号为8866。 + - ```python + def predict( + data, + max_seq_len=128, + batch_size=1, + use_gpu=False + ) + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - **参数** -### Step2: 发送预测请求 + - `data`: 待预测数据,格式为\[\[sample\_a\_text\_a, sample\_a\_text\_b\], \[sample\_b\_text\_a, sample\_b\_text\_b\],…,\],其中每个元素都是一个样例,每个样例可以包含text\_a与text\_b。每个样例文本数量(1个或者2个)需和训练时保持一致。 + - `max_seq_len`:模型处理文本的最大长度 + - `batch_size`:模型批处理大小 + - `use_gpu`:是否使用gpu,默认为False。对于GPU用户,建议开启use_gpu。 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - **返回** -```python -import requests -import json - -# 指定用于获取embedding的文本[[text_1], [text_2], ... ]} -text = [["今天是个好日子"], ["天气预报说今天要下雨"]] -# 以key的方式指定text传入预测方法的时的参数,此例中为"data" -# 对应本地部署,则为module.get_embedding(data=text) -data = {"data": text} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://127.0.0.1:8866/predict/electra-small" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} - -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - `results`:list类型,不同任务类型的返回结果如下 + - 文本分类:列表里包含每个句子的预测标签,格式为\[label\_1, label\_2, …,\] + - 序列标注:列表里包含每个句子每个token的预测标签,格式为\[\[token\_1, token\_2, …,\], \[token\_1, token\_2, …,\], …,\] + + - ```python + def get_embedding( + data, + use_gpu=False + ) + ``` -## 查看代码 + - 用于获取输入文本的句子粒度特征与字粒度特征 -https://github.com/google-research/electra + - **参数** + - `data`:输入文本列表,格式为\[\[sample\_a\_text\_a, sample\_a\_text\_b\], \[sample\_b\_text\_a, sample\_b\_text\_b\],…,\],其中每个元素都是一个样例,每个样例可以包含text\_a与text\_b。 + - `use_gpu`:是否使用gpu,默认为False。对于GPU用户,建议开启use_gpu。 -## 依赖 + - **返回** -paddlepaddle >= 2.0.0 + - `results`:list类型,格式为\[\[sample\_a\_pooled\_feature, sample\_a\_seq\_feature\], \[sample\_b\_pooled\_feature, sample\_b\_seq\_feature\],…,\],其中每个元素都是对应样例的特征输出,每个样例都有句子粒度特征pooled\_feature与字粒度特征seq\_feature。 -paddlehub >= 2.0.0 +## 四、服务部署 -## 更新历史 +- PaddleHub Serving可以部署一个在线获取预训练词向量。 +- ### 第一步:启动PaddleHub Serving + + - ```shell + $ hub serving start -m electra-small + ``` + - 这样就完成了一个获取预训练词向量服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于获取embedding的文本[[text_1], [text_2], ... ]} + text = [["今天是个好日子"], ["天气预报说今天要下雨"]] + # 以key的方式指定text传入预测方法的时的参数,此例中为"data" + # 对应本地部署,则为module.get_embedding(data=text) + data = {"data": text} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/electra-small" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + +## 五、更新历史 * 1.0.0 初始发布,动态图版本模型,支持文本分类`seq-cls`和序列标注`token-cls`任务的fine-tune @@ -157,3 +170,7 @@ paddlehub >= 2.0.0 * 1.0.1 增加文本匹配任务`text-matching` + +* 1.0.2 + + 修复词嵌入模型预测的问题 diff --git a/modules/text/language_model/electra_small/module.py b/modules/text/language_model/electra_small/module.py index b139f0cb1..dfdb953b9 100644 --- a/modules/text/language_model/electra_small/module.py +++ b/modules/text/language_model/electra_small/module.py @@ -28,7 +28,7 @@ @moduleinfo( name="electra-small", - version="1.0.1", + version="1.0.2", summary="electra-small, 12-layer, 256-hidden, 4-heads, 14M parameters. The module is executed as paddle.dygraph.", author="paddlepaddle", author_email="", @@ -162,8 +162,7 @@ def forward(self, return probs, loss, {'acc': acc} return probs else: - sequence_output, pooled_output = result - return sequence_output, pooled_output + return result @staticmethod def get_tokenizer(*args, **kwargs): diff --git a/paddlehub/module/nlp_module.py b/paddlehub/module/nlp_module.py index 9e39d3467..379cd866b 100644 --- a/paddlehub/module/nlp_module.py +++ b/paddlehub/module/nlp_module.py @@ -607,10 +607,14 @@ def predict(self, # token labels labels = [[self.label_map[i] for i in token_ids] for token_ids in batch_ids] elif self.task == None: - sequence_output, pooled_output = self(input_ids, segment_ids) - results.append( - [pooled_output.squeeze(0).numpy().tolist(), - sequence_output.squeeze(0).numpy().tolist()]) + output = self(input_ids, segment_ids) + if len(output) == 1: + results.append(output.squeeze(0).numpy().tolist()) + else: + sequence_output, pooled_output = output + results.append( + [pooled_output.squeeze(0).numpy().tolist(), + sequence_output.squeeze(0).numpy().tolist()]) if self.task: # save probs only when return prob if return_prob: From 5e3ca2d53e49e3d0914159accd501c7f6e25b900 Mon Sep 17 00:00:00 2001 From: buchongyu <18001307871@163.com> Date: Wed, 13 Jul 2022 14:15:18 +0800 Subject: [PATCH 024/117] fix deeplabv3p_xception65_humanseg inference client bug (#1914) --- .../deeplabv3p_xception65_humanseg/README.md | 57 ++++++++++--------- .../README_en.md | 57 ++++++++++--------- 2 files changed, 58 insertions(+), 56 deletions(-) diff --git a/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/README.md b/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/README.md index 4b939565d..ae623197e 100644 --- a/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/README.md +++ b/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/README.md @@ -131,34 +131,35 @@ - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 - ```python - import requests - import json - import cv2 - import base64 - import numpy as np - - - def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') - - - def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - - # 发送HTTP请求 - data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} - headers = {"Content-type": "application/json"} - url = "http://127.0.0.1:8866/predict/deeplabv3p_xception65_humanseg" - r = requests.post(url=url, headers=headers, # 保存图片 - mask =cv2.cvtColor(base64_to_cv2(r.json()["results"][0]['data']), cv2.COLOR_BGR2GRAY) - rgba = np.concatenate((org_im, np.expand_dims(mask, axis=2)), axis=2) - cv2.imwrite("segment_human_server.png", rgba) - ``` + - ```python + import requests + import json + import cv2 + import base64 + import numpy as np + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + org_im = cv2.imread("/PATH/TO/IMAGE") + # 发送HTTP请求 + data = {'images':[cv2_to_base64(org_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/deeplabv3p_xception65_humanseg" + r = requests.post(url=url, headers=headers, data=json.dumps(data))# 保存图片 + mask =cv2.cvtColor(base64_to_cv2(r.json()["results"][0]['data']), cv2.COLOR_BGR2GRAY) + rgba = np.concatenate((org_im, np.expand_dims(mask, axis=2)), axis=2) + cv2.imwrite("segment_human_server.png", rgba) + ``` ## 五、更新历史 diff --git a/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/README_en.md b/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/README_en.md index eb6204c76..8e090c7f7 100644 --- a/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/README_en.md +++ b/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/README_en.md @@ -128,34 +128,35 @@ - With a configured server, use the following lines of code to send the prediction request and obtain the result - - ```python - import requests - import json - import cv2 - import base64 - import numpy as np - - - def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') - - - def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - - # Send an HTTP request - data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} - headers = {"Content-type": "application/json"} - url = "http://127.0.0.1:8866/predict/deeplabv3p_xception65_humanseg" - r = requests.post(url=url, headers=headers, - mask =cv2.cvtColor(base64_to_cv2(r.json()["results"][0]['data']), cv2.COLOR_BGR2GRAY) - rgba = np.concatenate((org_im, np.expand_dims(mask, axis=2)), axis=2) - cv2.imwrite("segment_human_server.png", rgba) - ``` + - ```python + import requests + import json + import cv2 + import base64 + import numpy as np + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + org_im = cv2.imread("/PATH/TO/IMAGE") + # Send an HTTP request + data = {'images':[cv2_to_base64(org_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/deeplabv3p_xception65_humanseg" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + mask =cv2.cvtColor(base64_to_cv2(r.json()["results"][0]['data']), cv2.COLOR_BGR2GRAY) + rgba = np.concatenate((org_im, np.expand_dims(mask, axis=2)), axis=2) + cv2.imwrite("segment_human_server.png", rgba) + ``` ## V. Release Note - 1.0.0 From 92563e669c6a4544f7040179d4eee60255fe9520 Mon Sep 17 00:00:00 2001 From: buchongyu <18001307871@163.com> Date: Thu, 14 Jul 2022 10:12:34 +0800 Subject: [PATCH 025/117] fix module/README.md error link address (#1915) --- modules/README.md | 4 ++-- modules/README_ch.md | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/README.md b/modules/README.md index 3e41ab12a..b1dad2373 100644 --- a/modules/README.md +++ b/modules/README.md @@ -382,8 +382,8 @@ English | [简体中文](README_ch.md) |module|Network|Dataset|Introduction| |--|--|--|--| -|[transformer_zh-en](text/machine_translation/transformer/transformer_zh-en)|Transformer|CWMT2021|中文译英文| -|[transformer_en-de](text/machine_translation/transformer/transformer_en-de)|Transformer|WMT14 EN-DE|英文译德文| +|[transformer_zh-en](text/machine_translation/transformer/zh-en)|Transformer|CWMT2021|中文译英文| +|[transformer_en-de](text/machine_translation/transformer/en-de)|Transformer|WMT14 EN-DE|英文译德文| - ### Language Model diff --git a/modules/README_ch.md b/modules/README_ch.md index d3389e3c3..087841cbd 100644 --- a/modules/README_ch.md +++ b/modules/README_ch.md @@ -382,8 +382,8 @@ |module|网络|数据集|简介| |--|--|--|--| -|[transformer_zh-en](text/machine_translation/transformer/transformer_zh-en)|Transformer|CWMT2021|中文译英文| -|[transformer_en-de](text/machine_translation/transformer/transformer_en-de)|Transformer|WMT14 EN-DE|英文译德文| +|[transformer_zh-en](text/machine_translation/transformer/zh-en)|Transformer|CWMT2021|中文译英文| +|[transformer_en-de](text/machine_translation/transformer/en-de)|Transformer|WMT14 EN-DE|英文译德文| - ### 语义模型 From 1730279fdd5ff58e4c1d76506ad71a4034312d22 Mon Sep 17 00:00:00 2001 From: buchongyu <18001307871@163.com> Date: Thu, 14 Jul 2022 17:17:09 +0800 Subject: [PATCH 026/117] upload dockerfile of cpu env (#1916) --- docker/Dockerfile | 66 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 docker/Dockerfile diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 000000000..f5d1301e3 --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,66 @@ +FROM ubuntu:16.04 + +RUN echo "deb [trusted=true] http://mirrors.tuna.tsinghua.edu.cn/ubuntu/ xenial main restricted \n\ +deb [trusted=true] http://mirrors.tuna.tsinghua.edu.cn/ubuntu/ xenial-updates main restricted \n\ +deb [trusted=true] http://mirrors.tuna.tsinghua.edu.cn/ubuntu/ xenial universe \n\ +deb [trusted=true] http://mirrors.tuna.tsinghua.edu.cn/ubuntu/ xenial-updates universe \n\ +deb [trusted=true] http://mirrors.tuna.tsinghua.edu.cn/ubuntu/ xenial multiverse \n\ +deb [trusted=true] http://mirrors.tuna.tsinghua.edu.cn/ubuntu/ xenial-updates multiverse \n\ +deb [trusted=true] http://mirrors.tuna.tsinghua.edu.cn/ubuntu/ xenial-backports main restricted universe multiverse \n\ +deb [trusted=true] http://mirrors.tuna.tsinghua.edu.cn/ubuntu/ xenial-security main restricted \n\ +deb [trusted=true] http://mirrors.tuna.tsinghua.edu.cn/ubuntu/ xenial-security universe \n\ +deb [trusted=true] http://mirrors.tuna.tsinghua.edu.cn/ubuntu/ xenial-security multiverse" > /etc/apt/sources.list + +RUN apt-get update && apt-get install -y inetutils-ping wget vim curl cmake git sox libsndfile1 libpng12-dev \ + libpng-dev swig libzip-dev openssl bc libflac* libgdk-pixbuf2.0-dev libpango1.0-dev libcairo2-dev \ + libgtk2.0-dev pkg-config zip unzip zlib1g-dev libreadline-dev libbz2-dev liblapack-dev libjpeg-turbo8-dev \ + sudo lrzsz libsqlite3-dev libx11-dev libsm6 apt-utils libopencv-dev libavcodec-dev libavformat-dev \ + libswscale-dev locales liblzma-dev python-lzma m4 libxext-dev strace libibverbs-dev libpcre3 libpcre3-dev \ + build-essential libncurses5-dev libgdbm-dev libnss3-dev libssl-dev libreadline-dev libffi-dev xz-utils \ + libfreetype6-dev libxslt1-dev libxml2-dev libgeos-3.5.0 libgeos-dev && apt-get install -y --allow-downgrades \ + --allow-change-held-packages && DEBIAN_FRONTEND=noninteractive apt-get install -y tzdata \ + && /bin/cp /usr/share/zoneinfo/Asia/Shanghai /etc/localtime && dpkg-reconfigure -f noninteractive tzdata + +RUN echo "set meta-flag on" >> /etc/inputrc && echo "set convert-meta off" >> /etc/inputrc && \ + locale-gen en_US.UTF-8 && /sbin/ldconfig -v && groupadd -g 10001 paddlehub && \ + useradd -m -s /bin/bash -N -u 10001 paddlehub -g paddlehub && chmod g+w /etc/passwd && \ + echo "paddlehub ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers + +ENV LANG=en_US.UTF-8 LC_ALL=en_US.UTF-8 LANGUAGE=en_US.UTF-8 TZ=Asia/Shanghai + +# official download site: https://www.python.org/ftp/python/3.7.13/Python-3.7.13.tgz +RUN wget https://cdn.npmmirror.com/binaries/python/3.7.13/Python-3.7.13.tgz && tar xvf Python-3.7.13.tgz && \ + cd Python-3.7.13 && ./configure --prefix=/home/paddlehub/python3.7 && make -j8 && make install && \ + rm -rf ../Python-3.7.13 ../Python-3.7.13.tgz && chown -R paddlehub:paddlehub /home/paddlehub/python3.7 + +RUN cd /tmp && wget https://mirrors.sjtug.sjtu.edu.cn/gnu/gmp/gmp-6.1.0.tar.bz2 && tar xvf gmp-6.1.0.tar.bz2 && \ + cd gmp-6.1.0 && ./configure --prefix=/usr/local && make -j8 && make install && \ + rm -rf ../gmp-6.1.0.tar.bz2 ../gmp-6.1.0 && cd /tmp && \ + wget https://www.mpfr.org/mpfr-3.1.4/mpfr-3.1.4.tar.bz2 && tar xvf mpfr-3.1.4.tar.bz2 && cd mpfr-3.1.4 && \ + ./configure --prefix=/usr/local && make -j8 && make install && rm -rf ../mpfr-3.1.4.tar.bz2 ../mpfr-3.1.4 && \ + cd /tmp && wget https://mirrors.sjtug.sjtu.edu.cn/gnu/mpc/mpc-1.0.3.tar.gz && tar xvf mpc-1.0.3.tar.gz && \ + cd mpc-1.0.3 && ./configure --prefix=/usr/local && make -j8 && make install && \ + rm -rf ../mpc-1.0.3.tar.gz ../mpc-1.0.3 && cd /tmp && \ + wget http://www.mirrorservice.org/sites/sourceware.org/pub/gcc/infrastructure/isl-0.18.tar.bz2 && \ + tar xvf isl-0.18.tar.bz2 && cd isl-0.18 && ./configure --prefix=/usr/local && make -j8 && make install \ + && rm -rf ../isl-0.18.tar.bz2 ../isl-0.18 && cd /tmp && \ + wget http://mirrors.ustc.edu.cn/gnu/gcc/gcc-8.2.0/gcc-8.2.0.tar.gz --no-check-certificate && \ + tar xvf gcc-8.2.0.tar.gz && cd gcc-8.2.0 && unset LIBRARY_PATH && ./configure --prefix=/home/paddlehub/gcc82 \ + --enable-threads=posix --disable-checking --disable-multilib --enable-languages=c,c++ --with-gmp=/usr/local \ + --with-mpfr=/usr/local --with-mpc=/usr/local --with-isl=/usr/local && make -j8 && make install && \ + rm -rf ../gcc-8.2.0.tar.gz ../gcc-8.2.0 && chown -R paddlehub:paddlehub /home/paddlehub/gcc82 + +WORKDIR /home/paddlehub +USER paddlehub +ENV PATH=/home/paddlehub/python3.7/bin:/home/paddlehub/gcc82/bin:${PATH} \ + LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu:/usr/local/cuda-11.2/targets/x86_64-linux/lib:${LD_LIBRARY_PATH} + +RUN mkdir -p ~/.pip && echo "[global]" > ~/.pip/pip.conf && \ + echo "index-url=https://mirror.baidu.com/pypi/simple" >> ~/.pip/pip.conf && \ + echo "trusted-host=mirror.baidu.com" >> ~/.pip/pip.conf && \ + pip3 install --upgrade pip && pip3 install paddlepaddle paddlehub shapely pyclipper && \ + sudo cp -f /home/paddlehub/gcc82/lib64/libstdc++.so.6.0.25 /usr/lib/x86_64-linux-gnu/libstdc++.so.6 && \ + rm -rf ~/.cache/pip + +#RUN hub install +CMD ['bash'] From 036cdce420c2876f35b13ba249520035c588d1ff Mon Sep 17 00:00:00 2001 From: chenjian Date: Wed, 27 Jul 2022 07:26:10 +0800 Subject: [PATCH 027/117] fix doc format (#1855) Co-authored-by: wuzewu Co-authored-by: Zeyu Chen --- modules/image/Image_gan/gan/styleganv2_editing/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/image/Image_gan/gan/styleganv2_editing/README.md b/modules/image/Image_gan/gan/styleganv2_editing/README.md index 4728207bf..a8340954f 100644 --- a/modules/image/Image_gan/gan/styleganv2_editing/README.md +++ b/modules/image/Image_gan/gan/styleganv2_editing/README.md @@ -63,13 +63,13 @@ module = hub.Module(name="styleganv2_editing") input_path = ["/PATH/TO/IMAGE"] # Read from a file - module.generate(paths=input_path, direction_name = 'age', direction_offset = 5, output_dir='./editing_result/', use_gpu=True) + module.generate(paths=input_path, direction_name='age', direction_offset=5, output_dir='./editing_result/', use_gpu=True) ``` - ### 3、API - ```python - generate(self, images=None, paths=None, direction_name = 'age', direction_offset = 0.0, output_dir='./editing_result/', use_gpu=False, visualization=True) + generate(self, images=None, paths=None, direction_name='age', direction_offset=0.0, output_dir='./editing_result/', use_gpu=False, visualization=True) ``` - 人脸编辑生成API。 From f4d6e64cdc132ae868699a0ba442f4ab1d304a14 Mon Sep 17 00:00:00 2001 From: chenjian Date: Fri, 19 Aug 2022 13:19:17 +0800 Subject: [PATCH 028/117] add disco_diffusion_cnclip_vitb16 module --- .../disco_diffusion_cnclip_vitb16/README.md | 114 ++ .../cn_clip/clip/README.md | 3 + .../cn_clip/clip/__init__.py | 4 + .../cn_clip/clip/bert_tokenizer.py | 426 ++++++ .../cn_clip/clip/configuration_bert.py | 85 ++ .../cn_clip/clip/model.py | 247 ++++ .../RoBERTa-wwm-ext-base-chinese.json | 13 + .../RoBERTa-wwm-ext-large-chinese.json | 13 + .../cn_clip/clip/model_configs/ViT-B-16.json | 7 + .../cn_clip/clip/model_configs/ViT-B-32.json | 7 + .../cn_clip/clip/model_configs/ViT-L-14.json | 7 + .../cn_clip/clip/modeling_bert.py | 450 ++++++ .../cn_clip/clip/utils.py | 74 + .../disco_diffusion_cnclip_vitb16/module.py | 435 ++++++ .../requirements.txt | 8 + .../resize_right/README.md | 3 + .../resize_right/__init__.py | 0 .../resize_right/interp_methods.py | 70 + .../resize_right/resize_right.py | 403 ++++++ .../reverse_diffusion/README.md | 2 + .../reverse_diffusion/__init__.py | 156 +++ .../reverse_diffusion/config.py | 77 ++ .../reverse_diffusion/helper.py | 138 ++ .../reverse_diffusion/model/__init__.py | 3 + .../model/gaussian_diffusion.py | 1214 +++++++++++++++++ .../reverse_diffusion/model/losses.py | 86 ++ .../reverse_diffusion/model/make_cutouts.py | 177 +++ .../reverse_diffusion/model/nn.py | 127 ++ .../reverse_diffusion/model/perlin_noises.py | 78 ++ .../reverse_diffusion/model/respace.py | 123 ++ .../reverse_diffusion/model/script_util.py | 201 +++ .../reverse_diffusion/model/sec_diff.py | 135 ++ .../reverse_diffusion/model/transforms.py | 757 ++++++++++ .../reverse_diffusion/model/unet.py | 838 ++++++++++++ .../reverse_diffusion/resources/default.yml | 45 + .../resources/docstrings.yml | 103 ++ .../reverse_diffusion/runner.py | 285 ++++ 37 files changed, 6914 insertions(+) create mode 100644 modules/image/text_to_image/disco_diffusion_cnclip_vitb16/README.md create mode 100644 modules/image/text_to_image/disco_diffusion_cnclip_vitb16/cn_clip/clip/README.md create mode 100755 modules/image/text_to_image/disco_diffusion_cnclip_vitb16/cn_clip/clip/__init__.py create mode 100755 modules/image/text_to_image/disco_diffusion_cnclip_vitb16/cn_clip/clip/bert_tokenizer.py create mode 100755 modules/image/text_to_image/disco_diffusion_cnclip_vitb16/cn_clip/clip/configuration_bert.py create mode 100644 modules/image/text_to_image/disco_diffusion_cnclip_vitb16/cn_clip/clip/model.py create mode 100755 modules/image/text_to_image/disco_diffusion_cnclip_vitb16/cn_clip/clip/model_configs/RoBERTa-wwm-ext-base-chinese.json create mode 100755 modules/image/text_to_image/disco_diffusion_cnclip_vitb16/cn_clip/clip/model_configs/RoBERTa-wwm-ext-large-chinese.json create mode 100755 modules/image/text_to_image/disco_diffusion_cnclip_vitb16/cn_clip/clip/model_configs/ViT-B-16.json create mode 100755 modules/image/text_to_image/disco_diffusion_cnclip_vitb16/cn_clip/clip/model_configs/ViT-B-32.json create mode 100755 modules/image/text_to_image/disco_diffusion_cnclip_vitb16/cn_clip/clip/model_configs/ViT-L-14.json create mode 100755 modules/image/text_to_image/disco_diffusion_cnclip_vitb16/cn_clip/clip/modeling_bert.py create mode 100755 modules/image/text_to_image/disco_diffusion_cnclip_vitb16/cn_clip/clip/utils.py create mode 100755 modules/image/text_to_image/disco_diffusion_cnclip_vitb16/module.py create mode 100755 modules/image/text_to_image/disco_diffusion_cnclip_vitb16/requirements.txt create mode 100755 modules/image/text_to_image/disco_diffusion_cnclip_vitb16/resize_right/README.md create mode 100755 modules/image/text_to_image/disco_diffusion_cnclip_vitb16/resize_right/__init__.py create mode 100755 modules/image/text_to_image/disco_diffusion_cnclip_vitb16/resize_right/interp_methods.py create mode 100755 modules/image/text_to_image/disco_diffusion_cnclip_vitb16/resize_right/resize_right.py create mode 100644 modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/README.md create mode 100755 modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/__init__.py create mode 100755 modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/config.py create mode 100755 modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/helper.py create mode 100755 modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/model/__init__.py create mode 100755 modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/model/gaussian_diffusion.py create mode 100755 modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/model/losses.py create mode 100755 modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/model/make_cutouts.py create mode 100755 modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/model/nn.py create mode 100755 modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/model/perlin_noises.py create mode 100755 modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/model/respace.py create mode 100755 modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/model/script_util.py create mode 100755 modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/model/sec_diff.py create mode 100755 modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/model/transforms.py create mode 100755 modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/model/unet.py create mode 100755 modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/resources/default.yml create mode 100755 modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/resources/docstrings.yml create mode 100755 modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/runner.py diff --git a/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/README.md b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/README.md new file mode 100644 index 000000000..135600437 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/README.md @@ -0,0 +1,114 @@ +# disco_diffusion_cnclip_vitb16 + +|模型名称|disco_diffusion_cnclip_vitb16| +| :--- | :---: | +|类别|图像-文图生成| +|网络|dd+cnclip ViTB16| +|数据集|-| +|是否支持Fine-tuning|否| +|模型大小|2.9GB| +|最新更新日期|2022-08-02| +|数据指标|-| + +## 一、模型基本信息 + +### 应用效果展示 + + - 输入文本 "在宁静的风景中画一幅美丽的建筑画,由Arthur Adams在artstation上所作" + + - 输出图像 +

+ +
+ + - 生成过程 +

+ +
+ +### 模型介绍 + +disco_diffusion_cnclip_vitb16 是一个文图生成模型,可以通过输入一段文字来生成符合该句子语义的图像。该模型由两部分组成,一部分是扩散模型,是一种生成模型,可以从噪声输入中重建出原始图像。另一部分是多模态预训练模型(CLIP), 可以将文本和图像表示在同一个特征空间,相近语义的文本和图像在该特征空间里距离会更相近。在该文图生成模型中,扩散模型负责从初始噪声或者指定初始图像中来生成目标图像,CLIP负责引导生成图像的语义和输入的文本的语义尽可能接近,随着扩散模型在CLIP的引导下不断的迭代生成新图像,最终能够生成文本所描述内容的图像。该模块中使用的CLIP模型结构为ViTB16。 + +更多详情请参考论文:[Diffusion Models Beat GANs on Image Synthesis](https://arxiv.org/abs/2105.05233) 以及 [Learning Transferable Visual Models From Natural Language Supervision](https://arxiv.org/abs/2103.00020) + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.2.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install disco_diffusion_cnclip_vitb16 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run disco_diffusion_cnclip_vitb16 --text_prompts "孤舟蓑笠翁,独钓寒江雪。风格如齐白石所作。" --output_dir disco_diffusion_cnclip_vitb16_out + ``` + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + + module = hub.Module(name="disco_diffusion_cnclip_vitb16") + text_prompts = ["孤舟蓑笠翁,独钓寒江雪。"] + # 生成图像, 默认会在disco_diffusion_cnclip_vitb16_out目录保存图像 + # 返回的da是一个DocumentArray对象,保存了所有的结果,包括最终结果和迭代过程的中间结果 + # 可以通过操作DocumentArray对象对生成的图像做后处理,保存或者分析 + da = module.generate_image(text_prompts=text_prompts, artist='齐白石', output_dir='./disco_diffusion_cnclip_vitb16_out/') + # 手动将最终生成的图像保存到指定路径 + da[0].save_uri_to_file('disco_diffusion_cnclip_vitb16_out-result.png') + # 展示所有的中间结果 + da[0].chunks.plot_image_sprites(skip_empty=True, show_index=True, keep_aspect_ratio=True) + # 将整个生成过程保存为一个动态图gif + da[0].chunks.save_gif('disco_diffusion_cnclip_vitb16_out-result.gif', show_index=True, inline_display=True, size_ratio=0.5) + ``` + +- ### 3、API + + - ```python + def generate_image( + text_prompts, + style: Optional[str] = None, + artist: Optional[str] = None, + width_height: Optional[List[int]] = [1280, 768], + seed: Optional[int] = None, + output_dir: Optional[str] = 'disco_diffusion_cnclip_vitb16_out'): + ``` + + - 文图生成API,生成文本描述内容的图像。 + + - **参数** + + - text_prompts(str): 输入的语句,描述想要生成的图像的内容。通常比较有效的构造方式为 "一段描述性的文字内容" + "指定艺术家的名字",如"孤舟蓑笠翁,独钓寒江雪。风格如齐白石所作"。 + - style(Optional[str]): 指定绘画的风格,如水墨画、油画、水彩画等。当不指定时,风格完全由您所填写的prompt决定。 + - artist(Optional[str]): 指定特定的艺术家,如齐白石、Greg Rutkowsk,将会生成所指定艺术家的绘画风格。当不指定时,风格完全由您所填写的prompt决定。各种艺术家的风格可以参考[网站](https://weirdwonderfulai.art/resources/disco-diffusion-70-plus-artist-studies/)。 + - width_height(Optional[List[int]]): 指定最终输出图像的宽高,宽和高都需要是64的倍数,生成的图像越大,所需要的计算时间越长。 + - seed(Optional[int]): 随机种子,由于输入默认是随机高斯噪声,设置不同的随机种子会由不同的初始输入,从而最终生成不同的结果,可以设置该参数来获得不同的输出图像。 + - output_dir(Optional[str]): 保存输出图像的目录,默认为"disco_diffusion_cnclip_vitb16_out"。 + + + - **返回** + - ra(DocumentArray): DocumentArray对象, 包含`n_batches`个Documents,其中每个Document都保存了迭代过程的所有中间结果。详细可参考[DocumentArray使用文档](https://docarray.jina.ai/fundamentals/documentarray/index.html)。 + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + ```shell + $ hub install disco_diffusion_cnclip_vitb16 == 1.0.0 + ``` diff --git a/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/cn_clip/clip/README.md b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/cn_clip/clip/README.md new file mode 100644 index 000000000..61cbe4ac5 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/cn_clip/clip/README.md @@ -0,0 +1,3 @@ +# Chinese-CLIP (Paddle) +Chinese-CLIP implemented by Paddle. +This module is based on [billjie1/Chinese-CLIP](https://github.com/billjie1/Chinese-CLIP). diff --git a/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/cn_clip/clip/__init__.py b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/cn_clip/clip/__init__.py new file mode 100755 index 000000000..2e17bd07f --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/cn_clip/clip/__init__.py @@ -0,0 +1,4 @@ +from .bert_tokenizer import FullTokenizer + +_tokenizer = FullTokenizer() +from .utils import tokenize, create_model diff --git a/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/cn_clip/clip/bert_tokenizer.py b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/cn_clip/clip/bert_tokenizer.py new file mode 100755 index 000000000..ab4ec678b --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/cn_clip/clip/bert_tokenizer.py @@ -0,0 +1,426 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tokenization classes.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections +import os +import re +import unicodedata +from functools import lru_cache + +import six + + +@lru_cache() +def default_vocab(): + return os.path.join(os.path.dirname(os.path.abspath(__file__)), "vocab.txt") + + +def validate_case_matches_checkpoint(do_lower_case, init_checkpoint): + """Checks whether the casing config is consistent with the checkpoint name.""" + + # The casing has to be passed in by the user and there is no explicit check + # as to whether it matches the checkpoint. The casing information probably + # should have been stored in the bert_config.json file, but it's not, so + # we have to heuristically detect it to validate. + + if not init_checkpoint: + return + + m = re.match("^.*?([A-Za-z0-9_-]+)/bert_model.ckpt", init_checkpoint) + if m is None: + return + + model_name = m.group(1) + + lower_models = [ + "uncased_L-24_H-1024_A-16", "uncased_L-12_H-768_A-12", "multilingual_L-12_H-768_A-12", "chinese_L-12_H-768_A-12" + ] + + cased_models = ["cased_L-12_H-768_A-12", "cased_L-24_H-1024_A-16", "multi_cased_L-12_H-768_A-12"] + + is_bad_config = False + if model_name in lower_models and not do_lower_case: + is_bad_config = True + actual_flag = "False" + case_name = "lowercased" + opposite_flag = "True" + + if model_name in cased_models and do_lower_case: + is_bad_config = True + actual_flag = "True" + case_name = "cased" + opposite_flag = "False" + + if is_bad_config: + raise ValueError("You passed in `--do_lower_case=%s` with `--init_checkpoint=%s`. " + "However, `%s` seems to be a %s model, so you " + "should pass in `--do_lower_case=%s` so that the fine-tuning matches " + "how the model was pre-training. If this error is wrong, please " + "just comment out this check." % + (actual_flag, init_checkpoint, model_name, case_name, opposite_flag)) + + +def convert_to_unicode(text): + """Converts `text` to Unicode (if it's not already), assuming utf-8 input.""" + if six.PY3: + if isinstance(text, str): + return text + elif isinstance(text, bytes): + return text.decode("utf-8", "ignore") + else: + raise ValueError("Unsupported string type: %s" % (type(text))) + elif six.PY2: + if isinstance(text, str): + return text.decode("utf-8", "ignore") + elif isinstance(text, unicode): + return text + else: + raise ValueError("Unsupported string type: %s" % (type(text))) + else: + raise ValueError("Not running on Python2 or Python 3?") + + +def printable_text(text): + """Returns text encoded in a way suitable for print or `tf.logging`.""" + + # These functions want `str` for both Python2 and Python3, but in one case + # it's a Unicode string and in the other it's a byte string. + if six.PY3: + if isinstance(text, str): + return text + elif isinstance(text, bytes): + return text.decode("utf-8", "ignore") + else: + raise ValueError("Unsupported string type: %s" % (type(text))) + elif six.PY2: + if isinstance(text, str): + return text + elif isinstance(text, unicode): + return text.encode("utf-8") + else: + raise ValueError("Unsupported string type: %s" % (type(text))) + else: + raise ValueError("Not running on Python2 or Python 3?") + + +def load_vocab(vocab_file): + """Loads a vocabulary file into a dictionary.""" + vocab = collections.OrderedDict() + index = 0 + with open(vocab_file, "r") as reader: + while True: + token = convert_to_unicode(reader.readline()) + if not token: + break + token = token.strip() + vocab[token] = index + index += 1 + return vocab + + +def convert_by_vocab(vocab, items): + """Converts a sequence of [tokens|ids] using the vocab.""" + output = [] + for item in items: + output.append(vocab[item]) + return output + + +def convert_tokens_to_ids(vocab, tokens): + return convert_by_vocab(vocab, tokens) + + +def convert_ids_to_tokens(inv_vocab, ids): + return convert_by_vocab(inv_vocab, ids) + + +def whitespace_tokenize(text): + """Runs basic whitespace cleaning and splitting on a piece of text.""" + text = text.strip() + if not text: + return [] + tokens = text.split() + return tokens + + +class FullTokenizer(object): + """Runs end-to-end tokenziation.""" + + def __init__(self, vocab_file=default_vocab(), do_lower_case=True): + self.vocab = load_vocab(vocab_file) + self.inv_vocab = {v: k for k, v in self.vocab.items()} + self.basic_tokenizer = BasicTokenizer(do_lower_case=do_lower_case) + self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab) + + def tokenize(self, text): + split_tokens = [] + for token in self.basic_tokenizer.tokenize(text): + for sub_token in self.wordpiece_tokenizer.tokenize(token): + split_tokens.append(sub_token) + + return split_tokens + + def convert_tokens_to_ids(self, tokens): + return convert_by_vocab(self.vocab, tokens) + + def convert_ids_to_tokens(self, ids): + return convert_by_vocab(self.inv_vocab, ids) + + @staticmethod + def convert_tokens_to_string(tokens, clean_up_tokenization_spaces=True): + """ Converts a sequence of tokens (string) in a single string. """ + + def clean_up_tokenization(out_string): + """ Clean up a list of simple English tokenization artifacts + like spaces before punctuations and abreviated forms. + """ + out_string = (out_string.replace(" .", ".").replace(" ?", "?").replace(" !", "!").replace( + " ,", + ",").replace(" ' ", + "'").replace(" n't", + "n't").replace(" 'm", + "'m").replace(" 's", + "'s").replace(" 've", + "'ve").replace(" 're", "'re")) + return out_string + + text = ' '.join(tokens).replace(' ##', '').strip() + if clean_up_tokenization_spaces: + clean_text = clean_up_tokenization(text) + return clean_text + else: + return text + + def vocab_size(self): + return len(self.vocab) + + +class BasicTokenizer(object): + """Runs basic tokenization (punctuation splitting, lower casing, etc.).""" + + def __init__(self, do_lower_case=True): + """Constructs a BasicTokenizer. + + Args: + do_lower_case: Whether to lower case the input. + """ + self.do_lower_case = do_lower_case + + def tokenize(self, text): + """Tokenizes a piece of text.""" + text = convert_to_unicode(text) + text = self._clean_text(text) + + # This was added on November 1st, 2018 for the multilingual and Chinese + # models. This is also applied to the English models now, but it doesn't + # matter since the English models were not trained on any Chinese data + # and generally don't have any Chinese data in them (there are Chinese + # characters in the vocabulary because Wikipedia does have some Chinese + # words in the English Wikipedia.). + text = self._tokenize_chinese_chars(text) + + orig_tokens = whitespace_tokenize(text) + split_tokens = [] + for token in orig_tokens: + if self.do_lower_case: + token = token.lower() + token = self._run_strip_accents(token) + split_tokens.extend(self._run_split_on_punc(token)) + + output_tokens = whitespace_tokenize(" ".join(split_tokens)) + return output_tokens + + def _run_strip_accents(self, text): + """Strips accents from a piece of text.""" + text = unicodedata.normalize("NFD", text) + output = [] + for char in text: + cat = unicodedata.category(char) + if cat == "Mn": + continue + output.append(char) + return "".join(output) + + def _run_split_on_punc(self, text): + """Splits punctuation on a piece of text.""" + chars = list(text) + i = 0 + start_new_word = True + output = [] + while i < len(chars): + char = chars[i] + if _is_punctuation(char): + output.append([char]) + start_new_word = True + else: + if start_new_word: + output.append([]) + start_new_word = False + output[-1].append(char) + i += 1 + + return ["".join(x) for x in output] + + def _tokenize_chinese_chars(self, text): + """Adds whitespace around any CJK character.""" + output = [] + for char in text: + cp = ord(char) + if self._is_chinese_char(cp): + output.append(" ") + output.append(char) + output.append(" ") + else: + output.append(char) + return "".join(output) + + def _is_chinese_char(self, cp): + """Checks whether CP is the codepoint of a CJK character.""" + # This defines a "chinese character" as anything in the CJK Unicode block: + # https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block) + # + # Note that the CJK Unicode block is NOT all Japanese and Korean characters, + # despite its name. The modern Korean Hangul alphabet is a different block, + # as is Japanese Hiragana and Katakana. Those alphabets are used to write + # space-separated words, so they are not treated specially and handled + # like the all of the other languages. + if ((cp >= 0x4E00 and cp <= 0x9FFF) or # + (cp >= 0x3400 and cp <= 0x4DBF) or # + (cp >= 0x20000 and cp <= 0x2A6DF) or # + (cp >= 0x2A700 and cp <= 0x2B73F) or # + (cp >= 0x2B740 and cp <= 0x2B81F) or # + (cp >= 0x2B820 and cp <= 0x2CEAF) or (cp >= 0xF900 and cp <= 0xFAFF) or # + (cp >= 0x2F800 and cp <= 0x2FA1F)): # + return True + + return False + + def _clean_text(self, text): + """Performs invalid character removal and whitespace cleanup on text.""" + output = [] + for char in text: + cp = ord(char) + if cp == 0 or cp == 0xfffd or _is_control(char): + continue + if _is_whitespace(char): + output.append(" ") + else: + output.append(char) + return "".join(output) + + +class WordpieceTokenizer(object): + """Runs WordPiece tokenziation.""" + + def __init__(self, vocab, unk_token="[UNK]", max_input_chars_per_word=200): + self.vocab = vocab + self.unk_token = unk_token + self.max_input_chars_per_word = max_input_chars_per_word + + def tokenize(self, text): + """Tokenizes a piece of text into its word pieces. + + This uses a greedy longest-match-first algorithm to perform tokenization + using the given vocabulary. + + For example: + input = "unaffable" + output = ["un", "##aff", "##able"] + + Args: + text: A single token or whitespace separated tokens. This should have + already been passed through `BasicTokenizer. + + Returns: + A list of wordpiece tokens. + """ + + text = convert_to_unicode(text) + + output_tokens = [] + for token in whitespace_tokenize(text): + chars = list(token) + if len(chars) > self.max_input_chars_per_word: + output_tokens.append(self.unk_token) + continue + + is_bad = False + start = 0 + sub_tokens = [] + while start < len(chars): + end = len(chars) + cur_substr = None + while start < end: + substr = "".join(chars[start:end]) + if start > 0: + substr = "##" + substr + if substr in self.vocab: + cur_substr = substr + break + end -= 1 + if cur_substr is None: + is_bad = True + break + sub_tokens.append(cur_substr) + start = end + + if is_bad: + output_tokens.append(self.unk_token) + else: + output_tokens.extend(sub_tokens) + return output_tokens + + +def _is_whitespace(char): + """Checks whether `chars` is a whitespace character.""" + # \t, \n, and \r are technically contorl characters but we treat them + # as whitespace since they are generally considered as such. + if char == " " or char == "\t" or char == "\n" or char == "\r": + return True + cat = unicodedata.category(char) + if cat == "Zs": + return True + return False + + +def _is_control(char): + """Checks whether `chars` is a control character.""" + # These are technically control characters but we count them as whitespace + # characters. + if char == "\t" or char == "\n" or char == "\r": + return False + cat = unicodedata.category(char) + if cat in ("Cc", "Cf"): + return True + return False + + +def _is_punctuation(char): + """Checks whether `chars` is a punctuation character.""" + cp = ord(char) + # We treat all non-letter/number ASCII as punctuation. + # Characters such as "^", "$", and "`" are not in the Unicode + # Punctuation class but we treat them as punctuation anyways, for + # consistency. + if ((cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64) or (cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126)): + return True + cat = unicodedata.category(char) + if cat.startswith("P"): + return True + return False diff --git a/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/cn_clip/clip/configuration_bert.py b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/cn_clip/clip/configuration_bert.py new file mode 100755 index 000000000..323193192 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/cn_clip/clip/configuration_bert.py @@ -0,0 +1,85 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" BERT model configuration """ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import logging + +logger = logging.getLogger(__name__) + + +class BertConfig(object): + r""" + :class:`~transformers.BertConfig` is the configuration class to store the configuration of a + `BertModel`. + + + Arguments: + vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `BertModel`. + hidden_size: Size of the encoder layers and the pooler layer. + num_hidden_layers: Number of hidden layers in the Transformer encoder. + num_attention_heads: Number of attention heads for each attention layer in + the Transformer encoder. + intermediate_size: The size of the "intermediate" (i.e., feed-forward) + layer in the Transformer encoder. + hidden_act: The non-linear activation function (function or string) in the + encoder and pooler. If string, "gelu", "relu", "swish" and "gelu_new" are supported. + hidden_dropout_prob: The dropout probabilitiy for all fully connected + layers in the embeddings, encoder, and pooler. + attention_probs_dropout_prob: The dropout ratio for the attention + probabilities. + max_position_embeddings: The maximum sequence length that this model might + ever be used with. Typically set this to something large just in case + (e.g., 512 or 1024 or 2048). + type_vocab_size: The vocabulary size of the `token_type_ids` passed into + `BertModel`. + initializer_range: The sttdev of the truncated_normal_initializer for + initializing all weight matrices. + layer_norm_eps: The epsilon used by LayerNorm. + """ + + def __init__(self, + vocab_size_or_config_json_file=30522, + hidden_size=768, + num_hidden_layers=12, + num_attention_heads=12, + intermediate_size=3072, + hidden_act="gelu", + hidden_dropout_prob=0.1, + attention_probs_dropout_prob=0.1, + max_position_embeddings=512, + type_vocab_size=2, + initializer_range=0.02, + layer_norm_eps=1e-12, + output_attentions=False, + output_hidden_states=False): + self.vocab_size = vocab_size_or_config_json_file + self.hidden_size = hidden_size + self.num_hidden_layers = num_hidden_layers + self.num_attention_heads = num_attention_heads + self.hidden_act = hidden_act + self.intermediate_size = intermediate_size + self.hidden_dropout_prob = hidden_dropout_prob + self.attention_probs_dropout_prob = attention_probs_dropout_prob + self.max_position_embeddings = max_position_embeddings + self.type_vocab_size = type_vocab_size + self.initializer_range = initializer_range + self.layer_norm_eps = layer_norm_eps + self.output_attentions = output_attentions + self.output_hidden_states = output_hidden_states diff --git a/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/cn_clip/clip/model.py b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/cn_clip/clip/model.py new file mode 100644 index 000000000..cc352e475 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/cn_clip/clip/model.py @@ -0,0 +1,247 @@ +from collections import OrderedDict +from typing import Tuple +from typing import Union + +import numpy as np +import paddle +import paddle.nn.functional as F +from disco_diffusion_cnclip_vitb16.cn_clip.clip import _tokenizer +from disco_diffusion_cnclip_vitb16.cn_clip.clip.configuration_bert import BertConfig +from disco_diffusion_cnclip_vitb16.cn_clip.clip.modeling_bert import BertModel +from paddle import nn +from paddle.nn import MultiHeadAttention + + +class Bottleneck(nn.Layer): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1): + super().__init__() + + # all conv layers have stride 1. an avgpool is performed after the second convolution when stride > 1 + self.conv1 = nn.Conv2D(inplanes, planes, 1, bias_attr=False) + self.bn1 = nn.BatchNorm2D(planes) + + self.conv2 = nn.Conv2D(planes, planes, 3, padding=1, bias_attr=False) + self.bn2 = nn.BatchNorm2D(planes) + + self.avgpool = nn.AvgPool2D(stride) if stride > 1 else nn.Identity() + + self.conv3 = nn.Conv2D(planes, planes * self.expansion, 1, bias_attr=False) + self.bn3 = nn.BatchNorm2D(planes * self.expansion) + + self.relu = nn.ReLU(inplace=True) + self.downsample = None + self.stride = stride + + if stride > 1 or inplanes != planes * Bottleneck.expansion: + # downsampling layer is prepended with an avgpool, and the subsequent convolution has stride 1 + self.downsample = nn.Sequential( + OrderedDict([("-1", nn.AvgPool2D(stride)), + ("0", nn.Conv2D(inplanes, planes * self.expansion, 1, stride=1, bias_attr=False)), + ("1", nn.BatchNorm2D(planes * self.expansion))])) + + def forward(self, x: paddle.Tensor): + identity = x + + out = self.relu(self.bn1(self.conv1(x))) + out = self.relu(self.bn2(self.conv2(out))) + out = self.avgpool(out) + out = self.bn3(self.conv3(out)) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.relu(out) + return out + + +class QuickGELU(nn.Layer): + + def forward(self, x: paddle.Tensor): + return x * paddle.nn.functional.sigmoid(1.702 * x) + + +class ResidualAttentionBlock(nn.Layer): + + def __init__(self, d_model: int, n_head: int, attn_mask: paddle.Tensor = None): + super().__init__() + self.attn = MultiHeadAttention(d_model, n_head) + self.ln_1 = nn.LayerNorm(d_model) + self.mlp = nn.Sequential(*[("c_fc", nn.Linear(d_model, d_model * 4)), ( + "gelu", QuickGELU()), ("c_proj", nn.Linear(d_model * 4, d_model))]) + self.ln_2 = nn.LayerNorm(d_model) + self.attn_mask = attn_mask + + def attention(self, x: paddle.Tensor): + self.attn_mask = self.attn_mask.to(dtype=x.dtype, device=x.device) if self.attn_mask is not None else None + return self.attn(x, x, x, attn_mask=self.attn_mask) + + def forward(self, x: paddle.Tensor): + x = x + self.attention(self.ln_1(x)) + x = x + self.mlp(self.ln_2(x)) + return x + + +class Transformer(nn.Layer): + + def __init__(self, width: int, layers: int, heads: int, attn_mask: paddle.Tensor = None): + super().__init__() + self.width = width + self.layers = layers + self.resblocks = nn.Sequential(*[ResidualAttentionBlock(width, heads, attn_mask) for _ in range(layers)]) + + def forward(self, x: paddle.Tensor): + return self.resblocks(x) + + +class VisualTransformer(nn.Layer): + + def __init__(self, input_resolution: int, patch_size: int, width: int, layers: int, heads: int, output_dim: int): + super().__init__() + self.input_resolution = input_resolution + self.output_dim = output_dim + self.conv1 = nn.Conv2D(in_channels=3, + out_channels=width, + kernel_size=patch_size, + stride=patch_size, + bias_attr=False) + + scale = width**-0.5 + # self.class_embedding = nn.Parameter(scale * paddle.randn(width)) + class_embedding = self.create_parameter([width]) + self.add_parameter("class_embedding", class_embedding) + # self.positional_embedding = nn.Parameter(scale * paddle.randn([(input_resolution // patch_size) ** 2 + 1, width)]) + positional_embedding = self.create_parameter([(input_resolution // patch_size)**2 + 1, width]) + self.add_parameter("positional_embedding", positional_embedding) + self.ln_pre = nn.LayerNorm(width) + + self.transformer = Transformer(width, layers, heads) + + self.ln_post = nn.LayerNorm(width) + # self.proj = nn.Parameter(scale * paddle.randn([width, output_dim])) + proj = self.create_parameter([width, output_dim]) + self.add_parameter("proj", proj) + + def forward(self, x: paddle.Tensor): + x = self.conv1(x) # shape = [*, width, grid, grid] + x = x.reshape([x.shape[0], x.shape[1], -1]) # shape = [*, width, grid ** 2] + x = x.transpose([0, 2, 1]) # shape = [*, grid ** 2, width] + x = paddle.concat([self.class_embedding + paddle.zeros([x.shape[0], 1, x.shape[-1]], dtype=x.dtype), x], + axis=1) # shape = [*, grid ** 2 + 1, width] + x = x + paddle.cast(self.positional_embedding, x.dtype) + x = self.ln_pre(x) + + x = self.transformer(x) + + x = self.ln_post(x[:, 0, :]) + + if self.proj is not None: + x = x @ self.proj + + return x + + +class CLIP(nn.Layer): + + def __init__( + self, + embed_dim: int, + # vision + image_resolution: int, + vision_layers: Union[Tuple[int, int, int, int], int], + vision_width: int, + vision_patch_size: int, + # text + vocab_size: int, + text_attention_probs_dropout_prob: float, + text_hidden_act: str, + text_hidden_dropout_prob: float, + text_hidden_size: int, + text_initializer_range: float, + text_intermediate_size: int, + text_max_position_embeddings: int, + text_num_attention_heads: int, + text_num_hidden_layers: int, + text_type_vocab_size: int, + tokenizer=_tokenizer, + ): + super().__init__() + + vision_heads = vision_width // 64 + self.visual = VisualTransformer(input_resolution=image_resolution, + patch_size=vision_patch_size, + width=vision_width, + layers=vision_layers, + heads=vision_heads, + output_dim=embed_dim) + + self.bert_config = BertConfig( + vocab_size_or_config_json_file=vocab_size, + hidden_size=text_hidden_size, + num_hidden_layers=text_num_hidden_layers, + num_attention_heads=text_num_attention_heads, + intermediate_size=text_intermediate_size, + hidden_act=text_hidden_act, + hidden_dropout_prob=text_hidden_dropout_prob, + attention_probs_dropout_prob=text_attention_probs_dropout_prob, + max_position_embeddings=text_max_position_embeddings, + type_vocab_size=text_type_vocab_size, + initializer_range=text_initializer_range, + layer_norm_eps=1e-12, + ) + self.bert = BertModel(self.bert_config) + + text_projection = self.create_parameter([text_hidden_size, embed_dim]) + self.add_parameter("text_projection", text_projection) + logit_scale = self.create_parameter([1]) + self.add_parameter("logit_scale", logit_scale) + + self.tokenizer = tokenizer + + @property + def dtype(self): + return self.visual.conv1.weight.dtype + + def encode_image(self, image): + return self.visual(image.cast(self.dtype)) + + def encode_text(self, text): + pad_index = self.tokenizer.vocab['[PAD]'] + + attn_mask = text.not_equal(paddle.to_tensor(pad_index)).cast(self.dtype) + + x = self.bert(text, attention_mask=attn_mask)[0].cast(self.dtype) # [batch_size, seq_length, hidden_size] + return x[:, 0, :] @ self.text_projection + + def forward(self, image, text): + assert image is not None or text is not None, "text and image cannot both be None!" + + if image is None: + return self.encode_text(text) + elif text is None: + return self.encode_image(image) + image_features = self.encode_image(image) + text_features = self.encode_text(text) + + image_features = image_features / image_features.norm(axis=-1, keepdim=True) + text_features = text_features / text_features.norm(axis=-1, keepdim=True) + + return image_features, text_features, self.logit_scale.exp() + + def get_similarity(self, image, text): + image_features = self.encode_image(image) + text_features = self.encode_text(text) + + # normalized features + image_features = image_features / image_features.norm(axis=1, keepdim=True) + text_features = text_features / text_features.norm(axis=1, keepdim=True) + + # cosine similarity as logits + logit_scale = self.logit_scale.exp() + logits_per_image = logit_scale * image_features @ text_features.t() + logits_per_text = logits_per_image.t() + + # shape = [global_batch_size, global_batch_size] + return logits_per_image, logits_per_text diff --git a/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/cn_clip/clip/model_configs/RoBERTa-wwm-ext-base-chinese.json b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/cn_clip/clip/model_configs/RoBERTa-wwm-ext-base-chinese.json new file mode 100755 index 000000000..fdd5bce81 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/cn_clip/clip/model_configs/RoBERTa-wwm-ext-base-chinese.json @@ -0,0 +1,13 @@ +{ + "vocab_size": 21128, + "text_attention_probs_dropout_prob": 0.1, + "text_hidden_act": "gelu", + "text_hidden_dropout_prob": 0.1, + "text_hidden_size": 768, + "text_initializer_range": 0.02, + "text_intermediate_size": 3072, + "text_max_position_embeddings": 512, + "text_num_attention_heads": 12, + "text_num_hidden_layers": 12, + "text_type_vocab_size": 2 +} diff --git a/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/cn_clip/clip/model_configs/RoBERTa-wwm-ext-large-chinese.json b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/cn_clip/clip/model_configs/RoBERTa-wwm-ext-large-chinese.json new file mode 100755 index 000000000..b4ef28998 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/cn_clip/clip/model_configs/RoBERTa-wwm-ext-large-chinese.json @@ -0,0 +1,13 @@ +{ + "vocab_size": 21128, + "text_attention_probs_dropout_prob": 0.1, + "text_hidden_act": "gelu", + "text_hidden_dropout_prob": 0.1, + "text_hidden_size": 1024, + "text_initializer_range": 0.02, + "text_intermediate_size": 4096, + "text_max_position_embeddings": 512, + "text_num_attention_heads": 16, + "text_num_hidden_layers": 24, + "text_type_vocab_size": 2 +} diff --git a/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/cn_clip/clip/model_configs/ViT-B-16.json b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/cn_clip/clip/model_configs/ViT-B-16.json new file mode 100755 index 000000000..4adcbeca7 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/cn_clip/clip/model_configs/ViT-B-16.json @@ -0,0 +1,7 @@ +{ + "embed_dim": 512, + "image_resolution": 224, + "vision_layers": 12, + "vision_width": 768, + "vision_patch_size": 16 +} diff --git a/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/cn_clip/clip/model_configs/ViT-B-32.json b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/cn_clip/clip/model_configs/ViT-B-32.json new file mode 100755 index 000000000..75c98937a --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/cn_clip/clip/model_configs/ViT-B-32.json @@ -0,0 +1,7 @@ +{ + "embed_dim": 512, + "image_resolution": 224, + "vision_layers": 12, + "vision_width": 768, + "vision_patch_size": 32 +} diff --git a/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/cn_clip/clip/model_configs/ViT-L-14.json b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/cn_clip/clip/model_configs/ViT-L-14.json new file mode 100755 index 000000000..d731eef46 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/cn_clip/clip/model_configs/ViT-L-14.json @@ -0,0 +1,7 @@ +{ + "embed_dim": 768, + "image_resolution": 224, + "vision_layers": 24, + "vision_width": 1024, + "vision_patch_size": 14 +} diff --git a/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/cn_clip/clip/modeling_bert.py b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/cn_clip/clip/modeling_bert.py new file mode 100755 index 000000000..881352974 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/cn_clip/clip/modeling_bert.py @@ -0,0 +1,450 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team. +# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""PyTorch BERT model. """ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import json +import logging +import math +import os +import sys +from io import open + +import paddle +from paddle import nn + +from .configuration_bert import BertConfig + +logger = logging.getLogger(__name__) + + +def gelu(x): + """ Original Implementation of the gelu activation function in Google Bert repo when initially created. + For information: OpenAI GPT's gelu is slightly different (and gives slightly different results): + 0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3)))) + Also see https://arxiv.org/abs/1606.08415 + """ + return x * 0.5 * (1.0 + paddle.erf(x / math.sqrt(2.0))) + + +def gelu_new(x): + """ Implementation of the gelu activation function currently in Google Bert repo (identical to OpenAI GPT). + Also see https://arxiv.org/abs/1606.08415 + """ + return 0.5 * x * (1 + paddle.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * paddle.pow(x, 3)))) + + +def swish(x): + return x * paddle.nn.functional.sigmoid(x) + + +ACT2FN = {"gelu": gelu, "relu": paddle.nn.functional.relu, "swish": swish, "gelu_new": gelu_new} + +BertLayerNorm = paddle.nn.LayerNorm + + +class BertEmbeddings(nn.Layer): + """Construct the embeddings from word, position and token_type embeddings. + """ + + def __init__(self, config): + super(BertEmbeddings, self).__init__() + self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size) #, padding_idx=0) + self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size) + self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.hidden_size) + + # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load + # any TensorFlow checkpoint file + self.LayerNorm = BertLayerNorm(config.hidden_size, epsilon=config.layer_norm_eps) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + + def forward(self, input_ids, token_type_ids=None, position_ids=None): + seq_length = input_ids.shape[1] + if position_ids is None: + position_ids = paddle.arange(seq_length, dtype='int64') + position_ids = position_ids.unsqueeze(0).expand_as(input_ids) + if token_type_ids is None: + token_type_ids = paddle.zeros_like(input_ids) + + words_embeddings = self.word_embeddings(input_ids) + position_embeddings = self.position_embeddings(position_ids) + + token_type_embeddings = self.token_type_embeddings(token_type_ids) + + embeddings = words_embeddings + position_embeddings + token_type_embeddings + embeddings = self.LayerNorm(embeddings) + + embeddings = self.dropout(embeddings) + + return embeddings + + +class BertSelfAttention(nn.Layer): + + def __init__(self, config): + super(BertSelfAttention, self).__init__() + if config.hidden_size % config.num_attention_heads != 0: + raise ValueError("The hidden size (%d) is not a multiple of the number of attention " + "heads (%d)" % (config.hidden_size, config.num_attention_heads)) + self.output_attentions = config.output_attentions + + self.num_attention_heads = config.num_attention_heads + self.attention_head_size = int(config.hidden_size / config.num_attention_heads) + self.all_head_size = self.num_attention_heads * self.attention_head_size + + self.query = nn.Linear(config.hidden_size, self.all_head_size) + self.key = nn.Linear(config.hidden_size, self.all_head_size) + self.value = nn.Linear(config.hidden_size, self.all_head_size) + + self.dropout = nn.Dropout(config.attention_probs_dropout_prob) + + def transpose_for_scores(self, x): + new_x_shape = x.shape[:-1] + [self.num_attention_heads, self.attention_head_size] + x = x.reshape(new_x_shape) + return x.transpose([0, 2, 1, 3]) + + def forward(self, hidden_states, attention_mask=None, head_mask=None): + mixed_query_layer = self.query(hidden_states) + mixed_key_layer = self.key(hidden_states) + mixed_value_layer = self.value(hidden_states) + + query_layer = self.transpose_for_scores(mixed_query_layer) + key_layer = self.transpose_for_scores(mixed_key_layer) + value_layer = self.transpose_for_scores(mixed_value_layer) + + # Take the dot product between "query" and "key" to get the raw attention scores. + attention_scores = paddle.matmul(query_layer, key_layer.transpose([0, 1, 3, 2])) + attention_scores = attention_scores / math.sqrt(self.attention_head_size) + if attention_mask is not None: + # Apply the attention mask is (precomputed for all layers in BertModel forward() function) + attention_scores = attention_scores + attention_mask + + # Normalize the attention scores to probabilities. + attention_probs = nn.Softmax(axis=-1)(attention_scores) + + # This is actually dropping out entire tokens to attend to, which might + # seem a bit unusual, but is taken from the original Transformer paper. + attention_probs = self.dropout(attention_probs) + + # Mask heads if we want to + if head_mask is not None: + attention_probs = attention_probs * head_mask + + context_layer = paddle.matmul(attention_probs, value_layer) + + context_layer = context_layer.transpose([0, 2, 1, 3]) + new_context_layer_shape = context_layer.shape[:-2] + [self.all_head_size] + context_layer = context_layer.reshape(new_context_layer_shape) + + outputs = (context_layer, attention_probs) if self.output_attentions else (context_layer, ) + return outputs + + +class BertSelfOutput(nn.Layer): + + def __init__(self, config): + super(BertSelfOutput, self).__init__() + self.dense = nn.Linear(config.hidden_size, config.hidden_size) + self.LayerNorm = BertLayerNorm(config.hidden_size, epsilon=config.layer_norm_eps) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + + def forward(self, hidden_states, input_tensor): + hidden_states = self.dense(hidden_states) + hidden_states = self.dropout(hidden_states) + hidden_states = self.LayerNorm(hidden_states + input_tensor) + return hidden_states + + +class BertAttention(nn.Layer): + + def __init__(self, config): + super(BertAttention, self).__init__() + self.self = BertSelfAttention(config) + self.output = BertSelfOutput(config) + self.pruned_heads = set() + + def forward(self, input_tensor, attention_mask=None, head_mask=None): + self_outputs = self.self(input_tensor, attention_mask, head_mask) + attention_output = self.output(self_outputs[0], input_tensor) + outputs = (attention_output, ) + self_outputs[1:] # add attentions if we output them + return outputs + + +class BertIntermediate(nn.Layer): + + def __init__(self, config): + super(BertIntermediate, self).__init__() + self.dense = nn.Linear(config.hidden_size, config.intermediate_size) + if isinstance(config.hidden_act, str) or (sys.version_info[0] == 2 and isinstance(config.hidden_act, unicode)): + self.intermediate_act_fn = ACT2FN[config.hidden_act] + else: + self.intermediate_act_fn = config.hidden_act + + def forward(self, hidden_states): + hidden_states = self.dense(hidden_states) + hidden_states = self.intermediate_act_fn(hidden_states) + return hidden_states + + +class BertOutput(nn.Layer): + + def __init__(self, config): + super(BertOutput, self).__init__() + self.dense = nn.Linear(config.intermediate_size, config.hidden_size) + self.LayerNorm = BertLayerNorm(config.hidden_size, epsilon=config.layer_norm_eps) + self.dropout = nn.Dropout(config.hidden_dropout_prob) + + def forward(self, hidden_states, input_tensor): + hidden_states = self.dense(hidden_states) + hidden_states = self.dropout(hidden_states) + hidden_states = self.LayerNorm(hidden_states + input_tensor) + return hidden_states + + +class BertLayer(nn.Layer): + + def __init__(self, config): + super(BertLayer, self).__init__() + self.attention = BertAttention(config) + self.intermediate = BertIntermediate(config) + self.output = BertOutput(config) + + def forward(self, hidden_states, attention_mask=None, head_mask=None): + attention_outputs = self.attention(hidden_states, attention_mask, head_mask) + attention_output = attention_outputs[0] + intermediate_output = self.intermediate(attention_output) + layer_output = self.output(intermediate_output, attention_output) + outputs = (layer_output, ) + attention_outputs[1:] # add attentions if we output them + return outputs + + +class BertEncoder(nn.Layer): + + def __init__(self, config): + super(BertEncoder, self).__init__() + self.output_attentions = config.output_attentions + self.output_hidden_states = config.output_hidden_states + self.layer = nn.LayerList([BertLayer(config) for _ in range(config.num_hidden_layers)]) + + def forward(self, hidden_states, attention_mask=None, head_mask=None): + all_hidden_states = () + all_attentions = () + for i, layer_module in enumerate(self.layer): + if self.output_hidden_states: + all_hidden_states = all_hidden_states + (hidden_states, ) + + layer_outputs = layer_module(hidden_states, attention_mask, head_mask[i]) + hidden_states = layer_outputs[0] + + if self.output_attentions: + all_attentions = all_attentions + (layer_outputs[1], ) + # Add last layer + if self.output_hidden_states: + all_hidden_states = all_hidden_states + (hidden_states, ) + + outputs = (hidden_states, ) + if self.output_hidden_states: + outputs = outputs + (all_hidden_states, ) + if self.output_attentions: + outputs = outputs + (all_attentions, ) + return outputs # last-layer hidden state, (all hidden states), (all attentions) + + +class BertPooler(nn.Layer): + + def __init__(self, config): + super(BertPooler, self).__init__() + self.dense = nn.Linear(config.hidden_size, config.hidden_size) + self.activation = nn.Tanh() + + def forward(self, hidden_states): + # We "pool" the model by simply taking the hidden state corresponding + # to the first token. + first_token_tensor = hidden_states[:, 0] + pooled_output = self.dense(first_token_tensor) + pooled_output = self.activation(pooled_output) + return pooled_output + + +class BertPredictionHeadTransform(nn.Layer): + + def __init__(self, config): + super(BertPredictionHeadTransform, self).__init__() + self.dense = nn.Linear(config.hidden_size, config.hidden_size) + if isinstance(config.hidden_act, str) or (sys.version_info[0] == 2 and isinstance(config.hidden_act, unicode)): + self.transform_act_fn = ACT2FN[config.hidden_act] + else: + self.transform_act_fn = config.hidden_act + self.LayerNorm = BertLayerNorm(config.hidden_size, epsilon=config.layer_norm_eps) + + def forward(self, hidden_states): + hidden_states = self.dense(hidden_states) + hidden_states = self.transform_act_fn(hidden_states) + hidden_states = self.LayerNorm(hidden_states) + return hidden_states + + +class BertLMPredictionHead(nn.Layer): + + def __init__(self, config): + super(BertLMPredictionHead, self).__init__() + self.transform = BertPredictionHeadTransform(config) + + # The output weights are the same as the input embeddings, but there is + # an output-only bias for each token. + self.decoder = nn.Linear(config.hidden_size, config.vocab_size, bias=False) + + self.bias = nn.Parameter(paddle.zeros(config.vocab_size)) + + def forward(self, hidden_states): + hidden_states = self.transform(hidden_states) + hidden_states = self.decoder(hidden_states) + self.bias + return hidden_states + + +class BertOnlyMLMHead(nn.Layer): + + def __init__(self, config): + super(BertOnlyMLMHead, self).__init__() + self.predictions = BertLMPredictionHead(config) + + def forward(self, sequence_output): + prediction_scores = self.predictions(sequence_output) + return prediction_scores + + +class BertOnlyNSPHead(nn.Layer): + + def __init__(self, config): + super(BertOnlyNSPHead, self).__init__() + self.seq_relationship = nn.Linear(config.hidden_size, 2) + + def forward(self, pooled_output): + seq_relationship_score = self.seq_relationship(pooled_output) + return seq_relationship_score + + +class BertPreTrainingHeads(nn.Layer): + + def __init__(self, config): + super(BertPreTrainingHeads, self).__init__() + self.predictions = BertLMPredictionHead(config) + self.seq_relationship = nn.Linear(config.hidden_size, 2) + + def forward(self, sequence_output, pooled_output): + prediction_scores = self.predictions(sequence_output) + seq_relationship_score = self.seq_relationship(pooled_output) + return prediction_scores, seq_relationship_score + + +class BertPreTrainedModel(nn.Layer): + config_class = BertConfig + base_model_prefix = "bert" + + def __init__(self, config): + super(BertPreTrainedModel, self).__init__() + self.config = config + + +class BertModel(BertPreTrainedModel): + r""" + Outputs: `Tuple` comprising various elements depending on the configuration (config) and inputs: + **last_hidden_state**: ``torch.FloatTensor`` of shape ``(batch_size, sequence_length, hidden_size)`` + Sequence of hidden-states at the output of the last layer of the model. + **pooler_output**: ``torch.FloatTensor`` of shape ``(batch_size, hidden_size)`` + Last layer hidden-state of the first token of the sequence (classification token) + further processed by a Linear layer and a Tanh activation function. The Linear + layer weights are trained from the next sentence prediction (classification) + objective during Bert pretraining. This output is usually *not* a good summary + of the semantic content of the input, you're often better with averaging or pooling + the sequence of hidden-states for the whole input sequence. + **hidden_states**: (`optional`, returned when ``config.output_hidden_states=True``) + list of ``torch.FloatTensor`` (one for the output of each layer + the output of the embeddings) + of shape ``(batch_size, sequence_length, hidden_size)``: + Hidden-states of the model at the output of each layer plus the initial embedding outputs. + **attentions**: (`optional`, returned when ``config.output_attentions=True``) + list of ``torch.FloatTensor`` (one for each layer) of shape ``(batch_size, num_heads, sequence_length, sequence_length)``: + Attentions weights after the attention softmax, used to compute the weighted average in the self-attention heads. + + Examples:: + + tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') + model = BertModel.from_pretrained('bert-base-uncased') + input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute")).unsqueeze(0) # Batch size 1 + outputs = model(input_ids) + last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple + + """ + + def __init__(self, config): + super(BertModel, self).__init__(config) + + self.embeddings = BertEmbeddings(config) + self.encoder = BertEncoder(config) + self.pooler = BertPooler(config) + + def forward(self, input_ids, attention_mask=None, token_type_ids=None, position_ids=None, head_mask=None): + if attention_mask is None: + attention_mask = paddle.ones_like(input_ids) + if token_type_ids is None: + token_type_ids = paddle.zeros_like(input_ids) + + # We create a 3D attention mask from a 2D tensor mask. + # Sizes are [batch_size, 1, 1, to_seq_length] + # So we can broadcast to [batch_size, num_heads, from_seq_length, to_seq_length] + # this attention mask is more simple than the triangular masking of causal attention + # used in OpenAI GPT, we just need to prepare the broadcast dimension here. + extended_attention_mask = attention_mask.unsqueeze(1).unsqueeze(2) + + # Since attention_mask is 1.0 for positions we want to attend and 0.0 for + # masked positions, this operation will create a tensor which is 0.0 for + # positions we want to attend and -10000.0 for masked positions. + # Since we are adding it to the raw scores before the softmax, this is + # effectively the same as removing these entirely. + extended_attention_mask = extended_attention_mask.cast(dtype=self.parameters()[0].dtype) # fp16 compatibility + extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0 + + # Prepare head mask if needed + # 1.0 in head_mask indicate we keep the head + # attention_probs has shape bsz x n_heads x N x N + # input head_mask has shape [num_heads] or [num_hidden_layers x num_heads] + # and head_mask is converted to shape [num_hidden_layers x batch x num_heads x seq_length x seq_length] + if head_mask is not None: + if head_mask.rank() == 1: + head_mask = head_mask.unsqueeze(0).unsqueeze(0).unsqueeze(-1).unsqueeze(-1) + head_mask = head_mask.expand(self.config.num_hidden_layers, -1, -1, -1, -1) + elif head_mask.rank() == 2: + head_mask = head_mask.unsqueeze(1).unsqueeze(-1).unsqueeze( + -1) # We can specify head_mask for each layer + head_mask = head_mask.cast(dtype=self.parameters()[0].dtype) # switch to fload if need + fp16 compatibility + else: + head_mask = [None] * self.config.num_hidden_layers + + embedding_output = self.embeddings(input_ids, position_ids=position_ids, token_type_ids=token_type_ids) + + encoder_outputs = self.encoder(embedding_output, extended_attention_mask, head_mask=head_mask) + + sequence_output = encoder_outputs[0] + pooled_output = self.pooler(sequence_output) + + outputs = ( + sequence_output, + pooled_output, + ) + encoder_outputs[1:] # add hidden_states and attentions if they are here + return outputs # sequence_output, pooled_output, (hidden_states), (attentions) diff --git a/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/cn_clip/clip/utils.py b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/cn_clip/clip/utils.py new file mode 100755 index 000000000..06607c51f --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/cn_clip/clip/utils.py @@ -0,0 +1,74 @@ +# Code modified from https://github.com/openai/CLIP +import json +import os +from pathlib import Path +from typing import List +from typing import Union + +import paddle +from disco_diffusion_cnclip_vitb16.cn_clip.clip import _tokenizer +from disco_diffusion_cnclip_vitb16.cn_clip.clip.model import CLIP +from tqdm import tqdm + +__all__ = ["tokenize", "create_model", "available_models"] + +_MODEL_INFO = {"ViTB16": {"struct": "ViT-B-16@RoBERTa-wwm-ext-base-chinese", "input_resolution": 224}} + + +def available_models() -> List[str]: + """Returns the names of available CLIP models""" + return list(_MODEL_INFO.keys()) + + +def tokenize(texts: Union[str, List[str]], context_length: int = 64): + """ + Returns the tokenized representation of given input string(s) + Parameters + ---------- + texts : Union[str, List[str]] + An input string or a list of input strings to tokenize + context_length : int + The context length to use; all baseline models use 24 as the context length + Returns + ------- + A two-dimensional tensor containing the resulting tokens, shape = [number of input strings, context_length] + """ + if isinstance(texts, str): + texts = [texts] + + all_tokens = [] + for text in texts: + all_tokens.append([_tokenizer.vocab['[CLS]']] + + _tokenizer.convert_tokens_to_ids(_tokenizer.tokenize(text))[:context_length - 2] + + [_tokenizer.vocab['[SEP]']]) + + result = paddle.zeros([len(all_tokens), context_length], dtype='int64') + + for i, tokens in enumerate(all_tokens): + assert len(tokens) <= context_length + result[i, :len(tokens)] = paddle.to_tensor(tokens) + + return result + + +def create_model(name): + checkpoint = paddle.load(os.path.join(os.path.dirname(__file__), 'pre_trained', '{}.pdparams'.format(name))) + model_name = _MODEL_INFO[name]['struct'] + vision_model, text_model = model_name.split('@') + # Initialize the model. + vision_model_config_file = Path(__file__).parent / f"model_configs/{vision_model.replace('/', '-')}.json" + print('Loading vision model config from', vision_model_config_file) + assert os.path.exists(vision_model_config_file) + + text_model_config_file = Path(__file__).parent / f"model_configs/{text_model.replace('/', '-')}.json" + print('Loading text model config from', text_model_config_file) + assert os.path.exists(text_model_config_file) + + with open(vision_model_config_file, 'r') as fv, open(text_model_config_file, 'r') as ft: + model_info = json.load(fv) + for k, v in json.load(ft).items(): + model_info[k] = v + + model = CLIP(**model_info) + model.set_state_dict(checkpoint) + return model diff --git a/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/module.py b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/module.py new file mode 100755 index 000000000..806135c16 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/module.py @@ -0,0 +1,435 @@ +# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import ast +import os +import sys +from functools import partial +from typing import List +from typing import Optional + +import paddle +from disco_diffusion_cnclip_vitb16 import resize_right +from disco_diffusion_cnclip_vitb16.reverse_diffusion import create + +import paddlehub as hub +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving + + +@moduleinfo(name="disco_diffusion_cnclip_vitb16", + version="1.0.0", + type="image/text_to_image", + summary="", + author="paddlepaddle", + author_email="paddle-dev@baidu.com") +class DiscoDiffusionClip: + + def generate_image(self, + text_prompts, + style: Optional[str] = None, + artist: Optional[str] = None, + init_image: Optional[str] = None, + width_height: Optional[List[int]] = [1280, 768], + skip_steps: Optional[int] = 0, + steps: Optional[int] = 250, + cut_ic_pow: Optional[int] = 1, + init_scale: Optional[int] = 1000, + clip_guidance_scale: Optional[int] = 5000, + tv_scale: Optional[int] = 0, + range_scale: Optional[int] = 0, + sat_scale: Optional[int] = 0, + cutn_batches: Optional[int] = 4, + diffusion_sampling_mode: Optional[str] = 'ddim', + perlin_init: Optional[bool] = False, + perlin_mode: Optional[str] = 'mixed', + seed: Optional[int] = None, + eta: Optional[float] = 0.8, + clamp_grad: Optional[bool] = True, + clamp_max: Optional[float] = 0.05, + randomize_class: Optional[bool] = True, + clip_denoised: Optional[bool] = False, + fuzzy_prompt: Optional[bool] = False, + rand_mag: Optional[float] = 0.05, + cut_overview: Optional[str] = '[12]*400+[4]*600', + cut_innercut: Optional[str] = '[4]*400+[12]*600', + cut_icgray_p: Optional[str] = '[0.2]*400+[0]*600', + display_rate: Optional[int] = 10, + n_batches: Optional[int] = 1, + batch_size: Optional[int] = 1, + batch_name: Optional[str] = '', + use_gpu: Optional[bool] = True, + output_dir: Optional[str] = 'disco_diffusion_cnclip_vitb16_out'): + """ + Create Disco Diffusion artworks and save the result into a DocumentArray. + + :param text_prompts: Phrase, sentence, or string of words and phrases describing what the image should look like. The words will be analyzed by the AI and will guide the diffusion process toward the image(s) you describe. These can include commas and weights to adjust the relative importance of each element. E.g. "A beautiful painting of a singular lighthouse, shining its light across a tumultuous sea of blood by greg rutkowski and thomas kinkade, Trending on artstation."Notice that this prompt loosely follows a structure: [subject], [prepositional details], [setting], [meta modifiers and artist]; this is a good starting point for your experiments. Developing text prompts takes practice and experience, and is not the subject of this guide. If you are a beginner to writing text prompts, a good place to start is on a simple AI art app like Night Cafe, starry ai or WOMBO prior to using DD, to get a feel for how text gets translated into images by GAN tools. These other apps use different technologies, but many of the same principles apply. + :param style: Image style, such as oil paintings, if specified, style will be used to construct prompts. + :param artist: Artist style, if specified, style will be used to construct prompts. + :param init_image: Recall that in the image sequence above, the first image shown is just noise. If an init_image is provided, diffusion will replace the noise with the init_image as its starting state. To use an init_image, upload the image to the Colab instance or your Google Drive, and enter the full image path here. If using an init_image, you may need to increase skip_steps to ~ 50% of total steps to retain the character of the init. See skip_steps above for further discussion. + :param width_height: Desired final image size, in pixels. You can have a square, wide, or tall image, but each edge length should be set to a multiple of 64px, and a minimum of 512px on the default CLIP model setting. If you forget to use multiples of 64px in your dimensions, DD will adjust the dimensions of your image to make it so. + :param skip_steps: Consider the chart shown here. Noise scheduling (denoise strength) starts very high and progressively gets lower and lower as diffusion steps progress. The noise levels in the first few steps are very high, so images change dramatically in early steps.As DD moves along the curve, noise levels (and thus the amount an image changes per step) declines, and image coherence from one step to the next increases.The first few steps of denoising are often so dramatic that some steps (maybe 10-15% of total) can be skipped without affecting the final image. You can experiment with this as a way to cut render times.If you skip too many steps, however, the remaining noise may not be high enough to generate new content, and thus may not have ‘time left’ to finish an image satisfactorily.Also, depending on your other settings, you may need to skip steps to prevent CLIP from overshooting your goal, resulting in ‘blown out’ colors (hyper saturated, solid white, or solid black regions) or otherwise poor image quality. Consider that the denoising process is at its strongest in the early steps, so skipping steps can sometimes mitigate other problems.Lastly, if using an init_image, you will need to skip ~50% of the diffusion steps to retain the shapes in the original init image. However, if you’re using an init_image, you can also adjust skip_steps up or down for creative reasons. With low skip_steps you can get a result "inspired by" the init_image which will retain the colors and rough layout and shapes but look quite different. With high skip_steps you can preserve most of the init_image contents and just do fine tuning of the texture. + :param steps: When creating an image, the denoising curve is subdivided into steps for processing. Each step (or iteration) involves the AI looking at subsets of the image called ‘cuts’ and calculating the ‘direction’ the image should be guided to be more like the prompt. Then it adjusts the image with the help of the diffusion denoiser, and moves to the next step.Increasing steps will provide more opportunities for the AI to adjust the image, and each adjustment will be smaller, and thus will yield a more precise, detailed image. Increasing steps comes at the expense of longer render times. Also, while increasing steps should generally increase image quality, there is a diminishing return on additional steps beyond 250 - 500 steps. However, some intricate images can take 1000, 2000, or more steps. It is really up to the user. Just know that the render time is directly related to the number of steps, and many other parameters have a major impact on image quality, without costing additional time. + :param cut_ic_pow: This sets the size of the border used for inner cuts. High cut_ic_pow values have larger borders, and therefore the cuts themselves will be smaller and provide finer details. If you have too many or too-small inner cuts, you may lose overall image coherency and/or it may cause an undesirable ‘mosaic’ effect. Low cut_ic_pow values will allow the inner cuts to be larger, helping image coherency while still helping with some details. + :param init_scale: This controls how strongly CLIP will try to match the init_image provided. This is balanced against the clip_guidance_scale (CGS) above. Too much init scale, and the image won’t change much during diffusion. Too much CGS and the init image will be lost. + :param clip_guidance_scale: CGS is one of the most important parameters you will use. It tells DD how strongly you want CLIP to move toward your prompt each timestep. Higher is generally better, but if CGS is too strong it will overshoot the goal and distort the image. So a happy medium is needed, and it takes experience to learn how to adjust CGS. Note that this parameter generally scales with image dimensions. In other words, if you increase your total dimensions by 50% (e.g. a change from 512 x 512 to 512 x 768), then to maintain the same effect on the image, you’d want to increase clip_guidance_scale from 5000 to 7500. Of the basic settings, clip_guidance_scale, steps and skip_steps are the most important contributors to image quality, so learn them well. + :param tv_scale: Total variance denoising. Optional, set to zero to turn off. Controls ‘smoothness’ of final output. If used, tv_scale will try to smooth out your final image to reduce overall noise. If your image is too ‘crunchy’, increase tv_scale. TV denoising is good at preserving edges while smoothing away noise in flat regions. See https://en.wikipedia.org/wiki/Total_variation_denoising + :param range_scale: Optional, set to zero to turn off. Used for adjustment of color contrast. Lower range_scale will increase contrast. Very low numbers create a reduced color palette, resulting in more vibrant or poster-like images. Higher range_scale will reduce contrast, for more muted images. + :param sat_scale: Saturation scale. Optional, set to zero to turn off. If used, sat_scale will help mitigate oversaturation. If your image is too saturated, increase sat_scale to reduce the saturation. + :param cutn_batches: Each iteration, the AI cuts the image into smaller pieces known as cuts, and compares each cut to the prompt to decide how to guide the next diffusion step. More cuts can generally lead to better images, since DD has more chances to fine-tune the image precision in each timestep. Additional cuts are memory intensive, however, and if DD tries to evaluate too many cuts at once, it can run out of memory. You can use cutn_batches to increase cuts per timestep without increasing memory usage. At the default settings, DD is scheduled to do 16 cuts per timestep. If cutn_batches is set to 1, there will indeed only be 16 cuts total per timestep. However, if cutn_batches is increased to 4, DD will do 64 cuts total in each timestep, divided into 4 sequential batches of 16 cuts each. Because the cuts are being evaluated only 16 at a time, DD uses the memory required for only 16 cuts, but gives you the quality benefit of 64 cuts. The tradeoff, of course, is that this will take ~4 times as long to render each image.So, (scheduled cuts) x (cutn_batches) = (total cuts per timestep). Increasing cutn_batches will increase render times, however, as the work is being done sequentially. DD’s default cut schedule is a good place to start, but the cut schedule can be adjusted in the Cutn Scheduling section, explained below. + :param diffusion_sampling_mode: Two alternate diffusion denoising algorithms. ddim has been around longer, and is more established and tested. plms is a newly added alternate method that promises good diffusion results in fewer steps, but has not been as fully tested and may have side effects. This new plms mode is actively being researched in the #settings-and-techniques channel in the DD Discord. + :param perlin_init: Normally, DD will use an image filled with random noise as a starting point for the diffusion curve. If perlin_init is selected, DD will instead use a Perlin noise model as an initial state. Perlin has very interesting characteristics, distinct from random noise, so it’s worth experimenting with this for your projects. Beyond perlin, you can, of course, generate your own noise images (such as with GIMP, etc) and use them as an init_image (without skipping steps). Choosing perlin_init does not affect the actual diffusion process, just the starting point for the diffusion. Please note that selecting a perlin_init will replace and override any init_image you may have specified. Further, because the 2D, 3D and video animation systems all rely on the init_image system, if you enable Perlin while using animation modes, the perlin_init will jump in front of any previous image or video input, and DD will NOT give you the expected sequence of coherent images. All of that said, using Perlin and animation modes together do make a very colorful rainbow effect, which can be used creatively. + :param perlin_mode: sets type of Perlin noise: colored, gray, or a mix of both, giving you additional options for noise types. Experiment to see what these do in your projects. + :param seed: Deep in the diffusion code, there is a random number ‘seed’ which is used as the basis for determining the initial state of the diffusion. By default, this is random, but you can also specify your own seed. This is useful if you like a particular result and would like to run more iterations that will be similar. After each run, the actual seed value used will be reported in the parameters report, and can be reused if desired by entering seed # here. If a specific numerical seed is used repeatedly, the resulting images will be quite similar but not identical. + :param eta: eta (greek letter η) is a diffusion model variable that mixes in a random amount of scaled noise into each timestep. 0 is no noise, 1.0 is more noise. As with most DD parameters, you can go below zero for eta, but it may give you unpredictable results. The steps parameter has a close relationship with the eta parameter. If you set eta to 0, then you can get decent output with only 50-75 steps. Setting eta to 1.0 favors higher step counts, ideally around 250 and up. eta has a subtle, unpredictable effect on image, so you’ll need to experiment to see how this affects your projects. + :param clamp_grad: As I understand it, clamp_grad is an internal limiter that stops DD from producing extreme results. Try your images with and without clamp_grad. If the image changes drastically with clamp_grad turned off, it probably means your clip_guidance_scale is too high and should be reduced. + :param clamp_max: Sets the value of the clamp_grad limitation. Default is 0.05, providing for smoother, more muted coloration in images, but setting higher values (0.15-0.3) can provide interesting contrast and vibrancy. + :param fuzzy_prompt: Controls whether to add multiple noisy prompts to the prompt losses. If True, can increase variability of image output. Experiment with this. + :param rand_mag: Affects only the fuzzy_prompt. Controls the magnitude of the random noise added by fuzzy_prompt. + :param cut_overview: The schedule of overview cuts + :param cut_innercut: The schedule of inner cuts + :param cut_icgray_p: This sets the size of the border used for inner cuts. High cut_ic_pow values have larger borders, and therefore the cuts themselves will be smaller and provide finer details. If you have too many or too-small inner cuts, you may lose overall image coherency and/or it may cause an undesirable ‘mosaic’ effect. Low cut_ic_pow values will allow the inner cuts to be larger, helping image coherency while still helping with some details. + :param display_rate: During a diffusion run, you can monitor the progress of each image being created with this variable. If display_rate is set to 50, DD will show you the in-progress image every 50 timesteps. Setting this to a lower value, like 5 or 10, is a good way to get an early peek at where your image is heading. If you don’t like the progression, just interrupt execution, change some settings, and re-run. If you are planning a long, unmonitored batch, it’s better to set display_rate equal to steps, because displaying interim images does slow Colab down slightly. + :param n_batches: This variable sets the number of still images you want DD to create. If you are using an animation mode (see below for details) DD will ignore n_batches and create a single set of animated frames based on the animation settings. + :param batch_name: The name of the batch, the batch id will be named as "discoart-[batch_name]-seed". To avoid your artworks be overridden by other users, please use a unique name. + :param use_gpu: whether to use gpu or not. + :return: a DocumentArray object that has `n_batches` Documents + """ + if use_gpu: + try: + _places = os.environ.get("CUDA_VISIBLE_DEVICES", None) + if _places: + paddle.device.set_device("gpu:{}".format(0)) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + else: + paddle.device.set_device("cpu") + paddle.disable_static() + if not os.path.exists(output_dir): + os.makedirs(output_dir, exist_ok=True) + + if isinstance(text_prompts, str): + text_prompts = text_prompts.rstrip(',.,。') + if style is not None: + text_prompts += ",{}".format(style) + if artist is not None: + text_prompts += ",由{}所作".format(artist) + elif isinstance(text_prompts, list): + text_prompts[0] = text_prompts[0].rstrip(',.,。') + if style is not None: + text_prompts[0] += ",{}".format(style) + if artist is not None: + text_prompts[0] += ",由{}所作".format(artist) + + return create(text_prompts=text_prompts, + init_image=init_image, + width_height=width_height, + skip_steps=skip_steps, + steps=steps, + cut_ic_pow=cut_ic_pow, + init_scale=init_scale, + clip_guidance_scale=clip_guidance_scale, + tv_scale=tv_scale, + range_scale=range_scale, + sat_scale=sat_scale, + cutn_batches=cutn_batches, + diffusion_sampling_mode=diffusion_sampling_mode, + perlin_init=perlin_init, + perlin_mode=perlin_mode, + seed=seed, + eta=eta, + clamp_grad=clamp_grad, + clamp_max=clamp_max, + randomize_class=randomize_class, + clip_denoised=clip_denoised, + fuzzy_prompt=fuzzy_prompt, + rand_mag=rand_mag, + cut_overview=cut_overview, + cut_innercut=cut_innercut, + cut_icgray_p=cut_icgray_p, + display_rate=display_rate, + n_batches=n_batches, + batch_size=batch_size, + batch_name=batch_name, + clip_models=['ViTB16'], + output_dir=output_dir) + + @serving + def serving_method(self, text_prompts, **kwargs): + """ + Run as a service. + """ + results = [] + for text_prompt in text_prompts: + result = self.generate_image(text_prompts=text_prompt, **kwargs)[0].to_base64() + results.append(result) + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.generate_image(text_prompts=args.text_prompts, + style=args.style, + artist=args.artist, + init_image=args.init_image, + width_height=args.width_height, + skip_steps=args.skip_steps, + steps=args.steps, + cut_ic_pow=args.cut_ic_pow, + init_scale=args.init_scale, + clip_guidance_scale=args.clip_guidance_scale, + tv_scale=args.tv_scale, + range_scale=args.range_scale, + sat_scale=args.sat_scale, + cutn_batches=args.cutn_batches, + diffusion_sampling_mode=args.diffusion_sampling_mode, + perlin_init=args.perlin_init, + perlin_mode=args.perlin_mode, + seed=args.seed, + eta=args.eta, + clamp_grad=args.clamp_grad, + clamp_max=args.clamp_max, + randomize_class=args.randomize_class, + clip_denoised=args.clip_denoised, + fuzzy_prompt=args.fuzzy_prompt, + rand_mag=args.rand_mag, + cut_overview=args.cut_overview, + cut_innercut=args.cut_innercut, + cut_icgray_p=args.cut_icgray_p, + display_rate=args.display_rate, + n_batches=args.n_batches, + batch_size=args.batch_size, + batch_name=args.batch_name, + output_dir=args.output_dir) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_input_group.add_argument( + '--skip_steps', + type=int, + default=0, + help= + 'Consider the chart shown here. Noise scheduling (denoise strength) starts very high and progressively gets lower and lower as diffusion steps progress. The noise levels in the first few steps are very high, so images change dramatically in early steps.As DD moves along the curve, noise levels (and thus the amount an image changes per step) declines, and image coherence from one step to the next increases.The first few steps of denoising are often so dramatic that some steps (maybe 10-15%% of total) can be skipped without affecting the final image. You can experiment with this as a way to cut render times.If you skip too many steps, however, the remaining noise may not be high enough to generate new content, and thus may not have ‘time left’ to finish an image satisfactorily.Also, depending on your other settings, you may need to skip steps to prevent CLIP from overshooting your goal, resulting in ‘blown out’ colors (hyper saturated, solid white, or solid black regions) or otherwise poor image quality. Consider that the denoising process is at its strongest in the early steps, so skipping steps can sometimes mitigate other problems.Lastly, if using an init_image, you will need to skip ~50%% of the diffusion steps to retain the shapes in the original init image. However, if you’re using an init_image, you can also adjust skip_steps up or down for creative reasons. With low skip_steps you can get a result "inspired by" the init_image which will retain the colors and rough layout and shapes but look quite different. With high skip_steps you can preserve most of the init_image contents and just do fine tuning of the texture' + ) + self.arg_input_group.add_argument( + '--steps', + type=int, + default=250, + help= + "When creating an image, the denoising curve is subdivided into steps for processing. Each step (or iteration) involves the AI looking at subsets of the image called ‘cuts’ and calculating the ‘direction’ the image should be guided to be more like the prompt. Then it adjusts the image with the help of the diffusion denoiser, and moves to the next step.Increasing steps will provide more opportunities for the AI to adjust the image, and each adjustment will be smaller, and thus will yield a more precise, detailed image. Increasing steps comes at the expense of longer render times. Also, while increasing steps should generally increase image quality, there is a diminishing return on additional steps beyond 250 - 500 steps. However, some intricate images can take 1000, 2000, or more steps. It is really up to the user. Just know that the render time is directly related to the number of steps, and many other parameters have a major impact on image quality, without costing additional time." + ) + self.arg_input_group.add_argument( + '--cut_ic_pow', + type=int, + default=1, + help= + "This sets the size of the border used for inner cuts. High cut_ic_pow values have larger borders, and therefore the cuts themselves will be smaller and provide finer details. If you have too many or too-small inner cuts, you may lose overall image coherency and/or it may cause an undesirable ‘mosaic’ effect. Low cut_ic_pow values will allow the inner cuts to be larger, helping image coherency while still helping with some details." + ) + self.arg_input_group.add_argument( + '--init_scale', + type=int, + default=1000, + help= + "This controls how strongly CLIP will try to match the init_image provided. This is balanced against the clip_guidance_scale (CGS) above. Too much init scale, and the image won’t change much during diffusion. Too much CGS and the init image will be lost." + ) + self.arg_input_group.add_argument( + '--clip_guidance_scale', + type=int, + default=5000, + help= + "CGS is one of the most important parameters you will use. It tells DD how strongly you want CLIP to move toward your prompt each timestep. Higher is generally better, but if CGS is too strong it will overshoot the goal and distort the image. So a happy medium is needed, and it takes experience to learn how to adjust CGS. Note that this parameter generally scales with image dimensions. In other words, if you increase your total dimensions by 50% (e.g. a change from 512 x 512 to 512 x 768), then to maintain the same effect on the image, you’d want to increase clip_guidance_scale from 5000 to 7500. Of the basic settings, clip_guidance_scale, steps and skip_steps are the most important contributors to image quality, so learn them well." + ) + self.arg_input_group.add_argument( + '--tv_scale', + type=int, + default=0, + help= + "Total variance denoising. Optional, set to zero to turn off. Controls ‘smoothness’ of final output. If used, tv_scale will try to smooth out your final image to reduce overall noise. If your image is too ‘crunchy’, increase tv_scale. TV denoising is good at preserving edges while smoothing away noise in flat regions. See https://en.wikipedia.org/wiki/Total_variation_denoising" + ) + self.arg_input_group.add_argument( + '--range_scale', + type=int, + default=0, + help= + "Optional, set to zero to turn off. Used for adjustment of color contrast. Lower range_scale will increase contrast. Very low numbers create a reduced color palette, resulting in more vibrant or poster-like images. Higher range_scale will reduce contrast, for more muted images." + ) + self.arg_input_group.add_argument( + '--sat_scale', + type=int, + default=0, + help= + "Saturation scale. Optional, set to zero to turn off. If used, sat_scale will help mitigate oversaturation. If your image is too saturated, increase sat_scale to reduce the saturation." + ) + self.arg_input_group.add_argument( + '--cutn_batches', + type=int, + default=4, + help= + "Each iteration, the AI cuts the image into smaller pieces known as cuts, and compares each cut to the prompt to decide how to guide the next diffusion step. More cuts can generally lead to better images, since DD has more chances to fine-tune the image precision in each timestep. Additional cuts are memory intensive, however, and if DD tries to evaluate too many cuts at once, it can run out of memory. You can use cutn_batches to increase cuts per timestep without increasing memory usage. At the default settings, DD is scheduled to do 16 cuts per timestep. If cutn_batches is set to 1, there will indeed only be 16 cuts total per timestep. However, if cutn_batches is increased to 4, DD will do 64 cuts total in each timestep, divided into 4 sequential batches of 16 cuts each. Because the cuts are being evaluated only 16 at a time, DD uses the memory required for only 16 cuts, but gives you the quality benefit of 64 cuts. The tradeoff, of course, is that this will take ~4 times as long to render each image.So, (scheduled cuts) x (cutn_batches) = (total cuts per timestep). Increasing cutn_batches will increase render times, however, as the work is being done sequentially. DD’s default cut schedule is a good place to start, but the cut schedule can be adjusted in the Cutn Scheduling section, explained below." + ) + self.arg_input_group.add_argument( + '--diffusion_sampling_mode', + type=str, + default='ddim', + help= + "Two alternate diffusion denoising algorithms. ddim has been around longer, and is more established and tested. plms is a newly added alternate method that promises good diffusion results in fewer steps, but has not been as fully tested and may have side effects. This new plms mode is actively being researched in the #settings-and-techniques channel in the DD Discord." + ) + self.arg_input_group.add_argument( + '--perlin_init', + type=bool, + default=False, + help= + "Normally, DD will use an image filled with random noise as a starting point for the diffusion curve. If perlin_init is selected, DD will instead use a Perlin noise model as an initial state. Perlin has very interesting characteristics, distinct from random noise, so it’s worth experimenting with this for your projects. Beyond perlin, you can, of course, generate your own noise images (such as with GIMP, etc) and use them as an init_image (without skipping steps). Choosing perlin_init does not affect the actual diffusion process, just the starting point for the diffusion. Please note that selecting a perlin_init will replace and override any init_image you may have specified. Further, because the 2D, 3D and video animation systems all rely on the init_image system, if you enable Perlin while using animation modes, the perlin_init will jump in front of any previous image or video input, and DD will NOT give you the expected sequence of coherent images. All of that said, using Perlin and animation modes together do make a very colorful rainbow effect, which can be used creatively." + ) + self.arg_input_group.add_argument( + '--perlin_mode', + type=str, + default='mixed', + help= + "sets type of Perlin noise: colored, gray, or a mix of both, giving you additional options for noise types. Experiment to see what these do in your projects." + ) + self.arg_input_group.add_argument( + '--seed', + type=int, + default=None, + help= + "Deep in the diffusion code, there is a random number ‘seed’ which is used as the basis for determining the initial state of the diffusion. By default, this is random, but you can also specify your own seed. This is useful if you like a particular result and would like to run more iterations that will be similar. After each run, the actual seed value used will be reported in the parameters report, and can be reused if desired by entering seed # here. If a specific numerical seed is used repeatedly, the resulting images will be quite similar but not identical." + ) + self.arg_input_group.add_argument( + '--eta', + type=float, + default=0.8, + help= + "eta (greek letter η) is a diffusion model variable that mixes in a random amount of scaled noise into each timestep. 0 is no noise, 1.0 is more noise. As with most DD parameters, you can go below zero for eta, but it may give you unpredictable results. The steps parameter has a close relationship with the eta parameter. If you set eta to 0, then you can get decent output with only 50-75 steps. Setting eta to 1.0 favors higher step counts, ideally around 250 and up. eta has a subtle, unpredictable effect on image, so you’ll need to experiment to see how this affects your projects." + ) + self.arg_input_group.add_argument( + '--clamp_grad', + type=bool, + default=True, + help= + "As I understand it, clamp_grad is an internal limiter that stops DD from producing extreme results. Try your images with and without clamp_grad. If the image changes drastically with clamp_grad turned off, it probably means your clip_guidance_scale is too high and should be reduced." + ) + self.arg_input_group.add_argument( + '--clamp_max', + type=float, + default=0.05, + help= + "Sets the value of the clamp_grad limitation. Default is 0.05, providing for smoother, more muted coloration in images, but setting higher values (0.15-0.3) can provide interesting contrast and vibrancy." + ) + self.arg_input_group.add_argument('--randomize_class', type=bool, default=True, help="Random class.") + self.arg_input_group.add_argument('--clip_denoised', type=bool, default=False, help="Clip denoised.") + self.arg_input_group.add_argument( + '--fuzzy_prompt', + type=bool, + default=False, + help= + "Controls whether to add multiple noisy prompts to the prompt losses. If True, can increase variability of image output. Experiment with this." + ) + self.arg_input_group.add_argument( + '--rand_mag', + type=float, + default=0.5, + help="Affects only the fuzzy_prompt. Controls the magnitude of the random noise added by fuzzy_prompt.") + self.arg_input_group.add_argument('--cut_overview', + type=str, + default='[12]*400+[4]*600', + help="The schedule of overview cuts") + self.arg_input_group.add_argument('--cut_innercut', + type=str, + default='[4]*400+[12]*600', + help="The schedule of inner cuts") + self.arg_input_group.add_argument( + '--cut_icgray_p', + type=str, + default='[0.2]*400+[0]*600', + help= + "This sets the size of the border used for inner cuts. High cut_ic_pow values have larger borders, and therefore the cuts themselves will be smaller and provide finer details. If you have too many or too-small inner cuts, you may lose overall image coherency and/or it may cause an undesirable ‘mosaic’ effect. Low cut_ic_pow values will allow the inner cuts to be larger, helping image coherency while still helping with some details." + ) + self.arg_input_group.add_argument( + '--display_rate', + type=int, + default=10, + help= + "During a diffusion run, you can monitor the progress of each image being created with this variable. If display_rate is set to 50, DD will show you the in-progress image every 50 timesteps. Setting this to a lower value, like 5 or 10, is a good way to get an early peek at where your image is heading. If you don’t like the progression, just interrupt execution, change some settings, and re-run. If you are planning a long, unmonitored batch, it’s better to set display_rate equal to steps, because displaying interim images does slow Colab down slightly." + ) + self.arg_config_group.add_argument('--use_gpu', + type=ast.literal_eval, + default=True, + help="whether use GPU or not") + self.arg_config_group.add_argument('--output_dir', + type=str, + default='disco_diffusion_cnclip_vitb16', + help='Output directory.') + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--text_prompts', type=str) + self.arg_input_group.add_argument( + '--style', + type=str, + default=None, + help='Image style, such as oil paintings, if specified, style will be used to construct prompts.') + self.arg_input_group.add_argument('--artist', + type=str, + default=None, + help='Artist style, if specified, style will be used to construct prompts.') + self.arg_input_group.add_argument( + '--init_image', + type=str, + default=None, + help= + "Recall that in the image sequence above, the first image shown is just noise. If an init_image is provided, diffusion will replace the noise with the init_image as its starting state. To use an init_image, upload the image to the Colab instance or your Google Drive, and enter the full image path here. If using an init_image, you may need to increase skip_steps to ~ 50% of total steps to retain the character of the init. See skip_steps above for further discussion." + ) + self.arg_input_group.add_argument( + '--width_height', + type=ast.literal_eval, + default=[1280, 768], + help= + "Desired final image size, in pixels. You can have a square, wide, or tall image, but each edge length should be set to a multiple of 64px, and a minimum of 512px on the default CLIP model setting. If you forget to use multiples of 64px in your dimensions, DD will adjust the dimensions of your image to make it so." + ) + self.arg_input_group.add_argument( + '--n_batches', + type=int, + default=1, + help= + "This variable sets the number of still images you want DD to create. If you are using an animation mode (see below for details) DD will ignore n_batches and create a single set of animated frames based on the animation settings." + ) + self.arg_input_group.add_argument('--batch_size', type=int, default=1, help="Batch size.") + self.arg_input_group.add_argument( + '--batch_name', + type=str, + default='', + help= + 'The name of the batch, the batch id will be named as "discoart-[batch_name]-seed". To avoid your artworks be overridden by other users, please use a unique name.' + ) diff --git a/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/requirements.txt b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/requirements.txt new file mode 100755 index 000000000..8b4bc0ea4 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/requirements.txt @@ -0,0 +1,8 @@ +numpy +paddle_lpips==0.1.2 +ftfy +docarray>=0.13.29 +pyyaml +regex +tqdm +ipywidgets diff --git a/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/resize_right/README.md b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/resize_right/README.md new file mode 100755 index 000000000..1f8d0bb0a --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/resize_right/README.md @@ -0,0 +1,3 @@ +# ResizeRight (Paddle) +Fully differentiable resize function implemented by Paddle. +This module is based on [assafshocher/ResizeRight](https://github.com/assafshocher/ResizeRight). diff --git a/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/resize_right/__init__.py b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/resize_right/__init__.py new file mode 100755 index 000000000..e69de29bb diff --git a/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/resize_right/interp_methods.py b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/resize_right/interp_methods.py new file mode 100755 index 000000000..276eb055a --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/resize_right/interp_methods.py @@ -0,0 +1,70 @@ +from math import pi + +try: + import paddle +except ImportError: + paddle = None + +try: + import numpy + import numpy as np +except ImportError: + numpy = None + +if numpy is None and paddle is None: + raise ImportError("Must have either Numpy or PyTorch but both not found") + + +def set_framework_dependencies(x): + if type(x) is numpy.ndarray: + to_dtype = lambda a: a + fw = numpy + else: + to_dtype = lambda a: paddle.cast(a, x.dtype) + fw = paddle + # eps = fw.finfo(fw.float32).eps + eps = paddle.to_tensor(np.finfo(np.float32).eps) + return fw, to_dtype, eps + + +def support_sz(sz): + + def wrapper(f): + f.support_sz = sz + return f + + return wrapper + + +@support_sz(4) +def cubic(x): + fw, to_dtype, eps = set_framework_dependencies(x) + absx = fw.abs(x) + absx2 = absx**2 + absx3 = absx**3 + return ((1.5 * absx3 - 2.5 * absx2 + 1.) * to_dtype(absx <= 1.) + + (-0.5 * absx3 + 2.5 * absx2 - 4. * absx + 2.) * to_dtype((1. < absx) & (absx <= 2.))) + + +@support_sz(4) +def lanczos2(x): + fw, to_dtype, eps = set_framework_dependencies(x) + return (((fw.sin(pi * x) * fw.sin(pi * x / 2) + eps) / ((pi**2 * x**2 / 2) + eps)) * to_dtype(abs(x) < 2)) + + +@support_sz(6) +def lanczos3(x): + fw, to_dtype, eps = set_framework_dependencies(x) + return (((fw.sin(pi * x) * fw.sin(pi * x / 3) + eps) / ((pi**2 * x**2 / 3) + eps)) * to_dtype(abs(x) < 3)) + + +@support_sz(2) +def linear(x): + fw, to_dtype, eps = set_framework_dependencies(x) + return ((x + 1) * to_dtype((-1 <= x) & (x < 0)) + (1 - x) * to_dtype((0 <= x) & (x <= 1))) + + +@support_sz(1) +def box(x): + fw, to_dtype, eps = set_framework_dependencies(x) + return to_dtype((-1 <= x) & (x < 0)) + to_dtype((0 <= x) & (x <= 1)) diff --git a/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/resize_right/resize_right.py b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/resize_right/resize_right.py new file mode 100755 index 000000000..d8bab5b81 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/resize_right/resize_right.py @@ -0,0 +1,403 @@ +import warnings +from fractions import Fraction +from math import ceil +from typing import Tuple + +import disco_diffusion_cnclip_vitb16.resize_right.interp_methods as interp_methods + + +class NoneClass: + pass + + +try: + import paddle + from paddle import nn + nnModuleWrapped = nn.Layer +except ImportError: + warnings.warn('No PyTorch found, will work only with Numpy') + paddle = None + nnModuleWrapped = NoneClass + +try: + import numpy + import numpy as np +except ImportError: + warnings.warn('No Numpy found, will work only with PyTorch') + numpy = None + +if numpy is None and paddle is None: + raise ImportError("Must have either Numpy or PyTorch but both not found") + + +def resize(input, + scale_factors=None, + out_shape=None, + interp_method=interp_methods.cubic, + support_sz=None, + antialiasing=True, + by_convs=False, + scale_tolerance=None, + max_numerator=10, + pad_mode='constant'): + # get properties of the input tensor + in_shape, n_dims = input.shape, input.ndim + + # fw stands for framework that can be either numpy or paddle, + # determined by the input type + fw = numpy if type(input) is numpy.ndarray else paddle + eps = np.finfo(np.float32).eps if fw == numpy else paddle.to_tensor(np.finfo(np.float32).eps) + device = input.place if fw is paddle else None + + # set missing scale factors or output shapem one according to another, + # scream if both missing. this is also where all the defults policies + # take place. also handling the by_convs attribute carefully. + scale_factors, out_shape, by_convs = set_scale_and_out_sz(in_shape, out_shape, scale_factors, by_convs, + scale_tolerance, max_numerator, eps, fw) + + # sort indices of dimensions according to scale of each dimension. + # since we are going dim by dim this is efficient + sorted_filtered_dims_and_scales = [(dim, scale_factors[dim], by_convs[dim], in_shape[dim], out_shape[dim]) + for dim in sorted(range(n_dims), key=lambda ind: scale_factors[ind]) + if scale_factors[dim] != 1.] + # unless support size is specified by the user, it is an attribute + # of the interpolation method + if support_sz is None: + support_sz = interp_method.support_sz + + # output begins identical to input and changes with each iteration + output = input + + # iterate over dims + for (dim, scale_factor, dim_by_convs, in_sz, out_sz) in sorted_filtered_dims_and_scales: + # STEP 1- PROJECTED GRID: The non-integer locations of the projection + # of output pixel locations to the input tensor + projected_grid = get_projected_grid(in_sz, out_sz, scale_factor, fw, dim_by_convs, device) + + # STEP 1.5: ANTIALIASING- If antialiasing is taking place, we modify + # the window size and the interpolation method (see inside function) + cur_interp_method, cur_support_sz = apply_antialiasing_if_needed(interp_method, support_sz, scale_factor, + antialiasing) + + # STEP 2- FIELDS OF VIEW: for each output pixels, map the input pixels + # that influence it. Also calculate needed padding and update grid + # accoedingly + field_of_view = get_field_of_view(projected_grid, cur_support_sz, fw, eps, device) + + # STEP 2.5- CALCULATE PAD AND UPDATE: according to the field of view, + # the input should be padded to handle the boundaries, coordinates + # should be updated. actual padding only occurs when weights are + # aplied (step 4). if using by_convs for this dim, then we need to + # calc right and left boundaries for each filter instead. + pad_sz, projected_grid, field_of_view = calc_pad_sz(in_sz, out_sz, field_of_view, projected_grid, scale_factor, + dim_by_convs, fw, device) + # STEP 3- CALCULATE WEIGHTS: Match a set of weights to the pixels in + # the field of view for each output pixel + weights = get_weights(cur_interp_method, projected_grid, field_of_view) + + # STEP 4- APPLY WEIGHTS: Each output pixel is calculated by multiplying + # its set of weights with the pixel values in its field of view. + # We now multiply the fields of view with their matching weights. + # We do this by tensor multiplication and broadcasting. + # if by_convs is true for this dim, then we do this action by + # convolutions. this is equivalent but faster. + if not dim_by_convs: + output = apply_weights(output, field_of_view, weights, dim, n_dims, pad_sz, pad_mode, fw) + else: + output = apply_convs(output, scale_factor, in_sz, out_sz, weights, dim, pad_sz, pad_mode, fw) + return output + + +def get_projected_grid(in_sz, out_sz, scale_factor, fw, by_convs, device=None): + # we start by having the ouput coordinates which are just integer locations + # in the special case when usin by_convs, we only need two cycles of grid + # points. the first and last. + grid_sz = out_sz if not by_convs else scale_factor.numerator + out_coordinates = fw_arange(grid_sz, fw, device) + + # This is projecting the ouput pixel locations in 1d to the input tensor, + # as non-integer locations. + # the following fomrula is derived in the paper + # "From Discrete to Continuous Convolutions" by Shocher et al. + return (out_coordinates / float(scale_factor) + (in_sz - 1) / 2 - (out_sz - 1) / (2 * float(scale_factor))) + + +def get_field_of_view(projected_grid, cur_support_sz, fw, eps, device): + # for each output pixel, map which input pixels influence it, in 1d. + # we start by calculating the leftmost neighbor, using half of the window + # size (eps is for when boundary is exact int) + left_boundaries = fw_ceil(projected_grid - cur_support_sz / 2 - eps, fw) + + # then we simply take all the pixel centers in the field by counting + # window size pixels from the left boundary + ordinal_numbers = fw_arange(ceil(cur_support_sz - eps), fw, device) + return left_boundaries[:, None] + ordinal_numbers + + +def calc_pad_sz(in_sz, out_sz, field_of_view, projected_grid, scale_factor, dim_by_convs, fw, device): + if not dim_by_convs: + # determine padding according to neighbor coords out of bound. + # this is a generalized notion of padding, when pad<0 it means crop + pad_sz = [-field_of_view[0, 0].item(), field_of_view[-1, -1].item() - in_sz + 1] + + # since input image will be changed by padding, coordinates of both + # field_of_view and projected_grid need to be updated + field_of_view += pad_sz[0] + projected_grid += pad_sz[0] + + else: + # only used for by_convs, to calc the boundaries of each filter the + # number of distinct convolutions is the numerator of the scale factor + num_convs, stride = scale_factor.numerator, scale_factor.denominator + + # calculate left and right boundaries for each conv. left can also be + # negative right can be bigger than in_sz. such cases imply padding if + # needed. however if# both are in-bounds, it means we need to crop, + # practically apply the conv only on part of the image. + left_pads = -field_of_view[:, 0] + + # next calc is tricky, explanation by rows: + # 1) counting output pixels between the first position of each filter + # to the right boundary of the input + # 2) dividing it by number of filters to count how many 'jumps' + # each filter does + # 3) multiplying by the stride gives us the distance over the input + # coords done by all these jumps for each filter + # 4) to this distance we add the right boundary of the filter when + # placed in its leftmost position. so now we get the right boundary + # of that filter in input coord. + # 5) the padding size needed is obtained by subtracting the rightmost + # input coordinate. if the result is positive padding is needed. if + # negative then negative padding means shaving off pixel columns. + right_pads = (((out_sz - fw_arange(num_convs, fw, device) - 1) # (1) + // num_convs) # (2) + * stride # (3) + + field_of_view[:, -1] # (4) + - in_sz + 1) # (5) + + # in the by_convs case pad_sz is a list of left-right pairs. one per + # each filter + + pad_sz = list(zip(left_pads, right_pads)) + + return pad_sz, projected_grid, field_of_view + + +def get_weights(interp_method, projected_grid, field_of_view): + # the set of weights per each output pixels is the result of the chosen + # interpolation method applied to the distances between projected grid + # locations and the pixel-centers in the field of view (distances are + # directed, can be positive or negative) + weights = interp_method(projected_grid[:, None] - field_of_view) + + # we now carefully normalize the weights to sum to 1 per each output pixel + sum_weights = weights.sum(1, keepdim=True) + sum_weights[sum_weights == 0] = 1 + return weights / sum_weights + + +def apply_weights(input, field_of_view, weights, dim, n_dims, pad_sz, pad_mode, fw): + # for this operation we assume the resized dim is the first one. + # so we transpose and will transpose back after multiplying + tmp_input = fw_swapaxes(input, dim, 0, fw) + + # apply padding + tmp_input = fw_pad(tmp_input, fw, pad_sz, pad_mode) + + # field_of_view is a tensor of order 2: for each output (1d location + # along cur dim)- a list of 1d neighbors locations. + # note that this whole operations is applied to each dim separately, + # this is why it is all in 1d. + # neighbors = tmp_input[field_of_view] is a tensor of order image_dims+1: + # for each output pixel (this time indicated in all dims), these are the + # values of the neighbors in the 1d field of view. note that we only + # consider neighbors along the current dim, but such set exists for every + # multi-dim location, hence the final tensor order is image_dims+1. + paddle.device.cuda.empty_cache() + neighbors = tmp_input[field_of_view] + + # weights is an order 2 tensor: for each output location along 1d- a list + # of weights matching the field of view. we augment it with ones, for + # broadcasting, so that when multiplies some tensor the weights affect + # only its first dim. + tmp_weights = fw.reshape(weights, (*weights.shape, *[1] * (n_dims - 1))) + + # now we simply multiply the weights with the neighbors, and then sum + # along the field of view, to get a single value per out pixel + tmp_output = (neighbors * tmp_weights).sum(1) + # we transpose back the resized dim to its original position + return fw_swapaxes(tmp_output, 0, dim, fw) + + +def apply_convs(input, scale_factor, in_sz, out_sz, weights, dim, pad_sz, pad_mode, fw): + # for this operations we assume the resized dim is the last one. + # so we transpose and will transpose back after multiplying + input = fw_swapaxes(input, dim, -1, fw) + + # the stride for all convs is the denominator of the scale factor + stride, num_convs = scale_factor.denominator, scale_factor.numerator + + # prepare an empty tensor for the output + tmp_out_shape = list(input.shape) + tmp_out_shape[-1] = out_sz + tmp_output = fw_empty(tuple(tmp_out_shape), fw, input.device) + + # iterate over the conv operations. we have as many as the numerator + # of the scale-factor. for each we need boundaries and a filter. + for conv_ind, (pad_sz, filt) in enumerate(zip(pad_sz, weights)): + # apply padding (we pad last dim, padding can be negative) + pad_dim = input.ndim - 1 + tmp_input = fw_pad(input, fw, pad_sz, pad_mode, dim=pad_dim) + + # apply convolution over last dim. store in the output tensor with + # positional strides so that when the loop is comlete conv results are + # interwind + tmp_output[..., conv_ind::num_convs] = fw_conv(tmp_input, filt, stride) + + return fw_swapaxes(tmp_output, -1, dim, fw) + + +def set_scale_and_out_sz(in_shape, out_shape, scale_factors, by_convs, scale_tolerance, max_numerator, eps, fw): + # eventually we must have both scale-factors and out-sizes for all in/out + # dims. however, we support many possible partial arguments + if scale_factors is None and out_shape is None: + raise ValueError("either scale_factors or out_shape should be " + "provided") + if out_shape is not None: + # if out_shape has less dims than in_shape, we defaultly resize the + # first dims for numpy and last dims for paddle + out_shape = (list(out_shape) + + list(in_shape[len(out_shape):]) if fw is numpy else list(in_shape[:-len(out_shape)]) + + list(out_shape)) + if scale_factors is None: + # if no scale given, we calculate it as the out to in ratio + # (not recomended) + scale_factors = [out_sz / in_sz for out_sz, in_sz in zip(out_shape, in_shape)] + if scale_factors is not None: + # by default, if a single number is given as scale, we assume resizing + # two dims (most common are images with 2 spatial dims) + scale_factors = (scale_factors if isinstance(scale_factors, (list, tuple)) else [scale_factors, scale_factors]) + # if less scale_factors than in_shape dims, we defaultly resize the + # first dims for numpy and last dims for paddle + scale_factors = (list(scale_factors) + [1] * (len(in_shape) - len(scale_factors)) if fw is numpy else [1] * + (len(in_shape) - len(scale_factors)) + list(scale_factors)) + if out_shape is None: + # when no out_shape given, it is calculated by multiplying the + # scale by the in_shape (not recomended) + out_shape = [ceil(scale_factor * in_sz) for scale_factor, in_sz in zip(scale_factors, in_shape)] + # next part intentionally after out_shape determined for stability + # we fix by_convs to be a list of truth values in case it is not + if not isinstance(by_convs, (list, tuple)): + by_convs = [by_convs] * len(out_shape) + + # next loop fixes the scale for each dim to be either frac or float. + # this is determined by by_convs and by tolerance for scale accuracy. + for ind, (sf, dim_by_convs) in enumerate(zip(scale_factors, by_convs)): + # first we fractionaize + if dim_by_convs: + frac = Fraction(1 / sf).limit_denominator(max_numerator) + frac = Fraction(numerator=frac.denominator, denominator=frac.numerator) + + # if accuracy is within tolerance scale will be frac. if not, then + # it will be float and the by_convs attr will be set false for + # this dim + if scale_tolerance is None: + scale_tolerance = eps + if dim_by_convs and abs(frac - sf) < scale_tolerance: + scale_factors[ind] = frac + else: + scale_factors[ind] = float(sf) + by_convs[ind] = False + + return scale_factors, out_shape, by_convs + + +def apply_antialiasing_if_needed(interp_method, support_sz, scale_factor, antialiasing): + # antialiasing is "stretching" the field of view according to the scale + # factor (only for downscaling). this is low-pass filtering. this + # requires modifying both the interpolation (stretching the 1d + # function and multiplying by the scale-factor) and the window size. + scale_factor = float(scale_factor) + if scale_factor >= 1.0 or not antialiasing: + return interp_method, support_sz + cur_interp_method = (lambda arg: scale_factor * interp_method(scale_factor * arg)) + cur_support_sz = support_sz / scale_factor + return cur_interp_method, cur_support_sz + + +def fw_ceil(x, fw): + if fw is numpy: + return fw.int_(fw.ceil(x)) + else: + return paddle.cast(x.ceil(), dtype='int64') + + +def fw_floor(x, fw): + if fw is numpy: + return fw.int_(fw.floor(x)) + else: + return paddle.cast(x.floor(), dtype='int64') + + +def fw_cat(x, fw): + if fw is numpy: + return fw.concatenate(x) + else: + return fw.concat(x) + + +def fw_swapaxes(x, ax_1, ax_2, fw): + if fw is numpy: + return fw.swapaxes(x, ax_1, ax_2) + else: + if ax_1 == -1: + ax_1 = len(x.shape) - 1 + if ax_2 == -1: + ax_2 = len(x.shape) - 1 + perm0 = list(range(len(x.shape))) + temp = ax_1 + perm0[temp] = ax_2 + perm0[ax_2] = temp + return fw.transpose(x, perm0) + + +def fw_pad(x, fw, pad_sz, pad_mode, dim=0): + if pad_sz == (0, 0): + return x + if fw is numpy: + pad_vec = [(0, 0)] * x.ndim + pad_vec[dim] = pad_sz + return fw.pad(x, pad_width=pad_vec, mode=pad_mode) + else: + if x.ndim < 3: + x = x[None, None, ...] + + pad_vec = [0] * ((x.ndim - 2) * 2) + pad_vec[0:2] = pad_sz + return fw_swapaxes(fw.nn.functional.pad(fw_swapaxes(x, dim, -1, fw), pad=pad_vec, mode=pad_mode), dim, -1, fw) + + +def fw_conv(input, filter, stride): + # we want to apply 1d conv to any nd array. the way to do it is to reshape + # the input to a 4D tensor. first two dims are singeletons, 3rd dim stores + # all the spatial dims that we are not convolving along now. then we can + # apply conv2d with a 1xK filter. This convolves the same way all the other + # dims stored in the 3d dim. like depthwise conv over these. + # TODO: numpy support + reshaped_input = input.reshape(1, 1, -1, input.shape[-1]) + reshaped_output = paddle.nn.functional.conv2d(reshaped_input, filter.view(1, 1, 1, -1), stride=(1, stride)) + return reshaped_output.reshape(*input.shape[:-1], -1) + + +def fw_arange(upper_bound, fw, device): + if fw is numpy: + return fw.arange(upper_bound) + else: + return fw.arange(upper_bound) + + +def fw_empty(shape, fw, device): + if fw is numpy: + return fw.empty(shape) + else: + return fw.empty(shape=shape) diff --git a/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/README.md b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/README.md new file mode 100644 index 000000000..711671bad --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/README.md @@ -0,0 +1,2 @@ +# Diffusion model (Paddle) +This module implements diffusion model which accepts a text prompt and outputs images semantically close to the text. The code is rewritten by Paddle, and mainly refer to two projects: jina-ai/discoart[https://github.com/jina-ai/discoart] and openai/guided-diffusion[https://github.com/openai/guided-diffusion]. Thanks for their wonderful work. diff --git a/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/__init__.py b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/__init__.py new file mode 100755 index 000000000..39fc908dc --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/__init__.py @@ -0,0 +1,156 @@ +''' +https://github.com/jina-ai/discoart/blob/main/discoart/__init__.py +''' +import os +import warnings + +os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE' + +__all__ = ['create'] + +import sys + +__resources_path__ = os.path.join( + os.path.dirname(sys.modules.get(__package__).__file__ if __package__ in sys.modules else __file__), + 'resources', +) + +import gc + +# check if GPU is available +import paddle + +# download and load models, this will take some time on the first load + +from .helper import load_all_models, load_diffusion_model, load_clip_models + +model_config, secondary_model = load_all_models('512x512_diffusion_uncond_finetune_008100', use_secondary_model=True) + +from typing import TYPE_CHECKING, overload, List, Optional + +if TYPE_CHECKING: + from docarray import DocumentArray, Document + +_clip_models_cache = {} + +# begin_create_overload + + +@overload +def create(text_prompts: Optional[List[str]] = [ + 'A beautiful painting of a singular lighthouse, shining its light across a tumultuous sea of blood by greg rutkowski and thomas kinkade, Trending on artstation.', + 'yellow color scheme', +], + init_image: Optional[str] = None, + width_height: Optional[List[int]] = [1280, 768], + skip_steps: Optional[int] = 10, + steps: Optional[int] = 250, + cut_ic_pow: Optional[int] = 1, + init_scale: Optional[int] = 1000, + clip_guidance_scale: Optional[int] = 5000, + tv_scale: Optional[int] = 0, + range_scale: Optional[int] = 150, + sat_scale: Optional[int] = 0, + cutn_batches: Optional[int] = 4, + diffusion_model: Optional[str] = '512x512_diffusion_uncond_finetune_008100', + use_secondary_model: Optional[bool] = True, + diffusion_sampling_mode: Optional[str] = 'ddim', + perlin_init: Optional[bool] = False, + perlin_mode: Optional[str] = 'mixed', + seed: Optional[int] = None, + eta: Optional[float] = 0.8, + clamp_grad: Optional[bool] = True, + clamp_max: Optional[float] = 0.05, + randomize_class: Optional[bool] = True, + clip_denoised: Optional[bool] = False, + fuzzy_prompt: Optional[bool] = False, + rand_mag: Optional[float] = 0.05, + cut_overview: Optional[str] = '[12]*400+[4]*600', + cut_innercut: Optional[str] = '[4]*400+[12]*600', + cut_icgray_p: Optional[str] = '[0.2]*400+[0]*600', + display_rate: Optional[int] = 10, + n_batches: Optional[int] = 4, + batch_size: Optional[int] = 1, + batch_name: Optional[str] = '', + clip_models: Optional[list] = ['ViTB32', 'ViTB16', 'RN50'], + output_dir: Optional[str] = 'discoart_output') -> 'DocumentArray': + """ + Create Disco Diffusion artworks and save the result into a DocumentArray. + + :param text_prompts: Phrase, sentence, or string of words and phrases describing what the image should look like. The words will be analyzed by the AI and will guide the diffusion process toward the image(s) you describe. These can include commas and weights to adjust the relative importance of each element. E.g. "A beautiful painting of a singular lighthouse, shining its light across a tumultuous sea of blood by greg rutkowski and thomas kinkade, Trending on artstation."Notice that this prompt loosely follows a structure: [subject], [prepositional details], [setting], [meta modifiers and artist]; this is a good starting point for your experiments. Developing text prompts takes practice and experience, and is not the subject of this guide. If you are a beginner to writing text prompts, a good place to start is on a simple AI art app like Night Cafe, starry ai or WOMBO prior to using DD, to get a feel for how text gets translated into images by GAN tools. These other apps use different technologies, but many of the same principles apply. + :param init_image: Recall that in the image sequence above, the first image shown is just noise. If an init_image is provided, diffusion will replace the noise with the init_image as its starting state. To use an init_image, upload the image to the Colab instance or your Google Drive, and enter the full image path here. If using an init_image, you may need to increase skip_steps to ~ 50% of total steps to retain the character of the init. See skip_steps above for further discussion. + :param width_height: Desired final image size, in pixels. You can have a square, wide, or tall image, but each edge length should be set to a multiple of 64px, and a minimum of 512px on the default CLIP model setting. If you forget to use multiples of 64px in your dimensions, DD will adjust the dimensions of your image to make it so. + :param skip_steps: Consider the chart shown here. Noise scheduling (denoise strength) starts very high and progressively gets lower and lower as diffusion steps progress. The noise levels in the first few steps are very high, so images change dramatically in early steps.As DD moves along the curve, noise levels (and thus the amount an image changes per step) declines, and image coherence from one step to the next increases.The first few steps of denoising are often so dramatic that some steps (maybe 10-15% of total) can be skipped without affecting the final image. You can experiment with this as a way to cut render times.If you skip too many steps, however, the remaining noise may not be high enough to generate new content, and thus may not have ‘time left’ to finish an image satisfactorily.Also, depending on your other settings, you may need to skip steps to prevent CLIP from overshooting your goal, resulting in ‘blown out’ colors (hyper saturated, solid white, or solid black regions) or otherwise poor image quality. Consider that the denoising process is at its strongest in the early steps, so skipping steps can sometimes mitigate other problems.Lastly, if using an init_image, you will need to skip ~50% of the diffusion steps to retain the shapes in the original init image. However, if you’re using an init_image, you can also adjust skip_steps up or down for creative reasons. With low skip_steps you can get a result "inspired by" the init_image which will retain the colors and rough layout and shapes but look quite different. With high skip_steps you can preserve most of the init_image contents and just do fine tuning of the texture. + :param steps: When creating an image, the denoising curve is subdivided into steps for processing. Each step (or iteration) involves the AI looking at subsets of the image called ‘cuts’ and calculating the ‘direction’ the image should be guided to be more like the prompt. Then it adjusts the image with the help of the diffusion denoiser, and moves to the next step.Increasing steps will provide more opportunities for the AI to adjust the image, and each adjustment will be smaller, and thus will yield a more precise, detailed image. Increasing steps comes at the expense of longer render times. Also, while increasing steps should generally increase image quality, there is a diminishing return on additional steps beyond 250 - 500 steps. However, some intricate images can take 1000, 2000, or more steps. It is really up to the user. Just know that the render time is directly related to the number of steps, and many other parameters have a major impact on image quality, without costing additional time. + :param cut_ic_pow: This sets the size of the border used for inner cuts. High cut_ic_pow values have larger borders, and therefore the cuts themselves will be smaller and provide finer details. If you have too many or too-small inner cuts, you may lose overall image coherency and/or it may cause an undesirable ‘mosaic’ effect. Low cut_ic_pow values will allow the inner cuts to be larger, helping image coherency while still helping with some details. + :param init_scale: This controls how strongly CLIP will try to match the init_image provided. This is balanced against the clip_guidance_scale (CGS) above. Too much init scale, and the image won’t change much during diffusion. Too much CGS and the init image will be lost. + :param clip_guidance_scale: CGS is one of the most important parameters you will use. It tells DD how strongly you want CLIP to move toward your prompt each timestep. Higher is generally better, but if CGS is too strong it will overshoot the goal and distort the image. So a happy medium is needed, and it takes experience to learn how to adjust CGS. Note that this parameter generally scales with image dimensions. In other words, if you increase your total dimensions by 50% (e.g. a change from 512 x 512 to 512 x 768), then to maintain the same effect on the image, you’d want to increase clip_guidance_scale from 5000 to 7500. Of the basic settings, clip_guidance_scale, steps and skip_steps are the most important contributors to image quality, so learn them well. + :param tv_scale: Total variance denoising. Optional, set to zero to turn off. Controls ‘smoothness’ of final output. If used, tv_scale will try to smooth out your final image to reduce overall noise. If your image is too ‘crunchy’, increase tv_scale. TV denoising is good at preserving edges while smoothing away noise in flat regions. See https://en.wikipedia.org/wiki/Total_variation_denoising + :param range_scale: Optional, set to zero to turn off. Used for adjustment of color contrast. Lower range_scale will increase contrast. Very low numbers create a reduced color palette, resulting in more vibrant or poster-like images. Higher range_scale will reduce contrast, for more muted images. + :param sat_scale: Saturation scale. Optional, set to zero to turn off. If used, sat_scale will help mitigate oversaturation. If your image is too saturated, increase sat_scale to reduce the saturation. + :param cutn_batches: Each iteration, the AI cuts the image into smaller pieces known as cuts, and compares each cut to the prompt to decide how to guide the next diffusion step. More cuts can generally lead to better images, since DD has more chances to fine-tune the image precision in each timestep. Additional cuts are memory intensive, however, and if DD tries to evaluate too many cuts at once, it can run out of memory. You can use cutn_batches to increase cuts per timestep without increasing memory usage. At the default settings, DD is scheduled to do 16 cuts per timestep. If cutn_batches is set to 1, there will indeed only be 16 cuts total per timestep. However, if cutn_batches is increased to 4, DD will do 64 cuts total in each timestep, divided into 4 sequential batches of 16 cuts each. Because the cuts are being evaluated only 16 at a time, DD uses the memory required for only 16 cuts, but gives you the quality benefit of 64 cuts. The tradeoff, of course, is that this will take ~4 times as long to render each image.So, (scheduled cuts) x (cutn_batches) = (total cuts per timestep). Increasing cutn_batches will increase render times, however, as the work is being done sequentially. DD’s default cut schedule is a good place to start, but the cut schedule can be adjusted in the Cutn Scheduling section, explained below. + :param diffusion_model: Diffusion_model of choice. + :param use_secondary_model: Option to use a secondary purpose-made diffusion model to clean up interim diffusion images for CLIP evaluation. If this option is turned off, DD will use the regular (large) diffusion model. Using the secondary model is faster - one user reported a 50% improvement in render speed! However, the secondary model is much smaller, and may reduce image quality and detail. I suggest you experiment with this. + :param diffusion_sampling_mode: Two alternate diffusion denoising algorithms. ddim has been around longer, and is more established and tested. plms is a newly added alternate method that promises good diffusion results in fewer steps, but has not been as fully tested and may have side effects. This new plms mode is actively being researched in the #settings-and-techniques channel in the DD Discord. + :param perlin_init: Normally, DD will use an image filled with random noise as a starting point for the diffusion curve. If perlin_init is selected, DD will instead use a Perlin noise model as an initial state. Perlin has very interesting characteristics, distinct from random noise, so it’s worth experimenting with this for your projects. Beyond perlin, you can, of course, generate your own noise images (such as with GIMP, etc) and use them as an init_image (without skipping steps). Choosing perlin_init does not affect the actual diffusion process, just the starting point for the diffusion. Please note that selecting a perlin_init will replace and override any init_image you may have specified. Further, because the 2D, 3D and video animation systems all rely on the init_image system, if you enable Perlin while using animation modes, the perlin_init will jump in front of any previous image or video input, and DD will NOT give you the expected sequence of coherent images. All of that said, using Perlin and animation modes together do make a very colorful rainbow effect, which can be used creatively. + :param perlin_mode: sets type of Perlin noise: colored, gray, or a mix of both, giving you additional options for noise types. Experiment to see what these do in your projects. + :param seed: Deep in the diffusion code, there is a random number ‘seed’ which is used as the basis for determining the initial state of the diffusion. By default, this is random, but you can also specify your own seed. This is useful if you like a particular result and would like to run more iterations that will be similar. After each run, the actual seed value used will be reported in the parameters report, and can be reused if desired by entering seed # here. If a specific numerical seed is used repeatedly, the resulting images will be quite similar but not identical. + :param eta: eta (greek letter η) is a diffusion model variable that mixes in a random amount of scaled noise into each timestep. 0 is no noise, 1.0 is more noise. As with most DD parameters, you can go below zero for eta, but it may give you unpredictable results. The steps parameter has a close relationship with the eta parameter. If you set eta to 0, then you can get decent output with only 50-75 steps. Setting eta to 1.0 favors higher step counts, ideally around 250 and up. eta has a subtle, unpredictable effect on image, so you’ll need to experiment to see how this affects your projects. + :param clamp_grad: As I understand it, clamp_grad is an internal limiter that stops DD from producing extreme results. Try your images with and without clamp_grad. If the image changes drastically with clamp_grad turned off, it probably means your clip_guidance_scale is too high and should be reduced. + :param clamp_max: Sets the value of the clamp_grad limitation. Default is 0.05, providing for smoother, more muted coloration in images, but setting higher values (0.15-0.3) can provide interesting contrast and vibrancy. + :param fuzzy_prompt: Controls whether to add multiple noisy prompts to the prompt losses. If True, can increase variability of image output. Experiment with this. + :param rand_mag: Affects only the fuzzy_prompt. Controls the magnitude of the random noise added by fuzzy_prompt. + :param cut_overview: The schedule of overview cuts + :param cut_innercut: The schedule of inner cuts + :param cut_icgray_p: This sets the size of the border used for inner cuts. High cut_ic_pow values have larger borders, and therefore the cuts themselves will be smaller and provide finer details. If you have too many or too-small inner cuts, you may lose overall image coherency and/or it may cause an undesirable ‘mosaic’ effect. Low cut_ic_pow values will allow the inner cuts to be larger, helping image coherency while still helping with some details. + :param display_rate: During a diffusion run, you can monitor the progress of each image being created with this variable. If display_rate is set to 50, DD will show you the in-progress image every 50 timesteps. Setting this to a lower value, like 5 or 10, is a good way to get an early peek at where your image is heading. If you don’t like the progression, just interrupt execution, change some settings, and re-run. If you are planning a long, unmonitored batch, it’s better to set display_rate equal to steps, because displaying interim images does slow Colab down slightly. + :param n_batches: This variable sets the number of still images you want DD to create. If you are using an animation mode (see below for details) DD will ignore n_batches and create a single set of animated frames based on the animation settings. + :param batch_name: The name of the batch, the batch id will be named as "discoart-[batch_name]-seed". To avoid your artworks be overridden by other users, please use a unique name. + :param clip_models: CLIP Model selectors. ViTB32, ViTB16, ViTL14, RN101, RN50, RN50x4, RN50x16, RN50x64.These various CLIP models are available for you to use during image generation. Models have different styles or ‘flavors,’ so look around. You can mix in multiple models as well for different results. However, keep in mind that some models are extremely memory-hungry, and turning on additional models will take additional memory and may cause a crash.The rough order of speed/mem usage is (smallest/fastest to largest/slowest):VitB32RN50RN101VitB16RN50x4RN50x16RN50x64ViTL14For RN50x64 & ViTL14 you may need to use fewer cuts, depending on your VRAM. + :return: a DocumentArray object that has `n_batches` Documents + """ + + +# end_create_overload + + +@overload +def create(init_document: 'Document') -> 'DocumentArray': + """ + Create an artwork using a DocArray ``Document`` object as initial state. + :param init_document: its ``.tags`` will be used as parameters, ``.uri`` (if present) will be used as init image. + :return: a DocumentArray object that has `n_batches` Documents + """ + + +def create(**kwargs) -> 'DocumentArray': + from .config import load_config + from .runner import do_run + + if 'init_document' in kwargs: + d = kwargs['init_document'] + _kwargs = d.tags + if not _kwargs: + warnings.warn('init_document has no .tags, fallback to default config') + if d.uri: + _kwargs['init_image'] = kwargs['init_document'].uri + else: + warnings.warn('init_document has no .uri, fallback to no init image') + kwargs.pop('init_document') + if kwargs: + warnings.warn('init_document has .tags and .uri, but kwargs are also present, will override .tags') + _kwargs.update(kwargs) + _args = load_config(user_config=_kwargs) + else: + _args = load_config(user_config=kwargs) + + model, diffusion = load_diffusion_model(model_config, _args.diffusion_model, steps=_args.steps) + + clip_models = load_clip_models(enabled=_args.clip_models, clip_models=_clip_models_cache) + + gc.collect() + paddle.device.cuda.empty_cache() + try: + return do_run(_args, (model, diffusion, clip_models, secondary_model)) + except KeyboardInterrupt: + pass diff --git a/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/config.py b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/config.py new file mode 100755 index 000000000..0cbc71e6f --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/config.py @@ -0,0 +1,77 @@ +''' +https://github.com/jina-ai/discoart/blob/main/discoart/config.py +''' +import copy +import random +import warnings +from types import SimpleNamespace +from typing import Dict + +import yaml +from yaml import Loader + +from . import __resources_path__ + +with open(f'{__resources_path__}/default.yml') as ymlfile: + default_args = yaml.load(ymlfile, Loader=Loader) + + +def load_config(user_config: Dict, ): + cfg = copy.deepcopy(default_args) + + if user_config: + cfg.update(**user_config) + + for k in user_config.keys(): + if k not in cfg: + warnings.warn(f'unknown argument {k}, ignored') + + for k, v in cfg.items(): + if k in ('batch_size', 'display_rate', 'seed', 'skip_steps', 'steps', 'n_batches', + 'cutn_batches') and isinstance(v, float): + cfg[k] = int(v) + if k == 'width_height': + cfg[k] = [int(vv) for vv in v] + + cfg.update(**{ + 'seed': cfg['seed'] or random.randint(0, 2**32), + }) + + if cfg['batch_name']: + da_name = f'{__package__}-{cfg["batch_name"]}-{cfg["seed"]}' + else: + da_name = f'{__package__}-{cfg["seed"]}' + warnings.warn('you did not set `batch_name`, set it to have unique session ID') + + cfg.update(**{'name_docarray': da_name}) + + print_args_table(cfg) + + return SimpleNamespace(**cfg) + + +def print_args_table(cfg): + from rich.table import Table + from rich import box + from rich.console import Console + + console = Console() + + param_str = Table( + title=cfg['name_docarray'], + box=box.ROUNDED, + highlight=True, + title_justify='left', + ) + param_str.add_column('Argument', justify='right') + param_str.add_column('Value', justify='left') + + for k, v in sorted(cfg.items()): + value = str(v) + + if not default_args.get(k, None) == v: + value = f'[b]{value}[/]' + + param_str.add_row(k, value) + + console.print(param_str) diff --git a/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/helper.py b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/helper.py new file mode 100755 index 000000000..b291b9b1d --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/helper.py @@ -0,0 +1,138 @@ +''' +This code is rewritten by Paddle based on Jina-ai/discoart. +https://github.com/jina-ai/discoart/blob/main/discoart/helper.py +''' +import hashlib +import logging +import os +import subprocess +import sys +from os.path import expanduser +from pathlib import Path +from typing import Any +from typing import Dict +from typing import List + +import paddle + + +def _get_logger(): + logger = logging.getLogger(__package__) + _log_level = os.environ.get('DISCOART_LOG_LEVEL', 'INFO') + logger.setLevel(_log_level) + ch = logging.StreamHandler() + ch.setLevel(_log_level) + formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + ch.setFormatter(formatter) + logger.addHandler(ch) + return logger + + +logger = _get_logger() + + +def load_clip_models(enabled: List[str], clip_models: Dict[str, Any] = {}): + + import disco_diffusion_cnclip_vitb16.cn_clip as cn_clip + from disco_diffusion_cnclip_vitb16.cn_clip.clip.utils import create_model + + # load enabled models + for k in enabled: + if k not in clip_models: + clip_models[k] = create_model(name=k) + clip_models[k].eval() + for parameter in clip_models[k].parameters(): + parameter.stop_gradient = True + + # disable not enabled models to save memory + for k in clip_models: + if k not in enabled: + clip_models.pop(k) + + return list(clip_models.values()) + + +def load_all_models(diffusion_model, use_secondary_model): + from .model.script_util import ( + model_and_diffusion_defaults, ) + + model_config = model_and_diffusion_defaults() + + if diffusion_model == '512x512_diffusion_uncond_finetune_008100': + model_config.update({ + 'attention_resolutions': '32, 16, 8', + 'class_cond': False, + 'diffusion_steps': 1000, # No need to edit this, it is taken care of later. + 'rescale_timesteps': True, + 'timestep_respacing': 250, # No need to edit this, it is taken care of later. + 'image_size': 512, + 'learn_sigma': True, + 'noise_schedule': 'linear', + 'num_channels': 256, + 'num_head_channels': 64, + 'num_res_blocks': 2, + 'resblock_updown': True, + 'use_fp16': False, + 'use_scale_shift_norm': True, + }) + elif diffusion_model == '256x256_diffusion_uncond': + model_config.update({ + 'attention_resolutions': '32, 16, 8', + 'class_cond': False, + 'diffusion_steps': 1000, # No need to edit this, it is taken care of later. + 'rescale_timesteps': True, + 'timestep_respacing': 250, # No need to edit this, it is taken care of later. + 'image_size': 256, + 'learn_sigma': True, + 'noise_schedule': 'linear', + 'num_channels': 256, + 'num_head_channels': 64, + 'num_res_blocks': 2, + 'resblock_updown': True, + 'use_fp16': False, + 'use_scale_shift_norm': True, + }) + + secondary_model = None + if use_secondary_model: + from .model.sec_diff import SecondaryDiffusionImageNet2 + secondary_model = SecondaryDiffusionImageNet2() + model_dict = paddle.load( + os.path.join(os.path.dirname(__file__), 'pre_trained', 'secondary_model_imagenet_2.pdparams')) + secondary_model.set_state_dict(model_dict) + secondary_model.eval() + for parameter in secondary_model.parameters(): + parameter.stop_gradient = True + + return model_config, secondary_model + + +def load_diffusion_model(model_config, diffusion_model, steps): + from .model.script_util import ( + create_model_and_diffusion, ) + + timestep_respacing = f'ddim{steps}' + diffusion_steps = (1000 // steps) * steps if steps < 1000 else steps + model_config.update({ + 'timestep_respacing': timestep_respacing, + 'diffusion_steps': diffusion_steps, + }) + + model, diffusion = create_model_and_diffusion(**model_config) + model.set_state_dict( + paddle.load(os.path.join(os.path.dirname(__file__), 'pre_trained', f'{diffusion_model}.pdparams'))) + model.eval() + for name, param in model.named_parameters(): + param.stop_gradient = True + + return model, diffusion + + +def parse_prompt(prompt): + if prompt.startswith('http://') or prompt.startswith('https://'): + vals = prompt.rsplit(':', 2) + vals = [vals[0] + ':' + vals[1], *vals[2:]] + else: + vals = prompt.rsplit(':', 1) + vals = vals + ['', '1'][len(vals):] + return vals[0], float(vals[1]) diff --git a/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/model/__init__.py b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/model/__init__.py new file mode 100755 index 000000000..466800666 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/model/__init__.py @@ -0,0 +1,3 @@ +""" +Codebase for "Improved Denoising Diffusion Probabilistic Models" implemented by Paddle. +""" diff --git a/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/model/gaussian_diffusion.py b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/model/gaussian_diffusion.py new file mode 100755 index 000000000..86cd2c650 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/model/gaussian_diffusion.py @@ -0,0 +1,1214 @@ +""" +Diffusion model implemented by Paddle. +This code is rewritten based on Pytorch version of of Ho et al's diffusion models: +https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/diffusion_utils_2.py +""" +import enum +import math + +import numpy as np +import paddle + +from .losses import discretized_gaussian_log_likelihood +from .losses import normal_kl +from .nn import mean_flat + + +def get_named_beta_schedule(schedule_name, num_diffusion_timesteps): + """ + Get a pre-defined beta schedule for the given name. + + The beta schedule library consists of beta schedules which remain similar + in the limit of num_diffusion_timesteps. + Beta schedules may be added, but should not be removed or changed once + they are committed to maintain backwards compatibility. + """ + if schedule_name == "linear": + # Linear schedule from Ho et al, extended to work for any number of + # diffusion steps. + scale = 1000 / num_diffusion_timesteps + beta_start = scale * 0.0001 + beta_end = scale * 0.02 + return np.linspace(beta_start, beta_end, num_diffusion_timesteps, dtype=np.float64) + elif schedule_name == "cosine": + return betas_for_alpha_bar( + num_diffusion_timesteps, + lambda t: math.cos((t + 0.008) / 1.008 * math.pi / 2)**2, + ) + else: + raise NotImplementedError(f"unknown beta schedule: {schedule_name}") + + +def betas_for_alpha_bar(num_diffusion_timesteps, alpha_bar, max_beta=0.999): + """ + Create a beta schedule that discretizes the given alpha_t_bar function, + which defines the cumulative product of (1-beta) over time from t = [0,1]. + + :param num_diffusion_timesteps: the number of betas to produce. + :param alpha_bar: a lambda that takes an argument t from 0 to 1 and + produces the cumulative product of (1-beta) up to that + part of the diffusion process. + :param max_beta: the maximum beta to use; use values lower than 1 to + prevent singularities. + """ + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta)) + return np.array(betas) + + +class ModelMeanType(enum.Enum): + """ + Which type of output the model predicts. + """ + + PREVIOUS_X = enum.auto() # the model predicts x_{t-1} + START_X = enum.auto() # the model predicts x_0 + EPSILON = enum.auto() # the model predicts epsilon + + +class ModelVarType(enum.Enum): + """ + What is used as the model's output variance. + + The LEARNED_RANGE option has been added to allow the model to predict + values between FIXED_SMALL and FIXED_LARGE, making its job easier. + """ + + LEARNED = enum.auto() + FIXED_SMALL = enum.auto() + FIXED_LARGE = enum.auto() + LEARNED_RANGE = enum.auto() + + +class LossType(enum.Enum): + MSE = enum.auto() # use raw MSE loss (and KL when learning variances) + RESCALED_MSE = (enum.auto()) # use raw MSE loss (with RESCALED_KL when learning variances) + KL = enum.auto() # use the variational lower-bound + RESCALED_KL = enum.auto() # like KL, but rescale to estimate the full VLB + + def is_vb(self): + return self == LossType.KL or self == LossType.RESCALED_KL + + +class GaussianDiffusion: + """ + Utilities for training and sampling diffusion models. + + Ported directly from here, and then adapted over time to further experimentation. + https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/diffusion_utils_2.py#L42 + + :param betas: a 1-D numpy array of betas for each diffusion timestep, + starting at T and going to 1. + :param model_mean_type: a ModelMeanType determining what the model outputs. + :param model_var_type: a ModelVarType determining how variance is output. + :param loss_type: a LossType determining the loss function to use. + :param rescale_timesteps: if True, pass floating point timesteps into the + model so that they are always scaled like in the + original paper (0 to 1000). + """ + + def __init__( + self, + *, + betas, + model_mean_type, + model_var_type, + loss_type, + rescale_timesteps=False, + ): + self.model_mean_type = model_mean_type + self.model_var_type = model_var_type + self.loss_type = loss_type + self.rescale_timesteps = rescale_timesteps + + # Use float64 for accuracy. + betas = np.array(betas, dtype=np.float64) + self.betas = betas + assert len(betas.shape) == 1, "betas must be 1-D" + assert (betas > 0).all() and (betas <= 1).all() + + self.num_timesteps = int(betas.shape[0]) + + alphas = 1.0 - betas + self.alphas_cumprod = np.cumprod(alphas, axis=0) + self.alphas_cumprod_prev = np.append(1.0, self.alphas_cumprod[:-1]) + self.alphas_cumprod_next = np.append(self.alphas_cumprod[1:], 0.0) + assert self.alphas_cumprod_prev.shape == (self.num_timesteps, ) + + # calculations for diffusion q(x_t | x_{t-1}) and others + self.sqrt_alphas_cumprod = np.sqrt(self.alphas_cumprod) + self.sqrt_one_minus_alphas_cumprod = np.sqrt(1.0 - self.alphas_cumprod) + self.log_one_minus_alphas_cumprod = np.log(1.0 - self.alphas_cumprod) + self.sqrt_recip_alphas_cumprod = np.sqrt(1.0 / self.alphas_cumprod) + self.sqrt_recipm1_alphas_cumprod = np.sqrt(1.0 / self.alphas_cumprod - 1) + + # calculations for posterior q(x_{t-1} | x_t, x_0) + self.posterior_variance = (betas * (1.0 - self.alphas_cumprod_prev) / (1.0 - self.alphas_cumprod)) + # log calculation clipped because the posterior variance is 0 at the + # beginning of the diffusion chain. + self.posterior_log_variance_clipped = np.log(np.append(self.posterior_variance[1], self.posterior_variance[1:])) + self.posterior_mean_coef1 = (betas * np.sqrt(self.alphas_cumprod_prev) / (1.0 - self.alphas_cumprod)) + self.posterior_mean_coef2 = ((1.0 - self.alphas_cumprod_prev) * np.sqrt(alphas) / (1.0 - self.alphas_cumprod)) + + def q_mean_variance(self, x_start, t): + """ + Get the distribution q(x_t | x_0). + + :param x_start: the [N x C x ...] tensor of noiseless inputs. + :param t: the number of diffusion steps (minus 1). Here, 0 means one step. + :return: A tuple (mean, variance, log_variance), all of x_start's shape. + """ + mean = (_extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start) + variance = _extract_into_tensor(1.0 - self.alphas_cumprod, t, x_start.shape) + log_variance = _extract_into_tensor(self.log_one_minus_alphas_cumprod, t, x_start.shape) + return mean, variance, log_variance + + def q_sample(self, x_start, t, noise=None): + """ + Diffuse the data for a given number of diffusion steps. + + In other words, sample from q(x_t | x_0). + + :param x_start: the initial data batch. + :param t: the number of diffusion steps (minus 1). Here, 0 means one step. + :param noise: if specified, the split-out normal noise. + :return: A noisy version of x_start. + """ + if noise is None: + # noise = th.randn_like(x_start) + noise = paddle.randn(x_start.shape, x_start.dtype) + assert noise.shape == x_start.shape + return (_extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start + + _extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape) * noise) + + def q_posterior_mean_variance(self, x_start, x_t, t): + """ + Compute the mean and variance of the diffusion posterior: + + q(x_{t-1} | x_t, x_0) + + """ + assert x_start.shape == x_t.shape + posterior_mean = (_extract_into_tensor(self.posterior_mean_coef1, t, x_t.shape) * x_start + + _extract_into_tensor(self.posterior_mean_coef2, t, x_t.shape) * x_t) + posterior_variance = _extract_into_tensor(self.posterior_variance, t, x_t.shape) + posterior_log_variance_clipped = _extract_into_tensor(self.posterior_log_variance_clipped, t, x_t.shape) + assert (posterior_mean.shape[0] == posterior_variance.shape[0] == posterior_log_variance_clipped.shape[0] == + x_start.shape[0]) + return posterior_mean, posterior_variance, posterior_log_variance_clipped + + def p_mean_variance(self, model, x, t, clip_denoised=True, denoised_fn=None, model_kwargs=None): + """ + Apply the model to get p(x_{t-1} | x_t), as well as a prediction of + the initial x, x_0. + + :param model: the model, which takes a signal and a batch of timesteps + as input. + :param x: the [N x C x ...] tensor at time t. + :param t: a 1-D Tensor of timesteps. + :param clip_denoised: if True, clip the denoised signal into [-1, 1]. + :param denoised_fn: if not None, a function which applies to the + x_start prediction before it is used to sample. Applies before + clip_denoised. + :param model_kwargs: if not None, a dict of extra keyword arguments to + pass to the model. This can be used for conditioning. + :return: a dict with the following keys: + - 'mean': the model mean output. + - 'variance': the model variance output. + - 'log_variance': the log of 'variance'. + - 'pred_xstart': the prediction for x_0. + """ + if model_kwargs is None: + model_kwargs = {} + + B, C = x.shape[:2] + assert t.shape == [B] + model_output = model(x, self._scale_timesteps(t), **model_kwargs) + + if self.model_var_type in [ModelVarType.LEARNED, ModelVarType.LEARNED_RANGE]: + assert model_output.shape == [B, C * 2, *x.shape[2:]] + model_output, model_var_values = paddle.split(model_output, 2, axis=1) + if self.model_var_type == ModelVarType.LEARNED: + model_log_variance = model_var_values + model_variance = paddle.exp(model_log_variance) + else: + min_log = _extract_into_tensor(self.posterior_log_variance_clipped, t, x.shape) + max_log = _extract_into_tensor(np.log(self.betas), t, x.shape) + # The model_var_values is [-1, 1] for [min_var, max_var]. + frac = (model_var_values + 1) / 2 + model_log_variance = frac * max_log + (1 - frac) * min_log + model_variance = paddle.exp(model_log_variance) + else: + model_variance, model_log_variance = { + # for fixedlarge, we set the initial (log-)variance like so + # to get a better decoder log likelihood. + ModelVarType.FIXED_LARGE: ( + np.append(self.posterior_variance[1], self.betas[1:]), + np.log(np.append(self.posterior_variance[1], self.betas[1:])), + ), + ModelVarType.FIXED_SMALL: ( + self.posterior_variance, + self.posterior_log_variance_clipped, + ), + }[self.model_var_type] + model_variance = _extract_into_tensor(model_variance, t, x.shape) + model_log_variance = _extract_into_tensor(model_log_variance, t, x.shape) + + def process_xstart(x): + if denoised_fn is not None: + x = denoised_fn(x) + if clip_denoised: + return x.clamp(-1, 1) + return x + + if self.model_mean_type == ModelMeanType.PREVIOUS_X: + pred_xstart = process_xstart(self._predict_xstart_from_xprev(x_t=x, t=t, xprev=model_output)) + model_mean = model_output + elif self.model_mean_type in [ModelMeanType.START_X, ModelMeanType.EPSILON]: + if self.model_mean_type == ModelMeanType.START_X: + pred_xstart = process_xstart(model_output) + else: + pred_xstart = process_xstart(self._predict_xstart_from_eps(x_t=x, t=t, eps=model_output)) + model_mean, _, _ = self.q_posterior_mean_variance(x_start=pred_xstart, x_t=x, t=t) + else: + raise NotImplementedError(self.model_mean_type) + + assert (model_mean.shape == model_log_variance.shape == pred_xstart.shape == x.shape) + return { + "mean": model_mean, + "variance": model_variance, + "log_variance": model_log_variance, + "pred_xstart": pred_xstart, + } + + def _predict_xstart_from_eps(self, x_t, t, eps): + assert x_t.shape == eps.shape + return (_extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t - + _extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape) * eps) + + def _predict_xstart_from_xprev(self, x_t, t, xprev): + assert x_t.shape == xprev.shape + return ( # (xprev - coef2*x_t) / coef1 + _extract_into_tensor(1.0 / self.posterior_mean_coef1, t, x_t.shape) * xprev - + _extract_into_tensor(self.posterior_mean_coef2 / self.posterior_mean_coef1, t, x_t.shape) * x_t) + + def _predict_eps_from_xstart(self, x_t, t, pred_xstart): + return (_extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t - + pred_xstart) / _extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape) + + def _scale_timesteps(self, t): + if self.rescale_timesteps: + return paddle.cast((t), 'float32') * (1000.0 / self.num_timesteps) + return t + + def condition_mean(self, cond_fn, p_mean_var, x, t, model_kwargs=None): + """ + Compute the mean for the previous step, given a function cond_fn that + computes the gradient of a conditional log probability with respect to + x. In particular, cond_fn computes grad(log(p(y|x))), and we want to + condition on y. + + This uses the conditioning strategy from Sohl-Dickstein et al. (2015). + """ + gradient = cond_fn(x, self._scale_timesteps(t), **model_kwargs) + new_mean = (paddle.cast((p_mean_var["mean"]), 'float32') + p_mean_var["variance"] * paddle.cast( + (gradient), 'float32')) + return new_mean + + def condition_mean_with_grad(self, cond_fn, p_mean_var, x, t, model_kwargs=None): + """ + Compute the mean for the previous step, given a function cond_fn that + computes the gradient of a conditional log probability with respect to + x. In particular, cond_fn computes grad(log(p(y|x))), and we want to + condition on y. + + This uses the conditioning strategy from Sohl-Dickstein et al. (2015). + """ + gradient = cond_fn(x, t, p_mean_var, **model_kwargs) + new_mean = (paddle.cast((p_mean_var["mean"]), 'float32') + p_mean_var["variance"] * paddle.cast( + (gradient), 'float32')) + return new_mean + + def condition_score(self, cond_fn, p_mean_var, x, t, model_kwargs=None): + """ + Compute what the p_mean_variance output would have been, should the + model's score function be conditioned by cond_fn. + + See condition_mean() for details on cond_fn. + + Unlike condition_mean(), this instead uses the conditioning strategy + from Song et al (2020). + """ + alpha_bar = _extract_into_tensor(self.alphas_cumprod, t, x.shape) + + eps = self._predict_eps_from_xstart(x, t, p_mean_var["pred_xstart"]) + eps = eps - (1 - alpha_bar).sqrt() * cond_fn(x, self._scale_timesteps(t), **model_kwargs) + + out = p_mean_var.copy() + out["pred_xstart"] = self._predict_xstart_from_eps(x, t, eps) + out["mean"], _, _ = self.q_posterior_mean_variance(x_start=out["pred_xstart"], x_t=x, t=t) + return out + + def condition_score_with_grad(self, cond_fn, p_mean_var, x, t, model_kwargs=None): + """ + Compute what the p_mean_variance output would have been, should the + model's score function be conditioned by cond_fn. + + See condition_mean() for details on cond_fn. + + Unlike condition_mean(), this instead uses the conditioning strategy + from Song et al (2020). + """ + alpha_bar = _extract_into_tensor(self.alphas_cumprod, t, x.shape) + + eps = self._predict_eps_from_xstart(x, t, p_mean_var["pred_xstart"]) + eps = eps - (1 - alpha_bar).sqrt() * cond_fn(x, t, p_mean_var, **model_kwargs) + + out = p_mean_var.copy() + out["pred_xstart"] = self._predict_xstart_from_eps(x, t, eps) + out["mean"], _, _ = self.q_posterior_mean_variance(x_start=out["pred_xstart"], x_t=x, t=t) + return out + + def p_sample( + self, + model, + x, + t, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + ): + """ + Sample x_{t-1} from the model at the given timestep. + + :param model: the model to sample from. + :param x: the current tensor at x_{t-1}. + :param t: the value of t, starting at 0 for the first diffusion step. + :param clip_denoised: if True, clip the x_start prediction to [-1, 1]. + :param denoised_fn: if not None, a function which applies to the + x_start prediction before it is used to sample. + :param cond_fn: if not None, this is a gradient function that acts + similarly to the model. + :param model_kwargs: if not None, a dict of extra keyword arguments to + pass to the model. This can be used for conditioning. + :return: a dict containing the following keys: + - 'sample': a random sample from the model. + - 'pred_xstart': a prediction of x_0. + """ + out = self.p_mean_variance( + model, + x, + t, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + model_kwargs=model_kwargs, + ) + # noise = th.randn_like(x) + noise = paddle.randn(x.shape, x.dtype) + nonzero_mask = (paddle.cast((t != 0), 'float32').reshape([-1, + *([1] * (len(x.shape) - 1))])) # no noise when t == 0 + if cond_fn is not None: + out["mean"] = self.condition_mean(cond_fn, out, x, t, model_kwargs=model_kwargs) + sample = out["mean"] + nonzero_mask * paddle.exp(0.5 * out["log_variance"]) * noise + return {"sample": sample, "pred_xstart": out["pred_xstart"]} + + def p_sample_with_grad( + self, + model, + x, + t, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + ): + """ + Sample x_{t-1} from the model at the given timestep. + + :param model: the model to sample from. + :param x: the current tensor at x_{t-1}. + :param t: the value of t, starting at 0 for the first diffusion step. + :param clip_denoised: if True, clip the x_start prediction to [-1, 1]. + :param denoised_fn: if not None, a function which applies to the + x_start prediction before it is used to sample. + :param cond_fn: if not None, this is a gradient function that acts + similarly to the model. + :param model_kwargs: if not None, a dict of extra keyword arguments to + pass to the model. This can be used for conditioning. + :return: a dict containing the following keys: + - 'sample': a random sample from the model. + - 'pred_xstart': a prediction of x_0. + """ + # with th.enable_grad(): + # x = x.detach().requires_grad_() + x = x.detach() + # x.stop_gradient = False + out = self.p_mean_variance( + model, + x, + t, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + model_kwargs=model_kwargs, + ) + # noise = th.randn_like(x) + noise = paddle.randn(x.shape, x.dtype) + nonzero_mask = (paddle.cast((t != 0), 'float32').reshape([-1, + *([1] * (len(x.shape) - 1))])) # no noise when t == 0 + if cond_fn is not None: + out["mean"] = self.condition_mean_with_grad(cond_fn, out, x, t, model_kwargs=model_kwargs) + sample = out["mean"] + nonzero_mask * paddle.exp(0.5 * out["log_variance"]) * noise + return {"sample": sample, "pred_xstart": out["pred_xstart"].detach()} + + def p_sample_loop( + self, + model, + shape, + noise=None, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + device=None, + progress=False, + skip_timesteps=0, + init_image=None, + randomize_class=False, + cond_fn_with_grad=False, + ): + """ + Generate samples from the model. + + :param model: the model module. + :param shape: the shape of the samples, (N, C, H, W). + :param noise: if specified, the noise from the encoder to sample. + Should be of the same shape as `shape`. + :param clip_denoised: if True, clip x_start predictions to [-1, 1]. + :param denoised_fn: if not None, a function which applies to the + x_start prediction before it is used to sample. + :param cond_fn: if not None, this is a gradient function that acts + similarly to the model. + :param model_kwargs: if not None, a dict of extra keyword arguments to + pass to the model. This can be used for conditioning. + :param device: if specified, the device to create the samples on. + If not specified, use a model parameter's device. + :param progress: if True, show a tqdm progress bar. + :return: a non-differentiable batch of samples. + """ + final = None + for sample in self.p_sample_loop_progressive( + model, + shape, + noise=noise, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + cond_fn=cond_fn, + model_kwargs=model_kwargs, + device=device, + progress=progress, + skip_timesteps=skip_timesteps, + init_image=init_image, + randomize_class=randomize_class, + cond_fn_with_grad=cond_fn_with_grad, + ): + final = sample + return final["sample"] + + def p_sample_loop_progressive( + self, + model, + shape, + noise=None, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + device=None, + progress=False, + skip_timesteps=0, + init_image=None, + randomize_class=False, + cond_fn_with_grad=False, + ): + """ + Generate samples from the model and yield intermediate samples from + each timestep of diffusion. + + Arguments are the same as p_sample_loop(). + Returns a generator over dicts, where each dict is the return value of + p_sample(). + """ + if device is None: + device = model.parameters()[0].place + assert isinstance(shape, (tuple, list)) + if noise is not None: + img = noise + else: + img = paddle.randn(shape) + + if skip_timesteps and init_image is None: + init_image = paddle.zeros_like(img) + + indices = list(range(self.num_timesteps - skip_timesteps))[::-1] + + if init_image is not None: + my_t = paddle.ones([shape[0]], dtype='int64') * indices[0] + img = self.q_sample(init_image, my_t, img) + + if progress: + # Lazy import so that we don't depend on tqdm. + from tqdm.auto import tqdm + + indices = tqdm(indices) + + for i in indices: + t = paddle.to_tensor([i] * shape[0], place=device) + if randomize_class and 'y' in model_kwargs: + model_kwargs['y'] = paddle.randint(low=0, high=model.num_classes, shape=model_kwargs['y'].shape) + # with paddle.no_grad(): + sample_fn = self.p_sample_with_grad if cond_fn_with_grad else self.p_sample + out = sample_fn( + model, + img, + t, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + cond_fn=cond_fn, + model_kwargs=model_kwargs, + ) + yield out + img = out["sample"] + + def ddim_sample( + self, + model, + x, + t, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + eta=0.0, + ): + """ + Sample x_{t-1} from the model using DDIM. + + Same usage as p_sample(). + """ + out_orig = self.p_mean_variance( + model, + x, + t, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + model_kwargs=model_kwargs, + ) + if cond_fn is not None: + out = self.condition_score(cond_fn, out_orig, x, t, model_kwargs=model_kwargs) + else: + out = out_orig + + # Usually our model outputs epsilon, but we re-derive it + # in case we used x_start or x_prev prediction. + eps = self._predict_eps_from_xstart(x, t, out["pred_xstart"]) + + alpha_bar = _extract_into_tensor(self.alphas_cumprod, t, x.shape) + alpha_bar_prev = _extract_into_tensor(self.alphas_cumprod_prev, t, x.shape) + sigma = (eta * paddle.sqrt( + (1 - alpha_bar_prev) / (1 - alpha_bar)) * paddle.sqrt(1 - alpha_bar / alpha_bar_prev)) + # Equation 12. + # noise = th.randn_like(x) + noise = paddle.randn(x.shape, x.dtype) + mean_pred = (out["pred_xstart"] * paddle.sqrt(alpha_bar_prev) + + paddle.sqrt(1 - alpha_bar_prev - sigma**2) * eps) + nonzero_mask = (paddle.cast((t != 0), 'float32').reshape([-1, + *([1] * (len(x.shape) - 1))])) # no noise when t == 0 + sample = mean_pred + nonzero_mask * sigma * noise + return {"sample": sample, "pred_xstart": out_orig["pred_xstart"]} + + def ddim_sample_with_grad( + self, + model, + x, + t, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + eta=0.0, + ): + """ + Sample x_{t-1} from the model using DDIM. + + Same usage as p_sample(). + """ + # with th.enable_grad(): + # x = x.detach().requires_grad_() + x = x.detach() + # x.stop_gradient = False + out_orig = self.p_mean_variance( + model, + x, + t, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + model_kwargs=model_kwargs, + ) + if cond_fn is not None: + out = self.condition_score_with_grad(cond_fn, out_orig, x, t, model_kwargs=model_kwargs) + else: + out = out_orig + + out["pred_xstart"] = out["pred_xstart"].detach() + + # Usually our model outputs epsilon, but we re-derive it + # in case we used x_start or x_prev prediction. + eps = self._predict_eps_from_xstart(x, t, out["pred_xstart"]) + + alpha_bar = _extract_into_tensor(self.alphas_cumprod, t, x.shape) + alpha_bar_prev = _extract_into_tensor(self.alphas_cumprod_prev, t, x.shape) + sigma = (eta * paddle.sqrt( + (1 - alpha_bar_prev) / (1 - alpha_bar)) * paddle.sqrt(1 - alpha_bar / alpha_bar_prev)) + # Equation 12. + # noise = th.randn_like(x) + noise = paddle.randn(x.shape, x.dtype) + mean_pred = (out["pred_xstart"] * paddle.sqrt(alpha_bar_prev) + + paddle.sqrt(1 - alpha_bar_prev - sigma**2) * eps) + nonzero_mask = (paddle.cast((t != 0), 'float32').reshape([-1, + *([1] * (len(x.shape) - 1))])) # no noise when t == 0 + sample = mean_pred + nonzero_mask * sigma * noise + return {"sample": sample, "pred_xstart": out_orig["pred_xstart"].detach()} + + def ddim_reverse_sample( + self, + model, + x, + t, + clip_denoised=True, + denoised_fn=None, + model_kwargs=None, + eta=0.0, + ): + """ + Sample x_{t+1} from the model using DDIM reverse ODE. + """ + assert eta == 0.0, "Reverse ODE only for deterministic path" + out = self.p_mean_variance( + model, + x, + t, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + model_kwargs=model_kwargs, + ) + # Usually our model outputs epsilon, but we re-derive it + # in case we used x_start or x_prev prediction. + eps = (_extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x.shape) * x - + out["pred_xstart"]) / _extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x.shape) + alpha_bar_next = _extract_into_tensor(self.alphas_cumprod_next, t, x.shape) + + # Equation 12. reversed + mean_pred = (out["pred_xstart"] * paddle.sqrt(alpha_bar_next) + paddle.sqrt(1 - alpha_bar_next) * eps) + + return {"sample": mean_pred, "pred_xstart": out["pred_xstart"]} + + def ddim_sample_loop( + self, + model, + shape, + noise=None, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + device=None, + progress=False, + eta=0.0, + skip_timesteps=0, + init_image=None, + randomize_class=False, + cond_fn_with_grad=False, + ): + """ + Generate samples from the model using DDIM. + + Same usage as p_sample_loop(). + """ + final = None + for sample in self.ddim_sample_loop_progressive( + model, + shape, + noise=noise, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + cond_fn=cond_fn, + model_kwargs=model_kwargs, + device=device, + progress=progress, + eta=eta, + skip_timesteps=skip_timesteps, + init_image=init_image, + randomize_class=randomize_class, + cond_fn_with_grad=cond_fn_with_grad, + ): + final = sample + return final["sample"] + + def ddim_sample_loop_progressive( + self, + model, + shape, + noise=None, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + device=None, + progress=False, + eta=0.0, + skip_timesteps=0, + init_image=None, + randomize_class=False, + cond_fn_with_grad=False, + ): + """ + Use DDIM to sample from the model and yield intermediate samples from + each timestep of DDIM. + + Same usage as p_sample_loop_progressive(). + """ + # if device is None: + # device = next(model.parameters()).device + if device is None: + device = model.parameters()[0].place + assert isinstance(shape, (tuple, list)) + if noise is not None: + img = noise + else: + img = paddle.randn(shape) + + if skip_timesteps and init_image is None: + init_image = paddle.zeros_like(img) + + indices = list(range(self.num_timesteps - skip_timesteps))[::-1] + + if init_image is not None: + my_t = paddle.ones([shape[0]], dtype='int64') * indices[0] + img = self.q_sample(init_image, my_t, img) + + if progress: + # Lazy import so that we don't depend on tqdm. + from tqdm.auto import tqdm + + indices = tqdm(indices) + + for i in indices: + t = paddle.to_tensor([i] * shape[0]) + if randomize_class and 'y' in model_kwargs: + model_kwargs['y'] = paddle.randint( + low=0, + high=model.num_classes, + shape=model_kwargs['y'].shape, + ) + sample_fn = self.ddim_sample_with_grad if cond_fn_with_grad else self.ddim_sample + out = sample_fn( + model, + img, + t, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + cond_fn=cond_fn, + model_kwargs=model_kwargs, + eta=eta, + ) + yield out + img = out["sample"] + + def plms_sample( + self, + model, + x, + t, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + cond_fn_with_grad=False, + order=2, + old_out=None, + ): + """ + Sample x_{t-1} from the model using Pseudo Linear Multistep. + + Same usage as p_sample(). + """ + if not int(order) or not 1 <= order <= 4: + raise ValueError('order is invalid (should be int from 1-4).') + + def get_model_output(x, t): + with paddle.set_grad_enabled(cond_fn_with_grad and cond_fn is not None): + x = x.detach().requires_grad_() if cond_fn_with_grad else x + out_orig = self.p_mean_variance( + model, + x, + t, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + model_kwargs=model_kwargs, + ) + if cond_fn is not None: + if cond_fn_with_grad: + out = self.condition_score_with_grad(cond_fn, out_orig, x, t, model_kwargs=model_kwargs) + x = x.detach() + else: + out = self.condition_score(cond_fn, out_orig, x, t, model_kwargs=model_kwargs) + else: + out = out_orig + + # Usually our model outputs epsilon, but we re-derive it + # in case we used x_start or x_prev prediction. + eps = self._predict_eps_from_xstart(x, t, out["pred_xstart"]) + return eps, out, out_orig + + alpha_bar = _extract_into_tensor(self.alphas_cumprod, t, x.shape) + alpha_bar_prev = _extract_into_tensor(self.alphas_cumprod_prev, t, x.shape) + eps, out, out_orig = get_model_output(x, t) + + if order > 1 and old_out is None: + # Pseudo Improved Euler + old_eps = [eps] + mean_pred = out["pred_xstart"] * paddle.sqrt(alpha_bar_prev) + paddle.sqrt(1 - alpha_bar_prev) * eps + eps_2, _, _ = get_model_output(mean_pred, t - 1) + eps_prime = (eps + eps_2) / 2 + pred_prime = self._predict_xstart_from_eps(x, t, eps_prime) + mean_pred = pred_prime * paddle.sqrt(alpha_bar_prev) + paddle.sqrt(1 - alpha_bar_prev) * eps_prime + else: + # Pseudo Linear Multistep (Adams-Bashforth) + old_eps = old_out["old_eps"] + old_eps.append(eps) + cur_order = min(order, len(old_eps)) + if cur_order == 1: + eps_prime = old_eps[-1] + elif cur_order == 2: + eps_prime = (3 * old_eps[-1] - old_eps[-2]) / 2 + elif cur_order == 3: + eps_prime = (23 * old_eps[-1] - 16 * old_eps[-2] + 5 * old_eps[-3]) / 12 + elif cur_order == 4: + eps_prime = (55 * old_eps[-1] - 59 * old_eps[-2] + 37 * old_eps[-3] - 9 * old_eps[-4]) / 24 + else: + raise RuntimeError('cur_order is invalid.') + pred_prime = self._predict_xstart_from_eps(x, t, eps_prime) + mean_pred = pred_prime * paddle.sqrt(alpha_bar_prev) + paddle.sqrt(1 - alpha_bar_prev) * eps_prime + + if len(old_eps) >= order: + old_eps.pop(0) + + nonzero_mask = paddle.cast((t != 0), 'float32').reshape([-1, *([1] * (len(x.shape) - 1))]) + sample = mean_pred * nonzero_mask + out["pred_xstart"] * (1 - nonzero_mask) + + return {"sample": sample, "pred_xstart": out_orig["pred_xstart"], "old_eps": old_eps} + + def plms_sample_loop( + self, + model, + shape, + noise=None, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + device=None, + progress=False, + skip_timesteps=0, + init_image=None, + randomize_class=False, + cond_fn_with_grad=False, + order=2, + ): + """ + Generate samples from the model using Pseudo Linear Multistep. + + Same usage as p_sample_loop(). + """ + final = None + for sample in self.plms_sample_loop_progressive( + model, + shape, + noise=noise, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + cond_fn=cond_fn, + model_kwargs=model_kwargs, + device=device, + progress=progress, + skip_timesteps=skip_timesteps, + init_image=init_image, + randomize_class=randomize_class, + cond_fn_with_grad=cond_fn_with_grad, + order=order, + ): + final = sample + return final["sample"] + + def plms_sample_loop_progressive( + self, + model, + shape, + noise=None, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + device=None, + progress=False, + skip_timesteps=0, + init_image=None, + randomize_class=False, + cond_fn_with_grad=False, + order=2, + ): + """ + Use PLMS to sample from the model and yield intermediate samples from each + timestep of PLMS. + + Same usage as p_sample_loop_progressive(). + """ + if device is None: + device = model.parameters()[0].place + assert isinstance(shape, (tuple, list)) + if noise is not None: + img = noise + else: + img = paddle.randn(shape) + + if skip_timesteps and init_image is None: + init_image = paddle.zeros_like(img) + + indices = list(range(self.num_timesteps - skip_timesteps))[::-1] + + if init_image is not None: + my_t = paddle.ones([shape[0]], dtype='int64') * indices[0] + img = self.q_sample(init_image, my_t, img) + + if progress: + # Lazy import so that we don't depend on tqdm. + from tqdm.auto import tqdm + + indices = tqdm(indices) + + old_out = None + + for i in indices: + t = paddle.to_tensor([i] * shape[0], place=device) + if randomize_class and 'y' in model_kwargs: + model_kwargs['y'] = paddle.randint(low=0, high=model.num_classes, shape=model_kwargs['y'].shape) + # with paddle.no_grad(): + out = self.plms_sample( + model, + img, + t, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + cond_fn=cond_fn, + model_kwargs=model_kwargs, + cond_fn_with_grad=cond_fn_with_grad, + order=order, + old_out=old_out, + ) + yield out + old_out = out + img = out["sample"] + + def _vb_terms_bpd(self, model, x_start, x_t, t, clip_denoised=True, model_kwargs=None): + """ + Get a term for the variational lower-bound. + + The resulting units are bits (rather than nats, as one might expect). + This allows for comparison to other papers. + + :return: a dict with the following keys: + - 'output': a shape [N] tensor of NLLs or KLs. + - 'pred_xstart': the x_0 predictions. + """ + true_mean, _, true_log_variance_clipped = self.q_posterior_mean_variance(x_start=x_start, x_t=x_t, t=t) + out = self.p_mean_variance(model, x_t, t, clip_denoised=clip_denoised, model_kwargs=model_kwargs) + kl = normal_kl(true_mean, true_log_variance_clipped, out["mean"], out["log_variance"]) + kl = mean_flat(kl) / np.log(2.0) + + decoder_nll = -discretized_gaussian_log_likelihood( + x_start, means=out["mean"], log_scales=0.5 * out["log_variance"]) + assert decoder_nll.shape == x_start.shape + decoder_nll = mean_flat(decoder_nll) / np.log(2.0) + + # At the first timestep return the decoder NLL, + # otherwise return KL(q(x_{t-1}|x_t,x_0) || p(x_{t-1}|x_t)) + output = paddle.where((t == 0), decoder_nll, kl) + return {"output": output, "pred_xstart": out["pred_xstart"]} + + def training_losses(self, model, x_start, t, model_kwargs=None, noise=None): + """ + Compute training losses for a single timestep. + + :param model: the model to evaluate loss on. + :param x_start: the [N x C x ...] tensor of inputs. + :param t: a batch of timestep indices. + :param model_kwargs: if not None, a dict of extra keyword arguments to + pass to the model. This can be used for conditioning. + :param noise: if specified, the specific Gaussian noise to try to remove. + :return: a dict with the key "loss" containing a tensor of shape [N]. + Some mean or variance settings may also have other keys. + """ + if model_kwargs is None: + model_kwargs = {} + if noise is None: + # noise = th.randn_like(x_start) + noise = paddle.randn(x_start.shape, x_start.dtype) + x_t = self.q_sample(x_start, t, noise=noise) + + terms = {} + + if self.loss_type == LossType.KL or self.loss_type == LossType.RESCALED_KL: + terms["loss"] = self._vb_terms_bpd( + model=model, + x_start=x_start, + x_t=x_t, + t=t, + clip_denoised=False, + model_kwargs=model_kwargs, + )["output"] + if self.loss_type == LossType.RESCALED_KL: + terms["loss"] *= self.num_timesteps + elif self.loss_type == LossType.MSE or self.loss_type == LossType.RESCALED_MSE: + model_output = model(x_t, self._scale_timesteps(t), **model_kwargs) + + if self.model_var_type in [ + ModelVarType.LEARNED, + ModelVarType.LEARNED_RANGE, + ]: + B, C = x_t.shape[:2] + assert model_output.shape == (B, C * 2, *x_t.shape[2:]) + model_output, model_var_values = paddle.split(model_output, 2, dim=1) + # Learn the variance using the variational bound, but don't let + # it affect our mean prediction. + frozen_out = paddle.concat([model_output.detach(), model_var_values], axis=1) + terms["vb"] = self._vb_terms_bpd( + model=lambda *args, r=frozen_out: r, + x_start=x_start, + x_t=x_t, + t=t, + clip_denoised=False, + )["output"] + if self.loss_type == LossType.RESCALED_MSE: + # Divide by 1000 for equivalence with initial implementation. + # Without a factor of 1/1000, the VB term hurts the MSE term. + terms["vb"] *= self.num_timesteps / 1000.0 + + target = { + ModelMeanType.PREVIOUS_X: self.q_posterior_mean_variance(x_start=x_start, x_t=x_t, t=t)[0], + ModelMeanType.START_X: x_start, + ModelMeanType.EPSILON: noise, + }[self.model_mean_type] + assert model_output.shape == target.shape == x_start.shape + terms["mse"] = mean_flat((target - model_output)**2) + if "vb" in terms: + terms["loss"] = terms["mse"] + terms["vb"] + else: + terms["loss"] = terms["mse"] + else: + raise NotImplementedError(self.loss_type) + + return terms + + def _prior_bpd(self, x_start): + """ + Get the prior KL term for the variational lower-bound, measured in + bits-per-dim. + + This term can't be optimized, as it only depends on the encoder. + + :param x_start: the [N x C x ...] tensor of inputs. + :return: a batch of [N] KL values (in bits), one per batch element. + """ + batch_size = x_start.shape[0] + t = paddle.to_tensor([self.num_timesteps - 1] * batch_size, place=x_start.place) + qt_mean, _, qt_log_variance = self.q_mean_variance(x_start, t) + kl_prior = normal_kl(mean1=qt_mean, logvar1=qt_log_variance, mean2=0.0, logvar2=0.0) + return mean_flat(kl_prior) / np.log(2.0) + + def calc_bpd_loop(self, model, x_start, clip_denoised=True, model_kwargs=None): + """ + Compute the entire variational lower-bound, measured in bits-per-dim, + as well as other related quantities. + + :param model: the model to evaluate loss on. + :param x_start: the [N x C x ...] tensor of inputs. + :param clip_denoised: if True, clip denoised samples. + :param model_kwargs: if not None, a dict of extra keyword arguments to + pass to the model. This can be used for conditioning. + + :return: a dict containing the following keys: + - total_bpd: the total variational lower-bound, per batch element. + - prior_bpd: the prior term in the lower-bound. + - vb: an [N x T] tensor of terms in the lower-bound. + - xstart_mse: an [N x T] tensor of x_0 MSEs for each timestep. + - mse: an [N x T] tensor of epsilon MSEs for each timestep. + """ + device = x_start.place + batch_size = x_start.shape[0] + + vb = [] + xstart_mse = [] + mse = [] + for t in list(range(self.num_timesteps))[::-1]: + t_batch = paddle.to_tensor([t] * batch_size, place=device) + # noise = th.randn_like(x_start) + noise = paddle.randn(x_start.shape, x_start.dtype) + x_t = self.q_sample(x_start=x_start, t=t_batch, noise=noise) + # Calculate VLB term at the current timestep + # with paddle.no_grad(): + out = self._vb_terms_bpd( + model, + x_start=x_start, + x_t=x_t, + t=t_batch, + clip_denoised=clip_denoised, + model_kwargs=model_kwargs, + ) + vb.append(out["output"]) + xstart_mse.append(mean_flat((out["pred_xstart"] - x_start)**2)) + eps = self._predict_eps_from_xstart(x_t, t_batch, out["pred_xstart"]) + mse.append(mean_flat((eps - noise)**2)) + + vb = paddle.stack(vb, axis=1) + xstart_mse = paddle.stack(xstart_mse, axis=1) + mse = paddle.stack(mse, axis=1) + + prior_bpd = self._prior_bpd(x_start) + total_bpd = vb.sum(axis=1) + prior_bpd + return { + "total_bpd": total_bpd, + "prior_bpd": prior_bpd, + "vb": vb, + "xstart_mse": xstart_mse, + "mse": mse, + } + + +def _extract_into_tensor(arr, timesteps, broadcast_shape): + """ + Extract values from a 1-D numpy array for a batch of indices. + + :param arr: the 1-D numpy array. + :param timesteps: a tensor of indices into the array to extract. + :param broadcast_shape: a larger shape of K dimensions with the batch + dimension equal to the length of timesteps. + :return: a tensor of shape [batch_size, 1, ...] where the shape has K dims. + """ + res = paddle.to_tensor(arr, place=timesteps.place)[timesteps] + while len(res.shape) < len(broadcast_shape): + res = res[..., None] + return res.expand(broadcast_shape) diff --git a/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/model/losses.py b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/model/losses.py new file mode 100755 index 000000000..5c3970de5 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/model/losses.py @@ -0,0 +1,86 @@ +""" +Helpers for various likelihood-based losses implemented by Paddle. These are ported from the original +Ho et al. diffusion models codebase: +https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/utils.py +""" +import numpy as np +import paddle +import paddle.nn.functional as F + + +def normal_kl(mean1, logvar1, mean2, logvar2): + """ + Compute the KL divergence between two gaussians. + + Shapes are automatically broadcasted, so batches can be compared to + scalars, among other use cases. + """ + tensor = None + for obj in (mean1, logvar1, mean2, logvar2): + if isinstance(obj, paddle.Tensor): + tensor = obj + break + assert tensor is not None, "at least one argument must be a Tensor" + + # Force variances to be Tensors. Broadcasting helps convert scalars to + # Tensors, but it does not work for th.exp(). + logvar1, logvar2 = [x if isinstance(x, paddle.Tensor) else paddle.to_tensor(x) for x in (logvar1, logvar2)] + + return 0.5 * (-1.0 + logvar2 - logvar1 + paddle.exp(logvar1 - logvar2) + + ((mean1 - mean2)**2) * paddle.exp(-logvar2)) + + +def approx_standard_normal_cdf(x): + """ + A fast approximation of the cumulative distribution function of the + standard normal. + """ + return 0.5 * (1.0 + paddle.tanh(np.sqrt(2.0 / np.pi) * (x + 0.044715 * paddle.pow(x, 3)))) + + +def discretized_gaussian_log_likelihood(x, *, means, log_scales): + """ + Compute the log-likelihood of a Gaussian distribution discretizing to a + given image. + + :param x: the target images. It is assumed that this was uint8 values, + rescaled to the range [-1, 1]. + :param means: the Gaussian mean Tensor. + :param log_scales: the Gaussian log stddev Tensor. + :return: a tensor like x of log probabilities (in nats). + """ + assert x.shape == means.shape == log_scales.shape + centered_x = x - means + inv_stdv = paddle.exp(-log_scales) + plus_in = inv_stdv * (centered_x + 1.0 / 255.0) + cdf_plus = approx_standard_normal_cdf(plus_in) + min_in = inv_stdv * (centered_x - 1.0 / 255.0) + cdf_min = approx_standard_normal_cdf(min_in) + log_cdf_plus = paddle.log(cdf_plus.clip(min=1e-12)) + log_one_minus_cdf_min = paddle.log((1.0 - cdf_min).clip(min=1e-12)) + cdf_delta = cdf_plus - cdf_min + log_probs = paddle.where( + x < -0.999, + log_cdf_plus, + paddle.where(x > 0.999, log_one_minus_cdf_min, paddle.log(cdf_delta.clip(min=1e-12))), + ) + assert log_probs.shape == x.shape + return log_probs + + +def spherical_dist_loss(x, y): + x = F.normalize(x, axis=-1) + y = F.normalize(y, axis=-1) + return (x - y).norm(axis=-1).divide(paddle.to_tensor(2.0)).asin().pow(2).multiply(paddle.to_tensor(2.0)) + + +def tv_loss(input): + """L2 total variation loss, as in Mahendran et al.""" + input = F.pad(input, (0, 1, 0, 1), 'replicate') + x_diff = input[..., :-1, 1:] - input[..., :-1, :-1] + y_diff = input[..., 1:, :-1] - input[..., :-1, :-1] + return (x_diff**2 + y_diff**2).mean([1, 2, 3]) + + +def range_loss(input): + return (input - input.clip(-1, 1)).pow(2).mean([1, 2, 3]) diff --git a/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/model/make_cutouts.py b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/model/make_cutouts.py new file mode 100755 index 000000000..cba46edc9 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/model/make_cutouts.py @@ -0,0 +1,177 @@ +''' +This code is rewritten by Paddle based on Jina-ai/discoart. +https://github.com/jina-ai/discoart/blob/main/discoart/nn/make_cutouts.py +''' +import math + +import paddle +import paddle.nn as nn +from disco_diffusion_cnclip_vitb16.resize_right.resize_right import resize +from paddle.nn import functional as F + +from . import transforms as T + +skip_augs = False # @param{type: 'boolean'} + + +def sinc(x): + return paddle.where(x != 0, paddle.sin(math.pi * x) / (math.pi * x), x.new_ones([])) + + +def lanczos(x, a): + cond = paddle.logical_and(-a < x, x < a) + out = paddle.where(cond, sinc(x) * sinc(x / a), x.new_zeros([])) + return out / out.sum() + + +def ramp(ratio, width): + n = math.ceil(width / ratio + 1) + out = paddle.empty([n]) + cur = 0 + for i in range(out.shape[0]): + out[i] = cur + cur += ratio + return paddle.concat([-out[1:].flip([0]), out])[1:-1] + + +class MakeCutouts(nn.Layer): + + def __init__(self, cut_size, cutn, skip_augs=False): + super().__init__() + self.cut_size = cut_size + self.cutn = cutn + self.skip_augs = skip_augs + self.augs = nn.Sequential(*[ + T.RandomHorizontalFlip(prob=0.5), + T.Lambda(lambda x: x + paddle.randn(x.shape) * 0.01), + T.RandomAffine(degrees=15, translate=(0.1, 0.1)), + T.Lambda(lambda x: x + paddle.randn(x.shape) * 0.01), + T.RandomPerspective(distortion_scale=0.4, p=0.7), + T.Lambda(lambda x: x + paddle.randn(x.shape) * 0.01), + T.RandomGrayscale(p=0.15), + T.Lambda(lambda x: x + paddle.randn(x.shape) * 0.01), + T.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1), + ]) + + def forward(self, input): + input = T.Pad(input.shape[2] // 4, fill=0)(input) + sideY, sideX = input.shape[2:4] + max_size = min(sideX, sideY) + + cutouts = [] + for ch in range(self.cutn): + if ch > self.cutn - self.cutn // 4: + cutout = input.clone() + else: + size = int(max_size * + paddle.zeros(1, ).normal_(mean=0.8, std=0.3).clip(float(self.cut_size / max_size), 1.0)) + offsetx = paddle.randint(0, abs(sideX - size + 1), ()) + offsety = paddle.randint(0, abs(sideY - size + 1), ()) + cutout = input[:, :, offsety:offsety + size, offsetx:offsetx + size] + + if not self.skip_augs: + cutout = self.augs(cutout) + cutouts.append(resample(cutout, (self.cut_size, self.cut_size))) + del cutout + + cutouts = paddle.concat(cutouts, axis=0) + return cutouts + + +class MakeCutoutsDango(nn.Layer): + + def __init__(self, cut_size, Overview=4, InnerCrop=0, IC_Size_Pow=0.5, IC_Grey_P=0.2): + super().__init__() + self.cut_size = cut_size + self.Overview = Overview + self.InnerCrop = InnerCrop + self.IC_Size_Pow = IC_Size_Pow + self.IC_Grey_P = IC_Grey_P + self.augs = nn.Sequential(*[ + T.RandomHorizontalFlip(prob=0.5), + T.Lambda(lambda x: x + paddle.randn(x.shape) * 0.01), + T.RandomAffine( + degrees=10, + translate=(0.05, 0.05), + interpolation=T.InterpolationMode.BILINEAR, + ), + T.Lambda(lambda x: x + paddle.randn(x.shape) * 0.01), + T.RandomGrayscale(p=0.1), + T.Lambda(lambda x: x + paddle.randn(x.shape) * 0.01), + T.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1), + ]) + + def forward(self, input): + cutouts = [] + gray = T.Grayscale(3) + sideY, sideX = input.shape[2:4] + max_size = min(sideX, sideY) + min_size = min(sideX, sideY, self.cut_size) + output_shape = [1, 3, self.cut_size, self.cut_size] + pad_input = F.pad( + input, + ( + (sideY - max_size) // 2, + (sideY - max_size) // 2, + (sideX - max_size) // 2, + (sideX - max_size) // 2, + ), + **padargs, + ) + cutout = resize(pad_input, out_shape=output_shape) + + if self.Overview > 0: + if self.Overview <= 4: + if self.Overview >= 1: + cutouts.append(cutout) + if self.Overview >= 2: + cutouts.append(gray(cutout)) + if self.Overview >= 3: + cutouts.append(cutout[:, :, :, ::-1]) + if self.Overview == 4: + cutouts.append(gray(cutout[:, :, :, ::-1])) + else: + cutout = resize(pad_input, out_shape=output_shape) + for _ in range(self.Overview): + cutouts.append(cutout) + + if self.InnerCrop > 0: + for i in range(self.InnerCrop): + size = int(paddle.rand([1])**self.IC_Size_Pow * (max_size - min_size) + min_size) + offsetx = paddle.randint(0, sideX - size + 1) + offsety = paddle.randint(0, sideY - size + 1) + cutout = input[:, :, offsety:offsety + size, offsetx:offsetx + size] + if i <= int(self.IC_Grey_P * self.InnerCrop): + cutout = gray(cutout) + cutout = resize(cutout, out_shape=output_shape) + cutouts.append(cutout) + + cutouts = paddle.concat(cutouts) + if skip_augs is not True: + cutouts = self.augs(cutouts) + return cutouts + + +def resample(input, size, align_corners=True): + n, c, h, w = input.shape + dh, dw = size + + input = input.reshape([n * c, 1, h, w]) + + if dh < h: + kernel_h = lanczos(ramp(dh / h, 2), 2).to(input.device, input.dtype) + pad_h = (kernel_h.shape[0] - 1) // 2 + input = F.pad(input, (0, 0, pad_h, pad_h), 'reflect') + input = F.conv2d(input, kernel_h[None, None, :, None]) + + if dw < w: + kernel_w = lanczos(ramp(dw / w, 2), 2).to(input.device, input.dtype) + pad_w = (kernel_w.shape[0] - 1) // 2 + input = F.pad(input, (pad_w, pad_w, 0, 0), 'reflect') + input = F.conv2d(input, kernel_w[None, None, None, :]) + + input = input.reshape([n, c, h, w]) + return F.interpolate(input, size, mode='bicubic', align_corners=align_corners) + + +padargs = {} diff --git a/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/model/nn.py b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/model/nn.py new file mode 100755 index 000000000..d618183e2 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/model/nn.py @@ -0,0 +1,127 @@ +""" +Various utilities for neural networks implemented by Paddle. This code is rewritten based on: +https://github.com/openai/guided-diffusion/blob/main/guided_diffusion/nn.py +""" +import math + +import paddle +import paddle.nn as nn + + +class SiLU(nn.Layer): + + def forward(self, x): + return x * nn.functional.sigmoid(x) + + +class GroupNorm32(nn.GroupNorm): + + def forward(self, x): + return super().forward(x) + + +def conv_nd(dims, *args, **kwargs): + """ + Create a 1D, 2D, or 3D convolution module. + """ + if dims == 1: + return nn.Conv1D(*args, **kwargs) + elif dims == 2: + return nn.Conv2D(*args, **kwargs) + elif dims == 3: + return nn.Conv3D(*args, **kwargs) + raise ValueError(f"unsupported dimensions: {dims}") + + +def linear(*args, **kwargs): + """ + Create a linear module. + """ + return nn.Linear(*args, **kwargs) + + +def avg_pool_nd(dims, *args, **kwargs): + """ + Create a 1D, 2D, or 3D average pooling module. + """ + if dims == 1: + return nn.AvgPool1D(*args, **kwargs) + elif dims == 2: + return nn.AvgPool2D(*args, **kwargs) + elif dims == 3: + return nn.AvgPool3D(*args, **kwargs) + raise ValueError(f"unsupported dimensions: {dims}") + + +def update_ema(target_params, source_params, rate=0.99): + """ + Update target parameters to be closer to those of source parameters using + an exponential moving average. + + :param target_params: the target parameter sequence. + :param source_params: the source parameter sequence. + :param rate: the EMA rate (closer to 1 means slower). + """ + for targ, src in zip(target_params, source_params): + targ.detach().mul_(rate).add_(src, alpha=1 - rate) + + +def zero_module(module): + """ + Zero out the parameters of a module and return it. + """ + for p in module.parameters(): + p.detach().zero_() + return module + + +def scale_module(module, scale): + """ + Scale the parameters of a module and return it. + """ + for p in module.parameters(): + p.detach().mul_(scale) + return module + + +def mean_flat(tensor): + """ + Take the mean over all non-batch dimensions. + """ + return tensor.mean(axis=list(range(1, len(tensor.shape)))) + + +def normalization(channels): + """ + Make a standard normalization layer. + + :param channels: number of input channels. + :return: an nn.Module for normalization. + """ + return GroupNorm32(32, channels) + + +def timestep_embedding(timesteps, dim, max_period=10000): + """ + Create sinusoidal timestep embeddings. + + :param timesteps: a 1-D Tensor of N indices, one per batch element. + These may be fractional. + :param dim: the dimension of the output. + :param max_period: controls the minimum frequency of the embeddings. + :return: an [N x dim] Tensor of positional embeddings. + """ + half = dim // 2 + freqs = paddle.exp(-math.log(max_period) * paddle.arange(start=0, end=half, dtype=paddle.float32) / half) + args = paddle.cast(timesteps[:, None], 'float32') * freqs[None] + embedding = paddle.concat([paddle.cos(args), paddle.sin(args)], axis=-1) + if dim % 2: + embedding = paddle.concat([embedding, paddle.zeros_like(embedding[:, :1])], axis=-1) + return embedding + + +def checkpoint(func, inputs, params, flag): + """ + This function is disabled. And now just forward. + """ + return func(*inputs) diff --git a/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/model/perlin_noises.py b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/model/perlin_noises.py new file mode 100755 index 000000000..6dacb331b --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/model/perlin_noises.py @@ -0,0 +1,78 @@ +''' +Perlin noise implementation by Paddle. +This code is rewritten based on: +https://github.com/jina-ai/discoart/blob/main/discoart/nn/perlin_noises.py +''' +import numpy as np +import paddle +import paddle.vision.transforms as TF +from PIL import Image +from PIL import ImageOps + + +def interp(t): + return 3 * t**2 - 2 * t**3 + + +def perlin(width, height, scale=10): + gx, gy = paddle.randn([2, width + 1, height + 1, 1, 1]) + xs = paddle.linspace(0, 1, scale + 1)[:-1, None] + ys = paddle.linspace(0, 1, scale + 1)[None, :-1] + wx = 1 - interp(xs) + wy = 1 - interp(ys) + dots = 0 + dots += wx * wy * (gx[:-1, :-1] * xs + gy[:-1, :-1] * ys) + dots += (1 - wx) * wy * (-gx[1:, :-1] * (1 - xs) + gy[1:, :-1] * ys) + dots += wx * (1 - wy) * (gx[:-1, 1:] * xs - gy[:-1, 1:] * (1 - ys)) + dots += (1 - wx) * (1 - wy) * (-gx[1:, 1:] * (1 - xs) - gy[1:, 1:] * (1 - ys)) + return dots.transpose([0, 2, 1, 3]).reshape([width * scale, height * scale]) + + +def perlin_ms(octaves, width, height, grayscale): + out_array = [0.5] if grayscale else [0.5, 0.5, 0.5] + # out_array = [0.0] if grayscale else [0.0, 0.0, 0.0] + for i in range(1 if grayscale else 3): + scale = 2**len(octaves) + oct_width = width + oct_height = height + for oct in octaves: + p = perlin(oct_width, oct_height, scale) + out_array[i] += p * oct + scale //= 2 + oct_width *= 2 + oct_height *= 2 + return paddle.concat(out_array) + + +def create_perlin_noise(octaves, width, height, grayscale, side_y, side_x): + out = perlin_ms(octaves, width, height, grayscale) + if grayscale: + out = TF.resize(size=(side_y, side_x), img=out.numpy()) + out = np.uint8(out) + out = Image.fromarray(out).convert('RGB') + else: + out = out.reshape([-1, 3, out.shape[0] // 3, out.shape[1]]) + out = out.squeeze().transpose([1, 2, 0]).numpy() + out = TF.resize(size=(side_y, side_x), img=out) + out = out.clip(0, 1) * 255 + out = np.uint8(out) + out = Image.fromarray(out) + + out = ImageOps.autocontrast(out) + return out + + +def regen_perlin(perlin_mode, side_y, side_x, batch_size): + if perlin_mode == 'color': + init = create_perlin_noise([1.5**-i * 0.5 for i in range(12)], 1, 1, False, side_y, side_x) + init2 = create_perlin_noise([1.5**-i * 0.5 for i in range(8)], 4, 4, False, side_y, side_x) + elif perlin_mode == 'gray': + init = create_perlin_noise([1.5**-i * 0.5 for i in range(12)], 1, 1, True, side_y, side_x) + init2 = create_perlin_noise([1.5**-i * 0.5 for i in range(8)], 4, 4, True, side_y, side_x) + else: + init = create_perlin_noise([1.5**-i * 0.5 for i in range(12)], 1, 1, False, side_y, side_x) + init2 = create_perlin_noise([1.5**-i * 0.5 for i in range(8)], 4, 4, True, side_y, side_x) + + init = (TF.to_tensor(init).add(TF.to_tensor(init2)).divide(paddle.to_tensor(2.0)).unsqueeze(0) * 2 - 1) + del init2 + return init.expand([batch_size, -1, -1, -1]) diff --git a/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/model/respace.py b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/model/respace.py new file mode 100755 index 000000000..c001c70d0 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/model/respace.py @@ -0,0 +1,123 @@ +''' +This code is rewritten by Paddle based on +https://github.com/openai/guided-diffusion/blob/main/guided_diffusion/respace.py +''' +import numpy as np +import paddle + +from .gaussian_diffusion import GaussianDiffusion + + +def space_timesteps(num_timesteps, section_counts): + """ + Create a list of timesteps to use from an original diffusion process, + given the number of timesteps we want to take from equally-sized portions + of the original process. + + For example, if there's 300 timesteps and the section counts are [10,15,20] + then the first 100 timesteps are strided to be 10 timesteps, the second 100 + are strided to be 15 timesteps, and the final 100 are strided to be 20. + + If the stride is a string starting with "ddim", then the fixed striding + from the DDIM paper is used, and only one section is allowed. + + :param num_timesteps: the number of diffusion steps in the original + process to divide up. + :param section_counts: either a list of numbers, or a string containing + comma-separated numbers, indicating the step count + per section. As a special case, use "ddimN" where N + is a number of steps to use the striding from the + DDIM paper. + :return: a set of diffusion steps from the original process to use. + """ + if isinstance(section_counts, str): + if section_counts.startswith("ddim"): + desired_count = int(section_counts[len("ddim"):]) + for i in range(1, num_timesteps): + if len(range(0, num_timesteps, i)) == desired_count: + return set(range(0, num_timesteps, i)) + raise ValueError(f"cannot create exactly {num_timesteps} steps with an integer stride") + section_counts = [int(x) for x in section_counts.split(",")] + size_per = num_timesteps // len(section_counts) + extra = num_timesteps % len(section_counts) + start_idx = 0 + all_steps = [] + for i, section_count in enumerate(section_counts): + size = size_per + (1 if i < extra else 0) + if size < section_count: + raise ValueError(f"cannot divide section of {size} steps into {section_count}") + if section_count <= 1: + frac_stride = 1 + else: + frac_stride = (size - 1) / (section_count - 1) + cur_idx = 0.0 + taken_steps = [] + for _ in range(section_count): + taken_steps.append(start_idx + round(cur_idx)) + cur_idx += frac_stride + all_steps += taken_steps + start_idx += size + return set(all_steps) + + +class SpacedDiffusion(GaussianDiffusion): + """ + A diffusion process which can skip steps in a base diffusion process. + + :param use_timesteps: a collection (sequence or set) of timesteps from the + original diffusion process to retain. + :param kwargs: the kwargs to create the base diffusion process. + """ + + def __init__(self, use_timesteps, **kwargs): + self.use_timesteps = set(use_timesteps) + self.timestep_map = [] + self.original_num_steps = len(kwargs["betas"]) + + base_diffusion = GaussianDiffusion(**kwargs) # pylint: disable=missing-kwoa + last_alpha_cumprod = 1.0 + new_betas = [] + for i, alpha_cumprod in enumerate(base_diffusion.alphas_cumprod): + if i in self.use_timesteps: + new_betas.append(1 - alpha_cumprod / last_alpha_cumprod) + last_alpha_cumprod = alpha_cumprod + self.timestep_map.append(i) + kwargs["betas"] = np.array(new_betas) + super().__init__(**kwargs) + + def p_mean_variance(self, model, *args, **kwargs): # pylint: disable=signature-differs + return super().p_mean_variance(self._wrap_model(model), *args, **kwargs) + + def training_losses(self, model, *args, **kwargs): # pylint: disable=signature-differs + return super().training_losses(self._wrap_model(model), *args, **kwargs) + + def condition_mean(self, cond_fn, *args, **kwargs): + return super().condition_mean(self._wrap_model(cond_fn), *args, **kwargs) + + def condition_score(self, cond_fn, *args, **kwargs): + return super().condition_score(self._wrap_model(cond_fn), *args, **kwargs) + + def _wrap_model(self, model): + if isinstance(model, _WrappedModel): + return model + return _WrappedModel(model, self.timestep_map, self.rescale_timesteps, self.original_num_steps) + + def _scale_timesteps(self, t): + # Scaling is done by the wrapped model. + return t + + +class _WrappedModel: + + def __init__(self, model, timestep_map, rescale_timesteps, original_num_steps): + self.model = model + self.timestep_map = timestep_map + self.rescale_timesteps = rescale_timesteps + self.original_num_steps = original_num_steps + + def __call__(self, x, ts, **kwargs): + map_tensor = paddle.to_tensor(self.timestep_map, place=ts.place, dtype=ts.dtype) + new_ts = map_tensor[ts] + if self.rescale_timesteps: + new_ts = paddle.cast(new_ts, 'float32') * (1000.0 / self.original_num_steps) + return self.model(x, new_ts, **kwargs) diff --git a/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/model/script_util.py b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/model/script_util.py new file mode 100755 index 000000000..d728a5430 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/model/script_util.py @@ -0,0 +1,201 @@ +''' +This code is based on +https://github.com/openai/guided-diffusion/blob/main/guided_diffusion/script_util.py +''' +import argparse +import inspect + +from . import gaussian_diffusion as gd +from .respace import space_timesteps +from .respace import SpacedDiffusion +from .unet import EncoderUNetModel +from .unet import SuperResModel +from .unet import UNetModel + +NUM_CLASSES = 1000 + + +def diffusion_defaults(): + """ + Defaults for image and classifier training. + """ + return dict( + learn_sigma=False, + diffusion_steps=1000, + noise_schedule="linear", + timestep_respacing="", + use_kl=False, + predict_xstart=False, + rescale_timesteps=False, + rescale_learned_sigmas=False, + ) + + +def model_and_diffusion_defaults(): + """ + Defaults for image training. + """ + res = dict( + image_size=64, + num_channels=128, + num_res_blocks=2, + num_heads=4, + num_heads_upsample=-1, + num_head_channels=-1, + attention_resolutions="16,8", + channel_mult="", + dropout=0.0, + class_cond=False, + use_checkpoint=False, + use_scale_shift_norm=True, + resblock_updown=False, + use_fp16=False, + use_new_attention_order=False, + ) + res.update(diffusion_defaults()) + return res + + +def create_model_and_diffusion( + image_size, + class_cond, + learn_sigma, + num_channels, + num_res_blocks, + channel_mult, + num_heads, + num_head_channels, + num_heads_upsample, + attention_resolutions, + dropout, + diffusion_steps, + noise_schedule, + timestep_respacing, + use_kl, + predict_xstart, + rescale_timesteps, + rescale_learned_sigmas, + use_checkpoint, + use_scale_shift_norm, + resblock_updown, + use_fp16, + use_new_attention_order, +): + model = create_model( + image_size, + num_channels, + num_res_blocks, + channel_mult=channel_mult, + learn_sigma=learn_sigma, + class_cond=class_cond, + use_checkpoint=use_checkpoint, + attention_resolutions=attention_resolutions, + num_heads=num_heads, + num_head_channels=num_head_channels, + num_heads_upsample=num_heads_upsample, + use_scale_shift_norm=use_scale_shift_norm, + dropout=dropout, + resblock_updown=resblock_updown, + use_fp16=use_fp16, + use_new_attention_order=use_new_attention_order, + ) + diffusion = create_gaussian_diffusion( + steps=diffusion_steps, + learn_sigma=learn_sigma, + noise_schedule=noise_schedule, + use_kl=use_kl, + predict_xstart=predict_xstart, + rescale_timesteps=rescale_timesteps, + rescale_learned_sigmas=rescale_learned_sigmas, + timestep_respacing=timestep_respacing, + ) + return model, diffusion + + +def create_model( + image_size, + num_channels, + num_res_blocks, + channel_mult="", + learn_sigma=False, + class_cond=False, + use_checkpoint=False, + attention_resolutions="16", + num_heads=1, + num_head_channels=-1, + num_heads_upsample=-1, + use_scale_shift_norm=False, + dropout=0, + resblock_updown=False, + use_fp16=False, + use_new_attention_order=False, +): + if channel_mult == "": + if image_size == 512: + channel_mult = (0.5, 1, 1, 2, 2, 4, 4) + elif image_size == 256: + channel_mult = (1, 1, 2, 2, 4, 4) + elif image_size == 128: + channel_mult = (1, 1, 2, 3, 4) + elif image_size == 64: + channel_mult = (1, 2, 3, 4) + else: + raise ValueError(f"unsupported image size: {image_size}") + else: + channel_mult = tuple(int(ch_mult) for ch_mult in channel_mult.split(",")) + + attention_ds = [] + for res in attention_resolutions.split(","): + attention_ds.append(image_size // int(res)) + + return UNetModel( + image_size=image_size, + in_channels=3, + model_channels=num_channels, + out_channels=(3 if not learn_sigma else 6), + num_res_blocks=num_res_blocks, + attention_resolutions=tuple(attention_ds), + dropout=dropout, + channel_mult=channel_mult, + num_classes=(NUM_CLASSES if class_cond else None), + use_checkpoint=use_checkpoint, + use_fp16=use_fp16, + num_heads=num_heads, + num_head_channels=num_head_channels, + num_heads_upsample=num_heads_upsample, + use_scale_shift_norm=use_scale_shift_norm, + resblock_updown=resblock_updown, + use_new_attention_order=use_new_attention_order, + ) + + +def create_gaussian_diffusion( + *, + steps=1000, + learn_sigma=False, + sigma_small=False, + noise_schedule="linear", + use_kl=False, + predict_xstart=False, + rescale_timesteps=False, + rescale_learned_sigmas=False, + timestep_respacing="", +): + betas = gd.get_named_beta_schedule(noise_schedule, steps) + if use_kl: + loss_type = gd.LossType.RESCALED_KL + elif rescale_learned_sigmas: + loss_type = gd.LossType.RESCALED_MSE + else: + loss_type = gd.LossType.MSE + if not timestep_respacing: + timestep_respacing = [steps] + return SpacedDiffusion( + use_timesteps=space_timesteps(steps, timestep_respacing), + betas=betas, + model_mean_type=(gd.ModelMeanType.EPSILON if not predict_xstart else gd.ModelMeanType.START_X), + model_var_type=((gd.ModelVarType.FIXED_LARGE if not sigma_small else gd.ModelVarType.FIXED_SMALL) + if not learn_sigma else gd.ModelVarType.LEARNED_RANGE), + loss_type=loss_type, + rescale_timesteps=rescale_timesteps, + ) diff --git a/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/model/sec_diff.py b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/model/sec_diff.py new file mode 100755 index 000000000..1e361f18f --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/model/sec_diff.py @@ -0,0 +1,135 @@ +''' +This code is rewritten by Paddle based on +https://github.com/jina-ai/discoart/blob/main/discoart/nn/sec_diff.py +''' +import math +from dataclasses import dataclass +from functools import partial + +import paddle +import paddle.nn as nn + + +@dataclass +class DiffusionOutput: + v: paddle.Tensor + pred: paddle.Tensor + eps: paddle.Tensor + + +class SkipBlock(nn.Layer): + + def __init__(self, main, skip=None): + super().__init__() + self.main = nn.Sequential(*main) + self.skip = skip if skip else nn.Identity() + + def forward(self, input): + return paddle.concat([self.main(input), self.skip(input)], axis=1) + + +def append_dims(x, n): + return x[(Ellipsis, *(None, ) * (n - x.ndim))] + + +def expand_to_planes(x, shape): + return paddle.tile(append_dims(x, len(shape)), [1, 1, *shape[2:]]) + + +def alpha_sigma_to_t(alpha, sigma): + return paddle.atan2(sigma, alpha) * 2 / math.pi + + +def t_to_alpha_sigma(t): + return paddle.cos(t * math.pi / 2), paddle.sin(t * math.pi / 2) + + +class SecondaryDiffusionImageNet2(nn.Layer): + + def __init__(self): + super().__init__() + c = 64 # The base channel count + cs = [c, c * 2, c * 2, c * 4, c * 4, c * 8] + + self.timestep_embed = FourierFeatures(1, 16) + self.down = nn.AvgPool2D(2) + self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False) + + self.net = nn.Sequential( + ConvBlock(3 + 16, cs[0]), + ConvBlock(cs[0], cs[0]), + SkipBlock([ + self.down, + ConvBlock(cs[0], cs[1]), + ConvBlock(cs[1], cs[1]), + SkipBlock([ + self.down, + ConvBlock(cs[1], cs[2]), + ConvBlock(cs[2], cs[2]), + SkipBlock([ + self.down, + ConvBlock(cs[2], cs[3]), + ConvBlock(cs[3], cs[3]), + SkipBlock([ + self.down, + ConvBlock(cs[3], cs[4]), + ConvBlock(cs[4], cs[4]), + SkipBlock([ + self.down, + ConvBlock(cs[4], cs[5]), + ConvBlock(cs[5], cs[5]), + ConvBlock(cs[5], cs[5]), + ConvBlock(cs[5], cs[4]), + self.up, + ]), + ConvBlock(cs[4] * 2, cs[4]), + ConvBlock(cs[4], cs[3]), + self.up, + ]), + ConvBlock(cs[3] * 2, cs[3]), + ConvBlock(cs[3], cs[2]), + self.up, + ]), + ConvBlock(cs[2] * 2, cs[2]), + ConvBlock(cs[2], cs[1]), + self.up, + ]), + ConvBlock(cs[1] * 2, cs[1]), + ConvBlock(cs[1], cs[0]), + self.up, + ]), + ConvBlock(cs[0] * 2, cs[0]), + nn.Conv2D(cs[0], 3, 3, padding=1), + ) + + def forward(self, input, t): + timestep_embed = expand_to_planes(self.timestep_embed(t[:, None]), input.shape) + v = self.net(paddle.concat([input, timestep_embed], axis=1)) + alphas, sigmas = map(partial(append_dims, n=v.ndim), t_to_alpha_sigma(t)) + pred = input * alphas - v * sigmas + eps = input * sigmas + v * alphas + return DiffusionOutput(v, pred, eps) + + +class FourierFeatures(nn.Layer): + + def __init__(self, in_features, out_features, std=1.0): + super().__init__() + assert out_features % 2 == 0 + # self.weight = nn.Parameter(paddle.randn([out_features // 2, in_features]) * std) + self.weight = paddle.create_parameter([out_features // 2, in_features], + dtype='float32', + default_initializer=nn.initializer.Normal(mean=0.0, std=std)) + + def forward(self, input): + f = 2 * math.pi * input @ self.weight.T + return paddle.concat([f.cos(), f.sin()], axis=-1) + + +class ConvBlock(nn.Sequential): + + def __init__(self, c_in, c_out): + super().__init__( + nn.Conv2D(c_in, c_out, 3, padding=1), + nn.ReLU(), + ) diff --git a/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/model/transforms.py b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/model/transforms.py new file mode 100755 index 000000000..e0b620b01 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/model/transforms.py @@ -0,0 +1,757 @@ +''' +This code is rewritten by Paddle based on +https://github.com/pytorch/vision/blob/main/torchvision/transforms/transforms.py +''' +import math +import numbers +import warnings +from enum import Enum +from typing import Any +from typing import Dict +from typing import List +from typing import Optional +from typing import Sequence +from typing import Tuple +from typing import Union + +import paddle +import paddle.nn as nn +from paddle import Tensor +from paddle.nn import functional as F +from paddle.nn.functional import grid_sample +from paddle.vision import transforms as T + + +class Normalize(nn.Layer): + + def __init__(self, mean, std): + super(Normalize, self).__init__() + self.mean = paddle.to_tensor(mean) + self.std = paddle.to_tensor(std) + + def forward(self, tensor: Tensor): + dtype = tensor.dtype + mean = paddle.to_tensor(self.mean, dtype=dtype) + std = paddle.to_tensor(self.std, dtype=dtype) + mean = mean.reshape([1, -1, 1, 1]) + std = std.reshape([1, -1, 1, 1]) + result = tensor.subtract(mean).divide(std) + return result + + +class InterpolationMode(Enum): + """Interpolation modes + Available interpolation methods are ``nearest``, ``bilinear``, ``bicubic``, ``box``, ``hamming``, and ``lanczos``. + """ + + NEAREST = "nearest" + BILINEAR = "bilinear" + BICUBIC = "bicubic" + # For PIL compatibility + BOX = "box" + HAMMING = "hamming" + LANCZOS = "lanczos" + + +class Grayscale(nn.Layer): + + def __init__(self, num_output_channels): + super(Grayscale, self).__init__() + self.num_output_channels = num_output_channels + + def forward(self, x): + output = (0.2989 * x[:, 0:1, :, :] + 0.587 * x[:, 1:2, :, :] + 0.114 * x[:, 2:3, :, :]) + if self.num_output_channels == 3: + return output.expand(x.shape) + + return output + + +class Lambda(nn.Layer): + + def __init__(self, func): + super(Lambda, self).__init__() + self.transform = func + + def forward(self, x): + return self.transform(x) + + +class RandomGrayscale(nn.Layer): + + def __init__(self, p): + super(RandomGrayscale, self).__init__() + self.prob = p + self.transform = Grayscale(3) + + def forward(self, x): + if paddle.rand([1]) < self.prob: + return self.transform(x) + else: + return x + + +class RandomHorizontalFlip(nn.Layer): + + def __init__(self, prob): + super(RandomHorizontalFlip, self).__init__() + self.prob = prob + + def forward(self, x): + if paddle.rand([1]) < self.prob: + return x[:, :, :, ::-1] + else: + return x + + +def _blend(img1: Tensor, img2: Tensor, ratio: float) -> Tensor: + ratio = float(ratio) + bound = 1.0 + return (ratio * img1 + (1.0 - ratio) * img2).clip(0, bound) + + +def trunc_div(a, b): + ipt = paddle.divide(a, b) + sign_ipt = paddle.sign(ipt) + abs_ipt = paddle.abs(ipt) + abs_ipt = paddle.floor(abs_ipt) + out = paddle.multiply(sign_ipt, abs_ipt) + return out + + +def fmod(a, b): + return a - trunc_div(a, b) * b + + +def _rgb2hsv(img: Tensor) -> Tensor: + r, g, b = img.unbind(axis=-3) + + # Implementation is based on https://github.com/python-pillow/Pillow/blob/4174d4267616897df3746d315d5a2d0f82c656ee/ + # src/libImaging/Convert.c#L330 + maxc = paddle.max(img, axis=-3) + minc = paddle.min(img, axis=-3) + + # The algorithm erases S and H channel where `maxc = minc`. This avoids NaN + # from happening in the results, because + # + S channel has division by `maxc`, which is zero only if `maxc = minc` + # + H channel has division by `(maxc - minc)`. + # + # Instead of overwriting NaN afterwards, we just prevent it from occuring so + # we don't need to deal with it in case we save the NaN in a buffer in + # backprop, if it is ever supported, but it doesn't hurt to do so. + eqc = maxc == minc + + cr = maxc - minc + # Since `eqc => cr = 0`, replacing denominator with 1 when `eqc` is fine. + ones = paddle.ones_like(maxc) + s = cr / paddle.where(eqc, ones, maxc) + # Note that `eqc => maxc = minc = r = g = b`. So the following calculation + # of `h` would reduce to `bc - gc + 2 + rc - bc + 4 + rc - bc = 6` so it + # would not matter what values `rc`, `gc`, and `bc` have here, and thus + # replacing denominator with 1 when `eqc` is fine. + cr_divisor = paddle.where(eqc, ones, cr) + rc = (maxc - r) / cr_divisor + gc = (maxc - g) / cr_divisor + bc = (maxc - b) / cr_divisor + + hr = (maxc == r).cast('float32') * (bc - gc) + hg = ((maxc == g) & (maxc != r)).cast('float32') * (2.0 + rc - bc) + hb = ((maxc != g) & (maxc != r)).cast('float32') * (4.0 + gc - rc) + h = hr + hg + hb + h = fmod((h / 6.0 + 1.0), paddle.to_tensor(1.0)) + return paddle.stack((h, s, maxc), axis=-3) + + +def _hsv2rgb(img: Tensor) -> Tensor: + h, s, v = img.unbind(axis=-3) + i = paddle.floor(h * 6.0) + f = (h * 6.0) - i + i = i.cast(dtype='int32') + + p = paddle.clip((v * (1.0 - s)), 0.0, 1.0) + q = paddle.clip((v * (1.0 - s * f)), 0.0, 1.0) + t = paddle.clip((v * (1.0 - s * (1.0 - f))), 0.0, 1.0) + i = i % 6 + + mask = i.unsqueeze(axis=-3) == paddle.arange(6).reshape([-1, 1, 1]) + + a1 = paddle.stack((v, q, p, p, t, v), axis=-3) + a2 = paddle.stack((t, v, v, q, p, p), axis=-3) + a3 = paddle.stack((p, p, t, v, v, q), axis=-3) + a4 = paddle.stack((a1, a2, a3), axis=-4) + + return paddle.einsum("...ijk, ...xijk -> ...xjk", mask.cast(dtype=img.dtype), a4) + + +def adjust_brightness(img: Tensor, brightness_factor: float) -> Tensor: + if brightness_factor < 0: + raise ValueError(f"brightness_factor ({brightness_factor}) is not non-negative.") + + return _blend(img, paddle.zeros_like(img), brightness_factor) + + +def adjust_contrast(img: Tensor, contrast_factor: float) -> Tensor: + if contrast_factor < 0: + raise ValueError(f"contrast_factor ({contrast_factor}) is not non-negative.") + + c = img.shape[1] + + if c == 3: + output = (0.2989 * img[:, 0:1, :, :] + 0.587 * img[:, 1:2, :, :] + 0.114 * img[:, 2:3, :, :]) + mean = paddle.mean(output, axis=(-3, -2, -1), keepdim=True) + + else: + mean = paddle.mean(img, axis=(-3, -2, -1), keepdim=True) + + return _blend(img, mean, contrast_factor) + + +def adjust_hue(img: Tensor, hue_factor: float) -> Tensor: + if not (-0.5 <= hue_factor <= 0.5): + raise ValueError(f"hue_factor ({hue_factor}) is not in [-0.5, 0.5].") + + img = _rgb2hsv(img) + h, s, v = img.unbind(axis=-3) + h = fmod(h + hue_factor, paddle.to_tensor(1.0)) + img = paddle.stack((h, s, v), axis=-3) + img_hue_adj = _hsv2rgb(img) + return img_hue_adj + + +def adjust_saturation(img: Tensor, saturation_factor: float) -> Tensor: + if saturation_factor < 0: + raise ValueError(f"saturation_factor ({saturation_factor}) is not non-negative.") + + output = (0.2989 * img[:, 0:1, :, :] + 0.587 * img[:, 1:2, :, :] + 0.114 * img[:, 2:3, :, :]) + + return _blend(img, output, saturation_factor) + + +class ColorJitter(nn.Layer): + + def __init__(self, brightness=0, contrast=0, saturation=0, hue=0): + super(ColorJitter, self).__init__() + self.brightness = self._check_input(brightness, "brightness") + self.contrast = self._check_input(contrast, "contrast") + self.saturation = self._check_input(saturation, "saturation") + self.hue = self._check_input(hue, "hue", center=0, bound=(-0.5, 0.5), clip_first_on_zero=False) + + def _check_input(self, value, name, center=1, bound=(0, float("inf")), clip_first_on_zero=True): + if isinstance(value, numbers.Number): + if value < 0: + raise ValueError(f"If {name} is a single number, it must be non negative.") + value = [center - float(value), center + float(value)] + if clip_first_on_zero: + value[0] = max(value[0], 0.0) + elif isinstance(value, (tuple, list)) and len(value) == 2: + if not bound[0] <= value[0] <= value[1] <= bound[1]: + raise ValueError(f"{name} values should be between {bound}") + else: + raise TypeError(f"{name} should be a single number or a list/tuple with length 2.") + + # if value is 0 or (1., 1.) for brightness/contrast/saturation + # or (0., 0.) for hue, do nothing + if value[0] == value[1] == center: + value = None + return value + + @staticmethod + def get_params( + brightness: Optional[List[float]], + contrast: Optional[List[float]], + saturation: Optional[List[float]], + hue: Optional[List[float]], + ) -> Tuple[Tensor, Optional[float], Optional[float], Optional[float], Optional[float]]: + """Get the parameters for the randomized transform to be applied on image. + + Args: + brightness (tuple of float (min, max), optional): The range from which the brightness_factor is chosen + uniformly. Pass None to turn off the transformation. + contrast (tuple of float (min, max), optional): The range from which the contrast_factor is chosen + uniformly. Pass None to turn off the transformation. + saturation (tuple of float (min, max), optional): The range from which the saturation_factor is chosen + uniformly. Pass None to turn off the transformation. + hue (tuple of float (min, max), optional): The range from which the hue_factor is chosen uniformly. + Pass None to turn off the transformation. + + Returns: + tuple: The parameters used to apply the randomized transform + along with their random order. + """ + fn_idx = paddle.randperm(4) + + b = None if brightness is None else paddle.empty([1]).uniform_(brightness[0], brightness[1]) + c = None if contrast is None else paddle.empty([1]).uniform_(contrast[0], contrast[1]) + s = None if saturation is None else paddle.empty([1]).uniform_(saturation[0], saturation[1]) + h = None if hue is None else paddle.empty([1]).uniform_(hue[0], hue[1]) + + return fn_idx, b, c, s, h + + def forward(self, img): + """ + Args: + img (PIL Image or Tensor): Input image. + + Returns: + PIL Image or Tensor: Color jittered image. + """ + fn_idx, brightness_factor, contrast_factor, saturation_factor, hue_factor = self.get_params( + self.brightness, self.contrast, self.saturation, self.hue) + + for fn_id in fn_idx: + if fn_id == 0 and brightness_factor is not None: + img = adjust_brightness(img, brightness_factor) + elif fn_id == 1 and contrast_factor is not None: + img = adjust_contrast(img, contrast_factor) + elif fn_id == 2 and saturation_factor is not None: + img = adjust_saturation(img, saturation_factor) + elif fn_id == 3 and hue_factor is not None: + img = adjust_hue(img, hue_factor) + + return img + + def __repr__(self) -> str: + s = (f"{self.__class__.__name__}(" + f"brightness={self.brightness}" + f", contrast={self.contrast}" + f", saturation={self.saturation}" + f", hue={self.hue})") + return s + + +def _apply_grid_transform(img: Tensor, grid: Tensor, mode: str, fill: Optional[List[float]]) -> Tensor: + + if img.shape[0] > 1: + # Apply same grid to a batch of images + grid = grid.expand([img.shape[0], grid.shape[1], grid.shape[2], grid.shape[3]]) + + # Append a dummy mask for customized fill colors, should be faster than grid_sample() twice + if fill is not None: + dummy = paddle.ones((img.shape[0], 1, img.shape[2], img.shape[3]), dtype=img.dtype) + img = paddle.concat((img, dummy), axis=1) + + img = grid_sample(img, grid, mode=mode, padding_mode="zeros", align_corners=False) + + # Fill with required color + if fill is not None: + mask = img[:, -1:, :, :] # N * 1 * H * W + img = img[:, :-1, :, :] # N * C * H * W + mask = mask.expand_as(img) + len_fill = len(fill) if isinstance(fill, (tuple, list)) else 1 + fill_img = paddle.to_tensor(fill, dtype=img.dtype).reshape([1, len_fill, 1, 1]).expand_as(img) + if mode == "nearest": + mask = mask < 0.5 + img[mask] = fill_img[mask] + else: # 'bilinear' + img = img * mask + (1.0 - mask) * fill_img + return img + + +def _gen_affine_grid( + theta: Tensor, + w: int, + h: int, + ow: int, + oh: int, +) -> Tensor: + # https://github.com/pytorch/pytorch/blob/74b65c32be68b15dc7c9e8bb62459efbfbde33d8/aten/src/ATen/native/ + # AffineGridGenerator.cpp#L18 + # Difference with AffineGridGenerator is that: + # 1) we normalize grid values after applying theta + # 2) we can normalize by other image size, such that it covers "extend" option like in PIL.Image.rotate + + d = 0.5 + base_grid = paddle.empty([1, oh, ow, 3], dtype=theta.dtype) + x_grid = paddle.linspace(-ow * 0.5 + d, ow * 0.5 + d - 1, num=ow) + base_grid[..., 0] = (x_grid) + y_grid = paddle.linspace(-oh * 0.5 + d, oh * 0.5 + d - 1, num=oh).unsqueeze_(-1) + base_grid[..., 1] = (y_grid) + base_grid[..., 2] = 1.0 + rescaled_theta = theta.transpose([0, 2, 1]) / paddle.to_tensor([0.5 * w, 0.5 * h], dtype=theta.dtype) + output_grid = base_grid.reshape([1, oh * ow, 3]).bmm(rescaled_theta) + return output_grid.reshape([1, oh, ow, 2]) + + +def affine_impl(img: Tensor, + matrix: List[float], + interpolation: str = "nearest", + fill: Optional[List[float]] = None) -> Tensor: + theta = paddle.to_tensor(matrix, dtype=img.dtype).reshape([1, 2, 3]) + shape = img.shape + # grid will be generated on the same device as theta and img + grid = _gen_affine_grid(theta, w=shape[-1], h=shape[-2], ow=shape[-1], oh=shape[-2]) + return _apply_grid_transform(img, grid, interpolation, fill=fill) + + +def _get_inverse_affine_matrix(center: List[float], + angle: float, + translate: List[float], + scale: float, + shear: List[float], + inverted: bool = True) -> List[float]: + # Helper method to compute inverse matrix for affine transformation + + # Pillow requires inverse affine transformation matrix: + # Affine matrix is : M = T * C * RotateScaleShear * C^-1 + # + # where T is translation matrix: [1, 0, tx | 0, 1, ty | 0, 0, 1] + # C is translation matrix to keep center: [1, 0, cx | 0, 1, cy | 0, 0, 1] + # RotateScaleShear is rotation with scale and shear matrix + # + # RotateScaleShear(a, s, (sx, sy)) = + # = R(a) * S(s) * SHy(sy) * SHx(sx) + # = [ s*cos(a - sy)/cos(sy), s*(-cos(a - sy)*tan(sx)/cos(sy) - sin(a)), 0 ] + # [ s*sin(a + sy)/cos(sy), s*(-sin(a - sy)*tan(sx)/cos(sy) + cos(a)), 0 ] + # [ 0 , 0 , 1 ] + # where R is a rotation matrix, S is a scaling matrix, and SHx and SHy are the shears: + # SHx(s) = [1, -tan(s)] and SHy(s) = [1 , 0] + # [0, 1 ] [-tan(s), 1] + # + # Thus, the inverse is M^-1 = C * RotateScaleShear^-1 * C^-1 * T^-1 + + rot = math.radians(angle) + sx = math.radians(shear[0]) + sy = math.radians(shear[1]) + + cx, cy = center + tx, ty = translate + + # RSS without scaling + a = math.cos(rot - sy) / math.cos(sy) + b = -math.cos(rot - sy) * math.tan(sx) / math.cos(sy) - math.sin(rot) + c = math.sin(rot - sy) / math.cos(sy) + d = -math.sin(rot - sy) * math.tan(sx) / math.cos(sy) + math.cos(rot) + + if inverted: + # Inverted rotation matrix with scale and shear + # det([[a, b], [c, d]]) == 1, since det(rotation) = 1 and det(shear) = 1 + matrix = [d, -b, 0.0, -c, a, 0.0] + matrix = [x / scale for x in matrix] + # Apply inverse of translation and of center translation: RSS^-1 * C^-1 * T^-1 + matrix[2] += matrix[0] * (-cx - tx) + matrix[1] * (-cy - ty) + matrix[5] += matrix[3] * (-cx - tx) + matrix[4] * (-cy - ty) + # Apply center translation: C * RSS^-1 * C^-1 * T^-1 + matrix[2] += cx + matrix[5] += cy + else: + matrix = [a, b, 0.0, c, d, 0.0] + matrix = [x * scale for x in matrix] + # Apply inverse of center translation: RSS * C^-1 + matrix[2] += matrix[0] * (-cx) + matrix[1] * (-cy) + matrix[5] += matrix[3] * (-cx) + matrix[4] * (-cy) + # Apply translation and center : T * C * RSS * C^-1 + matrix[2] += cx + tx + matrix[5] += cy + ty + + return matrix + + +def affine( + img: Tensor, + angle: float, + translate: List[int], + scale: float, + shear: List[float], + interpolation: InterpolationMode = InterpolationMode.NEAREST, + fill: Optional[List[float]] = None, + resample: Optional[int] = None, + fillcolor: Optional[List[float]] = None, + center: Optional[List[int]] = None, +) -> Tensor: + """Apply affine transformation on the image keeping image center invariant. + If the image is paddle Tensor, it is expected + to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions. + + Args: + img (PIL Image or Tensor): image to transform. + angle (number): rotation angle in degrees between -180 and 180, clockwise direction. + translate (sequence of integers): horizontal and vertical translations (post-rotation translation) + scale (float): overall scale + shear (float or sequence): shear angle value in degrees between -180 to 180, clockwise direction. + If a sequence is specified, the first value corresponds to a shear parallel to the x axis, while + the second value corresponds to a shear parallel to the y axis. + interpolation (InterpolationMode): Desired interpolation enum defined by + :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``. + If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported. + For backward compatibility integer values (e.g. ``PIL.Image[.Resampling].NEAREST``) are still accepted, + but deprecated since 0.13 and will be removed in 0.15. Please use InterpolationMode enum. + fill (sequence or number, optional): Pixel fill value for the area outside the transformed + image. If given a number, the value is used for all bands respectively. + + .. note:: + In torchscript mode single int/float value is not supported, please use a sequence + of length 1: ``[value, ]``. + fillcolor (sequence or number, optional): + .. warning:: + This parameter was deprecated in ``0.12`` and will be removed in ``0.14``. Please use ``fill`` instead. + resample (int, optional): + .. warning:: + This parameter was deprecated in ``0.12`` and will be removed in ``0.14``. Please use ``interpolation`` + instead. + center (sequence, optional): Optional center of rotation. Origin is the upper left corner. + Default is the center of the image. + + Returns: + PIL Image or Tensor: Transformed image. + """ + + # Backward compatibility with integer value + if isinstance(interpolation, int): + warnings.warn("Argument 'interpolation' of type int is deprecated since 0.13 and will be removed in 0.15. " + "Please use InterpolationMode enum.") + interpolation = _interpolation_modes_from_int(interpolation) + + if fillcolor is not None: + warnings.warn("The parameter 'fillcolor' is deprecated since 0.12 and will be removed in 0.14. " + "Please use 'fill' instead.") + fill = fillcolor + + if not isinstance(angle, (int, float)): + raise TypeError("Argument angle should be int or float") + + if not isinstance(translate, (list, tuple)): + raise TypeError("Argument translate should be a sequence") + + if len(translate) != 2: + raise ValueError("Argument translate should be a sequence of length 2") + + if scale <= 0.0: + raise ValueError("Argument scale should be positive") + + if not isinstance(shear, (numbers.Number, (list, tuple))): + raise TypeError("Shear should be either a single value or a sequence of two values") + + if not isinstance(interpolation, InterpolationMode): + raise TypeError("Argument interpolation should be a InterpolationMode") + + if isinstance(angle, int): + angle = float(angle) + + if isinstance(translate, tuple): + translate = list(translate) + + if isinstance(shear, numbers.Number): + shear = [shear, 0.0] + + if isinstance(shear, tuple): + shear = list(shear) + + if len(shear) == 1: + shear = [shear[0], shear[0]] + + if len(shear) != 2: + raise ValueError(f"Shear should be a sequence containing two values. Got {shear}") + + if center is not None and not isinstance(center, (list, tuple)): + raise TypeError("Argument center should be a sequence") + center_f = [0.0, 0.0] + if center is not None: + _, height, width = img.shape[0], img.shape[1], img.shape[2] + # Center values should be in pixel coordinates but translated such that (0, 0) corresponds to image center. + center_f = [1.0 * (c - s * 0.5) for c, s in zip(center, [width, height])] + + translate_f = [1.0 * t for t in translate] + matrix = _get_inverse_affine_matrix(center_f, angle, translate_f, scale, shear) + return affine_impl(img, matrix=matrix, interpolation=interpolation.value, fill=fill) + + +def _interpolation_modes_from_int(i: int) -> InterpolationMode: + inverse_modes_mapping = { + 0: InterpolationMode.NEAREST, + 2: InterpolationMode.BILINEAR, + 3: InterpolationMode.BICUBIC, + 4: InterpolationMode.BOX, + 5: InterpolationMode.HAMMING, + 1: InterpolationMode.LANCZOS, + } + return inverse_modes_mapping[i] + + +def _check_sequence_input(x, name, req_sizes): + msg = req_sizes[0] if len(req_sizes) < 2 else " or ".join([str(s) for s in req_sizes]) + if not isinstance(x, Sequence): + raise TypeError(f"{name} should be a sequence of length {msg}.") + if len(x) not in req_sizes: + raise ValueError(f"{name} should be sequence of length {msg}.") + + +def _setup_angle(x, name, req_sizes=(2, )): + if isinstance(x, numbers.Number): + if x < 0: + raise ValueError(f"If {name} is a single number, it must be positive.") + x = [-x, x] + else: + _check_sequence_input(x, name, req_sizes) + + return [float(d) for d in x] + + +class RandomAffine(nn.Layer): + """Random affine transformation of the image keeping center invariant. + If the image is paddle Tensor, it is expected + to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions. + + Args: + degrees (sequence or number): Range of degrees to select from. + If degrees is a number instead of sequence like (min, max), the range of degrees + will be (-degrees, +degrees). Set to 0 to deactivate rotations. + translate (tuple, optional): tuple of maximum absolute fraction for horizontal + and vertical translations. For example translate=(a, b), then horizontal shift + is randomly sampled in the range -img_width * a < dx < img_width * a and vertical shift is + randomly sampled in the range -img_height * b < dy < img_height * b. Will not translate by default. + scale (tuple, optional): scaling factor interval, e.g (a, b), then scale is + randomly sampled from the range a <= scale <= b. Will keep original scale by default. + shear (sequence or number, optional): Range of degrees to select from. + If shear is a number, a shear parallel to the x axis in the range (-shear, +shear) + will be applied. Else if shear is a sequence of 2 values a shear parallel to the x axis in the + range (shear[0], shear[1]) will be applied. Else if shear is a sequence of 4 values, + a x-axis shear in (shear[0], shear[1]) and y-axis shear in (shear[2], shear[3]) will be applied. + Will not apply shear by default. + interpolation (InterpolationMode): Desired interpolation enum defined by + :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``. + If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported. + For backward compatibility integer values (e.g. ``PIL.Image[.Resampling].NEAREST``) are still accepted, + but deprecated since 0.13 and will be removed in 0.15. Please use InterpolationMode enum. + fill (sequence or number): Pixel fill value for the area outside the transformed + image. Default is ``0``. If given a number, the value is used for all bands respectively. + fillcolor (sequence or number, optional): + .. warning:: + This parameter was deprecated in ``0.12`` and will be removed in ``0.14``. Please use ``fill`` instead. + resample (int, optional): + .. warning:: + This parameter was deprecated in ``0.12`` and will be removed in ``0.14``. Please use ``interpolation`` + instead. + center (sequence, optional): Optional center of rotation, (x, y). Origin is the upper left corner. + Default is the center of the image. + + .. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters + + """ + + def __init__( + self, + degrees, + translate=None, + scale=None, + shear=None, + interpolation=InterpolationMode.NEAREST, + fill=0, + fillcolor=None, + resample=None, + center=None, + ): + super(RandomAffine, self).__init__() + if resample is not None: + warnings.warn("The parameter 'resample' is deprecated since 0.12 and will be removed in 0.14. " + "Please use 'interpolation' instead.") + interpolation = _interpolation_modes_from_int(resample) + + # Backward compatibility with integer value + if isinstance(interpolation, int): + warnings.warn("Argument 'interpolation' of type int is deprecated since 0.13 and will be removed in 0.15. " + "Please use InterpolationMode enum.") + interpolation = _interpolation_modes_from_int(interpolation) + + if fillcolor is not None: + warnings.warn("The parameter 'fillcolor' is deprecated since 0.12 and will be removed in 0.14. " + "Please use 'fill' instead.") + fill = fillcolor + + self.degrees = _setup_angle(degrees, name="degrees", req_sizes=(2, )) + + if translate is not None: + _check_sequence_input(translate, "translate", req_sizes=(2, )) + for t in translate: + if not (0.0 <= t <= 1.0): + raise ValueError("translation values should be between 0 and 1") + self.translate = translate + + if scale is not None: + _check_sequence_input(scale, "scale", req_sizes=(2, )) + for s in scale: + if s <= 0: + raise ValueError("scale values should be positive") + self.scale = scale + + if shear is not None: + self.shear = _setup_angle(shear, name="shear", req_sizes=(2, 4)) + else: + self.shear = shear + + self.resample = self.interpolation = interpolation + + if fill is None: + fill = 0 + elif not isinstance(fill, (Sequence, numbers.Number)): + raise TypeError("Fill should be either a sequence or a number.") + + self.fillcolor = self.fill = fill + + if center is not None: + _check_sequence_input(center, "center", req_sizes=(2, )) + + self.center = center + + @staticmethod + def get_params( + degrees: List[float], + translate: Optional[List[float]], + scale_ranges: Optional[List[float]], + shears: Optional[List[float]], + img_size: List[int], + ) -> Tuple[float, Tuple[int, int], float, Tuple[float, float]]: + """Get parameters for affine transformation + + Returns: + params to be passed to the affine transformation + """ + angle = float(paddle.empty([1]).uniform_(float(degrees[0]), float(degrees[1]))) + if translate is not None: + max_dx = float(translate[0] * img_size[0]) + max_dy = float(translate[1] * img_size[1]) + tx = int(float(paddle.empty([1]).uniform_(-max_dx, max_dx))) + ty = int(float(paddle.empty([1]).uniform_(-max_dy, max_dy))) + translations = (tx, ty) + else: + translations = (0, 0) + + if scale_ranges is not None: + scale = float(paddle.empty([1]).uniform_(scale_ranges[0], scale_ranges[1])) + else: + scale = 1.0 + + shear_x = shear_y = 0.0 + if shears is not None: + shear_x = float(paddle.empty([1]).uniform_(shears[0], shears[1])) + if len(shears) == 4: + shear_y = float(paddle.empty([1]).uniform_(shears[2], shears[3])) + + shear = (shear_x, shear_y) + + return angle, translations, scale, shear + + def forward(self, img): + fill = self.fill + channels, height, width = img.shape[1], img.shape[2], img.shape[3] + if isinstance(fill, (int, float)): + fill = [float(fill)] * channels + else: + fill = [float(f) for f in fill] + + img_size = [width, height] # flip for keeping BC on get_params call + + ret = self.get_params(self.degrees, self.translate, self.scale, self.shear, img_size) + + return affine(img, *ret, interpolation=self.interpolation, fill=fill, center=self.center) + + def __repr__(self) -> str: + s = f"{self.__class__.__name__}(degrees={self.degrees}" + s += f", translate={self.translate}" if self.translate is not None else "" + s += f", scale={self.scale}" if self.scale is not None else "" + s += f", shear={self.shear}" if self.shear is not None else "" + s += f", interpolation={self.interpolation.value}" if self.interpolation != InterpolationMode.NEAREST else "" + s += f", fill={self.fill}" if self.fill != 0 else "" + s += f", center={self.center}" if self.center is not None else "" + s += ")" + + return s diff --git a/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/model/unet.py b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/model/unet.py new file mode 100755 index 000000000..56f3ad61e --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/model/unet.py @@ -0,0 +1,838 @@ +''' +This code is rewritten by Paddle based on +https://github.com/openai/guided-diffusion/blob/main/guided_diffusion/unet.py +''' +import math +from abc import abstractmethod + +import numpy as np +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + +from .nn import avg_pool_nd +from .nn import checkpoint +from .nn import conv_nd +from .nn import linear +from .nn import normalization +from .nn import SiLU +from .nn import timestep_embedding +from .nn import zero_module + + +class AttentionPool2d(nn.Layer): + """ + Adapted from CLIP: https://github.com/openai/CLIP/blob/main/clip/model.py + """ + + def __init__( + self, + spacial_dim: int, + embed_dim: int, + num_heads_channels: int, + output_dim: int = None, + ): + super().__init__() + # self.positional_embedding = nn.Parameter( + # th.randn(embed_dim, spacial_dim ** 2 + 1) / embed_dim ** 0.5 + # ) + positional_embedding = self.create_parameter(paddle.randn(embed_dim, spacial_dim**2 + 1) / embed_dim**0.5) + self.add_parameter("positional_embedding", positional_embedding) + self.qkv_proj = conv_nd(1, embed_dim, 3 * embed_dim, 1) + self.c_proj = conv_nd(1, embed_dim, output_dim or embed_dim, 1) + self.num_heads = embed_dim // num_heads_channels + self.attention = QKVAttention(self.num_heads) + + def forward(self, x): + b, c, *_spatial = x.shape + # x = x.reshape(b, c, -1) # NC(HW) + x = paddle.reshape(x, [b, c, -1]) + x = paddle.concat([x.mean(dim=-1, keepdim=True), x], axis=-1) # NC(HW+1) + x = x + paddle.cast(self.positional_embedding[None, :, :], x.dtype) # NC(HW+1) + x = self.qkv_proj(x) + x = self.attention(x) + x = self.c_proj(x) + return x[:, :, 0] + + +class TimestepBlock(nn.Layer): + """ + Any module where forward() takes timestep embeddings as a second argument. + """ + + @abstractmethod + def forward(self, x, emb): + """ + Apply the module to `x` given `emb` timestep embeddings. + """ + + +class TimestepEmbedSequential(nn.Sequential, TimestepBlock): + """ + A sequential module that passes timestep embeddings to the children that + support it as an extra input. + """ + + def forward(self, x, emb): + for layer in self: + if isinstance(layer, TimestepBlock): + x = layer(x, emb) + else: + x = layer(x) + return x + + +class Upsample(nn.Layer): + """ + An upsampling layer with an optional convolution. + + :param channels: channels in the inputs and outputs. + :param use_conv: a bool determining if a convolution is applied. + :param dims: determines if the signal is 1D, 2D, or 3D. If 3D, then + upsampling occurs in the inner-two dimensions. + """ + + def __init__(self, channels, use_conv, dims=2, out_channels=None): + super().__init__() + self.channels = channels + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.dims = dims + if use_conv: + self.conv = conv_nd(dims, self.channels, self.out_channels, 3, padding=1) + + def forward(self, x): + assert x.shape[1] == self.channels + if self.dims == 3: + x = F.interpolate(x, (x.shape[2], x.shape[3] * 2, x.shape[4] * 2), mode="nearest") + else: + x = F.interpolate(x, scale_factor=2, mode="nearest") + if self.use_conv: + x = self.conv(x) + return x + + +class Downsample(nn.Layer): + """ + A downsampling layer with an optional convolution. + + :param channels: channels in the inputs and outputs. + :param use_conv: a bool determining if a convolution is applied. + :param dims: determines if the signal is 1D, 2D, or 3D. If 3D, then + downsampling occurs in the inner-two dimensions. + """ + + def __init__(self, channels, use_conv, dims=2, out_channels=None): + super().__init__() + self.channels = channels + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.dims = dims + stride = 2 if dims != 3 else (1, 2, 2) + if use_conv: + self.op = conv_nd(dims, self.channels, self.out_channels, 3, stride=stride, padding=1) + else: + assert self.channels == self.out_channels + self.op = avg_pool_nd(dims, kernel_size=stride, stride=stride) + + def forward(self, x): + assert x.shape[1] == self.channels + return self.op(x) + + +class ResBlock(TimestepBlock): + """ + A residual block that can optionally change the number of channels. + + :param channels: the number of input channels. + :param emb_channels: the number of timestep embedding channels. + :param dropout: the rate of dropout. + :param out_channels: if specified, the number of out channels. + :param use_conv: if True and out_channels is specified, use a spatial + convolution instead of a smaller 1x1 convolution to change the + channels in the skip connection. + :param dims: determines if the signal is 1D, 2D, or 3D. + :param use_checkpoint: if True, use gradient checkpointing on this module. + :param up: if True, use this block for upsampling. + :param down: if True, use this block for downsampling. + """ + + def __init__( + self, + channels, + emb_channels, + dropout, + out_channels=None, + use_conv=False, + use_scale_shift_norm=False, + dims=2, + use_checkpoint=False, + up=False, + down=False, + ): + super().__init__() + self.channels = channels + self.emb_channels = emb_channels + self.dropout = dropout + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.use_checkpoint = use_checkpoint + self.use_scale_shift_norm = use_scale_shift_norm + + self.in_layers = nn.Sequential( + normalization(channels), + SiLU(), + conv_nd(dims, channels, self.out_channels, 3, padding=1), + ) + + self.updown = up or down + + if up: + self.h_upd = Upsample(channels, False, dims) + self.x_upd = Upsample(channels, False, dims) + elif down: + self.h_upd = Downsample(channels, False, dims) + self.x_upd = Downsample(channels, False, dims) + else: + self.h_upd = self.x_upd = nn.Identity() + + self.emb_layers = nn.Sequential( + SiLU(), + linear( + emb_channels, + 2 * self.out_channels if use_scale_shift_norm else self.out_channels, + ), + ) + self.out_layers = nn.Sequential( + normalization(self.out_channels), + SiLU(), + nn.Dropout(p=dropout), + zero_module(conv_nd(dims, self.out_channels, self.out_channels, 3, padding=1)), + ) + + if self.out_channels == channels: + self.skip_connection = nn.Identity() + elif use_conv: + self.skip_connection = conv_nd(dims, channels, self.out_channels, 3, padding=1) + else: + self.skip_connection = conv_nd(dims, channels, self.out_channels, 1) + + def forward(self, x, emb): + """ + Apply the block to a Tensor, conditioned on a timestep embedding. + + :param x: an [N x C x ...] Tensor of features. + :param emb: an [N x emb_channels] Tensor of timestep embeddings. + :return: an [N x C x ...] Tensor of outputs. + """ + return checkpoint(self._forward, (x, emb), self.parameters(), self.use_checkpoint) + + def _forward(self, x, emb): + if self.updown: + in_rest, in_conv = self.in_layers[:-1], self.in_layers[-1] + h = in_rest(x) + h = self.h_upd(h) + x = self.x_upd(x) + h = in_conv(h) + else: + h = self.in_layers(x) + emb_out = self.emb_layers(emb) + emb_out = paddle.cast(emb_out, h.dtype) + while len(emb_out.shape) < len(h.shape): + emb_out = emb_out[..., None] + if self.use_scale_shift_norm: + out_norm, out_rest = self.out_layers[0], self.out_layers[1:] + scale, shift = paddle.chunk(emb_out, 2, axis=1) + h = out_norm(h) * (1 + scale) + shift + h = out_rest(h) + else: + h = h + emb_out + h = self.out_layers(h) + return self.skip_connection(x) + h + + +class AttentionBlock(nn.Layer): + """ + An attention block that allows spatial positions to attend to each other. + + Originally ported from here, but adapted to the N-d case. + https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/models/unet.py#L66. + """ + + def __init__( + self, + channels, + num_heads=1, + num_head_channels=-1, + use_checkpoint=False, + use_new_attention_order=False, + ): + super().__init__() + self.channels = channels + if num_head_channels == -1: + self.num_heads = num_heads + else: + assert (channels % num_head_channels == 0 + ), f"q,k,v channels {channels} is not divisible by num_head_channels {num_head_channels}" + self.num_heads = channels // num_head_channels + self.use_checkpoint = use_checkpoint + self.norm = normalization(channels) + self.qkv = conv_nd(1, channels, channels * 3, 1) + if use_new_attention_order: + # split qkv before split heads + self.attention = QKVAttention(self.num_heads) + else: + # split heads before split qkv + self.attention = QKVAttentionLegacy(self.num_heads) + + self.proj_out = zero_module(conv_nd(1, channels, channels, 1)) + + def forward(self, x): + return checkpoint(self._forward, (x, ), self.parameters(), self.use_checkpoint) + + def _forward(self, x): + b, c, *spatial = x.shape + # x = x.reshape(b, c, -1) + x = paddle.reshape(x, [b, c, -1]) + qkv = self.qkv(self.norm(x)) + h = self.attention(qkv) + h = self.proj_out(h) + # return (x + h).reshape(b, c, *spatial) + return paddle.reshape(x + h, [b, c, *spatial]) + + +def count_flops_attn(model, _x, y): + """ + A counter for the `thop` package to count the operations in an + attention operation. + Meant to be used like: + macs, params = thop.profile( + model, + inputs=(inputs, timestamps), + custom_ops={QKVAttention: QKVAttention.count_flops}, + ) + """ + b, c, *spatial = y[0].shape + num_spatial = int(np.prod(spatial)) + # We perform two matmuls with the same number of ops. + # The first computes the weight matrix, the second computes + # the combination of the value vectors. + matmul_ops = 2 * b * (num_spatial**2) * c + model.total_ops += paddle.to_tensor([matmul_ops], dtype='float64') + + +class QKVAttentionLegacy(nn.Layer): + """ + A module which performs QKV attention. Matches legacy QKVAttention + input/ouput heads shaping + """ + + def __init__(self, n_heads): + super().__init__() + self.n_heads = n_heads + + def forward(self, qkv): + """ + Apply QKV attention. + + :param qkv: an [N x (H * 3 * C) x T] tensor of Qs, Ks, and Vs. + :return: an [N x (H * C) x T] tensor after attention. + """ + bs, width, length = qkv.shape + assert width % (3 * self.n_heads) == 0 + ch = width // (3 * self.n_heads) + # q, k, v = qkv.reshape(bs * self.n_heads, ch * 3, length).split(ch, dim=1) + q, k, v = paddle.reshape(qkv, [bs * self.n_heads, ch * 3, length]).split(3, axis=1) + scale = 1 / math.sqrt(math.sqrt(ch)) + weight = paddle.einsum("bct,bcs->bts", q * scale, k * scale) # More stable with f16 than dividing afterwards + weight = paddle.cast(nn.functional.softmax(paddle.cast(weight, 'float32'), axis=-1), weight.dtype) + a = paddle.einsum("bts,bcs->bct", weight, v) + # return a.reshape(bs, -1, length) + return paddle.reshape(a, [bs, -1, length]) + + @staticmethod + def count_flops(model, _x, y): + return count_flops_attn(model, _x, y) + + +class QKVAttention(nn.Layer): + """ + A module which performs QKV attention and splits in a different order. + """ + + def __init__(self, n_heads): + super().__init__() + self.n_heads = n_heads + + def forward(self, qkv): + """ + Apply QKV attention. + + :param qkv: an [N x (3 * H * C) x T] tensor of Qs, Ks, and Vs. + :return: an [N x (H * C) x T] tensor after attention. + """ + bs, width, length = qkv.shape + assert width % (3 * self.n_heads) == 0 + ch = width // (3 * self.n_heads) + q, k, v = qkv.chunk(3, axis=1) + scale = 1 / math.sqrt(math.sqrt(ch)) + weight = paddle.einsum( + "bct,bcs->bts", + (q * scale).view(bs * self.n_heads, ch, length), + (k * scale).view(bs * self.n_heads, ch, length), + ) # More stable with f16 than dividing afterwards + weight = paddle.cast(nn.functional.softmax(paddle.cast(weight, 'float32'), axis=-1), weight.dtype) + a = paddle.einsum("bts,bcs->bct", weight, v.reshape(bs * self.n_heads, ch, length)) + # return a.reshape(bs, -1, length) + return paddle.reshape(a, [bs, -1, length]) + + @staticmethod + def count_flops(model, _x, y): + return count_flops_attn(model, _x, y) + + +class UNetModel(nn.Layer): + """ + The full UNet model with attention and timestep embedding. + + :param in_channels: channels in the input Tensor. + :param model_channels: base channel count for the model. + :param out_channels: channels in the output Tensor. + :param num_res_blocks: number of residual blocks per downsample. + :param attention_resolutions: a collection of downsample rates at which + attention will take place. May be a set, list, or tuple. + For example, if this contains 4, then at 4x downsampling, attention + will be used. + :param dropout: the dropout probability. + :param channel_mult: channel multiplier for each level of the UNet. + :param conv_resample: if True, use learned convolutions for upsampling and + downsampling. + :param dims: determines if the signal is 1D, 2D, or 3D. + :param num_classes: if specified (as an int), then this model will be + class-conditional with `num_classes` classes. + :param use_checkpoint: use gradient checkpointing to reduce memory usage. + :param num_heads: the number of attention heads in each attention layer. + :param num_heads_channels: if specified, ignore num_heads and instead use + a fixed channel width per attention head. + :param num_heads_upsample: works with num_heads to set a different number + of heads for upsampling. Deprecated. + :param use_scale_shift_norm: use a FiLM-like conditioning mechanism. + :param resblock_updown: use residual blocks for up/downsampling. + :param use_new_attention_order: use a different attention pattern for potentially + increased efficiency. + """ + + def __init__( + self, + image_size, + in_channels, + model_channels, + out_channels, + num_res_blocks, + attention_resolutions, + dropout=0, + channel_mult=(1, 2, 4, 8), + conv_resample=True, + dims=2, + num_classes=None, + use_checkpoint=False, + use_fp16=False, + num_heads=1, + num_head_channels=-1, + num_heads_upsample=-1, + use_scale_shift_norm=False, + resblock_updown=False, + use_new_attention_order=False, + ): + super().__init__() + + if num_heads_upsample == -1: + num_heads_upsample = num_heads + + self.image_size = image_size + self.in_channels = in_channels + self.model_channels = model_channels + self.out_channels = out_channels + self.num_res_blocks = num_res_blocks + self.attention_resolutions = attention_resolutions + self.dropout = dropout + self.channel_mult = channel_mult + self.conv_resample = conv_resample + self.num_classes = num_classes + self.use_checkpoint = use_checkpoint + self.dtype = paddle.float16 if use_fp16 else paddle.float32 + self.num_heads = num_heads + self.num_head_channels = num_head_channels + self.num_heads_upsample = num_heads_upsample + + time_embed_dim = model_channels * 4 + self.time_embed = nn.Sequential( + linear(model_channels, time_embed_dim), + SiLU(), + linear(time_embed_dim, time_embed_dim), + ) + + if self.num_classes is not None: + self.label_emb = nn.Embedding(num_classes, time_embed_dim) + + ch = input_ch = int(channel_mult[0] * model_channels) + self.input_blocks = nn.LayerList([TimestepEmbedSequential(conv_nd(dims, in_channels, ch, 3, padding=1))]) + self._feature_size = ch + input_block_chans = [ch] + ds = 1 + for level, mult in enumerate(channel_mult): + for _ in range(num_res_blocks): + layers = [ + ResBlock( + ch, + time_embed_dim, + dropout, + out_channels=int(mult * model_channels), + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ) + ] + ch = int(mult * model_channels) + if ds in attention_resolutions: + layers.append( + AttentionBlock( + ch, + use_checkpoint=use_checkpoint, + num_heads=num_heads, + num_head_channels=num_head_channels, + use_new_attention_order=use_new_attention_order, + )) + self.input_blocks.append(TimestepEmbedSequential(*layers)) + self._feature_size += ch + input_block_chans.append(ch) + if level != len(channel_mult) - 1: + out_ch = ch + self.input_blocks.append( + TimestepEmbedSequential( + ResBlock( + ch, + time_embed_dim, + dropout, + out_channels=out_ch, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + down=True, + ) if resblock_updown else Downsample(ch, conv_resample, dims=dims, out_channels=out_ch))) + ch = out_ch + input_block_chans.append(ch) + ds *= 2 + self._feature_size += ch + + self.middle_block = TimestepEmbedSequential( + ResBlock( + ch, + time_embed_dim, + dropout, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ), + AttentionBlock( + ch, + use_checkpoint=use_checkpoint, + num_heads=num_heads, + num_head_channels=num_head_channels, + use_new_attention_order=use_new_attention_order, + ), + ResBlock( + ch, + time_embed_dim, + dropout, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ), + ) + self._feature_size += ch + + self.output_blocks = nn.LayerList([]) + for level, mult in list(enumerate(channel_mult))[::-1]: + for i in range(num_res_blocks + 1): + ich = input_block_chans.pop() + layers = [ + ResBlock( + ch + ich, + time_embed_dim, + dropout, + out_channels=int(model_channels * mult), + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ) + ] + ch = int(model_channels * mult) + if ds in attention_resolutions: + layers.append( + AttentionBlock( + ch, + use_checkpoint=use_checkpoint, + num_heads=num_heads_upsample, + num_head_channels=num_head_channels, + use_new_attention_order=use_new_attention_order, + )) + if level and i == num_res_blocks: + out_ch = ch + layers.append( + ResBlock( + ch, + time_embed_dim, + dropout, + out_channels=out_ch, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + up=True, + ) if resblock_updown else Upsample(ch, conv_resample, dims=dims, out_channels=out_ch)) + ds //= 2 + self.output_blocks.append(TimestepEmbedSequential(*layers)) + self._feature_size += ch + + self.out = nn.Sequential( + normalization(ch), + SiLU(), + zero_module(conv_nd(dims, input_ch, out_channels, 3, padding=1)), + ) + + def forward(self, x, timesteps, y=None): + """ + Apply the model to an input batch. + + :param x: an [N x C x ...] Tensor of inputs. + :param timesteps: a 1-D batch of timesteps. + :param y: an [N] Tensor of labels, if class-conditional. + :return: an [N x C x ...] Tensor of outputs. + """ + assert (y is not None) == (self.num_classes + is not None), "must specify y if and only if the model is class-conditional" + + hs = [] + emb = self.time_embed(timestep_embedding(timesteps, self.model_channels)) + if self.num_classes is not None: + assert y.shape == (x.shape[0], ) + emb = emb + self.label_emb(y) + + h = paddle.cast(x, self.dtype) + for module in self.input_blocks: + h = module(h, emb) + hs.append(h) + h = self.middle_block(h, emb) + for module in self.output_blocks: + h = paddle.concat([h, hs.pop()], axis=1) + h = module(h, emb) + # h = paddle.cast(h, x.dtype) + return self.out(h) + + +class SuperResModel(UNetModel): + """ + A UNetModel that performs super-resolution. + + Expects an extra kwarg `low_res` to condition on a low-resolution image. + """ + + def __init__(self, image_size, in_channels, *args, **kwargs): + super().__init__(image_size, in_channels * 2, *args, **kwargs) + + def forward(self, x, timesteps, low_res=None, **kwargs): + _, _, new_height, new_width = x.shape + upsampled = F.interpolate(low_res, (new_height, new_width), mode="bilinear") + x = paddle.concat([x, upsampled], axis=1) + return super().forward(x, timesteps, **kwargs) + + +class EncoderUNetModel(nn.Layer): + """ + The half UNet model with attention and timestep embedding. + + For usage, see UNet. + """ + + def __init__( + self, + image_size, + in_channels, + model_channels, + out_channels, + num_res_blocks, + attention_resolutions, + dropout=0, + channel_mult=(1, 2, 4, 8), + conv_resample=True, + dims=2, + use_checkpoint=False, + use_fp16=False, + num_heads=1, + num_head_channels=-1, + num_heads_upsample=-1, + use_scale_shift_norm=False, + resblock_updown=False, + use_new_attention_order=False, + pool="adaptive", + ): + super().__init__() + + if num_heads_upsample == -1: + num_heads_upsample = num_heads + + self.in_channels = in_channels + self.model_channels = model_channels + self.out_channels = out_channels + self.num_res_blocks = num_res_blocks + self.attention_resolutions = attention_resolutions + self.dropout = dropout + self.channel_mult = channel_mult + self.conv_resample = conv_resample + self.use_checkpoint = use_checkpoint + self.dtype = paddle.float16 if use_fp16 else paddle.float32 + self.num_heads = num_heads + self.num_head_channels = num_head_channels + self.num_heads_upsample = num_heads_upsample + + time_embed_dim = model_channels * 4 + self.time_embed = nn.Sequential( + linear(model_channels, time_embed_dim), + SiLU(), + linear(time_embed_dim, time_embed_dim), + ) + + ch = int(channel_mult[0] * model_channels) + self.input_blocks = nn.LayerList([TimestepEmbedSequential(conv_nd(dims, in_channels, ch, 3, padding=1))]) + self._feature_size = ch + input_block_chans = [ch] + ds = 1 + for level, mult in enumerate(channel_mult): + for _ in range(num_res_blocks): + layers = [ + ResBlock( + ch, + time_embed_dim, + dropout, + out_channels=int(mult * model_channels), + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ) + ] + ch = int(mult * model_channels) + if ds in attention_resolutions: + layers.append( + AttentionBlock( + ch, + use_checkpoint=use_checkpoint, + num_heads=num_heads, + num_head_channels=num_head_channels, + use_new_attention_order=use_new_attention_order, + )) + self.input_blocks.append(TimestepEmbedSequential(*layers)) + self._feature_size += ch + input_block_chans.append(ch) + if level != len(channel_mult) - 1: + out_ch = ch + self.input_blocks.append( + TimestepEmbedSequential( + ResBlock( + ch, + time_embed_dim, + dropout, + out_channels=out_ch, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + down=True, + ) if resblock_updown else Downsample(ch, conv_resample, dims=dims, out_channels=out_ch))) + ch = out_ch + input_block_chans.append(ch) + ds *= 2 + self._feature_size += ch + + self.middle_block = TimestepEmbedSequential( + ResBlock( + ch, + time_embed_dim, + dropout, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ), + AttentionBlock( + ch, + use_checkpoint=use_checkpoint, + num_heads=num_heads, + num_head_channels=num_head_channels, + use_new_attention_order=use_new_attention_order, + ), + ResBlock( + ch, + time_embed_dim, + dropout, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ), + ) + self._feature_size += ch + self.pool = pool + if pool == "adaptive": + self.out = nn.Sequential( + normalization(ch), + SiLU(), + nn.AdaptiveAvgPool2D((1, 1)), + zero_module(conv_nd(dims, ch, out_channels, 1)), + nn.Flatten(), + ) + elif pool == "attention": + assert num_head_channels != -1 + self.out = nn.Sequential( + normalization(ch), + SiLU(), + AttentionPool2d((image_size // ds), ch, num_head_channels, out_channels), + ) + elif pool == "spatial": + self.out = nn.Sequential( + nn.Linear(self._feature_size, 2048), + nn.ReLU(), + nn.Linear(2048, self.out_channels), + ) + elif pool == "spatial_v2": + self.out = nn.Sequential( + nn.Linear(self._feature_size, 2048), + normalization(2048), + SiLU(), + nn.Linear(2048, self.out_channels), + ) + else: + raise NotImplementedError(f"Unexpected {pool} pooling") + + def forward(self, x, timesteps): + """ + Apply the model to an input batch. + + :param x: an [N x C x ...] Tensor of inputs. + :param timesteps: a 1-D batch of timesteps. + :return: an [N x K] Tensor of outputs. + """ + emb = self.time_embed(timestep_embedding(timesteps, self.model_channels)) + + results = [] + # h = x.type(self.dtype) + h = paddle.cast(x, self.dtype) + for module in self.input_blocks: + h = module(h, emb) + if self.pool.startswith("spatial"): + # results.append(h.type(x.dtype).mean(axis=(2, 3))) + results.append(paddle.cast(h, x.dtype).mean(axis=(2, 3))) + h = self.middle_block(h, emb) + if self.pool.startswith("spatial"): + results.append(paddle.cast(h, x.dtype).mean(axis=(2, 3))) + h = paddle.concat(results, axis=-1) + return self.out(h) + else: + # h = h.type(x.dtype) + h = paddle.cast(h, x.dtype) + return self.out(h) diff --git a/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/resources/default.yml b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/resources/default.yml new file mode 100755 index 000000000..3a161f169 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/resources/default.yml @@ -0,0 +1,45 @@ +text_prompts: + - greg rutkowski和thomas kinkade在artstation上的一幅美丽的画,一个独特的灯塔,照耀着它的光穿过喧嚣的血海。 + +init_image: +width_height: [ 1280, 768] + +skip_steps: 10 +steps: 250 + +cut_ic_pow: 1 +init_scale: 1000 +clip_guidance_scale: 5000 + +tv_scale: 0 +range_scale: 150 +sat_scale: 0 +cutn_batches: 4 + +diffusion_model: 512x512_diffusion_uncond_finetune_008100 +use_secondary_model: True +diffusion_sampling_mode: ddim + +perlin_init: False +perlin_mode: mixed +seed: 445467575 +eta: 0.8 +clamp_grad: True +clamp_max: 0.05 + +randomize_class: True +clip_denoised: False +fuzzy_prompt: False +rand_mag: 0.05 + +cut_overview: "[12]*400+[4]*600" +cut_innercut: "[4]*400+[12]*600" +cut_icgray_p: "[0.2]*400+[0]*600" + +display_rate: 10 +n_batches: 1 +batch_size: 1 +batch_name: '' +clip_models: + - ViTB16 +output_dir: "./" diff --git a/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/resources/docstrings.yml b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/resources/docstrings.yml new file mode 100755 index 000000000..702015e1c --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/resources/docstrings.yml @@ -0,0 +1,103 @@ +text_prompts: | + Phrase, sentence, or string of words and phrases describing what the image should look like. The words will be analyzed by the AI and will guide the diffusion process toward the image(s) you describe. These can include commas and weights to adjust the relative importance of each element. E.g. "A beautiful painting of a singular lighthouse, shining its light across a tumultuous sea of blood by greg rutkowski and thomas kinkade, Trending on artstation." + Notice that this prompt loosely follows a structure: [subject], [prepositional details], [setting], [meta modifiers and artist]; this is a good starting point for your experiments. + Developing text prompts takes practice and experience, and is not the subject of this guide. If you are a beginner to writing text prompts, a good place to start is on a simple AI art app like Night Cafe, starry ai or WOMBO prior to using DD, to get a feel for how text gets translated into images by GAN tools. These other apps use different technologies, but many of the same principles apply. +init_image: | + Recall that in the image sequence above, the first image shown is just noise. If an init_image is provided, diffusion will replace the noise with the init_image as its starting state. To use an init_image, upload the image to the Colab instance or your Google Drive, and enter the full image path here. + If using an init_image, you may need to increase skip_steps to ~ 50% of total steps to retain the character of the init. See skip_steps above for further discussion. +width_height: | + Desired final image size, in pixels. You can have a square, wide, or tall image, but each edge length should be set to a multiple of 64px, and a minimum of 512px on the default CLIP model setting. If you forget to use multiples of 64px in your dimensions, DD will adjust the dimensions of your image to make it so. + +skip_steps: | + Consider the chart shown here. Noise scheduling (denoise strength) starts very high and progressively gets lower and lower as diffusion steps progress. The noise levels in the first few steps are very high, so images change dramatically in early steps. + As DD moves along the curve, noise levels (and thus the amount an image changes per step) declines, and image coherence from one step to the next increases. + The first few steps of denoising are often so dramatic that some steps (maybe 10-15% of total) can be skipped without affecting the final image. You can experiment with this as a way to cut render times. + If you skip too many steps, however, the remaining noise may not be high enough to generate new content, and thus may not have ‘time left’ to finish an image satisfactorily. + Also, depending on your other settings, you may need to skip steps to prevent CLIP from overshooting your goal, resulting in ‘blown out’ colors (hyper saturated, solid white, or solid black regions) or otherwise poor image quality. Consider that the denoising process is at its strongest in the early steps, so skipping steps can sometimes mitigate other problems. + Lastly, if using an init_image, you will need to skip ~50% of the diffusion steps to retain the shapes in the original init image. + However, if you’re using an init_image, you can also adjust skip_steps up or down for creative reasons. With low skip_steps you can get a result "inspired by" the init_image which will retain the colors and rough layout and shapes but look quite different. With high skip_steps you can preserve most of the init_image contents and just do fine tuning of the texture. + +steps: | + When creating an image, the denoising curve is subdivided into steps for processing. Each step (or iteration) involves the AI looking at subsets of the image called ‘cuts’ and calculating the ‘direction’ the image should be guided to be more like the prompt. Then it adjusts the image with the help of the diffusion denoiser, and moves to the next step. + Increasing steps will provide more opportunities for the AI to adjust the image, and each adjustment will be smaller, and thus will yield a more precise, detailed image. Increasing steps comes at the expense of longer render times. Also, while increasing steps should generally increase image quality, there is a diminishing return on additional steps beyond 250 - 500 steps. However, some intricate images can take 1000, 2000, or more steps. It is really up to the user. + Just know that the render time is directly related to the number of steps, and many other parameters have a major impact on image quality, without costing additional time. + +cut_ic_pow: | + This sets the size of the border used for inner cuts. High cut_ic_pow values have larger borders, and therefore the cuts themselves will be smaller and provide finer details. If you have too many or too-small inner cuts, you may lose overall image coherency and/or it may cause an undesirable ‘mosaic’ effect. Low cut_ic_pow values will allow the inner cuts to be larger, helping image coherency while still helping with some details. + +init_scale: | + This controls how strongly CLIP will try to match the init_image provided. This is balanced against the clip_guidance_scale (CGS) above. Too much init scale, and the image won’t change much during diffusion. Too much CGS and the init image will be lost. +clip_guidance_scale: | + CGS is one of the most important parameters you will use. It tells DD how strongly you want CLIP to move toward your prompt each timestep. Higher is generally better, but if CGS is too strong it will overshoot the goal and distort the image. So a happy medium is needed, and it takes experience to learn how to adjust CGS. + Note that this parameter generally scales with image dimensions. In other words, if you increase your total dimensions by 50% (e.g. a change from 512 x 512 to 512 x 768), then to maintain the same effect on the image, you’d want to increase clip_guidance_scale from 5000 to 7500. + Of the basic settings, clip_guidance_scale, steps and skip_steps are the most important contributors to image quality, so learn them well. +tv_scale: | + Total variance denoising. Optional, set to zero to turn off. Controls ‘smoothness’ of final output. If used, tv_scale will try to smooth out your final image to reduce overall noise. If your image is too ‘crunchy’, increase tv_scale. TV denoising is good at preserving edges while smoothing away noise in flat regions. See https://en.wikipedia.org/wiki/Total_variation_denoising +range_scale: | + Optional, set to zero to turn off. Used for adjustment of color contrast. Lower range_scale will increase contrast. Very low numbers create a reduced color palette, resulting in more vibrant or poster-like images. Higher range_scale will reduce contrast, for more muted images. + +sat_scale: | + Saturation scale. Optional, set to zero to turn off. If used, sat_scale will help mitigate oversaturation. If your image is too saturated, increase sat_scale to reduce the saturation. +cutn_batches: | + Each iteration, the AI cuts the image into smaller pieces known as cuts, and compares each cut to the prompt to decide how to guide the next diffusion step. More cuts can generally lead to better images, since DD has more chances to fine-tune the image precision in each timestep. + Additional cuts are memory intensive, however, and if DD tries to evaluate too many cuts at once, it can run out of memory. You can use cutn_batches to increase cuts per timestep without increasing memory usage. + At the default settings, DD is scheduled to do 16 cuts per timestep. If cutn_batches is set to 1, there will indeed only be 16 cuts total per timestep. + However, if cutn_batches is increased to 4, DD will do 64 cuts total in each timestep, divided into 4 sequential batches of 16 cuts each. Because the cuts are being evaluated only 16 at a time, DD uses the memory required for only 16 cuts, but gives you the quality benefit of 64 cuts. The tradeoff, of course, is that this will take ~4 times as long to render each image. + So, (scheduled cuts) x (cutn_batches) = (total cuts per timestep). Increasing cutn_batches will increase render times, however, as the work is being done sequentially. DD’s default cut schedule is a good place to start, but the cut schedule can be adjusted in the Cutn Scheduling section, explained below. + +diffusion_model: Diffusion_model of choice. + +use_secondary_model: | + Option to use a secondary purpose-made diffusion model to clean up interim diffusion images for CLIP evaluation. If this option is turned off, DD will use the regular (large) diffusion model. Using the secondary model is faster - one user reported a 50% improvement in render speed! However, the secondary model is much smaller, and may reduce image quality and detail. I suggest you experiment with this. + +diffusion_sampling_mode: | + Two alternate diffusion denoising algorithms. ddim has been around longer, and is more established and tested. plms is a newly added alternate method that promises good diffusion results in fewer steps, but has not been as fully tested and may have side effects. This new plms mode is actively being researched in the #settings-and-techniques channel in the DD Discord. + +perlin_init: | + Normally, DD will use an image filled with random noise as a starting point for the diffusion curve. If perlin_init is selected, DD will instead use a Perlin noise model as an initial state. Perlin has very interesting characteristics, distinct from random noise, so it’s worth experimenting with this for your projects. Beyond perlin, you can, of course, generate your own noise images (such as with GIMP, etc) and use them as an init_image (without skipping steps). + Choosing perlin_init does not affect the actual diffusion process, just the starting point for the diffusion. Please note that selecting a perlin_init will replace and override any init_image you may have specified. Further, because the 2D, 3D and video animation systems all rely on the init_image system, if you enable Perlin while using animation modes, the perlin_init will jump in front of any previous image or video input, and DD will NOT give you the expected sequence of coherent images. All of that said, using Perlin and animation modes together do make a very colorful rainbow effect, which can be used creatively. + +perlin_mode: | + sets type of Perlin noise: colored, gray, or a mix of both, giving you additional options for noise types. Experiment to see what these do in your projects. +seed: | + Deep in the diffusion code, there is a random number ‘seed’ which is used as the basis for determining the initial state of the diffusion. By default, this is random, but you can also specify your own seed. This is useful if you like a particular result and would like to run more iterations that will be similar. + After each run, the actual seed value used will be reported in the parameters report, and can be reused if desired by entering seed # here. If a specific numerical seed is used repeatedly, the resulting images will be quite similar but not identical. +eta: | + eta (greek letter η) is a diffusion model variable that mixes in a random amount of scaled noise into each timestep. 0 is no noise, 1.0 is more noise. As with most DD parameters, you can go below zero for eta, but it may give you unpredictable results. + The steps parameter has a close relationship with the eta parameter. If you set eta to 0, then you can get decent output with only 50-75 steps. Setting eta to 1.0 favors higher step counts, ideally around 250 and up. eta has a subtle, unpredictable effect on image, so you’ll need to experiment to see how this affects your projects. +clamp_grad: | + As I understand it, clamp_grad is an internal limiter that stops DD from producing extreme results. Try your images with and without clamp_grad. If the image changes drastically with clamp_grad turned off, it probably means your clip_guidance_scale is too high and should be reduced. +clamp_max: | + Sets the value of the clamp_grad limitation. Default is 0.05, providing for smoother, more muted coloration in images, but setting higher values (0.15-0.3) can provide interesting contrast and vibrancy. + +randomize_class: +clip_denoised: False +fuzzy_prompt: | + Controls whether to add multiple noisy prompts to the prompt losses. If True, can increase variability of image output. Experiment with this. +rand_mag: | + Affects only the fuzzy_prompt. Controls the magnitude of the random noise added by fuzzy_prompt. + +cut_overview: The schedule of overview cuts +cut_innercut: The schedule of inner cuts +cut_icgray_p: This sets the size of the border used for inner cuts. High cut_ic_pow values have larger borders, and therefore the cuts themselves will be smaller and provide finer details. If you have too many or too-small inner cuts, you may lose overall image coherency and/or it may cause an undesirable ‘mosaic’ effect. Low cut_ic_pow values will allow the inner cuts to be larger, helping image coherency while still helping with some details. + +display_rate: | + During a diffusion run, you can monitor the progress of each image being created with this variable. If display_rate is set to 50, DD will show you the in-progress image every 50 timesteps. Setting this to a lower value, like 5 or 10, is a good way to get an early peek at where your image is heading. If you don’t like the progression, just interrupt execution, change some settings, and re-run. If you are planning a long, unmonitored batch, it’s better to set display_rate equal to steps, because displaying interim images does slow Colab down slightly. +n_batches: | + This variable sets the number of still images you want DD to create. If you are using an animation mode (see below for details) DD will ignore n_batches and create a single set of animated frames based on the animation settings. +batch_name: | + The name of the batch, the batch id will be named as "discoart-[batch_name]-seed". To avoid your artworks be overridden by other users, please use a unique name. +clip_models: | + CLIP Model selectors. ViT-B/32, ViT-B/16, ViT-L/14, RN101, RN50, RN50x4, RN50x16, RN50x64. + These various CLIP models are available for you to use during image generation. Models have different styles or ‘flavors,’ so look around. + You can mix in multiple models as well for different results. However, keep in mind that some models are extremely memory-hungry, and turning on additional models will take additional memory and may cause a crash. + The rough order of speed/mem usage is (smallest/fastest to largest/slowest): + ViT-B/32 + RN50 + RN101 + ViT-B/16 + RN50x4 + RN50x16 + RN50x64 + ViT-L/14 + For RN50x64 & ViTL14 you may need to use fewer cuts, depending on your VRAM. diff --git a/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/runner.py b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/runner.py new file mode 100755 index 000000000..58a0c23a0 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/reverse_diffusion/runner.py @@ -0,0 +1,285 @@ +''' +This code is rewritten by Paddle based on Jina-ai/discoart. +https://github.com/jina-ai/discoart/blob/main/discoart/runner.py +''' +import gc +import os +import random +from threading import Thread + +import disco_diffusion_cnclip_vitb16.cn_clip.clip as clip +import numpy as np +import paddle +import paddle.vision.transforms as T +import paddle_lpips as lpips +from docarray import Document +from docarray import DocumentArray +from IPython import display +from ipywidgets import Output +from PIL import Image + +from .helper import logger +from .helper import parse_prompt +from .model.losses import range_loss +from .model.losses import spherical_dist_loss +from .model.losses import tv_loss +from .model.make_cutouts import MakeCutoutsDango +from .model.sec_diff import alpha_sigma_to_t +from .model.sec_diff import SecondaryDiffusionImageNet2 +from .model.transforms import Normalize + + +def do_run(args, models) -> 'DocumentArray': + logger.info('preparing models...') + model, diffusion, clip_models, secondary_model = models + normalize = Normalize( + mean=[0.48145466, 0.4578275, 0.40821073], + std=[0.26862954, 0.26130258, 0.27577711], + ) + lpips_model = lpips.LPIPS(net='vgg') + for parameter in lpips_model.parameters(): + parameter.stop_gradient = True + side_x = (args.width_height[0] // 64) * 64 + side_y = (args.width_height[1] // 64) * 64 + cut_overview = eval(args.cut_overview) + cut_innercut = eval(args.cut_innercut) + cut_icgray_p = eval(args.cut_icgray_p) + + from .model.perlin_noises import create_perlin_noise, regen_perlin + + seed = args.seed + + skip_steps = args.skip_steps + + loss_values = [] + + if seed is not None: + np.random.seed(seed) + random.seed(seed) + paddle.seed(seed) + + model_stats = [] + for clip_model in clip_models: + model_stat = { + 'clip_model': None, + 'target_embeds': [], + 'make_cutouts': None, + 'weights': [], + } + model_stat['clip_model'] = clip_model + + if isinstance(args.text_prompts, str): + args.text_prompts = [args.text_prompts] + + for prompt in args.text_prompts: + txt, weight = parse_prompt(prompt) + txt = clip_model.encode_text(clip.tokenize(prompt)) + if args.fuzzy_prompt: + for i in range(25): + model_stat['target_embeds'].append((txt + paddle.randn(txt.shape) * args.rand_mag).clip(0, 1)) + model_stat['weights'].append(weight) + else: + model_stat['target_embeds'].append(txt) + model_stat['weights'].append(weight) + + model_stat['target_embeds'] = paddle.concat(model_stat['target_embeds']) + model_stat['weights'] = paddle.to_tensor(model_stat['weights']) + if model_stat['weights'].sum().abs() < 1e-3: + raise RuntimeError('The weights must not sum to 0.') + model_stat['weights'] /= model_stat['weights'].sum().abs() + model_stats.append(model_stat) + + init = None + if args.init_image: + d = Document(uri=args.init_image).load_uri_to_image_tensor(side_x, side_y) + init = T.to_tensor(d.tensor).unsqueeze(0) * 2 - 1 + + if args.perlin_init: + if args.perlin_mode == 'color': + init = create_perlin_noise([1.5**-i * 0.5 for i in range(12)], 1, 1, False, side_y, side_x) + init2 = create_perlin_noise([1.5**-i * 0.5 for i in range(8)], 4, 4, False, side_y, side_x) + elif args.perlin_mode == 'gray': + init = create_perlin_noise([1.5**-i * 0.5 for i in range(12)], 1, 1, True, side_y, side_x) + init2 = create_perlin_noise([1.5**-i * 0.5 for i in range(8)], 4, 4, True, side_y, side_x) + else: + init = create_perlin_noise([1.5**-i * 0.5 for i in range(12)], 1, 1, False, side_y, side_x) + init2 = create_perlin_noise([1.5**-i * 0.5 for i in range(8)], 4, 4, True, side_y, side_x) + init = (T.to_tensor(init).add(T.to_tensor(init2)).divide(paddle.to_tensor(2.0)).unsqueeze(0) * 2 - 1) + del init2 + + cur_t = None + + def cond_fn(x, t, y=None): + x_is_NaN = False + n = x.shape[0] + if secondary_model: + alpha = paddle.to_tensor(diffusion.sqrt_alphas_cumprod[cur_t], dtype='float32') + sigma = paddle.to_tensor(diffusion.sqrt_one_minus_alphas_cumprod[cur_t], dtype='float32') + cosine_t = alpha_sigma_to_t(alpha, sigma) + x = paddle.to_tensor(x.detach(), dtype='float32') + x.stop_gradient = False + cosine_t = paddle.tile(paddle.to_tensor(cosine_t.detach().cpu().numpy()), [n]) + cosine_t.stop_gradient = False + out = secondary_model(x, cosine_t).pred + fac = diffusion.sqrt_one_minus_alphas_cumprod[cur_t] + x_in_d = out * fac + x * (1 - fac) + x_in = x_in_d.detach() + x_in.stop_gradient = False + x_in_grad = paddle.zeros_like(x_in, dtype='float32') + else: + t = paddle.ones([n], dtype='int64') * cur_t + out = diffusion.p_mean_variance(model, x, t, clip_denoised=False, model_kwargs={'y': y}) + fac = diffusion.sqrt_one_minus_alphas_cumprod[cur_t] + x_in_d = out['pred_xstart'] * fac + x * (1 - fac) + x_in = x_in_d.detach() + x_in.stop_gradient = False + x_in_grad = paddle.zeros_like(x_in, dtype='float32') + for model_stat in model_stats: + for i in range(args.cutn_batches): + t_int = (int(t.item()) + 1) # errors on last step without +1, need to find source + # when using SLIP Base model the dimensions need to be hard coded to avoid AttributeError: 'VisionTransformer' object has no attribute 'input_resolution' + try: + input_resolution = model_stat['clip_model'].visual.input_resolution + except: + input_resolution = 224 + + cuts = MakeCutoutsDango( + input_resolution, + Overview=cut_overview[1000 - t_int], + InnerCrop=cut_innercut[1000 - t_int], + IC_Size_Pow=args.cut_ic_pow, + IC_Grey_P=cut_icgray_p[1000 - t_int], + ) + clip_in = normalize(cuts(x_in.add(paddle.to_tensor(1.0)).divide(paddle.to_tensor(2.0)))) + image_embeds = (model_stat['clip_model'].encode_image(clip_in)) + + dists = spherical_dist_loss( + image_embeds.unsqueeze(1), + model_stat['target_embeds'].unsqueeze(0), + ) + + dists = dists.reshape([ + cut_overview[1000 - t_int] + cut_innercut[1000 - t_int], + n, + -1, + ]) + losses = dists.multiply(model_stat['weights']).sum(2).mean(0) + loss_values.append(losses.sum().item()) # log loss, probably shouldn't do per cutn_batch + + x_in_grad += (paddle.grad(losses.sum() * args.clip_guidance_scale, x_in)[0] / args.cutn_batches) + tv_losses = tv_loss(x_in) + range_losses = range_loss(x_in) + sat_losses = paddle.abs(x_in - x_in.clip(min=-1, max=1)).mean() + loss = (tv_losses.sum() * args.tv_scale + range_losses.sum() * args.range_scale + + sat_losses.sum() * args.sat_scale) + if init is not None and args.init_scale: + init_losses = lpips_model(x_in, init) + loss = loss + init_losses.sum() * args.init_scale + x_in_grad += paddle.grad(loss, x_in)[0] + if not paddle.isnan(x_in_grad).any(): + grad = -paddle.grad(x_in_d, x, x_in_grad)[0] + else: + x_is_NaN = True + grad = paddle.zeros_like(x) + if args.clamp_grad and not x_is_NaN: + magnitude = grad.square().mean().sqrt() + return (grad * magnitude.clip(max=args.clamp_max) / magnitude) + return grad + + if args.diffusion_sampling_mode == 'ddim': + sample_fn = diffusion.ddim_sample_loop_progressive + else: + sample_fn = diffusion.plms_sample_loop_progressive + + logger.info('creating artwork...') + + image_display = Output() + da_batches = DocumentArray() + + for _nb in range(args.n_batches): + display.clear_output(wait=True) + display.display(args.name_docarray, image_display) + gc.collect() + paddle.device.cuda.empty_cache() + + d = Document(tags=vars(args)) + da_batches.append(d) + + cur_t = diffusion.num_timesteps - skip_steps - 1 + + if args.perlin_init: + init = regen_perlin(args.perlin_mode, side_y, side_x, args.batch_size) + + if args.diffusion_sampling_mode == 'ddim': + samples = sample_fn( + model, + (args.batch_size, 3, side_y, side_x), + clip_denoised=args.clip_denoised, + model_kwargs={}, + cond_fn=cond_fn, + progress=True, + skip_timesteps=skip_steps, + init_image=init, + randomize_class=args.randomize_class, + eta=args.eta, + ) + else: + samples = sample_fn( + model, + (args.batch_size, 3, side_y, side_x), + clip_denoised=args.clip_denoised, + model_kwargs={}, + cond_fn=cond_fn, + progress=True, + skip_timesteps=skip_steps, + init_image=init, + randomize_class=args.randomize_class, + order=2, + ) + + threads = [] + for j, sample in enumerate(samples): + cur_t -= 1 + with image_display: + if j % args.display_rate == 0 or cur_t == -1: + for _, image in enumerate(sample['pred_xstart']): + image = (image + 1) / 2 + image = image.clip(0, 1).squeeze().transpose([1, 2, 0]).numpy() * 255 + image = np.uint8(image) + image = Image.fromarray(image) + + image.save(os.path.join(args.output_dir, 'progress-{}.png'.format(_nb))) + c = Document(tags={'cur_t': cur_t}) + c.load_pil_image_to_datauri(image) + d.chunks.append(c) + display.clear_output(wait=True) + display.display(display.Image(os.path.join(args.output_dir, 'progress-{}.png'.format(_nb)))) + d.chunks.plot_image_sprites(os.path.join(args.output_dir, + f'{args.name_docarray}-progress-{_nb}.png'), + show_index=True) + t = Thread( + target=_silent_push, + args=( + da_batches, + args.name_docarray, + ), + ) + threads.append(t) + t.start() + + if cur_t == -1: + d.load_pil_image_to_datauri(image) + + for t in threads: + t.join() + display.clear_output(wait=True) + logger.info(f'done! {args.name_docarray}') + da_batches.plot_image_sprites(skip_empty=True, show_index=True, keep_aspect_ratio=True) + return da_batches + + +def _silent_push(da_batches: DocumentArray, name: str) -> None: + try: + da_batches.push(name) + except Exception as ex: + logger.debug(f'push failed: {ex}') From fc35d0c8c2cbd9cad5cbd79cd201b6e324302f5f Mon Sep 17 00:00:00 2001 From: chenjian Date: Fri, 19 Aug 2022 13:19:39 +0800 Subject: [PATCH 029/117] Add disco_diffusion_clip_vitb32 model --- .../disco_diffusion_clip_vitb32/README.md | 114 ++ .../clip/README.md | 2 + .../clip/clip/__init__.py | 1 + .../clip/clip/layers.py | 182 +++ .../clip/clip/model.py | 227 +++ .../clip/clip/simple_tokenizer.py | 135 ++ .../clip/clip/utils.py | 122 ++ .../disco_diffusion_clip_vitb32/module.py | 441 ++++++ .../requirements.txt | 8 + .../resize_right/README.md | 3 + .../resize_right/__init__.py | 0 .../resize_right/interp_methods.py | 70 + .../resize_right/resize_right.py | 403 ++++++ .../reverse_diffusion/README.md | 2 + .../reverse_diffusion/__init__.py | 156 +++ .../reverse_diffusion/config.py | 77 ++ .../reverse_diffusion/helper.py | 137 ++ .../reverse_diffusion/model/__init__.py | 3 + .../model/gaussian_diffusion.py | 1214 +++++++++++++++++ .../reverse_diffusion/model/losses.py | 86 ++ .../reverse_diffusion/model/make_cutouts.py | 177 +++ .../reverse_diffusion/model/nn.py | 127 ++ .../reverse_diffusion/model/perlin_noises.py | 78 ++ .../reverse_diffusion/model/respace.py | 123 ++ .../reverse_diffusion/model/script_util.py | 201 +++ .../reverse_diffusion/model/sec_diff.py | 135 ++ .../reverse_diffusion/model/transforms.py | 757 ++++++++++ .../reverse_diffusion/model/unet.py | 838 ++++++++++++ .../reverse_diffusion/resources/default.yml | 47 + .../resources/docstrings.yml | 103 ++ .../reverse_diffusion/runner.py | 285 ++++ 31 files changed, 6254 insertions(+) create mode 100644 modules/image/text_to_image/disco_diffusion_clip_vitb32/README.md create mode 100644 modules/image/text_to_image/disco_diffusion_clip_vitb32/clip/README.md create mode 100755 modules/image/text_to_image/disco_diffusion_clip_vitb32/clip/clip/__init__.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_vitb32/clip/clip/layers.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_vitb32/clip/clip/model.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_vitb32/clip/clip/simple_tokenizer.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_vitb32/clip/clip/utils.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_vitb32/module.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_vitb32/requirements.txt create mode 100644 modules/image/text_to_image/disco_diffusion_clip_vitb32/resize_right/README.md create mode 100755 modules/image/text_to_image/disco_diffusion_clip_vitb32/resize_right/__init__.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_vitb32/resize_right/interp_methods.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_vitb32/resize_right/resize_right.py create mode 100644 modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/README.md create mode 100755 modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/__init__.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/config.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/helper.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/model/__init__.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/model/gaussian_diffusion.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/model/losses.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/model/make_cutouts.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/model/nn.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/model/perlin_noises.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/model/respace.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/model/script_util.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/model/sec_diff.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/model/transforms.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/model/unet.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/resources/default.yml create mode 100755 modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/resources/docstrings.yml create mode 100755 modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/runner.py diff --git a/modules/image/text_to_image/disco_diffusion_clip_vitb32/README.md b/modules/image/text_to_image/disco_diffusion_clip_vitb32/README.md new file mode 100644 index 000000000..1a42914c7 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_vitb32/README.md @@ -0,0 +1,114 @@ +# disco_diffusion_clip_vitb32 + +|模型名称|disco_diffusion_clip_vitb32| +| :--- | :---: | +|类别|图像-文图生成| +|网络|dd+clip ViTB32| +|数据集|-| +|是否支持Fine-tuning|否| +|模型大小|3.1GB| +|最新更新日期|2022-08-02| +|数据指标|-| + +## 一、模型基本信息 + +### 应用效果展示 + + - 输入文本 "A beautiful painting of a singular lighthouse, shining its light across a tumultuous sea of blood by greg rutkowski and thomas kinkade, Trending on artstation." + + - 输出图像 +

+ +
+ + - 生成过程 +

+ +
+ +### 模型介绍 + +disco_diffusion_clip_vitb32 是一个文图生成模型,可以通过输入一段文字来生成符合该句子语义的图像。该模型由两部分组成,一部分是扩散模型,是一种生成模型,可以从噪声输入中重建出原始图像。另一部分是多模态预训练模型(CLIP), 可以将文本和图像表示在同一个特征空间,相近语义的文本和图像在该特征空间里距离会更相近。在该文图生成模型中,扩散模型负责从初始噪声或者指定初始图像中来生成目标图像,CLIP负责引导生成图像的语义和输入的文本的语义尽可能接近,随着扩散模型在CLIP的引导下不断的迭代生成新图像,最终能够生成文本所描述内容的图像。该模块中使用的CLIP模型结构为ViTB32。 + +更多详情请参考论文:[Diffusion Models Beat GANs on Image Synthesis](https://arxiv.org/abs/2105.05233) 以及 [Learning Transferable Visual Models From Natural Language Supervision](https://arxiv.org/abs/2103.00020) + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.2.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install disco_diffusion_clip_vitb32 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run disco_diffusion_clip_vitb32 --text_prompts "A beautiful painting of a singular lighthouse, shining its light across a tumultuous sea of blood by greg rutkowski and thomas kinkade, Trending on artstation." --output_dir disco_diffusion_clip_vitb32_out + ``` + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + + module = hub.Module(name="disco_diffusion_clip_vitb32") + text_prompts = ["A beautiful painting of a singular lighthouse, shining its light across a tumultuous sea of blood by greg rutkowski and thomas kinkade, Trending on artstation."] + # 生成图像, 默认会在disco_diffusion_clip_vitb32_out目录保存图像 + # 返回的da是一个DocumentArray对象,保存了所有的结果,包括最终结果和迭代过程的中间结果 + # 可以通过操作DocumentArray对象对生成的图像做后处理,保存或者分析 + da = module.generate_image(text_prompts=text_prompts, output_dir='./disco_diffusion_clip_vitb32_out/') + # 手动将最终生成的图像保存到指定路径 + da[0].save_uri_to_file('disco_diffusion_clip_vitb32_out-result.png') + # 展示所有的中间结果 + da[0].chunks.plot_image_sprites(skip_empty=True, show_index=True, keep_aspect_ratio=True) + # 将整个生成过程保存为一个动态图gif + da[0].chunks.save_gif('disco_diffusion_clip_vitb32_out-result.gif', show_index=True, inline_display=True, size_ratio=0.5) + ``` + +- ### 3、API + + - ```python + def generate_image( + text_prompts, + style: Optional[str] = None, + artist: Optional[str] = None, + width_height: Optional[List[int]] = [1280, 768], + seed: Optional[int] = None, + output_dir: Optional[str] = 'disco_diffusion_clip_vitb32_out'): + ``` + + - 文图生成API,生成文本描述内容的图像。 + + - **参数** + + - text_prompts(str): 输入的语句,描述想要生成的图像的内容。通常比较有效的构造方式为 "一段描述性的文字内容" + "指定艺术家的名字",如"a beautiful painting of Chinese architecture, by krenz, sunny, super wide angle, artstation."。prompt的构造可以参考[网站](https://docs.google.com/document/d/1XUT2G9LmkZataHFzmuOtRXnuWBfhvXDAo8DkS--8tec/edit#)。 + - style(Optional[str]): 指定绘画的风格,如'watercolor','Chinese painting'等。当不指定时,风格完全由您所填写的prompt决定。 + - artist(Optional[str]): 指定特定的艺术家,如Greg Rutkowsk、krenz,将会生成所指定艺术家的绘画风格。当不指定时,风格完全由您所填写的prompt决定。各种艺术家的风格可以参考[网站](https://weirdwonderfulai.art/resources/disco-diffusion-70-plus-artist-studies/)。 + - width_height(Optional[List[int]]): 指定最终输出图像的宽高,宽和高都需要是64的倍数,生成的图像越大,所需要的计算时间越长。 + - seed(Optional[int]): 随机种子,由于输入默认是随机高斯噪声,设置不同的随机种子会由不同的初始输入,从而最终生成不同的结果,可以设置该参数来获得不同的输出图像。 + - output_dir(Optional[str]): 保存输出图像的目录,默认为"disco_diffusion_clip_vitb32_out"。 + + + - **返回** + - ra(DocumentArray): DocumentArray对象, 包含`n_batches`个Documents,其中每个Document都保存了迭代过程的所有中间结果。详细可参考[DocumentArray使用文档](https://docarray.jina.ai/fundamentals/documentarray/index.html)。 + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + ```shell + $ hub install disco_diffusion_clip_vitb32 == 1.0.0 + ``` diff --git a/modules/image/text_to_image/disco_diffusion_clip_vitb32/clip/README.md b/modules/image/text_to_image/disco_diffusion_clip_vitb32/clip/README.md new file mode 100644 index 000000000..317214d80 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_vitb32/clip/README.md @@ -0,0 +1,2 @@ +# OpenAI CLIP implemented in Paddle. +The original implementation repo is [ranchlai/clip.paddle](https://github.com/ranchlai/clip.paddle). We copy this repo here for guided diffusion. diff --git a/modules/image/text_to_image/disco_diffusion_clip_vitb32/clip/clip/__init__.py b/modules/image/text_to_image/disco_diffusion_clip_vitb32/clip/clip/__init__.py new file mode 100755 index 000000000..5657b56e6 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_vitb32/clip/clip/__init__.py @@ -0,0 +1 @@ +from .utils import * diff --git a/modules/image/text_to_image/disco_diffusion_clip_vitb32/clip/clip/layers.py b/modules/image/text_to_image/disco_diffusion_clip_vitb32/clip/clip/layers.py new file mode 100755 index 000000000..286f35ab4 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_vitb32/clip/clip/layers.py @@ -0,0 +1,182 @@ +from typing import Optional + +import paddle +import paddle.nn as nn +from paddle import Tensor +from paddle.nn import functional as F +from paddle.nn import Linear + +__all__ = ['ResidualAttentionBlock', 'AttentionPool2d', 'multi_head_attention_forward', 'MultiHeadAttention'] + + +def multi_head_attention_forward(x: Tensor, + num_heads: int, + q_proj: Linear, + k_proj: Linear, + v_proj: Linear, + c_proj: Linear, + attn_mask: Optional[Tensor] = None): + max_len, batch_size, emb_dim = x.shape + head_dim = emb_dim // num_heads + scaling = float(head_dim)**-0.5 + q = q_proj(x) # L, N, E + k = k_proj(x) # L, N, E + v = v_proj(x) # L, N, E + #k = k.con + v = v.reshape((-1, batch_size * num_heads, head_dim)).transpose((1, 0, 2)) + k = k.reshape((-1, batch_size * num_heads, head_dim)).transpose((1, 0, 2)) + q = q.reshape((-1, batch_size * num_heads, head_dim)).transpose((1, 0, 2)) + + q = q * scaling + qk = paddle.bmm(q, k.transpose((0, 2, 1))) + if attn_mask is not None: + if attn_mask.ndim == 2: + attn_mask.unsqueeze_(0) + #assert str(attn_mask.dtype) == 'VarType.FP32' and attn_mask.ndim == 3 + assert attn_mask.shape[0] == 1 and attn_mask.shape[1] == max_len and attn_mask.shape[2] == max_len + qk += attn_mask + + qk = paddle.nn.functional.softmax(qk, axis=-1) + atten = paddle.bmm(qk, v) + atten = atten.transpose((1, 0, 2)) + atten = atten.reshape((max_len, batch_size, emb_dim)) + atten = c_proj(atten) + return atten + + +class MultiHeadAttention(nn.Layer): # without attention mask + + def __init__(self, emb_dim: int, num_heads: int): + super().__init__() + self.q_proj = nn.Linear(emb_dim, emb_dim, bias_attr=True) + self.k_proj = nn.Linear(emb_dim, emb_dim, bias_attr=True) + self.v_proj = nn.Linear(emb_dim, emb_dim, bias_attr=True) + self.c_proj = nn.Linear(emb_dim, emb_dim, bias_attr=True) + self.head_dim = emb_dim // num_heads + self.emb_dim = emb_dim + self.num_heads = num_heads + assert self.head_dim * num_heads == emb_dim, "embed_dim must be divisible by num_heads" + #self.scaling = float(self.head_dim) ** -0.5 + + def forward(self, x, attn_mask=None): # x is in shape[max_len,batch_size,emb_dim] + + atten = multi_head_attention_forward(x, + self.num_heads, + self.q_proj, + self.k_proj, + self.v_proj, + self.c_proj, + attn_mask=attn_mask) + + return atten + + +class Identity(nn.Layer): + + def __init__(self): + super().__init__() + + def forward(self, x): + return x + + +class Bottleneck(nn.Layer): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1): + super().__init__() + + # all conv layers have stride 1. an avgpool is performed after the second convolution when stride > 1 + self.conv1 = nn.Conv2D(inplanes, planes, 1, bias_attr=False) + self.bn1 = nn.BatchNorm2D(planes) + + self.conv2 = nn.Conv2D(planes, planes, 3, padding=1, bias_attr=False) + self.bn2 = nn.BatchNorm2D(planes) + + self.avgpool = nn.AvgPool2D(stride) if stride > 1 else Identity() + + self.conv3 = nn.Conv2D(planes, planes * self.expansion, 1, bias_attr=False) + self.bn3 = nn.BatchNorm2D(planes * self.expansion) + + self.relu = nn.ReLU() + self.downsample = None + self.stride = stride + + if stride > 1 or inplanes != planes * Bottleneck.expansion: + self.downsample = nn.Sequential( + ("-1", nn.AvgPool2D(stride)), + ("0", nn.Conv2D(inplanes, planes * self.expansion, 1, stride=1, bias_attr=False)), + ("1", nn.BatchNorm2D(planes * self.expansion))) + + def forward(self, x): + identity = x + + out = self.relu(self.bn1(self.conv1(x))) + out = self.relu(self.bn2(self.conv2(out))) + out = self.avgpool(out) + out = self.bn3(self.conv3(out)) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.relu(out) + return out + + +class AttentionPool2d(nn.Layer): + + def __init__(self, spacial_dim: int, embed_dim: int, num_heads: int, output_dim: int = None): + super().__init__() + + self.positional_embedding = paddle.create_parameter((spacial_dim**2 + 1, embed_dim), dtype='float32') + + self.q_proj = nn.Linear(embed_dim, embed_dim, bias_attr=True) + self.k_proj = nn.Linear(embed_dim, embed_dim, bias_attr=True) + self.v_proj = nn.Linear(embed_dim, embed_dim, bias_attr=True) + self.c_proj = nn.Linear(embed_dim, output_dim or embed_dim, bias_attr=True) + self.num_heads = num_heads + + self.head_dim = embed_dim // num_heads + assert self.head_dim * num_heads == embed_dim, "embed_dim must be divisible by num_heads" + + def forward(self, x): + + x = x.reshape((x.shape[0], x.shape[1], x.shape[2] * x.shape[3])).transpose((2, 0, 1)) # NCHW -> (HW)NC + max_len, batch_size, emb_dim = x.shape + head_dim = self.head_dim + x = paddle.concat([paddle.mean(x, axis=0, keepdim=True), x], axis=0) + x = x + paddle.unsqueeze(self.positional_embedding, 1) + out = multi_head_attention_forward(x, self.num_heads, self.q_proj, self.k_proj, self.v_proj, self.c_proj) + + return out[0] + + +class QuickGELU(nn.Layer): + + def forward(self, x): + return x * paddle.nn.functional.sigmoid(1.702 * x) + + +class ResidualAttentionBlock(nn.Layer): + + def __init__(self, d_model: int, n_head: int, attn_mask=None): + super().__init__() + + self.attn = MultiHeadAttention(d_model, n_head) + self.ln_1 = nn.LayerNorm(d_model) + self.mlp = nn.Sequential(("c_fc", nn.Linear(d_model, d_model * 4)), ("gelu", QuickGELU()), + ("c_proj", nn.Linear(d_model * 4, d_model))) + self.ln_2 = nn.LayerNorm(d_model) + self.attn_mask = attn_mask + + def attention(self, x): + x = self.attn(x, self.attn_mask) + assert isinstance(x, paddle.Tensor) # not tuble here + return x + + def forward(self, x): + + x = x + self.attention(self.ln_1(x)) + x = x + self.mlp(self.ln_2(x)) + return x diff --git a/modules/image/text_to_image/disco_diffusion_clip_vitb32/clip/clip/model.py b/modules/image/text_to_image/disco_diffusion_clip_vitb32/clip/clip/model.py new file mode 100755 index 000000000..63d1835c5 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_vitb32/clip/clip/model.py @@ -0,0 +1,227 @@ +from typing import Tuple +from typing import Union + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddle import nn + +from .layers import AttentionPool2d +from .layers import Bottleneck +from .layers import MultiHeadAttention +from .layers import ResidualAttentionBlock + + +class ModifiedResNet(nn.Layer): + """ + A ResNet class that is similar to torchvision's but contains the following changes: + - There are now 3 "stem" convolutions as opposed to 1, with an average pool instead of a max pool. + - Performs anti-aliasing strided convolutions, where an avgpool is prepended to convolutions with stride > 1 + - The final pooling layer is a QKV attention instead of an average pool + """ + + def __init__(self, layers, output_dim, heads, input_resolution=224, width=64): + super().__init__() + self.output_dim = output_dim + self.input_resolution = input_resolution + + # the 3-layer stem + self.conv1 = nn.Conv2D(3, width // 2, kernel_size=3, stride=2, padding=1, bias_attr=False) + self.bn1 = nn.BatchNorm2D(width // 2) + self.conv2 = nn.Conv2D(width // 2, width // 2, kernel_size=3, padding=1, bias_attr=False) + self.bn2 = nn.BatchNorm2D(width // 2) + self.conv3 = nn.Conv2D(width // 2, width, kernel_size=3, padding=1, bias_attr=False) + self.bn3 = nn.BatchNorm2D(width) + self.avgpool = nn.AvgPool2D(2) + self.relu = nn.ReLU() + + # residual layers + self._inplanes = width # this is a *mutable* variable used during construction + self.layer1 = self._make_layer(width, layers[0]) + self.layer2 = self._make_layer(width * 2, layers[1], stride=2) + self.layer3 = self._make_layer(width * 4, layers[2], stride=2) + self.layer4 = self._make_layer(width * 8, layers[3], stride=2) + + embed_dim = width * 32 # the ResNet feature dimension + self.attnpool = AttentionPool2d(input_resolution // 32, embed_dim, heads, output_dim) + + def _make_layer(self, planes, blocks, stride=1): + layers = [Bottleneck(self._inplanes, planes, stride)] + + self._inplanes = planes * Bottleneck.expansion + for _ in range(1, blocks): + layers.append(Bottleneck(self._inplanes, planes)) + + return nn.Sequential(*layers) + + def forward(self, x): + + def stem(x): + for conv, bn in [(self.conv1, self.bn1), (self.conv2, self.bn2), (self.conv3, self.bn3)]: + x = self.relu(bn(conv(x))) + x = self.avgpool(x) + return x + + #x = x.type(self.conv1.weight.dtype) + x = stem(x) + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + x = self.attnpool(x) + + return x + + +class Transformer(nn.Layer): + + def __init__(self, width: int, layers: int, heads: int, attn_mask=None): + super().__init__() + self.width = width + self.layers = layers + self.resblocks = nn.Sequential(*[ResidualAttentionBlock(width, heads, attn_mask) for _ in range(layers)]) + + def forward(self, x): + return self.resblocks(x) + + +class VisualTransformer(nn.Layer): + + def __init__(self, input_resolution: int, patch_size: int, width: int, layers: int, heads: int, output_dim: int): + super().__init__() + self.input_resolution = input_resolution + self.output_dim = output_dim + # used patch_size x patch_size, stride patch_size to do linear projection + self.conv1 = nn.Conv2D(in_channels=3, + out_channels=width, + kernel_size=patch_size, + stride=patch_size, + bias_attr=False) + + # scale = width ** -0.5 + self.class_embedding = paddle.create_parameter((width, ), 'float32') + + self.positional_embedding = paddle.create_parameter(((input_resolution // patch_size)**2 + 1, width), 'float32') + + self.ln_pre = nn.LayerNorm(width) + + self.transformer = Transformer(width, layers, heads) + + self.ln_post = nn.LayerNorm(width) + self.proj = paddle.create_parameter((width, output_dim), 'float32') + + def forward(self, x): + + x = self.conv1(x) + x = x.reshape((x.shape[0], x.shape[1], -1)) + x = x.transpose((0, 2, 1)) + x = paddle.concat([self.class_embedding + paddle.zeros((x.shape[0], 1, x.shape[-1]), dtype=x.dtype), x], axis=1) + + x = x + self.positional_embedding + x = self.ln_pre(x) + x = x.transpose((1, 0, 2)) + x = self.transformer(x) + x = x.transpose((1, 0, 2)) + x = self.ln_post(x[:, 0, :]) + if self.proj is not None: + x = paddle.matmul(x, self.proj) + + return x + + +class CLIP(nn.Layer): + + def __init__( + self, + embed_dim: int, + # vision + image_resolution: int, + vision_layers: Union[Tuple[int, int, int, int], int], + vision_width: int, + vision_patch_size: int, + # text + context_length: int, + vocab_size: int, + transformer_width: int, + transformer_heads: int, + transformer_layers: int): + super().__init__() + + self.context_length = context_length + if isinstance(vision_layers, (tuple, list)): + vision_heads = vision_width * 32 // 64 + self.visual = ModifiedResNet(layers=vision_layers, + output_dim=embed_dim, + heads=vision_heads, + input_resolution=image_resolution, + width=vision_width) + else: + vision_heads = vision_width // 64 + self.visual = VisualTransformer(input_resolution=image_resolution, + patch_size=vision_patch_size, + width=vision_width, + layers=vision_layers, + heads=vision_heads, + output_dim=embed_dim) + + self.transformer = Transformer(width=transformer_width, + layers=transformer_layers, + heads=transformer_heads, + attn_mask=self.build_attention_mask()) + + self.vocab_size = vocab_size + self.token_embedding = nn.Embedding(vocab_size, transformer_width) + self.positional_embedding = paddle.create_parameter((self.context_length, transformer_width), 'float32') + self.ln_final = nn.LayerNorm(transformer_width) + + self.text_projection = paddle.create_parameter((transformer_width, embed_dim), 'float32') + self.logit_scale = paddle.create_parameter((1, ), 'float32') + + def build_attention_mask(self): + # lazily create causal attention mask, with full attention between the vision tokens + # mask = paddle.empty((self.context_length, self.context_length),dtype='float32') + # mask.fill_(float("-inf")) + #mask.triu_(1) # zero out the lower diagonal + + mask = paddle.ones((self.context_length, self.context_length)) * float("-inf") + mask = paddle.triu(mask, diagonal=1) + + return mask + + def encode_image(self, image): + return self.visual(image) + + def encode_text(self, text): + x = self.token_embedding(text) # [batch_size, n_ctx, d_model] + # print(x.shape) + + x = x + self.positional_embedding + #print(x.shape) + + x = x.transpose((1, 0, 2)) # NLD -> LND + x = self.transformer(x) + x = x.transpose((1, 0, 2)) # LND -> NLD + x = self.ln_final(x) + + idx = text.numpy().argmax(-1) + idx = list(idx) + x = [x[i:i + 1, int(j), :] for i, j in enumerate(idx)] + x = paddle.concat(x, 0) + x = paddle.matmul(x, self.text_projection) + return x + + def forward(self, image, text): + image_features = self.encode_image(image) + text_features = self.encode_text(text) + + # normalized features + image_features = image_features / image_features.norm(dim=-1, keepdim=True) + text_features = text_features / text_features.norm(dim=-1, keepdim=True) + + # cosine similarity as logits + logit_scale = self.logit_scale.exp() + logits_per_image = paddle.matmul(logit_scale * image_features, text_features.t()) + logits_per_text = paddle.matmul(logit_scale * text_features, image_features.t()) + + # shape = [global_batch_size, global_batch_size] + return logits_per_image, logits_per_text diff --git a/modules/image/text_to_image/disco_diffusion_clip_vitb32/clip/clip/simple_tokenizer.py b/modules/image/text_to_image/disco_diffusion_clip_vitb32/clip/clip/simple_tokenizer.py new file mode 100755 index 000000000..4eaf82e9e --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_vitb32/clip/clip/simple_tokenizer.py @@ -0,0 +1,135 @@ +import gzip +import html +import os +from functools import lru_cache + +import ftfy +import regex as re + + +@lru_cache() +def default_bpe(): + return os.path.join(os.path.dirname(os.path.abspath(__file__)), "../assets/bpe_simple_vocab_16e6.txt.gz") + + +@lru_cache() +def bytes_to_unicode(): + """ + Returns list of utf-8 byte and a corresponding list of unicode strings. + The reversible bpe codes work on unicode strings. + This means you need a large # of unicode characters in your vocab if you want to avoid UNKs. + When you're at something like a 10B token dataset you end up needing around 5K for decent coverage. + This is a signficant percentage of your normal, say, 32K bpe vocab. + To avoid that, we want lookup tables between utf-8 bytes and unicode strings. + And avoids mapping to whitespace/control characters the bpe code barfs on. + """ + bs = list(range(ord("!"), ord("~") + 1)) + list(range(ord("¡"), ord("¬") + 1)) + list(range(ord("®"), ord("ÿ") + 1)) + cs = bs[:] + n = 0 + for b in range(2**8): + if b not in bs: + bs.append(b) + cs.append(2**8 + n) + n += 1 + cs = [chr(n) for n in cs] + return dict(zip(bs, cs)) + + +def get_pairs(word): + """Return set of symbol pairs in a word. + Word is represented as tuple of symbols (symbols being variable-length strings). + """ + pairs = set() + prev_char = word[0] + for char in word[1:]: + pairs.add((prev_char, char)) + prev_char = char + return pairs + + +def basic_clean(text): + text = ftfy.fix_text(text) + text = html.unescape(html.unescape(text)) + return text.strip() + + +def whitespace_clean(text): + text = re.sub(r'\s+', ' ', text) + text = text.strip() + return text + + +class SimpleTokenizer(object): + + def __init__(self, bpe_path: str = default_bpe()): + self.byte_encoder = bytes_to_unicode() + self.byte_decoder = {v: k for k, v in self.byte_encoder.items()} + merges = gzip.open(bpe_path).read().decode("utf-8").split('\n') + merges = merges[1:49152 - 256 - 2 + 1] + merges = [tuple(merge.split()) for merge in merges] + vocab = list(bytes_to_unicode().values()) + vocab = vocab + [v + '' for v in vocab] + for merge in merges: + vocab.append(''.join(merge)) + vocab.extend(['<|startoftext|>', '<|endoftext|>']) + self.encoder = dict(zip(vocab, range(len(vocab)))) + self.decoder = {v: k for k, v in self.encoder.items()} + self.bpe_ranks = dict(zip(merges, range(len(merges)))) + self.cache = {'<|startoftext|>': '<|startoftext|>', '<|endoftext|>': '<|endoftext|>'} + self.pat = re.compile( + r"""<\|startoftext\|>|<\|endoftext\|>|'s|'t|'re|'ve|'m|'ll|'d|[\p{L}]+|[\p{N}]|[^\s\p{L}\p{N}]+""", + re.IGNORECASE) + + def bpe(self, token): + if token in self.cache: + return self.cache[token] + word = tuple(token[:-1]) + (token[-1] + '', ) + pairs = get_pairs(word) + + if not pairs: + return token + '' + + while True: + bigram = min(pairs, key=lambda pair: self.bpe_ranks.get(pair, float('inf'))) + if bigram not in self.bpe_ranks: + break + first, second = bigram + new_word = [] + i = 0 + while i < len(word): + try: + j = word.index(first, i) + new_word.extend(word[i:j]) + i = j + except: + new_word.extend(word[i:]) + break + + if word[i] == first and i < len(word) - 1 and word[i + 1] == second: + new_word.append(first + second) + i += 2 + else: + new_word.append(word[i]) + i += 1 + new_word = tuple(new_word) + word = new_word + if len(word) == 1: + break + else: + pairs = get_pairs(word) + word = ' '.join(word) + self.cache[token] = word + return word + + def encode(self, text): + bpe_tokens = [] + text = whitespace_clean(basic_clean(text)).lower() + for token in re.findall(self.pat, text): + token = ''.join(self.byte_encoder[b] for b in token.encode('utf-8')) + bpe_tokens.extend(self.encoder[bpe_token] for bpe_token in self.bpe(token).split(' ')) + return bpe_tokens + + def decode(self, tokens): + text = ''.join([self.decoder[token] for token in tokens]) + text = bytearray([self.byte_decoder[c] for c in text]).decode('utf-8', errors="replace").replace('', ' ') + return text diff --git a/modules/image/text_to_image/disco_diffusion_clip_vitb32/clip/clip/utils.py b/modules/image/text_to_image/disco_diffusion_clip_vitb32/clip/clip/utils.py new file mode 100755 index 000000000..8ea909142 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_vitb32/clip/clip/utils.py @@ -0,0 +1,122 @@ +import os +from typing import List +from typing import Union + +import numpy as np +import paddle +from paddle.utils import download +from paddle.vision.transforms import CenterCrop +from paddle.vision.transforms import Compose +from paddle.vision.transforms import Normalize +from paddle.vision.transforms import Resize +from paddle.vision.transforms import ToTensor + +from .model import CLIP +from .simple_tokenizer import SimpleTokenizer + +__all__ = ['transform', 'tokenize', 'build_model'] + +MODEL_NAMES = ['RN50', 'RN101', 'VIT32'] + +URL = { + 'RN50': os.path.join(os.path.dirname(__file__), 'pre_trained', 'RN50.pdparams'), + 'RN101': os.path.join(os.path.dirname(__file__), 'pre_trained', 'RN101.pdparams'), + 'VIT32': os.path.join(os.path.dirname(__file__), 'pre_trained', 'ViT-B-32.pdparams') +} + +MEAN, STD = (0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711) +_tokenizer = SimpleTokenizer() + +transform = Compose([ + Resize(224, interpolation='bicubic'), + CenterCrop(224), lambda image: image.convert('RGB'), + ToTensor(), + Normalize(mean=MEAN, std=STD), lambda t: t.unsqueeze_(0) +]) + + +def tokenize(texts: Union[str, List[str]], context_length: int = 77): + """ + Returns the tokenized representation of given input string(s) + + Parameters + ---------- + texts : Union[str, List[str]] + An input string or a list of input strings to tokenize + + context_length : int + The context length to use; all CLIP models use 77 as the context length + + Returns + ------- + A two-dimensional tensor containing the resulting tokens, shape = [number of input strings, context_length] + """ + if isinstance(texts, str): + texts = [texts] + + sot_token = _tokenizer.encoder["<|startoftext|>"] + eot_token = _tokenizer.encoder["<|endoftext|>"] + all_tokens = [[sot_token] + _tokenizer.encode(text) + [eot_token] for text in texts] + result = paddle.zeros((len(all_tokens), context_length), dtype='int64') + + for i, tokens in enumerate(all_tokens): + if len(tokens) > context_length: + raise RuntimeError(f"Input {texts[i]} is too long for context length {context_length}") + result[i, :len(tokens)] = paddle.Tensor(np.array(tokens)) + + return result + + +def build_model(name='VIT32'): + assert name in MODEL_NAMES, f"model name must be one of {MODEL_NAMES}" + name2model = {'RN101': build_rn101_model, 'VIT32': build_vit_model, 'RN50': build_rn50_model} + model = name2model[name]() + weight = URL[name] + sd = paddle.load(weight) + model.load_dict(sd) + model.eval() + return model + + +def build_vit_model(): + + model = CLIP(embed_dim=512, + image_resolution=224, + vision_layers=12, + vision_width=768, + vision_patch_size=32, + context_length=77, + vocab_size=49408, + transformer_width=512, + transformer_heads=8, + transformer_layers=12) + return model + + +def build_rn101_model(): + model = CLIP( + embed_dim=512, + image_resolution=224, + vision_layers=(3, 4, 23, 3), + vision_width=64, + vision_patch_size=0, #Not used in resnet + context_length=77, + vocab_size=49408, + transformer_width=512, + transformer_heads=8, + transformer_layers=12) + return model + + +def build_rn50_model(): + model = CLIP(embed_dim=1024, + image_resolution=224, + vision_layers=(3, 4, 6, 3), + vision_width=64, + vision_patch_size=None, + context_length=77, + vocab_size=49408, + transformer_width=512, + transformer_heads=8, + transformer_layers=12) + return model diff --git a/modules/image/text_to_image/disco_diffusion_clip_vitb32/module.py b/modules/image/text_to_image/disco_diffusion_clip_vitb32/module.py new file mode 100755 index 000000000..fb025bfc9 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_vitb32/module.py @@ -0,0 +1,441 @@ +# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import ast +import os +import sys +from functools import partial +from typing import List +from typing import Optional + +import disco_diffusion_clip_vitb32.clip as clip +import disco_diffusion_clip_vitb32.resize_right as resize_right +import paddle +from disco_diffusion_clip_vitb32.reverse_diffusion import create + +import paddlehub as hub +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving + + +@moduleinfo(name="disco_diffusion_clip_vitb32", + version="1.0.0", + type="image/text_to_image", + summary="", + author="paddlepaddle", + author_email="paddle-dev@baidu.com") +class DiscoDiffusionClip: + + def generate_image(self, + text_prompts, + style: Optional[str] = None, + artist: Optional[str] = None, + init_image: Optional[str] = None, + width_height: Optional[List[int]] = [1280, 768], + skip_steps: Optional[int] = 0, + steps: Optional[int] = 250, + cut_ic_pow: Optional[int] = 1, + init_scale: Optional[int] = 1000, + clip_guidance_scale: Optional[int] = 5000, + tv_scale: Optional[int] = 0, + range_scale: Optional[int] = 0, + sat_scale: Optional[int] = 0, + cutn_batches: Optional[int] = 4, + diffusion_sampling_mode: Optional[str] = 'ddim', + perlin_init: Optional[bool] = False, + perlin_mode: Optional[str] = 'mixed', + seed: Optional[int] = None, + eta: Optional[float] = 0.8, + clamp_grad: Optional[bool] = True, + clamp_max: Optional[float] = 0.05, + randomize_class: Optional[bool] = True, + clip_denoised: Optional[bool] = False, + fuzzy_prompt: Optional[bool] = False, + rand_mag: Optional[float] = 0.05, + cut_overview: Optional[str] = '[12]*400+[4]*600', + cut_innercut: Optional[str] = '[4]*400+[12]*600', + cut_icgray_p: Optional[str] = '[0.2]*400+[0]*600', + display_rate: Optional[int] = 10, + n_batches: Optional[int] = 1, + batch_size: Optional[int] = 1, + batch_name: Optional[str] = '', + use_gpu: Optional[bool] = True, + output_dir: Optional[str] = 'disco_diffusion_clip_vitb32_out'): + """ + Create Disco Diffusion artworks and save the result into a DocumentArray. + + :param text_prompts: Phrase, sentence, or string of words and phrases describing what the image should look like. The words will be analyzed by the AI and will guide the diffusion process toward the image(s) you describe. These can include commas and weights to adjust the relative importance of each element. E.g. "A beautiful painting of a singular lighthouse, shining its light across a tumultuous sea of blood by greg rutkowski and thomas kinkade, Trending on artstation."Notice that this prompt loosely follows a structure: [subject], [prepositional details], [setting], [meta modifiers and artist]; this is a good starting point for your experiments. Developing text prompts takes practice and experience, and is not the subject of this guide. If you are a beginner to writing text prompts, a good place to start is on a simple AI art app like Night Cafe, starry ai or WOMBO prior to using DD, to get a feel for how text gets translated into images by GAN tools. These other apps use different technologies, but many of the same principles apply. + :param style: Image style, such as oil paintings, if specified, style will be used to construct prompts. + :param artist: Artist style, if specified, style will be used to construct prompts. + :param init_image: Recall that in the image sequence above, the first image shown is just noise. If an init_image is provided, diffusion will replace the noise with the init_image as its starting state. To use an init_image, upload the image to the Colab instance or your Google Drive, and enter the full image path here. If using an init_image, you may need to increase skip_steps to ~ 50% of total steps to retain the character of the init. See skip_steps above for further discussion. + :param width_height: Desired final image size, in pixels. You can have a square, wide, or tall image, but each edge length should be set to a multiple of 64px, and a minimum of 512px on the default CLIP model setting. If you forget to use multiples of 64px in your dimensions, DD will adjust the dimensions of your image to make it so. + :param skip_steps: Consider the chart shown here. Noise scheduling (denoise strength) starts very high and progressively gets lower and lower as diffusion steps progress. The noise levels in the first few steps are very high, so images change dramatically in early steps.As DD moves along the curve, noise levels (and thus the amount an image changes per step) declines, and image coherence from one step to the next increases.The first few steps of denoising are often so dramatic that some steps (maybe 10-15% of total) can be skipped without affecting the final image. You can experiment with this as a way to cut render times.If you skip too many steps, however, the remaining noise may not be high enough to generate new content, and thus may not have ‘time left’ to finish an image satisfactorily.Also, depending on your other settings, you may need to skip steps to prevent CLIP from overshooting your goal, resulting in ‘blown out’ colors (hyper saturated, solid white, or solid black regions) or otherwise poor image quality. Consider that the denoising process is at its strongest in the early steps, so skipping steps can sometimes mitigate other problems.Lastly, if using an init_image, you will need to skip ~50% of the diffusion steps to retain the shapes in the original init image. However, if you’re using an init_image, you can also adjust skip_steps up or down for creative reasons. With low skip_steps you can get a result "inspired by" the init_image which will retain the colors and rough layout and shapes but look quite different. With high skip_steps you can preserve most of the init_image contents and just do fine tuning of the texture. + :param steps: When creating an image, the denoising curve is subdivided into steps for processing. Each step (or iteration) involves the AI looking at subsets of the image called ‘cuts’ and calculating the ‘direction’ the image should be guided to be more like the prompt. Then it adjusts the image with the help of the diffusion denoiser, and moves to the next step.Increasing steps will provide more opportunities for the AI to adjust the image, and each adjustment will be smaller, and thus will yield a more precise, detailed image. Increasing steps comes at the expense of longer render times. Also, while increasing steps should generally increase image quality, there is a diminishing return on additional steps beyond 250 - 500 steps. However, some intricate images can take 1000, 2000, or more steps. It is really up to the user. Just know that the render time is directly related to the number of steps, and many other parameters have a major impact on image quality, without costing additional time. + :param cut_ic_pow: This sets the size of the border used for inner cuts. High cut_ic_pow values have larger borders, and therefore the cuts themselves will be smaller and provide finer details. If you have too many or too-small inner cuts, you may lose overall image coherency and/or it may cause an undesirable ‘mosaic’ effect. Low cut_ic_pow values will allow the inner cuts to be larger, helping image coherency while still helping with some details. + :param init_scale: This controls how strongly CLIP will try to match the init_image provided. This is balanced against the clip_guidance_scale (CGS) above. Too much init scale, and the image won’t change much during diffusion. Too much CGS and the init image will be lost. + :param clip_guidance_scale: CGS is one of the most important parameters you will use. It tells DD how strongly you want CLIP to move toward your prompt each timestep. Higher is generally better, but if CGS is too strong it will overshoot the goal and distort the image. So a happy medium is needed, and it takes experience to learn how to adjust CGS. Note that this parameter generally scales with image dimensions. In other words, if you increase your total dimensions by 50% (e.g. a change from 512 x 512 to 512 x 768), then to maintain the same effect on the image, you’d want to increase clip_guidance_scale from 5000 to 7500. Of the basic settings, clip_guidance_scale, steps and skip_steps are the most important contributors to image quality, so learn them well. + :param tv_scale: Total variance denoising. Optional, set to zero to turn off. Controls ‘smoothness’ of final output. If used, tv_scale will try to smooth out your final image to reduce overall noise. If your image is too ‘crunchy’, increase tv_scale. TV denoising is good at preserving edges while smoothing away noise in flat regions. See https://en.wikipedia.org/wiki/Total_variation_denoising + :param range_scale: Optional, set to zero to turn off. Used for adjustment of color contrast. Lower range_scale will increase contrast. Very low numbers create a reduced color palette, resulting in more vibrant or poster-like images. Higher range_scale will reduce contrast, for more muted images. + :param sat_scale: Saturation scale. Optional, set to zero to turn off. If used, sat_scale will help mitigate oversaturation. If your image is too saturated, increase sat_scale to reduce the saturation. + :param cutn_batches: Each iteration, the AI cuts the image into smaller pieces known as cuts, and compares each cut to the prompt to decide how to guide the next diffusion step. More cuts can generally lead to better images, since DD has more chances to fine-tune the image precision in each timestep. Additional cuts are memory intensive, however, and if DD tries to evaluate too many cuts at once, it can run out of memory. You can use cutn_batches to increase cuts per timestep without increasing memory usage. At the default settings, DD is scheduled to do 16 cuts per timestep. If cutn_batches is set to 1, there will indeed only be 16 cuts total per timestep. However, if cutn_batches is increased to 4, DD will do 64 cuts total in each timestep, divided into 4 sequential batches of 16 cuts each. Because the cuts are being evaluated only 16 at a time, DD uses the memory required for only 16 cuts, but gives you the quality benefit of 64 cuts. The tradeoff, of course, is that this will take ~4 times as long to render each image.So, (scheduled cuts) x (cutn_batches) = (total cuts per timestep). Increasing cutn_batches will increase render times, however, as the work is being done sequentially. DD’s default cut schedule is a good place to start, but the cut schedule can be adjusted in the Cutn Scheduling section, explained below. + :param diffusion_sampling_mode: Two alternate diffusion denoising algorithms. ddim has been around longer, and is more established and tested. plms is a newly added alternate method that promises good diffusion results in fewer steps, but has not been as fully tested and may have side effects. This new plms mode is actively being researched in the #settings-and-techniques channel in the DD Discord. + :param perlin_init: Normally, DD will use an image filled with random noise as a starting point for the diffusion curve. If perlin_init is selected, DD will instead use a Perlin noise model as an initial state. Perlin has very interesting characteristics, distinct from random noise, so it’s worth experimenting with this for your projects. Beyond perlin, you can, of course, generate your own noise images (such as with GIMP, etc) and use them as an init_image (without skipping steps). Choosing perlin_init does not affect the actual diffusion process, just the starting point for the diffusion. Please note that selecting a perlin_init will replace and override any init_image you may have specified. Further, because the 2D, 3D and video animation systems all rely on the init_image system, if you enable Perlin while using animation modes, the perlin_init will jump in front of any previous image or video input, and DD will NOT give you the expected sequence of coherent images. All of that said, using Perlin and animation modes together do make a very colorful rainbow effect, which can be used creatively. + :param perlin_mode: sets type of Perlin noise: colored, gray, or a mix of both, giving you additional options for noise types. Experiment to see what these do in your projects. + :param seed: Deep in the diffusion code, there is a random number ‘seed’ which is used as the basis for determining the initial state of the diffusion. By default, this is random, but you can also specify your own seed. This is useful if you like a particular result and would like to run more iterations that will be similar. After each run, the actual seed value used will be reported in the parameters report, and can be reused if desired by entering seed # here. If a specific numerical seed is used repeatedly, the resulting images will be quite similar but not identical. + :param eta: eta (greek letter η) is a diffusion model variable that mixes in a random amount of scaled noise into each timestep. 0 is no noise, 1.0 is more noise. As with most DD parameters, you can go below zero for eta, but it may give you unpredictable results. The steps parameter has a close relationship with the eta parameter. If you set eta to 0, then you can get decent output with only 50-75 steps. Setting eta to 1.0 favors higher step counts, ideally around 250 and up. eta has a subtle, unpredictable effect on image, so you’ll need to experiment to see how this affects your projects. + :param clamp_grad: As I understand it, clamp_grad is an internal limiter that stops DD from producing extreme results. Try your images with and without clamp_grad. If the image changes drastically with clamp_grad turned off, it probably means your clip_guidance_scale is too high and should be reduced. + :param clamp_max: Sets the value of the clamp_grad limitation. Default is 0.05, providing for smoother, more muted coloration in images, but setting higher values (0.15-0.3) can provide interesting contrast and vibrancy. + :param fuzzy_prompt: Controls whether to add multiple noisy prompts to the prompt losses. If True, can increase variability of image output. Experiment with this. + :param rand_mag: Affects only the fuzzy_prompt. Controls the magnitude of the random noise added by fuzzy_prompt. + :param cut_overview: The schedule of overview cuts + :param cut_innercut: The schedule of inner cuts + :param cut_icgray_p: This sets the size of the border used for inner cuts. High cut_ic_pow values have larger borders, and therefore the cuts themselves will be smaller and provide finer details. If you have too many or too-small inner cuts, you may lose overall image coherency and/or it may cause an undesirable ‘mosaic’ effect. Low cut_ic_pow values will allow the inner cuts to be larger, helping image coherency while still helping with some details. + :param display_rate: During a diffusion run, you can monitor the progress of each image being created with this variable. If display_rate is set to 50, DD will show you the in-progress image every 50 timesteps. Setting this to a lower value, like 5 or 10, is a good way to get an early peek at where your image is heading. If you don’t like the progression, just interrupt execution, change some settings, and re-run. If you are planning a long, unmonitored batch, it’s better to set display_rate equal to steps, because displaying interim images does slow Colab down slightly. + :param n_batches: This variable sets the number of still images you want DD to create. If you are using an animation mode (see below for details) DD will ignore n_batches and create a single set of animated frames based on the animation settings. + :param batch_name: The name of the batch, the batch id will be named as "discoart-[batch_name]-seed". To avoid your artworks be overridden by other users, please use a unique name. + :param use_gpu: whether to use gpu or not. + :return: a DocumentArray object that has `n_batches` Documents + """ + if use_gpu: + try: + _places = os.environ.get("CUDA_VISIBLE_DEVICES", None) + if _places: + paddle.device.set_device("gpu:{}".format(0)) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + else: + paddle.device.set_device("cpu") + paddle.disable_static() + if not os.path.exists(output_dir): + os.makedirs(output_dir, exist_ok=True) + + if isinstance(text_prompts, str): + text_prompts = text_prompts.rstrip(',.,。') + if style is not None: + text_prompts += ",{}".format(style) + if artist is not None: + text_prompts += ",{},trending on artstation".format(artist) + elif isinstance(text_prompts, list): + text_prompts[0] = text_prompts[0].rstrip(',.,。') + if style is not None: + text_prompts[0] += ",{}".format(style) + if artist is not None: + text_prompts[0] += ",{},trending on artstation".format(artist) + + return create(text_prompts=text_prompts, + init_image=init_image, + width_height=width_height, + skip_steps=skip_steps, + steps=steps, + cut_ic_pow=cut_ic_pow, + init_scale=init_scale, + clip_guidance_scale=clip_guidance_scale, + tv_scale=tv_scale, + range_scale=range_scale, + sat_scale=sat_scale, + cutn_batches=cutn_batches, + diffusion_sampling_mode=diffusion_sampling_mode, + perlin_init=perlin_init, + perlin_mode=perlin_mode, + seed=seed, + eta=eta, + clamp_grad=clamp_grad, + clamp_max=clamp_max, + randomize_class=randomize_class, + clip_denoised=clip_denoised, + fuzzy_prompt=fuzzy_prompt, + rand_mag=rand_mag, + cut_overview=cut_overview, + cut_innercut=cut_innercut, + cut_icgray_p=cut_icgray_p, + display_rate=display_rate, + n_batches=n_batches, + batch_size=batch_size, + batch_name=batch_name, + clip_models=['VIT32'], + output_dir=output_dir) + + @serving + def serving_method(self, text_prompts, **kwargs): + """ + Run as a service. + """ + results = [] + for text_prompt in text_prompts: + result = self.generate_image(text_prompts=text_prompt, **kwargs)[0].to_base64() + results.append(result) + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.generate_image(text_prompts=args.text_prompts, + style=args.style, + artist=args.artist, + init_image=args.init_image, + width_height=args.width_height, + skip_steps=args.skip_steps, + steps=args.steps, + cut_ic_pow=args.cut_ic_pow, + init_scale=args.init_scale, + clip_guidance_scale=args.clip_guidance_scale, + tv_scale=args.tv_scale, + range_scale=args.range_scale, + sat_scale=args.sat_scale, + cutn_batches=args.cutn_batches, + diffusion_sampling_mode=args.diffusion_sampling_mode, + perlin_init=args.perlin_init, + perlin_mode=args.perlin_mode, + seed=args.seed, + eta=args.eta, + clamp_grad=args.clamp_grad, + clamp_max=args.clamp_max, + randomize_class=args.randomize_class, + clip_denoised=args.clip_denoised, + fuzzy_prompt=args.fuzzy_prompt, + rand_mag=args.rand_mag, + cut_overview=args.cut_overview, + cut_innercut=args.cut_innercut, + cut_icgray_p=args.cut_icgray_p, + display_rate=args.display_rate, + n_batches=args.n_batches, + batch_size=args.batch_size, + batch_name=args.batch_name, + output_dir=args.output_dir) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_input_group.add_argument( + '--skip_steps', + type=int, + default=0, + help= + 'Consider the chart shown here. Noise scheduling (denoise strength) starts very high and progressively gets lower and lower as diffusion steps progress. The noise levels in the first few steps are very high, so images change dramatically in early steps.As DD moves along the curve, noise levels (and thus the amount an image changes per step) declines, and image coherence from one step to the next increases.The first few steps of denoising are often so dramatic that some steps (maybe 10-15%% of total) can be skipped without affecting the final image. You can experiment with this as a way to cut render times.If you skip too many steps, however, the remaining noise may not be high enough to generate new content, and thus may not have ‘time left’ to finish an image satisfactorily.Also, depending on your other settings, you may need to skip steps to prevent CLIP from overshooting your goal, resulting in ‘blown out’ colors (hyper saturated, solid white, or solid black regions) or otherwise poor image quality. Consider that the denoising process is at its strongest in the early steps, so skipping steps can sometimes mitigate other problems.Lastly, if using an init_image, you will need to skip ~50%% of the diffusion steps to retain the shapes in the original init image. However, if you’re using an init_image, you can also adjust skip_steps up or down for creative reasons. With low skip_steps you can get a result "inspired by" the init_image which will retain the colors and rough layout and shapes but look quite different. With high skip_steps you can preserve most of the init_image contents and just do fine tuning of the texture' + ) + self.arg_input_group.add_argument( + '--steps', + type=int, + default=250, + help= + "When creating an image, the denoising curve is subdivided into steps for processing. Each step (or iteration) involves the AI looking at subsets of the image called ‘cuts’ and calculating the ‘direction’ the image should be guided to be more like the prompt. Then it adjusts the image with the help of the diffusion denoiser, and moves to the next step.Increasing steps will provide more opportunities for the AI to adjust the image, and each adjustment will be smaller, and thus will yield a more precise, detailed image. Increasing steps comes at the expense of longer render times. Also, while increasing steps should generally increase image quality, there is a diminishing return on additional steps beyond 250 - 500 steps. However, some intricate images can take 1000, 2000, or more steps. It is really up to the user. Just know that the render time is directly related to the number of steps, and many other parameters have a major impact on image quality, without costing additional time." + ) + self.arg_input_group.add_argument( + '--cut_ic_pow', + type=int, + default=1, + help= + "This sets the size of the border used for inner cuts. High cut_ic_pow values have larger borders, and therefore the cuts themselves will be smaller and provide finer details. If you have too many or too-small inner cuts, you may lose overall image coherency and/or it may cause an undesirable ‘mosaic’ effect. Low cut_ic_pow values will allow the inner cuts to be larger, helping image coherency while still helping with some details." + ) + self.arg_input_group.add_argument( + '--init_scale', + type=int, + default=1000, + help= + "This controls how strongly CLIP will try to match the init_image provided. This is balanced against the clip_guidance_scale (CGS) above. Too much init scale, and the image won’t change much during diffusion. Too much CGS and the init image will be lost." + ) + self.arg_input_group.add_argument( + '--clip_guidance_scale', + type=int, + default=5000, + help= + "CGS is one of the most important parameters you will use. It tells DD how strongly you want CLIP to move toward your prompt each timestep. Higher is generally better, but if CGS is too strong it will overshoot the goal and distort the image. So a happy medium is needed, and it takes experience to learn how to adjust CGS. Note that this parameter generally scales with image dimensions. In other words, if you increase your total dimensions by 50% (e.g. a change from 512 x 512 to 512 x 768), then to maintain the same effect on the image, you’d want to increase clip_guidance_scale from 5000 to 7500. Of the basic settings, clip_guidance_scale, steps and skip_steps are the most important contributors to image quality, so learn them well." + ) + self.arg_input_group.add_argument( + '--tv_scale', + type=int, + default=0, + help= + "Total variance denoising. Optional, set to zero to turn off. Controls ‘smoothness’ of final output. If used, tv_scale will try to smooth out your final image to reduce overall noise. If your image is too ‘crunchy’, increase tv_scale. TV denoising is good at preserving edges while smoothing away noise in flat regions. See https://en.wikipedia.org/wiki/Total_variation_denoising" + ) + self.arg_input_group.add_argument( + '--range_scale', + type=int, + default=0, + help= + "Optional, set to zero to turn off. Used for adjustment of color contrast. Lower range_scale will increase contrast. Very low numbers create a reduced color palette, resulting in more vibrant or poster-like images. Higher range_scale will reduce contrast, for more muted images." + ) + self.arg_input_group.add_argument( + '--sat_scale', + type=int, + default=0, + help= + "Saturation scale. Optional, set to zero to turn off. If used, sat_scale will help mitigate oversaturation. If your image is too saturated, increase sat_scale to reduce the saturation." + ) + self.arg_input_group.add_argument( + '--cutn_batches', + type=int, + default=4, + help= + "Each iteration, the AI cuts the image into smaller pieces known as cuts, and compares each cut to the prompt to decide how to guide the next diffusion step. More cuts can generally lead to better images, since DD has more chances to fine-tune the image precision in each timestep. Additional cuts are memory intensive, however, and if DD tries to evaluate too many cuts at once, it can run out of memory. You can use cutn_batches to increase cuts per timestep without increasing memory usage. At the default settings, DD is scheduled to do 16 cuts per timestep. If cutn_batches is set to 1, there will indeed only be 16 cuts total per timestep. However, if cutn_batches is increased to 4, DD will do 64 cuts total in each timestep, divided into 4 sequential batches of 16 cuts each. Because the cuts are being evaluated only 16 at a time, DD uses the memory required for only 16 cuts, but gives you the quality benefit of 64 cuts. The tradeoff, of course, is that this will take ~4 times as long to render each image.So, (scheduled cuts) x (cutn_batches) = (total cuts per timestep). Increasing cutn_batches will increase render times, however, as the work is being done sequentially. DD’s default cut schedule is a good place to start, but the cut schedule can be adjusted in the Cutn Scheduling section, explained below." + ) + self.arg_input_group.add_argument( + '--diffusion_sampling_mode', + type=str, + default='ddim', + help= + "Two alternate diffusion denoising algorithms. ddim has been around longer, and is more established and tested. plms is a newly added alternate method that promises good diffusion results in fewer steps, but has not been as fully tested and may have side effects. This new plms mode is actively being researched in the #settings-and-techniques channel in the DD Discord." + ) + self.arg_input_group.add_argument( + '--perlin_init', + type=bool, + default=False, + help= + "Normally, DD will use an image filled with random noise as a starting point for the diffusion curve. If perlin_init is selected, DD will instead use a Perlin noise model as an initial state. Perlin has very interesting characteristics, distinct from random noise, so it’s worth experimenting with this for your projects. Beyond perlin, you can, of course, generate your own noise images (such as with GIMP, etc) and use them as an init_image (without skipping steps). Choosing perlin_init does not affect the actual diffusion process, just the starting point for the diffusion. Please note that selecting a perlin_init will replace and override any init_image you may have specified. Further, because the 2D, 3D and video animation systems all rely on the init_image system, if you enable Perlin while using animation modes, the perlin_init will jump in front of any previous image or video input, and DD will NOT give you the expected sequence of coherent images. All of that said, using Perlin and animation modes together do make a very colorful rainbow effect, which can be used creatively." + ) + self.arg_input_group.add_argument( + '--perlin_mode', + type=str, + default='mixed', + help= + "sets type of Perlin noise: colored, gray, or a mix of both, giving you additional options for noise types. Experiment to see what these do in your projects." + ) + self.arg_input_group.add_argument( + '--seed', + type=int, + default=None, + help= + "Deep in the diffusion code, there is a random number ‘seed’ which is used as the basis for determining the initial state of the diffusion. By default, this is random, but you can also specify your own seed. This is useful if you like a particular result and would like to run more iterations that will be similar. After each run, the actual seed value used will be reported in the parameters report, and can be reused if desired by entering seed # here. If a specific numerical seed is used repeatedly, the resulting images will be quite similar but not identical." + ) + self.arg_input_group.add_argument( + '--eta', + type=float, + default=0.8, + help= + "eta (greek letter η) is a diffusion model variable that mixes in a random amount of scaled noise into each timestep. 0 is no noise, 1.0 is more noise. As with most DD parameters, you can go below zero for eta, but it may give you unpredictable results. The steps parameter has a close relationship with the eta parameter. If you set eta to 0, then you can get decent output with only 50-75 steps. Setting eta to 1.0 favors higher step counts, ideally around 250 and up. eta has a subtle, unpredictable effect on image, so you’ll need to experiment to see how this affects your projects." + ) + self.arg_input_group.add_argument( + '--clamp_grad', + type=bool, + default=True, + help= + "As I understand it, clamp_grad is an internal limiter that stops DD from producing extreme results. Try your images with and without clamp_grad. If the image changes drastically with clamp_grad turned off, it probably means your clip_guidance_scale is too high and should be reduced." + ) + self.arg_input_group.add_argument( + '--clamp_max', + type=float, + default=0.05, + help= + "Sets the value of the clamp_grad limitation. Default is 0.05, providing for smoother, more muted coloration in images, but setting higher values (0.15-0.3) can provide interesting contrast and vibrancy." + ) + self.arg_input_group.add_argument('--randomize_class', type=bool, default=True, help="Random class.") + self.arg_input_group.add_argument('--clip_denoised', type=bool, default=False, help="Clip denoised.") + self.arg_input_group.add_argument( + '--fuzzy_prompt', + type=bool, + default=False, + help= + "Controls whether to add multiple noisy prompts to the prompt losses. If True, can increase variability of image output. Experiment with this." + ) + self.arg_input_group.add_argument( + '--rand_mag', + type=float, + default=0.5, + help="Affects only the fuzzy_prompt. Controls the magnitude of the random noise added by fuzzy_prompt.") + self.arg_input_group.add_argument('--cut_overview', + type=str, + default='[12]*400+[4]*600', + help="The schedule of overview cuts") + self.arg_input_group.add_argument('--cut_innercut', + type=str, + default='[4]*400+[12]*600', + help="The schedule of inner cuts") + self.arg_input_group.add_argument( + '--cut_icgray_p', + type=str, + default='[0.2]*400+[0]*600', + help= + "This sets the size of the border used for inner cuts. High cut_ic_pow values have larger borders, and therefore the cuts themselves will be smaller and provide finer details. If you have too many or too-small inner cuts, you may lose overall image coherency and/or it may cause an undesirable ‘mosaic’ effect. Low cut_ic_pow values will allow the inner cuts to be larger, helping image coherency while still helping with some details." + ) + self.arg_input_group.add_argument( + '--display_rate', + type=int, + default=10, + help= + "During a diffusion run, you can monitor the progress of each image being created with this variable. If display_rate is set to 50, DD will show you the in-progress image every 50 timesteps. Setting this to a lower value, like 5 or 10, is a good way to get an early peek at where your image is heading. If you don’t like the progression, just interrupt execution, change some settings, and re-run. If you are planning a long, unmonitored batch, it’s better to set display_rate equal to steps, because displaying interim images does slow Colab down slightly." + ) + self.arg_config_group.add_argument('--use_gpu', + type=ast.literal_eval, + default=True, + help="whether use GPU or not") + self.arg_config_group.add_argument('--output_dir', + type=str, + default='disco_diffusion_clip_vitb32_out', + help='Output directory.') + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument( + '--text_prompts', + type=str, + help= + 'Phrase, sentence, or string of words and phrases describing what the image should look like. The words will be analyzed by the AI and will guide the diffusion process toward the image(s) you describe. These can include commas and weights to adjust the relative importance of each element. E.g. "A beautiful painting of a singular lighthouse, shining its light across a tumultuous sea of blood by greg rutkowski and thomas kinkade, Trending on artstation."Notice that this prompt loosely follows a structure: [subject], [prepositional details], [setting], [meta modifiers and artist]; this is a good starting point for your experiments. Developing text prompts takes practice and experience, and is not the subject of this guide. If you are a beginner to writing text prompts, a good place to start is on a simple AI art app like Night Cafe, starry ai or WOMBO prior to using DD, to get a feel for how text gets translated into images by GAN tools. These other apps use different technologies, but many of the same principles apply.' + ) + self.arg_input_group.add_argument( + '--style', + type=str, + default=None, + help='Image style, such as oil paintings, if specified, style will be used to construct prompts.') + self.arg_input_group.add_argument('--artist', + type=str, + default=None, + help='Artist style, if specified, style will be used to construct prompts.') + self.arg_input_group.add_argument( + '--init_image', + type=str, + default=None, + help= + "Recall that in the image sequence above, the first image shown is just noise. If an init_image is provided, diffusion will replace the noise with the init_image as its starting state. To use an init_image, upload the image to the Colab instance or your Google Drive, and enter the full image path here. If using an init_image, you may need to increase skip_steps to ~ 50% of total steps to retain the character of the init. See skip_steps above for further discussion." + ) + self.arg_input_group.add_argument( + '--width_height', + type=ast.literal_eval, + default=[1280, 768], + help= + "Desired final image size, in pixels. You can have a square, wide, or tall image, but each edge length should be set to a multiple of 64px, and a minimum of 512px on the default CLIP model setting. If you forget to use multiples of 64px in your dimensions, DD will adjust the dimensions of your image to make it so." + ) + self.arg_input_group.add_argument( + '--n_batches', + type=int, + default=1, + help= + "This variable sets the number of still images you want DD to create. If you are using an animation mode (see below for details) DD will ignore n_batches and create a single set of animated frames based on the animation settings." + ) + self.arg_input_group.add_argument('--batch_size', type=int, default=1, help="Batch size.") + self.arg_input_group.add_argument( + '--batch_name', + type=str, + default='', + help= + 'The name of the batch, the batch id will be named as "reverse_diffusion-[batch_name]-seed". To avoid your artworks be overridden by other users, please use a unique name.' + ) diff --git a/modules/image/text_to_image/disco_diffusion_clip_vitb32/requirements.txt b/modules/image/text_to_image/disco_diffusion_clip_vitb32/requirements.txt new file mode 100755 index 000000000..8b4bc0ea4 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_vitb32/requirements.txt @@ -0,0 +1,8 @@ +numpy +paddle_lpips==0.1.2 +ftfy +docarray>=0.13.29 +pyyaml +regex +tqdm +ipywidgets diff --git a/modules/image/text_to_image/disco_diffusion_clip_vitb32/resize_right/README.md b/modules/image/text_to_image/disco_diffusion_clip_vitb32/resize_right/README.md new file mode 100644 index 000000000..1f8d0bb0a --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_vitb32/resize_right/README.md @@ -0,0 +1,3 @@ +# ResizeRight (Paddle) +Fully differentiable resize function implemented by Paddle. +This module is based on [assafshocher/ResizeRight](https://github.com/assafshocher/ResizeRight). diff --git a/modules/image/text_to_image/disco_diffusion_clip_vitb32/resize_right/__init__.py b/modules/image/text_to_image/disco_diffusion_clip_vitb32/resize_right/__init__.py new file mode 100755 index 000000000..e69de29bb diff --git a/modules/image/text_to_image/disco_diffusion_clip_vitb32/resize_right/interp_methods.py b/modules/image/text_to_image/disco_diffusion_clip_vitb32/resize_right/interp_methods.py new file mode 100755 index 000000000..276eb055a --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_vitb32/resize_right/interp_methods.py @@ -0,0 +1,70 @@ +from math import pi + +try: + import paddle +except ImportError: + paddle = None + +try: + import numpy + import numpy as np +except ImportError: + numpy = None + +if numpy is None and paddle is None: + raise ImportError("Must have either Numpy or PyTorch but both not found") + + +def set_framework_dependencies(x): + if type(x) is numpy.ndarray: + to_dtype = lambda a: a + fw = numpy + else: + to_dtype = lambda a: paddle.cast(a, x.dtype) + fw = paddle + # eps = fw.finfo(fw.float32).eps + eps = paddle.to_tensor(np.finfo(np.float32).eps) + return fw, to_dtype, eps + + +def support_sz(sz): + + def wrapper(f): + f.support_sz = sz + return f + + return wrapper + + +@support_sz(4) +def cubic(x): + fw, to_dtype, eps = set_framework_dependencies(x) + absx = fw.abs(x) + absx2 = absx**2 + absx3 = absx**3 + return ((1.5 * absx3 - 2.5 * absx2 + 1.) * to_dtype(absx <= 1.) + + (-0.5 * absx3 + 2.5 * absx2 - 4. * absx + 2.) * to_dtype((1. < absx) & (absx <= 2.))) + + +@support_sz(4) +def lanczos2(x): + fw, to_dtype, eps = set_framework_dependencies(x) + return (((fw.sin(pi * x) * fw.sin(pi * x / 2) + eps) / ((pi**2 * x**2 / 2) + eps)) * to_dtype(abs(x) < 2)) + + +@support_sz(6) +def lanczos3(x): + fw, to_dtype, eps = set_framework_dependencies(x) + return (((fw.sin(pi * x) * fw.sin(pi * x / 3) + eps) / ((pi**2 * x**2 / 3) + eps)) * to_dtype(abs(x) < 3)) + + +@support_sz(2) +def linear(x): + fw, to_dtype, eps = set_framework_dependencies(x) + return ((x + 1) * to_dtype((-1 <= x) & (x < 0)) + (1 - x) * to_dtype((0 <= x) & (x <= 1))) + + +@support_sz(1) +def box(x): + fw, to_dtype, eps = set_framework_dependencies(x) + return to_dtype((-1 <= x) & (x < 0)) + to_dtype((0 <= x) & (x <= 1)) diff --git a/modules/image/text_to_image/disco_diffusion_clip_vitb32/resize_right/resize_right.py b/modules/image/text_to_image/disco_diffusion_clip_vitb32/resize_right/resize_right.py new file mode 100755 index 000000000..77ea95640 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_vitb32/resize_right/resize_right.py @@ -0,0 +1,403 @@ +import warnings +from fractions import Fraction +from math import ceil +from typing import Tuple + +import disco_diffusion_clip_vitb32.resize_right.interp_methods as interp_methods + + +class NoneClass: + pass + + +try: + import paddle + from paddle import nn + nnModuleWrapped = nn.Layer +except ImportError: + warnings.warn('No PyTorch found, will work only with Numpy') + paddle = None + nnModuleWrapped = NoneClass + +try: + import numpy + import numpy as np +except ImportError: + warnings.warn('No Numpy found, will work only with PyTorch') + numpy = None + +if numpy is None and paddle is None: + raise ImportError("Must have either Numpy or PyTorch but both not found") + + +def resize(input, + scale_factors=None, + out_shape=None, + interp_method=interp_methods.cubic, + support_sz=None, + antialiasing=True, + by_convs=False, + scale_tolerance=None, + max_numerator=10, + pad_mode='constant'): + # get properties of the input tensor + in_shape, n_dims = input.shape, input.ndim + + # fw stands for framework that can be either numpy or paddle, + # determined by the input type + fw = numpy if type(input) is numpy.ndarray else paddle + eps = np.finfo(np.float32).eps if fw == numpy else paddle.to_tensor(np.finfo(np.float32).eps) + device = input.place if fw is paddle else None + + # set missing scale factors or output shapem one according to another, + # scream if both missing. this is also where all the defults policies + # take place. also handling the by_convs attribute carefully. + scale_factors, out_shape, by_convs = set_scale_and_out_sz(in_shape, out_shape, scale_factors, by_convs, + scale_tolerance, max_numerator, eps, fw) + + # sort indices of dimensions according to scale of each dimension. + # since we are going dim by dim this is efficient + sorted_filtered_dims_and_scales = [(dim, scale_factors[dim], by_convs[dim], in_shape[dim], out_shape[dim]) + for dim in sorted(range(n_dims), key=lambda ind: scale_factors[ind]) + if scale_factors[dim] != 1.] + # unless support size is specified by the user, it is an attribute + # of the interpolation method + if support_sz is None: + support_sz = interp_method.support_sz + + # output begins identical to input and changes with each iteration + output = input + + # iterate over dims + for (dim, scale_factor, dim_by_convs, in_sz, out_sz) in sorted_filtered_dims_and_scales: + # STEP 1- PROJECTED GRID: The non-integer locations of the projection + # of output pixel locations to the input tensor + projected_grid = get_projected_grid(in_sz, out_sz, scale_factor, fw, dim_by_convs, device) + + # STEP 1.5: ANTIALIASING- If antialiasing is taking place, we modify + # the window size and the interpolation method (see inside function) + cur_interp_method, cur_support_sz = apply_antialiasing_if_needed(interp_method, support_sz, scale_factor, + antialiasing) + + # STEP 2- FIELDS OF VIEW: for each output pixels, map the input pixels + # that influence it. Also calculate needed padding and update grid + # accoedingly + field_of_view = get_field_of_view(projected_grid, cur_support_sz, fw, eps, device) + + # STEP 2.5- CALCULATE PAD AND UPDATE: according to the field of view, + # the input should be padded to handle the boundaries, coordinates + # should be updated. actual padding only occurs when weights are + # aplied (step 4). if using by_convs for this dim, then we need to + # calc right and left boundaries for each filter instead. + pad_sz, projected_grid, field_of_view = calc_pad_sz(in_sz, out_sz, field_of_view, projected_grid, scale_factor, + dim_by_convs, fw, device) + # STEP 3- CALCULATE WEIGHTS: Match a set of weights to the pixels in + # the field of view for each output pixel + weights = get_weights(cur_interp_method, projected_grid, field_of_view) + + # STEP 4- APPLY WEIGHTS: Each output pixel is calculated by multiplying + # its set of weights with the pixel values in its field of view. + # We now multiply the fields of view with their matching weights. + # We do this by tensor multiplication and broadcasting. + # if by_convs is true for this dim, then we do this action by + # convolutions. this is equivalent but faster. + if not dim_by_convs: + output = apply_weights(output, field_of_view, weights, dim, n_dims, pad_sz, pad_mode, fw) + else: + output = apply_convs(output, scale_factor, in_sz, out_sz, weights, dim, pad_sz, pad_mode, fw) + return output + + +def get_projected_grid(in_sz, out_sz, scale_factor, fw, by_convs, device=None): + # we start by having the ouput coordinates which are just integer locations + # in the special case when usin by_convs, we only need two cycles of grid + # points. the first and last. + grid_sz = out_sz if not by_convs else scale_factor.numerator + out_coordinates = fw_arange(grid_sz, fw, device) + + # This is projecting the ouput pixel locations in 1d to the input tensor, + # as non-integer locations. + # the following fomrula is derived in the paper + # "From Discrete to Continuous Convolutions" by Shocher et al. + return (out_coordinates / float(scale_factor) + (in_sz - 1) / 2 - (out_sz - 1) / (2 * float(scale_factor))) + + +def get_field_of_view(projected_grid, cur_support_sz, fw, eps, device): + # for each output pixel, map which input pixels influence it, in 1d. + # we start by calculating the leftmost neighbor, using half of the window + # size (eps is for when boundary is exact int) + left_boundaries = fw_ceil(projected_grid - cur_support_sz / 2 - eps, fw) + + # then we simply take all the pixel centers in the field by counting + # window size pixels from the left boundary + ordinal_numbers = fw_arange(ceil(cur_support_sz - eps), fw, device) + return left_boundaries[:, None] + ordinal_numbers + + +def calc_pad_sz(in_sz, out_sz, field_of_view, projected_grid, scale_factor, dim_by_convs, fw, device): + if not dim_by_convs: + # determine padding according to neighbor coords out of bound. + # this is a generalized notion of padding, when pad<0 it means crop + pad_sz = [-field_of_view[0, 0].item(), field_of_view[-1, -1].item() - in_sz + 1] + + # since input image will be changed by padding, coordinates of both + # field_of_view and projected_grid need to be updated + field_of_view += pad_sz[0] + projected_grid += pad_sz[0] + + else: + # only used for by_convs, to calc the boundaries of each filter the + # number of distinct convolutions is the numerator of the scale factor + num_convs, stride = scale_factor.numerator, scale_factor.denominator + + # calculate left and right boundaries for each conv. left can also be + # negative right can be bigger than in_sz. such cases imply padding if + # needed. however if# both are in-bounds, it means we need to crop, + # practically apply the conv only on part of the image. + left_pads = -field_of_view[:, 0] + + # next calc is tricky, explanation by rows: + # 1) counting output pixels between the first position of each filter + # to the right boundary of the input + # 2) dividing it by number of filters to count how many 'jumps' + # each filter does + # 3) multiplying by the stride gives us the distance over the input + # coords done by all these jumps for each filter + # 4) to this distance we add the right boundary of the filter when + # placed in its leftmost position. so now we get the right boundary + # of that filter in input coord. + # 5) the padding size needed is obtained by subtracting the rightmost + # input coordinate. if the result is positive padding is needed. if + # negative then negative padding means shaving off pixel columns. + right_pads = (((out_sz - fw_arange(num_convs, fw, device) - 1) # (1) + // num_convs) # (2) + * stride # (3) + + field_of_view[:, -1] # (4) + - in_sz + 1) # (5) + + # in the by_convs case pad_sz is a list of left-right pairs. one per + # each filter + + pad_sz = list(zip(left_pads, right_pads)) + + return pad_sz, projected_grid, field_of_view + + +def get_weights(interp_method, projected_grid, field_of_view): + # the set of weights per each output pixels is the result of the chosen + # interpolation method applied to the distances between projected grid + # locations and the pixel-centers in the field of view (distances are + # directed, can be positive or negative) + weights = interp_method(projected_grid[:, None] - field_of_view) + + # we now carefully normalize the weights to sum to 1 per each output pixel + sum_weights = weights.sum(1, keepdim=True) + sum_weights[sum_weights == 0] = 1 + return weights / sum_weights + + +def apply_weights(input, field_of_view, weights, dim, n_dims, pad_sz, pad_mode, fw): + # for this operation we assume the resized dim is the first one. + # so we transpose and will transpose back after multiplying + tmp_input = fw_swapaxes(input, dim, 0, fw) + + # apply padding + tmp_input = fw_pad(tmp_input, fw, pad_sz, pad_mode) + + # field_of_view is a tensor of order 2: for each output (1d location + # along cur dim)- a list of 1d neighbors locations. + # note that this whole operations is applied to each dim separately, + # this is why it is all in 1d. + # neighbors = tmp_input[field_of_view] is a tensor of order image_dims+1: + # for each output pixel (this time indicated in all dims), these are the + # values of the neighbors in the 1d field of view. note that we only + # consider neighbors along the current dim, but such set exists for every + # multi-dim location, hence the final tensor order is image_dims+1. + paddle.device.cuda.empty_cache() + neighbors = tmp_input[field_of_view] + + # weights is an order 2 tensor: for each output location along 1d- a list + # of weights matching the field of view. we augment it with ones, for + # broadcasting, so that when multiplies some tensor the weights affect + # only its first dim. + tmp_weights = fw.reshape(weights, (*weights.shape, *[1] * (n_dims - 1))) + + # now we simply multiply the weights with the neighbors, and then sum + # along the field of view, to get a single value per out pixel + tmp_output = (neighbors * tmp_weights).sum(1) + # we transpose back the resized dim to its original position + return fw_swapaxes(tmp_output, 0, dim, fw) + + +def apply_convs(input, scale_factor, in_sz, out_sz, weights, dim, pad_sz, pad_mode, fw): + # for this operations we assume the resized dim is the last one. + # so we transpose and will transpose back after multiplying + input = fw_swapaxes(input, dim, -1, fw) + + # the stride for all convs is the denominator of the scale factor + stride, num_convs = scale_factor.denominator, scale_factor.numerator + + # prepare an empty tensor for the output + tmp_out_shape = list(input.shape) + tmp_out_shape[-1] = out_sz + tmp_output = fw_empty(tuple(tmp_out_shape), fw, input.device) + + # iterate over the conv operations. we have as many as the numerator + # of the scale-factor. for each we need boundaries and a filter. + for conv_ind, (pad_sz, filt) in enumerate(zip(pad_sz, weights)): + # apply padding (we pad last dim, padding can be negative) + pad_dim = input.ndim - 1 + tmp_input = fw_pad(input, fw, pad_sz, pad_mode, dim=pad_dim) + + # apply convolution over last dim. store in the output tensor with + # positional strides so that when the loop is comlete conv results are + # interwind + tmp_output[..., conv_ind::num_convs] = fw_conv(tmp_input, filt, stride) + + return fw_swapaxes(tmp_output, -1, dim, fw) + + +def set_scale_and_out_sz(in_shape, out_shape, scale_factors, by_convs, scale_tolerance, max_numerator, eps, fw): + # eventually we must have both scale-factors and out-sizes for all in/out + # dims. however, we support many possible partial arguments + if scale_factors is None and out_shape is None: + raise ValueError("either scale_factors or out_shape should be " + "provided") + if out_shape is not None: + # if out_shape has less dims than in_shape, we defaultly resize the + # first dims for numpy and last dims for paddle + out_shape = (list(out_shape) + + list(in_shape[len(out_shape):]) if fw is numpy else list(in_shape[:-len(out_shape)]) + + list(out_shape)) + if scale_factors is None: + # if no scale given, we calculate it as the out to in ratio + # (not recomended) + scale_factors = [out_sz / in_sz for out_sz, in_sz in zip(out_shape, in_shape)] + if scale_factors is not None: + # by default, if a single number is given as scale, we assume resizing + # two dims (most common are images with 2 spatial dims) + scale_factors = (scale_factors if isinstance(scale_factors, (list, tuple)) else [scale_factors, scale_factors]) + # if less scale_factors than in_shape dims, we defaultly resize the + # first dims for numpy and last dims for paddle + scale_factors = (list(scale_factors) + [1] * (len(in_shape) - len(scale_factors)) if fw is numpy else [1] * + (len(in_shape) - len(scale_factors)) + list(scale_factors)) + if out_shape is None: + # when no out_shape given, it is calculated by multiplying the + # scale by the in_shape (not recomended) + out_shape = [ceil(scale_factor * in_sz) for scale_factor, in_sz in zip(scale_factors, in_shape)] + # next part intentionally after out_shape determined for stability + # we fix by_convs to be a list of truth values in case it is not + if not isinstance(by_convs, (list, tuple)): + by_convs = [by_convs] * len(out_shape) + + # next loop fixes the scale for each dim to be either frac or float. + # this is determined by by_convs and by tolerance for scale accuracy. + for ind, (sf, dim_by_convs) in enumerate(zip(scale_factors, by_convs)): + # first we fractionaize + if dim_by_convs: + frac = Fraction(1 / sf).limit_denominator(max_numerator) + frac = Fraction(numerator=frac.denominator, denominator=frac.numerator) + + # if accuracy is within tolerance scale will be frac. if not, then + # it will be float and the by_convs attr will be set false for + # this dim + if scale_tolerance is None: + scale_tolerance = eps + if dim_by_convs and abs(frac - sf) < scale_tolerance: + scale_factors[ind] = frac + else: + scale_factors[ind] = float(sf) + by_convs[ind] = False + + return scale_factors, out_shape, by_convs + + +def apply_antialiasing_if_needed(interp_method, support_sz, scale_factor, antialiasing): + # antialiasing is "stretching" the field of view according to the scale + # factor (only for downscaling). this is low-pass filtering. this + # requires modifying both the interpolation (stretching the 1d + # function and multiplying by the scale-factor) and the window size. + scale_factor = float(scale_factor) + if scale_factor >= 1.0 or not antialiasing: + return interp_method, support_sz + cur_interp_method = (lambda arg: scale_factor * interp_method(scale_factor * arg)) + cur_support_sz = support_sz / scale_factor + return cur_interp_method, cur_support_sz + + +def fw_ceil(x, fw): + if fw is numpy: + return fw.int_(fw.ceil(x)) + else: + return paddle.cast(x.ceil(), dtype='int64') + + +def fw_floor(x, fw): + if fw is numpy: + return fw.int_(fw.floor(x)) + else: + return paddle.cast(x.floor(), dtype='int64') + + +def fw_cat(x, fw): + if fw is numpy: + return fw.concatenate(x) + else: + return fw.concat(x) + + +def fw_swapaxes(x, ax_1, ax_2, fw): + if fw is numpy: + return fw.swapaxes(x, ax_1, ax_2) + else: + if ax_1 == -1: + ax_1 = len(x.shape) - 1 + if ax_2 == -1: + ax_2 = len(x.shape) - 1 + perm0 = list(range(len(x.shape))) + temp = ax_1 + perm0[temp] = ax_2 + perm0[ax_2] = temp + return fw.transpose(x, perm0) + + +def fw_pad(x, fw, pad_sz, pad_mode, dim=0): + if pad_sz == (0, 0): + return x + if fw is numpy: + pad_vec = [(0, 0)] * x.ndim + pad_vec[dim] = pad_sz + return fw.pad(x, pad_width=pad_vec, mode=pad_mode) + else: + if x.ndim < 3: + x = x[None, None, ...] + + pad_vec = [0] * ((x.ndim - 2) * 2) + pad_vec[0:2] = pad_sz + return fw_swapaxes(fw.nn.functional.pad(fw_swapaxes(x, dim, -1, fw), pad=pad_vec, mode=pad_mode), dim, -1, fw) + + +def fw_conv(input, filter, stride): + # we want to apply 1d conv to any nd array. the way to do it is to reshape + # the input to a 4D tensor. first two dims are singeletons, 3rd dim stores + # all the spatial dims that we are not convolving along now. then we can + # apply conv2d with a 1xK filter. This convolves the same way all the other + # dims stored in the 3d dim. like depthwise conv over these. + # TODO: numpy support + reshaped_input = input.reshape(1, 1, -1, input.shape[-1]) + reshaped_output = paddle.nn.functional.conv2d(reshaped_input, filter.view(1, 1, 1, -1), stride=(1, stride)) + return reshaped_output.reshape(*input.shape[:-1], -1) + + +def fw_arange(upper_bound, fw, device): + if fw is numpy: + return fw.arange(upper_bound) + else: + return fw.arange(upper_bound) + + +def fw_empty(shape, fw, device): + if fw is numpy: + return fw.empty(shape) + else: + return fw.empty(shape=shape) diff --git a/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/README.md b/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/README.md new file mode 100644 index 000000000..711671bad --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/README.md @@ -0,0 +1,2 @@ +# Diffusion model (Paddle) +This module implements diffusion model which accepts a text prompt and outputs images semantically close to the text. The code is rewritten by Paddle, and mainly refer to two projects: jina-ai/discoart[https://github.com/jina-ai/discoart] and openai/guided-diffusion[https://github.com/openai/guided-diffusion]. Thanks for their wonderful work. diff --git a/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/__init__.py b/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/__init__.py new file mode 100755 index 000000000..39fc908dc --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/__init__.py @@ -0,0 +1,156 @@ +''' +https://github.com/jina-ai/discoart/blob/main/discoart/__init__.py +''' +import os +import warnings + +os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE' + +__all__ = ['create'] + +import sys + +__resources_path__ = os.path.join( + os.path.dirname(sys.modules.get(__package__).__file__ if __package__ in sys.modules else __file__), + 'resources', +) + +import gc + +# check if GPU is available +import paddle + +# download and load models, this will take some time on the first load + +from .helper import load_all_models, load_diffusion_model, load_clip_models + +model_config, secondary_model = load_all_models('512x512_diffusion_uncond_finetune_008100', use_secondary_model=True) + +from typing import TYPE_CHECKING, overload, List, Optional + +if TYPE_CHECKING: + from docarray import DocumentArray, Document + +_clip_models_cache = {} + +# begin_create_overload + + +@overload +def create(text_prompts: Optional[List[str]] = [ + 'A beautiful painting of a singular lighthouse, shining its light across a tumultuous sea of blood by greg rutkowski and thomas kinkade, Trending on artstation.', + 'yellow color scheme', +], + init_image: Optional[str] = None, + width_height: Optional[List[int]] = [1280, 768], + skip_steps: Optional[int] = 10, + steps: Optional[int] = 250, + cut_ic_pow: Optional[int] = 1, + init_scale: Optional[int] = 1000, + clip_guidance_scale: Optional[int] = 5000, + tv_scale: Optional[int] = 0, + range_scale: Optional[int] = 150, + sat_scale: Optional[int] = 0, + cutn_batches: Optional[int] = 4, + diffusion_model: Optional[str] = '512x512_diffusion_uncond_finetune_008100', + use_secondary_model: Optional[bool] = True, + diffusion_sampling_mode: Optional[str] = 'ddim', + perlin_init: Optional[bool] = False, + perlin_mode: Optional[str] = 'mixed', + seed: Optional[int] = None, + eta: Optional[float] = 0.8, + clamp_grad: Optional[bool] = True, + clamp_max: Optional[float] = 0.05, + randomize_class: Optional[bool] = True, + clip_denoised: Optional[bool] = False, + fuzzy_prompt: Optional[bool] = False, + rand_mag: Optional[float] = 0.05, + cut_overview: Optional[str] = '[12]*400+[4]*600', + cut_innercut: Optional[str] = '[4]*400+[12]*600', + cut_icgray_p: Optional[str] = '[0.2]*400+[0]*600', + display_rate: Optional[int] = 10, + n_batches: Optional[int] = 4, + batch_size: Optional[int] = 1, + batch_name: Optional[str] = '', + clip_models: Optional[list] = ['ViTB32', 'ViTB16', 'RN50'], + output_dir: Optional[str] = 'discoart_output') -> 'DocumentArray': + """ + Create Disco Diffusion artworks and save the result into a DocumentArray. + + :param text_prompts: Phrase, sentence, or string of words and phrases describing what the image should look like. The words will be analyzed by the AI and will guide the diffusion process toward the image(s) you describe. These can include commas and weights to adjust the relative importance of each element. E.g. "A beautiful painting of a singular lighthouse, shining its light across a tumultuous sea of blood by greg rutkowski and thomas kinkade, Trending on artstation."Notice that this prompt loosely follows a structure: [subject], [prepositional details], [setting], [meta modifiers and artist]; this is a good starting point for your experiments. Developing text prompts takes practice and experience, and is not the subject of this guide. If you are a beginner to writing text prompts, a good place to start is on a simple AI art app like Night Cafe, starry ai or WOMBO prior to using DD, to get a feel for how text gets translated into images by GAN tools. These other apps use different technologies, but many of the same principles apply. + :param init_image: Recall that in the image sequence above, the first image shown is just noise. If an init_image is provided, diffusion will replace the noise with the init_image as its starting state. To use an init_image, upload the image to the Colab instance or your Google Drive, and enter the full image path here. If using an init_image, you may need to increase skip_steps to ~ 50% of total steps to retain the character of the init. See skip_steps above for further discussion. + :param width_height: Desired final image size, in pixels. You can have a square, wide, or tall image, but each edge length should be set to a multiple of 64px, and a minimum of 512px on the default CLIP model setting. If you forget to use multiples of 64px in your dimensions, DD will adjust the dimensions of your image to make it so. + :param skip_steps: Consider the chart shown here. Noise scheduling (denoise strength) starts very high and progressively gets lower and lower as diffusion steps progress. The noise levels in the first few steps are very high, so images change dramatically in early steps.As DD moves along the curve, noise levels (and thus the amount an image changes per step) declines, and image coherence from one step to the next increases.The first few steps of denoising are often so dramatic that some steps (maybe 10-15% of total) can be skipped without affecting the final image. You can experiment with this as a way to cut render times.If you skip too many steps, however, the remaining noise may not be high enough to generate new content, and thus may not have ‘time left’ to finish an image satisfactorily.Also, depending on your other settings, you may need to skip steps to prevent CLIP from overshooting your goal, resulting in ‘blown out’ colors (hyper saturated, solid white, or solid black regions) or otherwise poor image quality. Consider that the denoising process is at its strongest in the early steps, so skipping steps can sometimes mitigate other problems.Lastly, if using an init_image, you will need to skip ~50% of the diffusion steps to retain the shapes in the original init image. However, if you’re using an init_image, you can also adjust skip_steps up or down for creative reasons. With low skip_steps you can get a result "inspired by" the init_image which will retain the colors and rough layout and shapes but look quite different. With high skip_steps you can preserve most of the init_image contents and just do fine tuning of the texture. + :param steps: When creating an image, the denoising curve is subdivided into steps for processing. Each step (or iteration) involves the AI looking at subsets of the image called ‘cuts’ and calculating the ‘direction’ the image should be guided to be more like the prompt. Then it adjusts the image with the help of the diffusion denoiser, and moves to the next step.Increasing steps will provide more opportunities for the AI to adjust the image, and each adjustment will be smaller, and thus will yield a more precise, detailed image. Increasing steps comes at the expense of longer render times. Also, while increasing steps should generally increase image quality, there is a diminishing return on additional steps beyond 250 - 500 steps. However, some intricate images can take 1000, 2000, or more steps. It is really up to the user. Just know that the render time is directly related to the number of steps, and many other parameters have a major impact on image quality, without costing additional time. + :param cut_ic_pow: This sets the size of the border used for inner cuts. High cut_ic_pow values have larger borders, and therefore the cuts themselves will be smaller and provide finer details. If you have too many or too-small inner cuts, you may lose overall image coherency and/or it may cause an undesirable ‘mosaic’ effect. Low cut_ic_pow values will allow the inner cuts to be larger, helping image coherency while still helping with some details. + :param init_scale: This controls how strongly CLIP will try to match the init_image provided. This is balanced against the clip_guidance_scale (CGS) above. Too much init scale, and the image won’t change much during diffusion. Too much CGS and the init image will be lost. + :param clip_guidance_scale: CGS is one of the most important parameters you will use. It tells DD how strongly you want CLIP to move toward your prompt each timestep. Higher is generally better, but if CGS is too strong it will overshoot the goal and distort the image. So a happy medium is needed, and it takes experience to learn how to adjust CGS. Note that this parameter generally scales with image dimensions. In other words, if you increase your total dimensions by 50% (e.g. a change from 512 x 512 to 512 x 768), then to maintain the same effect on the image, you’d want to increase clip_guidance_scale from 5000 to 7500. Of the basic settings, clip_guidance_scale, steps and skip_steps are the most important contributors to image quality, so learn them well. + :param tv_scale: Total variance denoising. Optional, set to zero to turn off. Controls ‘smoothness’ of final output. If used, tv_scale will try to smooth out your final image to reduce overall noise. If your image is too ‘crunchy’, increase tv_scale. TV denoising is good at preserving edges while smoothing away noise in flat regions. See https://en.wikipedia.org/wiki/Total_variation_denoising + :param range_scale: Optional, set to zero to turn off. Used for adjustment of color contrast. Lower range_scale will increase contrast. Very low numbers create a reduced color palette, resulting in more vibrant or poster-like images. Higher range_scale will reduce contrast, for more muted images. + :param sat_scale: Saturation scale. Optional, set to zero to turn off. If used, sat_scale will help mitigate oversaturation. If your image is too saturated, increase sat_scale to reduce the saturation. + :param cutn_batches: Each iteration, the AI cuts the image into smaller pieces known as cuts, and compares each cut to the prompt to decide how to guide the next diffusion step. More cuts can generally lead to better images, since DD has more chances to fine-tune the image precision in each timestep. Additional cuts are memory intensive, however, and if DD tries to evaluate too many cuts at once, it can run out of memory. You can use cutn_batches to increase cuts per timestep without increasing memory usage. At the default settings, DD is scheduled to do 16 cuts per timestep. If cutn_batches is set to 1, there will indeed only be 16 cuts total per timestep. However, if cutn_batches is increased to 4, DD will do 64 cuts total in each timestep, divided into 4 sequential batches of 16 cuts each. Because the cuts are being evaluated only 16 at a time, DD uses the memory required for only 16 cuts, but gives you the quality benefit of 64 cuts. The tradeoff, of course, is that this will take ~4 times as long to render each image.So, (scheduled cuts) x (cutn_batches) = (total cuts per timestep). Increasing cutn_batches will increase render times, however, as the work is being done sequentially. DD’s default cut schedule is a good place to start, but the cut schedule can be adjusted in the Cutn Scheduling section, explained below. + :param diffusion_model: Diffusion_model of choice. + :param use_secondary_model: Option to use a secondary purpose-made diffusion model to clean up interim diffusion images for CLIP evaluation. If this option is turned off, DD will use the regular (large) diffusion model. Using the secondary model is faster - one user reported a 50% improvement in render speed! However, the secondary model is much smaller, and may reduce image quality and detail. I suggest you experiment with this. + :param diffusion_sampling_mode: Two alternate diffusion denoising algorithms. ddim has been around longer, and is more established and tested. plms is a newly added alternate method that promises good diffusion results in fewer steps, but has not been as fully tested and may have side effects. This new plms mode is actively being researched in the #settings-and-techniques channel in the DD Discord. + :param perlin_init: Normally, DD will use an image filled with random noise as a starting point for the diffusion curve. If perlin_init is selected, DD will instead use a Perlin noise model as an initial state. Perlin has very interesting characteristics, distinct from random noise, so it’s worth experimenting with this for your projects. Beyond perlin, you can, of course, generate your own noise images (such as with GIMP, etc) and use them as an init_image (without skipping steps). Choosing perlin_init does not affect the actual diffusion process, just the starting point for the diffusion. Please note that selecting a perlin_init will replace and override any init_image you may have specified. Further, because the 2D, 3D and video animation systems all rely on the init_image system, if you enable Perlin while using animation modes, the perlin_init will jump in front of any previous image or video input, and DD will NOT give you the expected sequence of coherent images. All of that said, using Perlin and animation modes together do make a very colorful rainbow effect, which can be used creatively. + :param perlin_mode: sets type of Perlin noise: colored, gray, or a mix of both, giving you additional options for noise types. Experiment to see what these do in your projects. + :param seed: Deep in the diffusion code, there is a random number ‘seed’ which is used as the basis for determining the initial state of the diffusion. By default, this is random, but you can also specify your own seed. This is useful if you like a particular result and would like to run more iterations that will be similar. After each run, the actual seed value used will be reported in the parameters report, and can be reused if desired by entering seed # here. If a specific numerical seed is used repeatedly, the resulting images will be quite similar but not identical. + :param eta: eta (greek letter η) is a diffusion model variable that mixes in a random amount of scaled noise into each timestep. 0 is no noise, 1.0 is more noise. As with most DD parameters, you can go below zero for eta, but it may give you unpredictable results. The steps parameter has a close relationship with the eta parameter. If you set eta to 0, then you can get decent output with only 50-75 steps. Setting eta to 1.0 favors higher step counts, ideally around 250 and up. eta has a subtle, unpredictable effect on image, so you’ll need to experiment to see how this affects your projects. + :param clamp_grad: As I understand it, clamp_grad is an internal limiter that stops DD from producing extreme results. Try your images with and without clamp_grad. If the image changes drastically with clamp_grad turned off, it probably means your clip_guidance_scale is too high and should be reduced. + :param clamp_max: Sets the value of the clamp_grad limitation. Default is 0.05, providing for smoother, more muted coloration in images, but setting higher values (0.15-0.3) can provide interesting contrast and vibrancy. + :param fuzzy_prompt: Controls whether to add multiple noisy prompts to the prompt losses. If True, can increase variability of image output. Experiment with this. + :param rand_mag: Affects only the fuzzy_prompt. Controls the magnitude of the random noise added by fuzzy_prompt. + :param cut_overview: The schedule of overview cuts + :param cut_innercut: The schedule of inner cuts + :param cut_icgray_p: This sets the size of the border used for inner cuts. High cut_ic_pow values have larger borders, and therefore the cuts themselves will be smaller and provide finer details. If you have too many or too-small inner cuts, you may lose overall image coherency and/or it may cause an undesirable ‘mosaic’ effect. Low cut_ic_pow values will allow the inner cuts to be larger, helping image coherency while still helping with some details. + :param display_rate: During a diffusion run, you can monitor the progress of each image being created with this variable. If display_rate is set to 50, DD will show you the in-progress image every 50 timesteps. Setting this to a lower value, like 5 or 10, is a good way to get an early peek at where your image is heading. If you don’t like the progression, just interrupt execution, change some settings, and re-run. If you are planning a long, unmonitored batch, it’s better to set display_rate equal to steps, because displaying interim images does slow Colab down slightly. + :param n_batches: This variable sets the number of still images you want DD to create. If you are using an animation mode (see below for details) DD will ignore n_batches and create a single set of animated frames based on the animation settings. + :param batch_name: The name of the batch, the batch id will be named as "discoart-[batch_name]-seed". To avoid your artworks be overridden by other users, please use a unique name. + :param clip_models: CLIP Model selectors. ViTB32, ViTB16, ViTL14, RN101, RN50, RN50x4, RN50x16, RN50x64.These various CLIP models are available for you to use during image generation. Models have different styles or ‘flavors,’ so look around. You can mix in multiple models as well for different results. However, keep in mind that some models are extremely memory-hungry, and turning on additional models will take additional memory and may cause a crash.The rough order of speed/mem usage is (smallest/fastest to largest/slowest):VitB32RN50RN101VitB16RN50x4RN50x16RN50x64ViTL14For RN50x64 & ViTL14 you may need to use fewer cuts, depending on your VRAM. + :return: a DocumentArray object that has `n_batches` Documents + """ + + +# end_create_overload + + +@overload +def create(init_document: 'Document') -> 'DocumentArray': + """ + Create an artwork using a DocArray ``Document`` object as initial state. + :param init_document: its ``.tags`` will be used as parameters, ``.uri`` (if present) will be used as init image. + :return: a DocumentArray object that has `n_batches` Documents + """ + + +def create(**kwargs) -> 'DocumentArray': + from .config import load_config + from .runner import do_run + + if 'init_document' in kwargs: + d = kwargs['init_document'] + _kwargs = d.tags + if not _kwargs: + warnings.warn('init_document has no .tags, fallback to default config') + if d.uri: + _kwargs['init_image'] = kwargs['init_document'].uri + else: + warnings.warn('init_document has no .uri, fallback to no init image') + kwargs.pop('init_document') + if kwargs: + warnings.warn('init_document has .tags and .uri, but kwargs are also present, will override .tags') + _kwargs.update(kwargs) + _args = load_config(user_config=_kwargs) + else: + _args = load_config(user_config=kwargs) + + model, diffusion = load_diffusion_model(model_config, _args.diffusion_model, steps=_args.steps) + + clip_models = load_clip_models(enabled=_args.clip_models, clip_models=_clip_models_cache) + + gc.collect() + paddle.device.cuda.empty_cache() + try: + return do_run(_args, (model, diffusion, clip_models, secondary_model)) + except KeyboardInterrupt: + pass diff --git a/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/config.py b/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/config.py new file mode 100755 index 000000000..0cbc71e6f --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/config.py @@ -0,0 +1,77 @@ +''' +https://github.com/jina-ai/discoart/blob/main/discoart/config.py +''' +import copy +import random +import warnings +from types import SimpleNamespace +from typing import Dict + +import yaml +from yaml import Loader + +from . import __resources_path__ + +with open(f'{__resources_path__}/default.yml') as ymlfile: + default_args = yaml.load(ymlfile, Loader=Loader) + + +def load_config(user_config: Dict, ): + cfg = copy.deepcopy(default_args) + + if user_config: + cfg.update(**user_config) + + for k in user_config.keys(): + if k not in cfg: + warnings.warn(f'unknown argument {k}, ignored') + + for k, v in cfg.items(): + if k in ('batch_size', 'display_rate', 'seed', 'skip_steps', 'steps', 'n_batches', + 'cutn_batches') and isinstance(v, float): + cfg[k] = int(v) + if k == 'width_height': + cfg[k] = [int(vv) for vv in v] + + cfg.update(**{ + 'seed': cfg['seed'] or random.randint(0, 2**32), + }) + + if cfg['batch_name']: + da_name = f'{__package__}-{cfg["batch_name"]}-{cfg["seed"]}' + else: + da_name = f'{__package__}-{cfg["seed"]}' + warnings.warn('you did not set `batch_name`, set it to have unique session ID') + + cfg.update(**{'name_docarray': da_name}) + + print_args_table(cfg) + + return SimpleNamespace(**cfg) + + +def print_args_table(cfg): + from rich.table import Table + from rich import box + from rich.console import Console + + console = Console() + + param_str = Table( + title=cfg['name_docarray'], + box=box.ROUNDED, + highlight=True, + title_justify='left', + ) + param_str.add_column('Argument', justify='right') + param_str.add_column('Value', justify='left') + + for k, v in sorted(cfg.items()): + value = str(v) + + if not default_args.get(k, None) == v: + value = f'[b]{value}[/]' + + param_str.add_row(k, value) + + console.print(param_str) diff --git a/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/helper.py b/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/helper.py new file mode 100755 index 000000000..6fc4196be --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/helper.py @@ -0,0 +1,137 @@ +''' +This code is rewritten by Paddle based on Jina-ai/discoart. +https://github.com/jina-ai/discoart/blob/main/discoart/helper.py +''' +import hashlib +import logging +import os +import subprocess +import sys +from os.path import expanduser +from pathlib import Path +from typing import Any +from typing import Dict +from typing import List + +import paddle + + +def _get_logger(): + logger = logging.getLogger(__package__) + logger.setLevel("INFO") + ch = logging.StreamHandler() + ch.setLevel("INFO") + formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + ch.setFormatter(formatter) + logger.addHandler(ch) + return logger + + +logger = _get_logger() + + +def load_clip_models(enabled: List[str], clip_models: Dict[str, Any] = {}): + + import disco_diffusion_clip_vitb32.clip.clip as clip + from disco_diffusion_clip_vitb32.clip.clip import build_model, tokenize, transform + + # load enabled models + for k in enabled: + if k not in clip_models: + clip_models[k] = build_model(name=k) + clip_models[k].eval() + for parameter in clip_models[k].parameters(): + parameter.stop_gradient = True + + # disable not enabled models to save memory + for k in clip_models: + if k not in enabled: + clip_models.pop(k) + + return list(clip_models.values()) + + +def load_all_models(diffusion_model, use_secondary_model): + from .model.script_util import ( + model_and_diffusion_defaults, ) + + model_config = model_and_diffusion_defaults() + + if diffusion_model == '512x512_diffusion_uncond_finetune_008100': + model_config.update({ + 'attention_resolutions': '32, 16, 8', + 'class_cond': False, + 'diffusion_steps': 1000, # No need to edit this, it is taken care of later. + 'rescale_timesteps': True, + 'timestep_respacing': 250, # No need to edit this, it is taken care of later. + 'image_size': 512, + 'learn_sigma': True, + 'noise_schedule': 'linear', + 'num_channels': 256, + 'num_head_channels': 64, + 'num_res_blocks': 2, + 'resblock_updown': True, + 'use_fp16': False, + 'use_scale_shift_norm': True, + }) + elif diffusion_model == '256x256_diffusion_uncond': + model_config.update({ + 'attention_resolutions': '32, 16, 8', + 'class_cond': False, + 'diffusion_steps': 1000, # No need to edit this, it is taken care of later. + 'rescale_timesteps': True, + 'timestep_respacing': 250, # No need to edit this, it is taken care of later. + 'image_size': 256, + 'learn_sigma': True, + 'noise_schedule': 'linear', + 'num_channels': 256, + 'num_head_channels': 64, + 'num_res_blocks': 2, + 'resblock_updown': True, + 'use_fp16': False, + 'use_scale_shift_norm': True, + }) + + secondary_model = None + if use_secondary_model: + from .model.sec_diff import SecondaryDiffusionImageNet2 + secondary_model = SecondaryDiffusionImageNet2() + model_dict = paddle.load( + os.path.join(os.path.dirname(__file__), 'pre_trained', 'secondary_model_imagenet_2.pdparams')) + secondary_model.set_state_dict(model_dict) + secondary_model.eval() + for parameter in secondary_model.parameters(): + parameter.stop_gradient = True + + return model_config, secondary_model + + +def load_diffusion_model(model_config, diffusion_model, steps): + from .model.script_util import ( + create_model_and_diffusion, ) + + timestep_respacing = f'ddim{steps}' + diffusion_steps = (1000 // steps) * steps if steps < 1000 else steps + model_config.update({ + 'timestep_respacing': timestep_respacing, + 'diffusion_steps': diffusion_steps, + }) + + model, diffusion = create_model_and_diffusion(**model_config) + model.set_state_dict( + paddle.load(os.path.join(os.path.dirname(__file__), 'pre_trained', f'{diffusion_model}.pdparams'))) + model.eval() + for name, param in model.named_parameters(): + param.stop_gradient = True + + return model, diffusion + + +def parse_prompt(prompt): + if prompt.startswith('http://') or prompt.startswith('https://'): + vals = prompt.rsplit(':', 2) + vals = [vals[0] + ':' + vals[1], *vals[2:]] + else: + vals = prompt.rsplit(':', 1) + vals = vals + ['', '1'][len(vals):] + return vals[0], float(vals[1]) diff --git a/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/model/__init__.py b/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/model/__init__.py new file mode 100755 index 000000000..466800666 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/model/__init__.py @@ -0,0 +1,3 @@ +""" +Codebase for "Improved Denoising Diffusion Probabilistic Models" implemented by Paddle. +""" diff --git a/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/model/gaussian_diffusion.py b/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/model/gaussian_diffusion.py new file mode 100755 index 000000000..86cd2c650 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/model/gaussian_diffusion.py @@ -0,0 +1,1214 @@ +""" +Diffusion model implemented by Paddle. +This code is rewritten based on Pytorch version of of Ho et al's diffusion models: +https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/diffusion_utils_2.py +""" +import enum +import math + +import numpy as np +import paddle + +from .losses import discretized_gaussian_log_likelihood +from .losses import normal_kl +from .nn import mean_flat + + +def get_named_beta_schedule(schedule_name, num_diffusion_timesteps): + """ + Get a pre-defined beta schedule for the given name. + + The beta schedule library consists of beta schedules which remain similar + in the limit of num_diffusion_timesteps. + Beta schedules may be added, but should not be removed or changed once + they are committed to maintain backwards compatibility. + """ + if schedule_name == "linear": + # Linear schedule from Ho et al, extended to work for any number of + # diffusion steps. + scale = 1000 / num_diffusion_timesteps + beta_start = scale * 0.0001 + beta_end = scale * 0.02 + return np.linspace(beta_start, beta_end, num_diffusion_timesteps, dtype=np.float64) + elif schedule_name == "cosine": + return betas_for_alpha_bar( + num_diffusion_timesteps, + lambda t: math.cos((t + 0.008) / 1.008 * math.pi / 2)**2, + ) + else: + raise NotImplementedError(f"unknown beta schedule: {schedule_name}") + + +def betas_for_alpha_bar(num_diffusion_timesteps, alpha_bar, max_beta=0.999): + """ + Create a beta schedule that discretizes the given alpha_t_bar function, + which defines the cumulative product of (1-beta) over time from t = [0,1]. + + :param num_diffusion_timesteps: the number of betas to produce. + :param alpha_bar: a lambda that takes an argument t from 0 to 1 and + produces the cumulative product of (1-beta) up to that + part of the diffusion process. + :param max_beta: the maximum beta to use; use values lower than 1 to + prevent singularities. + """ + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta)) + return np.array(betas) + + +class ModelMeanType(enum.Enum): + """ + Which type of output the model predicts. + """ + + PREVIOUS_X = enum.auto() # the model predicts x_{t-1} + START_X = enum.auto() # the model predicts x_0 + EPSILON = enum.auto() # the model predicts epsilon + + +class ModelVarType(enum.Enum): + """ + What is used as the model's output variance. + + The LEARNED_RANGE option has been added to allow the model to predict + values between FIXED_SMALL and FIXED_LARGE, making its job easier. + """ + + LEARNED = enum.auto() + FIXED_SMALL = enum.auto() + FIXED_LARGE = enum.auto() + LEARNED_RANGE = enum.auto() + + +class LossType(enum.Enum): + MSE = enum.auto() # use raw MSE loss (and KL when learning variances) + RESCALED_MSE = (enum.auto()) # use raw MSE loss (with RESCALED_KL when learning variances) + KL = enum.auto() # use the variational lower-bound + RESCALED_KL = enum.auto() # like KL, but rescale to estimate the full VLB + + def is_vb(self): + return self == LossType.KL or self == LossType.RESCALED_KL + + +class GaussianDiffusion: + """ + Utilities for training and sampling diffusion models. + + Ported directly from here, and then adapted over time to further experimentation. + https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/diffusion_utils_2.py#L42 + + :param betas: a 1-D numpy array of betas for each diffusion timestep, + starting at T and going to 1. + :param model_mean_type: a ModelMeanType determining what the model outputs. + :param model_var_type: a ModelVarType determining how variance is output. + :param loss_type: a LossType determining the loss function to use. + :param rescale_timesteps: if True, pass floating point timesteps into the + model so that they are always scaled like in the + original paper (0 to 1000). + """ + + def __init__( + self, + *, + betas, + model_mean_type, + model_var_type, + loss_type, + rescale_timesteps=False, + ): + self.model_mean_type = model_mean_type + self.model_var_type = model_var_type + self.loss_type = loss_type + self.rescale_timesteps = rescale_timesteps + + # Use float64 for accuracy. + betas = np.array(betas, dtype=np.float64) + self.betas = betas + assert len(betas.shape) == 1, "betas must be 1-D" + assert (betas > 0).all() and (betas <= 1).all() + + self.num_timesteps = int(betas.shape[0]) + + alphas = 1.0 - betas + self.alphas_cumprod = np.cumprod(alphas, axis=0) + self.alphas_cumprod_prev = np.append(1.0, self.alphas_cumprod[:-1]) + self.alphas_cumprod_next = np.append(self.alphas_cumprod[1:], 0.0) + assert self.alphas_cumprod_prev.shape == (self.num_timesteps, ) + + # calculations for diffusion q(x_t | x_{t-1}) and others + self.sqrt_alphas_cumprod = np.sqrt(self.alphas_cumprod) + self.sqrt_one_minus_alphas_cumprod = np.sqrt(1.0 - self.alphas_cumprod) + self.log_one_minus_alphas_cumprod = np.log(1.0 - self.alphas_cumprod) + self.sqrt_recip_alphas_cumprod = np.sqrt(1.0 / self.alphas_cumprod) + self.sqrt_recipm1_alphas_cumprod = np.sqrt(1.0 / self.alphas_cumprod - 1) + + # calculations for posterior q(x_{t-1} | x_t, x_0) + self.posterior_variance = (betas * (1.0 - self.alphas_cumprod_prev) / (1.0 - self.alphas_cumprod)) + # log calculation clipped because the posterior variance is 0 at the + # beginning of the diffusion chain. + self.posterior_log_variance_clipped = np.log(np.append(self.posterior_variance[1], self.posterior_variance[1:])) + self.posterior_mean_coef1 = (betas * np.sqrt(self.alphas_cumprod_prev) / (1.0 - self.alphas_cumprod)) + self.posterior_mean_coef2 = ((1.0 - self.alphas_cumprod_prev) * np.sqrt(alphas) / (1.0 - self.alphas_cumprod)) + + def q_mean_variance(self, x_start, t): + """ + Get the distribution q(x_t | x_0). + + :param x_start: the [N x C x ...] tensor of noiseless inputs. + :param t: the number of diffusion steps (minus 1). Here, 0 means one step. + :return: A tuple (mean, variance, log_variance), all of x_start's shape. + """ + mean = (_extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start) + variance = _extract_into_tensor(1.0 - self.alphas_cumprod, t, x_start.shape) + log_variance = _extract_into_tensor(self.log_one_minus_alphas_cumprod, t, x_start.shape) + return mean, variance, log_variance + + def q_sample(self, x_start, t, noise=None): + """ + Diffuse the data for a given number of diffusion steps. + + In other words, sample from q(x_t | x_0). + + :param x_start: the initial data batch. + :param t: the number of diffusion steps (minus 1). Here, 0 means one step. + :param noise: if specified, the split-out normal noise. + :return: A noisy version of x_start. + """ + if noise is None: + # noise = th.randn_like(x_start) + noise = paddle.randn(x_start.shape, x_start.dtype) + assert noise.shape == x_start.shape + return (_extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start + + _extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape) * noise) + + def q_posterior_mean_variance(self, x_start, x_t, t): + """ + Compute the mean and variance of the diffusion posterior: + + q(x_{t-1} | x_t, x_0) + + """ + assert x_start.shape == x_t.shape + posterior_mean = (_extract_into_tensor(self.posterior_mean_coef1, t, x_t.shape) * x_start + + _extract_into_tensor(self.posterior_mean_coef2, t, x_t.shape) * x_t) + posterior_variance = _extract_into_tensor(self.posterior_variance, t, x_t.shape) + posterior_log_variance_clipped = _extract_into_tensor(self.posterior_log_variance_clipped, t, x_t.shape) + assert (posterior_mean.shape[0] == posterior_variance.shape[0] == posterior_log_variance_clipped.shape[0] == + x_start.shape[0]) + return posterior_mean, posterior_variance, posterior_log_variance_clipped + + def p_mean_variance(self, model, x, t, clip_denoised=True, denoised_fn=None, model_kwargs=None): + """ + Apply the model to get p(x_{t-1} | x_t), as well as a prediction of + the initial x, x_0. + + :param model: the model, which takes a signal and a batch of timesteps + as input. + :param x: the [N x C x ...] tensor at time t. + :param t: a 1-D Tensor of timesteps. + :param clip_denoised: if True, clip the denoised signal into [-1, 1]. + :param denoised_fn: if not None, a function which applies to the + x_start prediction before it is used to sample. Applies before + clip_denoised. + :param model_kwargs: if not None, a dict of extra keyword arguments to + pass to the model. This can be used for conditioning. + :return: a dict with the following keys: + - 'mean': the model mean output. + - 'variance': the model variance output. + - 'log_variance': the log of 'variance'. + - 'pred_xstart': the prediction for x_0. + """ + if model_kwargs is None: + model_kwargs = {} + + B, C = x.shape[:2] + assert t.shape == [B] + model_output = model(x, self._scale_timesteps(t), **model_kwargs) + + if self.model_var_type in [ModelVarType.LEARNED, ModelVarType.LEARNED_RANGE]: + assert model_output.shape == [B, C * 2, *x.shape[2:]] + model_output, model_var_values = paddle.split(model_output, 2, axis=1) + if self.model_var_type == ModelVarType.LEARNED: + model_log_variance = model_var_values + model_variance = paddle.exp(model_log_variance) + else: + min_log = _extract_into_tensor(self.posterior_log_variance_clipped, t, x.shape) + max_log = _extract_into_tensor(np.log(self.betas), t, x.shape) + # The model_var_values is [-1, 1] for [min_var, max_var]. + frac = (model_var_values + 1) / 2 + model_log_variance = frac * max_log + (1 - frac) * min_log + model_variance = paddle.exp(model_log_variance) + else: + model_variance, model_log_variance = { + # for fixedlarge, we set the initial (log-)variance like so + # to get a better decoder log likelihood. + ModelVarType.FIXED_LARGE: ( + np.append(self.posterior_variance[1], self.betas[1:]), + np.log(np.append(self.posterior_variance[1], self.betas[1:])), + ), + ModelVarType.FIXED_SMALL: ( + self.posterior_variance, + self.posterior_log_variance_clipped, + ), + }[self.model_var_type] + model_variance = _extract_into_tensor(model_variance, t, x.shape) + model_log_variance = _extract_into_tensor(model_log_variance, t, x.shape) + + def process_xstart(x): + if denoised_fn is not None: + x = denoised_fn(x) + if clip_denoised: + return x.clamp(-1, 1) + return x + + if self.model_mean_type == ModelMeanType.PREVIOUS_X: + pred_xstart = process_xstart(self._predict_xstart_from_xprev(x_t=x, t=t, xprev=model_output)) + model_mean = model_output + elif self.model_mean_type in [ModelMeanType.START_X, ModelMeanType.EPSILON]: + if self.model_mean_type == ModelMeanType.START_X: + pred_xstart = process_xstart(model_output) + else: + pred_xstart = process_xstart(self._predict_xstart_from_eps(x_t=x, t=t, eps=model_output)) + model_mean, _, _ = self.q_posterior_mean_variance(x_start=pred_xstart, x_t=x, t=t) + else: + raise NotImplementedError(self.model_mean_type) + + assert (model_mean.shape == model_log_variance.shape == pred_xstart.shape == x.shape) + return { + "mean": model_mean, + "variance": model_variance, + "log_variance": model_log_variance, + "pred_xstart": pred_xstart, + } + + def _predict_xstart_from_eps(self, x_t, t, eps): + assert x_t.shape == eps.shape + return (_extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t - + _extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape) * eps) + + def _predict_xstart_from_xprev(self, x_t, t, xprev): + assert x_t.shape == xprev.shape + return ( # (xprev - coef2*x_t) / coef1 + _extract_into_tensor(1.0 / self.posterior_mean_coef1, t, x_t.shape) * xprev - + _extract_into_tensor(self.posterior_mean_coef2 / self.posterior_mean_coef1, t, x_t.shape) * x_t) + + def _predict_eps_from_xstart(self, x_t, t, pred_xstart): + return (_extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t - + pred_xstart) / _extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape) + + def _scale_timesteps(self, t): + if self.rescale_timesteps: + return paddle.cast((t), 'float32') * (1000.0 / self.num_timesteps) + return t + + def condition_mean(self, cond_fn, p_mean_var, x, t, model_kwargs=None): + """ + Compute the mean for the previous step, given a function cond_fn that + computes the gradient of a conditional log probability with respect to + x. In particular, cond_fn computes grad(log(p(y|x))), and we want to + condition on y. + + This uses the conditioning strategy from Sohl-Dickstein et al. (2015). + """ + gradient = cond_fn(x, self._scale_timesteps(t), **model_kwargs) + new_mean = (paddle.cast((p_mean_var["mean"]), 'float32') + p_mean_var["variance"] * paddle.cast( + (gradient), 'float32')) + return new_mean + + def condition_mean_with_grad(self, cond_fn, p_mean_var, x, t, model_kwargs=None): + """ + Compute the mean for the previous step, given a function cond_fn that + computes the gradient of a conditional log probability with respect to + x. In particular, cond_fn computes grad(log(p(y|x))), and we want to + condition on y. + + This uses the conditioning strategy from Sohl-Dickstein et al. (2015). + """ + gradient = cond_fn(x, t, p_mean_var, **model_kwargs) + new_mean = (paddle.cast((p_mean_var["mean"]), 'float32') + p_mean_var["variance"] * paddle.cast( + (gradient), 'float32')) + return new_mean + + def condition_score(self, cond_fn, p_mean_var, x, t, model_kwargs=None): + """ + Compute what the p_mean_variance output would have been, should the + model's score function be conditioned by cond_fn. + + See condition_mean() for details on cond_fn. + + Unlike condition_mean(), this instead uses the conditioning strategy + from Song et al (2020). + """ + alpha_bar = _extract_into_tensor(self.alphas_cumprod, t, x.shape) + + eps = self._predict_eps_from_xstart(x, t, p_mean_var["pred_xstart"]) + eps = eps - (1 - alpha_bar).sqrt() * cond_fn(x, self._scale_timesteps(t), **model_kwargs) + + out = p_mean_var.copy() + out["pred_xstart"] = self._predict_xstart_from_eps(x, t, eps) + out["mean"], _, _ = self.q_posterior_mean_variance(x_start=out["pred_xstart"], x_t=x, t=t) + return out + + def condition_score_with_grad(self, cond_fn, p_mean_var, x, t, model_kwargs=None): + """ + Compute what the p_mean_variance output would have been, should the + model's score function be conditioned by cond_fn. + + See condition_mean() for details on cond_fn. + + Unlike condition_mean(), this instead uses the conditioning strategy + from Song et al (2020). + """ + alpha_bar = _extract_into_tensor(self.alphas_cumprod, t, x.shape) + + eps = self._predict_eps_from_xstart(x, t, p_mean_var["pred_xstart"]) + eps = eps - (1 - alpha_bar).sqrt() * cond_fn(x, t, p_mean_var, **model_kwargs) + + out = p_mean_var.copy() + out["pred_xstart"] = self._predict_xstart_from_eps(x, t, eps) + out["mean"], _, _ = self.q_posterior_mean_variance(x_start=out["pred_xstart"], x_t=x, t=t) + return out + + def p_sample( + self, + model, + x, + t, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + ): + """ + Sample x_{t-1} from the model at the given timestep. + + :param model: the model to sample from. + :param x: the current tensor at x_{t-1}. + :param t: the value of t, starting at 0 for the first diffusion step. + :param clip_denoised: if True, clip the x_start prediction to [-1, 1]. + :param denoised_fn: if not None, a function which applies to the + x_start prediction before it is used to sample. + :param cond_fn: if not None, this is a gradient function that acts + similarly to the model. + :param model_kwargs: if not None, a dict of extra keyword arguments to + pass to the model. This can be used for conditioning. + :return: a dict containing the following keys: + - 'sample': a random sample from the model. + - 'pred_xstart': a prediction of x_0. + """ + out = self.p_mean_variance( + model, + x, + t, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + model_kwargs=model_kwargs, + ) + # noise = th.randn_like(x) + noise = paddle.randn(x.shape, x.dtype) + nonzero_mask = (paddle.cast((t != 0), 'float32').reshape([-1, + *([1] * (len(x.shape) - 1))])) # no noise when t == 0 + if cond_fn is not None: + out["mean"] = self.condition_mean(cond_fn, out, x, t, model_kwargs=model_kwargs) + sample = out["mean"] + nonzero_mask * paddle.exp(0.5 * out["log_variance"]) * noise + return {"sample": sample, "pred_xstart": out["pred_xstart"]} + + def p_sample_with_grad( + self, + model, + x, + t, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + ): + """ + Sample x_{t-1} from the model at the given timestep. + + :param model: the model to sample from. + :param x: the current tensor at x_{t-1}. + :param t: the value of t, starting at 0 for the first diffusion step. + :param clip_denoised: if True, clip the x_start prediction to [-1, 1]. + :param denoised_fn: if not None, a function which applies to the + x_start prediction before it is used to sample. + :param cond_fn: if not None, this is a gradient function that acts + similarly to the model. + :param model_kwargs: if not None, a dict of extra keyword arguments to + pass to the model. This can be used for conditioning. + :return: a dict containing the following keys: + - 'sample': a random sample from the model. + - 'pred_xstart': a prediction of x_0. + """ + # with th.enable_grad(): + # x = x.detach().requires_grad_() + x = x.detach() + # x.stop_gradient = False + out = self.p_mean_variance( + model, + x, + t, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + model_kwargs=model_kwargs, + ) + # noise = th.randn_like(x) + noise = paddle.randn(x.shape, x.dtype) + nonzero_mask = (paddle.cast((t != 0), 'float32').reshape([-1, + *([1] * (len(x.shape) - 1))])) # no noise when t == 0 + if cond_fn is not None: + out["mean"] = self.condition_mean_with_grad(cond_fn, out, x, t, model_kwargs=model_kwargs) + sample = out["mean"] + nonzero_mask * paddle.exp(0.5 * out["log_variance"]) * noise + return {"sample": sample, "pred_xstart": out["pred_xstart"].detach()} + + def p_sample_loop( + self, + model, + shape, + noise=None, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + device=None, + progress=False, + skip_timesteps=0, + init_image=None, + randomize_class=False, + cond_fn_with_grad=False, + ): + """ + Generate samples from the model. + + :param model: the model module. + :param shape: the shape of the samples, (N, C, H, W). + :param noise: if specified, the noise from the encoder to sample. + Should be of the same shape as `shape`. + :param clip_denoised: if True, clip x_start predictions to [-1, 1]. + :param denoised_fn: if not None, a function which applies to the + x_start prediction before it is used to sample. + :param cond_fn: if not None, this is a gradient function that acts + similarly to the model. + :param model_kwargs: if not None, a dict of extra keyword arguments to + pass to the model. This can be used for conditioning. + :param device: if specified, the device to create the samples on. + If not specified, use a model parameter's device. + :param progress: if True, show a tqdm progress bar. + :return: a non-differentiable batch of samples. + """ + final = None + for sample in self.p_sample_loop_progressive( + model, + shape, + noise=noise, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + cond_fn=cond_fn, + model_kwargs=model_kwargs, + device=device, + progress=progress, + skip_timesteps=skip_timesteps, + init_image=init_image, + randomize_class=randomize_class, + cond_fn_with_grad=cond_fn_with_grad, + ): + final = sample + return final["sample"] + + def p_sample_loop_progressive( + self, + model, + shape, + noise=None, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + device=None, + progress=False, + skip_timesteps=0, + init_image=None, + randomize_class=False, + cond_fn_with_grad=False, + ): + """ + Generate samples from the model and yield intermediate samples from + each timestep of diffusion. + + Arguments are the same as p_sample_loop(). + Returns a generator over dicts, where each dict is the return value of + p_sample(). + """ + if device is None: + device = model.parameters()[0].place + assert isinstance(shape, (tuple, list)) + if noise is not None: + img = noise + else: + img = paddle.randn(shape) + + if skip_timesteps and init_image is None: + init_image = paddle.zeros_like(img) + + indices = list(range(self.num_timesteps - skip_timesteps))[::-1] + + if init_image is not None: + my_t = paddle.ones([shape[0]], dtype='int64') * indices[0] + img = self.q_sample(init_image, my_t, img) + + if progress: + # Lazy import so that we don't depend on tqdm. + from tqdm.auto import tqdm + + indices = tqdm(indices) + + for i in indices: + t = paddle.to_tensor([i] * shape[0], place=device) + if randomize_class and 'y' in model_kwargs: + model_kwargs['y'] = paddle.randint(low=0, high=model.num_classes, shape=model_kwargs['y'].shape) + # with paddle.no_grad(): + sample_fn = self.p_sample_with_grad if cond_fn_with_grad else self.p_sample + out = sample_fn( + model, + img, + t, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + cond_fn=cond_fn, + model_kwargs=model_kwargs, + ) + yield out + img = out["sample"] + + def ddim_sample( + self, + model, + x, + t, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + eta=0.0, + ): + """ + Sample x_{t-1} from the model using DDIM. + + Same usage as p_sample(). + """ + out_orig = self.p_mean_variance( + model, + x, + t, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + model_kwargs=model_kwargs, + ) + if cond_fn is not None: + out = self.condition_score(cond_fn, out_orig, x, t, model_kwargs=model_kwargs) + else: + out = out_orig + + # Usually our model outputs epsilon, but we re-derive it + # in case we used x_start or x_prev prediction. + eps = self._predict_eps_from_xstart(x, t, out["pred_xstart"]) + + alpha_bar = _extract_into_tensor(self.alphas_cumprod, t, x.shape) + alpha_bar_prev = _extract_into_tensor(self.alphas_cumprod_prev, t, x.shape) + sigma = (eta * paddle.sqrt( + (1 - alpha_bar_prev) / (1 - alpha_bar)) * paddle.sqrt(1 - alpha_bar / alpha_bar_prev)) + # Equation 12. + # noise = th.randn_like(x) + noise = paddle.randn(x.shape, x.dtype) + mean_pred = (out["pred_xstart"] * paddle.sqrt(alpha_bar_prev) + + paddle.sqrt(1 - alpha_bar_prev - sigma**2) * eps) + nonzero_mask = (paddle.cast((t != 0), 'float32').reshape([-1, + *([1] * (len(x.shape) - 1))])) # no noise when t == 0 + sample = mean_pred + nonzero_mask * sigma * noise + return {"sample": sample, "pred_xstart": out_orig["pred_xstart"]} + + def ddim_sample_with_grad( + self, + model, + x, + t, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + eta=0.0, + ): + """ + Sample x_{t-1} from the model using DDIM. + + Same usage as p_sample(). + """ + # with th.enable_grad(): + # x = x.detach().requires_grad_() + x = x.detach() + # x.stop_gradient = False + out_orig = self.p_mean_variance( + model, + x, + t, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + model_kwargs=model_kwargs, + ) + if cond_fn is not None: + out = self.condition_score_with_grad(cond_fn, out_orig, x, t, model_kwargs=model_kwargs) + else: + out = out_orig + + out["pred_xstart"] = out["pred_xstart"].detach() + + # Usually our model outputs epsilon, but we re-derive it + # in case we used x_start or x_prev prediction. + eps = self._predict_eps_from_xstart(x, t, out["pred_xstart"]) + + alpha_bar = _extract_into_tensor(self.alphas_cumprod, t, x.shape) + alpha_bar_prev = _extract_into_tensor(self.alphas_cumprod_prev, t, x.shape) + sigma = (eta * paddle.sqrt( + (1 - alpha_bar_prev) / (1 - alpha_bar)) * paddle.sqrt(1 - alpha_bar / alpha_bar_prev)) + # Equation 12. + # noise = th.randn_like(x) + noise = paddle.randn(x.shape, x.dtype) + mean_pred = (out["pred_xstart"] * paddle.sqrt(alpha_bar_prev) + + paddle.sqrt(1 - alpha_bar_prev - sigma**2) * eps) + nonzero_mask = (paddle.cast((t != 0), 'float32').reshape([-1, + *([1] * (len(x.shape) - 1))])) # no noise when t == 0 + sample = mean_pred + nonzero_mask * sigma * noise + return {"sample": sample, "pred_xstart": out_orig["pred_xstart"].detach()} + + def ddim_reverse_sample( + self, + model, + x, + t, + clip_denoised=True, + denoised_fn=None, + model_kwargs=None, + eta=0.0, + ): + """ + Sample x_{t+1} from the model using DDIM reverse ODE. + """ + assert eta == 0.0, "Reverse ODE only for deterministic path" + out = self.p_mean_variance( + model, + x, + t, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + model_kwargs=model_kwargs, + ) + # Usually our model outputs epsilon, but we re-derive it + # in case we used x_start or x_prev prediction. + eps = (_extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x.shape) * x - + out["pred_xstart"]) / _extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x.shape) + alpha_bar_next = _extract_into_tensor(self.alphas_cumprod_next, t, x.shape) + + # Equation 12. reversed + mean_pred = (out["pred_xstart"] * paddle.sqrt(alpha_bar_next) + paddle.sqrt(1 - alpha_bar_next) * eps) + + return {"sample": mean_pred, "pred_xstart": out["pred_xstart"]} + + def ddim_sample_loop( + self, + model, + shape, + noise=None, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + device=None, + progress=False, + eta=0.0, + skip_timesteps=0, + init_image=None, + randomize_class=False, + cond_fn_with_grad=False, + ): + """ + Generate samples from the model using DDIM. + + Same usage as p_sample_loop(). + """ + final = None + for sample in self.ddim_sample_loop_progressive( + model, + shape, + noise=noise, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + cond_fn=cond_fn, + model_kwargs=model_kwargs, + device=device, + progress=progress, + eta=eta, + skip_timesteps=skip_timesteps, + init_image=init_image, + randomize_class=randomize_class, + cond_fn_with_grad=cond_fn_with_grad, + ): + final = sample + return final["sample"] + + def ddim_sample_loop_progressive( + self, + model, + shape, + noise=None, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + device=None, + progress=False, + eta=0.0, + skip_timesteps=0, + init_image=None, + randomize_class=False, + cond_fn_with_grad=False, + ): + """ + Use DDIM to sample from the model and yield intermediate samples from + each timestep of DDIM. + + Same usage as p_sample_loop_progressive(). + """ + # if device is None: + # device = next(model.parameters()).device + if device is None: + device = model.parameters()[0].place + assert isinstance(shape, (tuple, list)) + if noise is not None: + img = noise + else: + img = paddle.randn(shape) + + if skip_timesteps and init_image is None: + init_image = paddle.zeros_like(img) + + indices = list(range(self.num_timesteps - skip_timesteps))[::-1] + + if init_image is not None: + my_t = paddle.ones([shape[0]], dtype='int64') * indices[0] + img = self.q_sample(init_image, my_t, img) + + if progress: + # Lazy import so that we don't depend on tqdm. + from tqdm.auto import tqdm + + indices = tqdm(indices) + + for i in indices: + t = paddle.to_tensor([i] * shape[0]) + if randomize_class and 'y' in model_kwargs: + model_kwargs['y'] = paddle.randint( + low=0, + high=model.num_classes, + shape=model_kwargs['y'].shape, + ) + sample_fn = self.ddim_sample_with_grad if cond_fn_with_grad else self.ddim_sample + out = sample_fn( + model, + img, + t, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + cond_fn=cond_fn, + model_kwargs=model_kwargs, + eta=eta, + ) + yield out + img = out["sample"] + + def plms_sample( + self, + model, + x, + t, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + cond_fn_with_grad=False, + order=2, + old_out=None, + ): + """ + Sample x_{t-1} from the model using Pseudo Linear Multistep. + + Same usage as p_sample(). + """ + if not int(order) or not 1 <= order <= 4: + raise ValueError('order is invalid (should be int from 1-4).') + + def get_model_output(x, t): + with paddle.set_grad_enabled(cond_fn_with_grad and cond_fn is not None): + x = x.detach().requires_grad_() if cond_fn_with_grad else x + out_orig = self.p_mean_variance( + model, + x, + t, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + model_kwargs=model_kwargs, + ) + if cond_fn is not None: + if cond_fn_with_grad: + out = self.condition_score_with_grad(cond_fn, out_orig, x, t, model_kwargs=model_kwargs) + x = x.detach() + else: + out = self.condition_score(cond_fn, out_orig, x, t, model_kwargs=model_kwargs) + else: + out = out_orig + + # Usually our model outputs epsilon, but we re-derive it + # in case we used x_start or x_prev prediction. + eps = self._predict_eps_from_xstart(x, t, out["pred_xstart"]) + return eps, out, out_orig + + alpha_bar = _extract_into_tensor(self.alphas_cumprod, t, x.shape) + alpha_bar_prev = _extract_into_tensor(self.alphas_cumprod_prev, t, x.shape) + eps, out, out_orig = get_model_output(x, t) + + if order > 1 and old_out is None: + # Pseudo Improved Euler + old_eps = [eps] + mean_pred = out["pred_xstart"] * paddle.sqrt(alpha_bar_prev) + paddle.sqrt(1 - alpha_bar_prev) * eps + eps_2, _, _ = get_model_output(mean_pred, t - 1) + eps_prime = (eps + eps_2) / 2 + pred_prime = self._predict_xstart_from_eps(x, t, eps_prime) + mean_pred = pred_prime * paddle.sqrt(alpha_bar_prev) + paddle.sqrt(1 - alpha_bar_prev) * eps_prime + else: + # Pseudo Linear Multistep (Adams-Bashforth) + old_eps = old_out["old_eps"] + old_eps.append(eps) + cur_order = min(order, len(old_eps)) + if cur_order == 1: + eps_prime = old_eps[-1] + elif cur_order == 2: + eps_prime = (3 * old_eps[-1] - old_eps[-2]) / 2 + elif cur_order == 3: + eps_prime = (23 * old_eps[-1] - 16 * old_eps[-2] + 5 * old_eps[-3]) / 12 + elif cur_order == 4: + eps_prime = (55 * old_eps[-1] - 59 * old_eps[-2] + 37 * old_eps[-3] - 9 * old_eps[-4]) / 24 + else: + raise RuntimeError('cur_order is invalid.') + pred_prime = self._predict_xstart_from_eps(x, t, eps_prime) + mean_pred = pred_prime * paddle.sqrt(alpha_bar_prev) + paddle.sqrt(1 - alpha_bar_prev) * eps_prime + + if len(old_eps) >= order: + old_eps.pop(0) + + nonzero_mask = paddle.cast((t != 0), 'float32').reshape([-1, *([1] * (len(x.shape) - 1))]) + sample = mean_pred * nonzero_mask + out["pred_xstart"] * (1 - nonzero_mask) + + return {"sample": sample, "pred_xstart": out_orig["pred_xstart"], "old_eps": old_eps} + + def plms_sample_loop( + self, + model, + shape, + noise=None, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + device=None, + progress=False, + skip_timesteps=0, + init_image=None, + randomize_class=False, + cond_fn_with_grad=False, + order=2, + ): + """ + Generate samples from the model using Pseudo Linear Multistep. + + Same usage as p_sample_loop(). + """ + final = None + for sample in self.plms_sample_loop_progressive( + model, + shape, + noise=noise, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + cond_fn=cond_fn, + model_kwargs=model_kwargs, + device=device, + progress=progress, + skip_timesteps=skip_timesteps, + init_image=init_image, + randomize_class=randomize_class, + cond_fn_with_grad=cond_fn_with_grad, + order=order, + ): + final = sample + return final["sample"] + + def plms_sample_loop_progressive( + self, + model, + shape, + noise=None, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + device=None, + progress=False, + skip_timesteps=0, + init_image=None, + randomize_class=False, + cond_fn_with_grad=False, + order=2, + ): + """ + Use PLMS to sample from the model and yield intermediate samples from each + timestep of PLMS. + + Same usage as p_sample_loop_progressive(). + """ + if device is None: + device = model.parameters()[0].place + assert isinstance(shape, (tuple, list)) + if noise is not None: + img = noise + else: + img = paddle.randn(shape) + + if skip_timesteps and init_image is None: + init_image = paddle.zeros_like(img) + + indices = list(range(self.num_timesteps - skip_timesteps))[::-1] + + if init_image is not None: + my_t = paddle.ones([shape[0]], dtype='int64') * indices[0] + img = self.q_sample(init_image, my_t, img) + + if progress: + # Lazy import so that we don't depend on tqdm. + from tqdm.auto import tqdm + + indices = tqdm(indices) + + old_out = None + + for i in indices: + t = paddle.to_tensor([i] * shape[0], place=device) + if randomize_class and 'y' in model_kwargs: + model_kwargs['y'] = paddle.randint(low=0, high=model.num_classes, shape=model_kwargs['y'].shape) + # with paddle.no_grad(): + out = self.plms_sample( + model, + img, + t, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + cond_fn=cond_fn, + model_kwargs=model_kwargs, + cond_fn_with_grad=cond_fn_with_grad, + order=order, + old_out=old_out, + ) + yield out + old_out = out + img = out["sample"] + + def _vb_terms_bpd(self, model, x_start, x_t, t, clip_denoised=True, model_kwargs=None): + """ + Get a term for the variational lower-bound. + + The resulting units are bits (rather than nats, as one might expect). + This allows for comparison to other papers. + + :return: a dict with the following keys: + - 'output': a shape [N] tensor of NLLs or KLs. + - 'pred_xstart': the x_0 predictions. + """ + true_mean, _, true_log_variance_clipped = self.q_posterior_mean_variance(x_start=x_start, x_t=x_t, t=t) + out = self.p_mean_variance(model, x_t, t, clip_denoised=clip_denoised, model_kwargs=model_kwargs) + kl = normal_kl(true_mean, true_log_variance_clipped, out["mean"], out["log_variance"]) + kl = mean_flat(kl) / np.log(2.0) + + decoder_nll = -discretized_gaussian_log_likelihood( + x_start, means=out["mean"], log_scales=0.5 * out["log_variance"]) + assert decoder_nll.shape == x_start.shape + decoder_nll = mean_flat(decoder_nll) / np.log(2.0) + + # At the first timestep return the decoder NLL, + # otherwise return KL(q(x_{t-1}|x_t,x_0) || p(x_{t-1}|x_t)) + output = paddle.where((t == 0), decoder_nll, kl) + return {"output": output, "pred_xstart": out["pred_xstart"]} + + def training_losses(self, model, x_start, t, model_kwargs=None, noise=None): + """ + Compute training losses for a single timestep. + + :param model: the model to evaluate loss on. + :param x_start: the [N x C x ...] tensor of inputs. + :param t: a batch of timestep indices. + :param model_kwargs: if not None, a dict of extra keyword arguments to + pass to the model. This can be used for conditioning. + :param noise: if specified, the specific Gaussian noise to try to remove. + :return: a dict with the key "loss" containing a tensor of shape [N]. + Some mean or variance settings may also have other keys. + """ + if model_kwargs is None: + model_kwargs = {} + if noise is None: + # noise = th.randn_like(x_start) + noise = paddle.randn(x_start.shape, x_start.dtype) + x_t = self.q_sample(x_start, t, noise=noise) + + terms = {} + + if self.loss_type == LossType.KL or self.loss_type == LossType.RESCALED_KL: + terms["loss"] = self._vb_terms_bpd( + model=model, + x_start=x_start, + x_t=x_t, + t=t, + clip_denoised=False, + model_kwargs=model_kwargs, + )["output"] + if self.loss_type == LossType.RESCALED_KL: + terms["loss"] *= self.num_timesteps + elif self.loss_type == LossType.MSE or self.loss_type == LossType.RESCALED_MSE: + model_output = model(x_t, self._scale_timesteps(t), **model_kwargs) + + if self.model_var_type in [ + ModelVarType.LEARNED, + ModelVarType.LEARNED_RANGE, + ]: + B, C = x_t.shape[:2] + assert model_output.shape == (B, C * 2, *x_t.shape[2:]) + model_output, model_var_values = paddle.split(model_output, 2, dim=1) + # Learn the variance using the variational bound, but don't let + # it affect our mean prediction. + frozen_out = paddle.concat([model_output.detach(), model_var_values], axis=1) + terms["vb"] = self._vb_terms_bpd( + model=lambda *args, r=frozen_out: r, + x_start=x_start, + x_t=x_t, + t=t, + clip_denoised=False, + )["output"] + if self.loss_type == LossType.RESCALED_MSE: + # Divide by 1000 for equivalence with initial implementation. + # Without a factor of 1/1000, the VB term hurts the MSE term. + terms["vb"] *= self.num_timesteps / 1000.0 + + target = { + ModelMeanType.PREVIOUS_X: self.q_posterior_mean_variance(x_start=x_start, x_t=x_t, t=t)[0], + ModelMeanType.START_X: x_start, + ModelMeanType.EPSILON: noise, + }[self.model_mean_type] + assert model_output.shape == target.shape == x_start.shape + terms["mse"] = mean_flat((target - model_output)**2) + if "vb" in terms: + terms["loss"] = terms["mse"] + terms["vb"] + else: + terms["loss"] = terms["mse"] + else: + raise NotImplementedError(self.loss_type) + + return terms + + def _prior_bpd(self, x_start): + """ + Get the prior KL term for the variational lower-bound, measured in + bits-per-dim. + + This term can't be optimized, as it only depends on the encoder. + + :param x_start: the [N x C x ...] tensor of inputs. + :return: a batch of [N] KL values (in bits), one per batch element. + """ + batch_size = x_start.shape[0] + t = paddle.to_tensor([self.num_timesteps - 1] * batch_size, place=x_start.place) + qt_mean, _, qt_log_variance = self.q_mean_variance(x_start, t) + kl_prior = normal_kl(mean1=qt_mean, logvar1=qt_log_variance, mean2=0.0, logvar2=0.0) + return mean_flat(kl_prior) / np.log(2.0) + + def calc_bpd_loop(self, model, x_start, clip_denoised=True, model_kwargs=None): + """ + Compute the entire variational lower-bound, measured in bits-per-dim, + as well as other related quantities. + + :param model: the model to evaluate loss on. + :param x_start: the [N x C x ...] tensor of inputs. + :param clip_denoised: if True, clip denoised samples. + :param model_kwargs: if not None, a dict of extra keyword arguments to + pass to the model. This can be used for conditioning. + + :return: a dict containing the following keys: + - total_bpd: the total variational lower-bound, per batch element. + - prior_bpd: the prior term in the lower-bound. + - vb: an [N x T] tensor of terms in the lower-bound. + - xstart_mse: an [N x T] tensor of x_0 MSEs for each timestep. + - mse: an [N x T] tensor of epsilon MSEs for each timestep. + """ + device = x_start.place + batch_size = x_start.shape[0] + + vb = [] + xstart_mse = [] + mse = [] + for t in list(range(self.num_timesteps))[::-1]: + t_batch = paddle.to_tensor([t] * batch_size, place=device) + # noise = th.randn_like(x_start) + noise = paddle.randn(x_start.shape, x_start.dtype) + x_t = self.q_sample(x_start=x_start, t=t_batch, noise=noise) + # Calculate VLB term at the current timestep + # with paddle.no_grad(): + out = self._vb_terms_bpd( + model, + x_start=x_start, + x_t=x_t, + t=t_batch, + clip_denoised=clip_denoised, + model_kwargs=model_kwargs, + ) + vb.append(out["output"]) + xstart_mse.append(mean_flat((out["pred_xstart"] - x_start)**2)) + eps = self._predict_eps_from_xstart(x_t, t_batch, out["pred_xstart"]) + mse.append(mean_flat((eps - noise)**2)) + + vb = paddle.stack(vb, axis=1) + xstart_mse = paddle.stack(xstart_mse, axis=1) + mse = paddle.stack(mse, axis=1) + + prior_bpd = self._prior_bpd(x_start) + total_bpd = vb.sum(axis=1) + prior_bpd + return { + "total_bpd": total_bpd, + "prior_bpd": prior_bpd, + "vb": vb, + "xstart_mse": xstart_mse, + "mse": mse, + } + + +def _extract_into_tensor(arr, timesteps, broadcast_shape): + """ + Extract values from a 1-D numpy array for a batch of indices. + + :param arr: the 1-D numpy array. + :param timesteps: a tensor of indices into the array to extract. + :param broadcast_shape: a larger shape of K dimensions with the batch + dimension equal to the length of timesteps. + :return: a tensor of shape [batch_size, 1, ...] where the shape has K dims. + """ + res = paddle.to_tensor(arr, place=timesteps.place)[timesteps] + while len(res.shape) < len(broadcast_shape): + res = res[..., None] + return res.expand(broadcast_shape) diff --git a/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/model/losses.py b/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/model/losses.py new file mode 100755 index 000000000..5c3970de5 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/model/losses.py @@ -0,0 +1,86 @@ +""" +Helpers for various likelihood-based losses implemented by Paddle. These are ported from the original +Ho et al. diffusion models codebase: +https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/utils.py +""" +import numpy as np +import paddle +import paddle.nn.functional as F + + +def normal_kl(mean1, logvar1, mean2, logvar2): + """ + Compute the KL divergence between two gaussians. + + Shapes are automatically broadcasted, so batches can be compared to + scalars, among other use cases. + """ + tensor = None + for obj in (mean1, logvar1, mean2, logvar2): + if isinstance(obj, paddle.Tensor): + tensor = obj + break + assert tensor is not None, "at least one argument must be a Tensor" + + # Force variances to be Tensors. Broadcasting helps convert scalars to + # Tensors, but it does not work for th.exp(). + logvar1, logvar2 = [x if isinstance(x, paddle.Tensor) else paddle.to_tensor(x) for x in (logvar1, logvar2)] + + return 0.5 * (-1.0 + logvar2 - logvar1 + paddle.exp(logvar1 - logvar2) + + ((mean1 - mean2)**2) * paddle.exp(-logvar2)) + + +def approx_standard_normal_cdf(x): + """ + A fast approximation of the cumulative distribution function of the + standard normal. + """ + return 0.5 * (1.0 + paddle.tanh(np.sqrt(2.0 / np.pi) * (x + 0.044715 * paddle.pow(x, 3)))) + + +def discretized_gaussian_log_likelihood(x, *, means, log_scales): + """ + Compute the log-likelihood of a Gaussian distribution discretizing to a + given image. + + :param x: the target images. It is assumed that this was uint8 values, + rescaled to the range [-1, 1]. + :param means: the Gaussian mean Tensor. + :param log_scales: the Gaussian log stddev Tensor. + :return: a tensor like x of log probabilities (in nats). + """ + assert x.shape == means.shape == log_scales.shape + centered_x = x - means + inv_stdv = paddle.exp(-log_scales) + plus_in = inv_stdv * (centered_x + 1.0 / 255.0) + cdf_plus = approx_standard_normal_cdf(plus_in) + min_in = inv_stdv * (centered_x - 1.0 / 255.0) + cdf_min = approx_standard_normal_cdf(min_in) + log_cdf_plus = paddle.log(cdf_plus.clip(min=1e-12)) + log_one_minus_cdf_min = paddle.log((1.0 - cdf_min).clip(min=1e-12)) + cdf_delta = cdf_plus - cdf_min + log_probs = paddle.where( + x < -0.999, + log_cdf_plus, + paddle.where(x > 0.999, log_one_minus_cdf_min, paddle.log(cdf_delta.clip(min=1e-12))), + ) + assert log_probs.shape == x.shape + return log_probs + + +def spherical_dist_loss(x, y): + x = F.normalize(x, axis=-1) + y = F.normalize(y, axis=-1) + return (x - y).norm(axis=-1).divide(paddle.to_tensor(2.0)).asin().pow(2).multiply(paddle.to_tensor(2.0)) + + +def tv_loss(input): + """L2 total variation loss, as in Mahendran et al.""" + input = F.pad(input, (0, 1, 0, 1), 'replicate') + x_diff = input[..., :-1, 1:] - input[..., :-1, :-1] + y_diff = input[..., 1:, :-1] - input[..., :-1, :-1] + return (x_diff**2 + y_diff**2).mean([1, 2, 3]) + + +def range_loss(input): + return (input - input.clip(-1, 1)).pow(2).mean([1, 2, 3]) diff --git a/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/model/make_cutouts.py b/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/model/make_cutouts.py new file mode 100755 index 000000000..cd46e4bd5 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/model/make_cutouts.py @@ -0,0 +1,177 @@ +''' +This code is rewritten by Paddle based on Jina-ai/discoart. +https://github.com/jina-ai/discoart/blob/main/discoart/nn/make_cutouts.py +''' +import math + +import paddle +import paddle.nn as nn +from disco_diffusion_clip_vitb32.resize_right.resize_right import resize +from paddle.nn import functional as F + +from . import transforms as T + +skip_augs = False # @param{type: 'boolean'} + + +def sinc(x): + return paddle.where(x != 0, paddle.sin(math.pi * x) / (math.pi * x), x.new_ones([])) + + +def lanczos(x, a): + cond = paddle.logical_and(-a < x, x < a) + out = paddle.where(cond, sinc(x) * sinc(x / a), x.new_zeros([])) + return out / out.sum() + + +def ramp(ratio, width): + n = math.ceil(width / ratio + 1) + out = paddle.empty([n]) + cur = 0 + for i in range(out.shape[0]): + out[i] = cur + cur += ratio + return paddle.concat([-out[1:].flip([0]), out])[1:-1] + + +class MakeCutouts(nn.Layer): + + def __init__(self, cut_size, cutn, skip_augs=False): + super().__init__() + self.cut_size = cut_size + self.cutn = cutn + self.skip_augs = skip_augs + self.augs = nn.Sequential(*[ + T.RandomHorizontalFlip(prob=0.5), + T.Lambda(lambda x: x + paddle.randn(x.shape) * 0.01), + T.RandomAffine(degrees=15, translate=(0.1, 0.1)), + T.Lambda(lambda x: x + paddle.randn(x.shape) * 0.01), + T.RandomPerspective(distortion_scale=0.4, p=0.7), + T.Lambda(lambda x: x + paddle.randn(x.shape) * 0.01), + T.RandomGrayscale(p=0.15), + T.Lambda(lambda x: x + paddle.randn(x.shape) * 0.01), + T.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1), + ]) + + def forward(self, input): + input = T.Pad(input.shape[2] // 4, fill=0)(input) + sideY, sideX = input.shape[2:4] + max_size = min(sideX, sideY) + + cutouts = [] + for ch in range(self.cutn): + if ch > self.cutn - self.cutn // 4: + cutout = input.clone() + else: + size = int(max_size * + paddle.zeros(1, ).normal_(mean=0.8, std=0.3).clip(float(self.cut_size / max_size), 1.0)) + offsetx = paddle.randint(0, abs(sideX - size + 1), ()) + offsety = paddle.randint(0, abs(sideY - size + 1), ()) + cutout = input[:, :, offsety:offsety + size, offsetx:offsetx + size] + + if not self.skip_augs: + cutout = self.augs(cutout) + cutouts.append(resample(cutout, (self.cut_size, self.cut_size))) + del cutout + + cutouts = paddle.concat(cutouts, axis=0) + return cutouts + + +class MakeCutoutsDango(nn.Layer): + + def __init__(self, cut_size, Overview=4, InnerCrop=0, IC_Size_Pow=0.5, IC_Grey_P=0.2): + super().__init__() + self.cut_size = cut_size + self.Overview = Overview + self.InnerCrop = InnerCrop + self.IC_Size_Pow = IC_Size_Pow + self.IC_Grey_P = IC_Grey_P + self.augs = nn.Sequential(*[ + T.RandomHorizontalFlip(prob=0.5), + T.Lambda(lambda x: x + paddle.randn(x.shape) * 0.01), + T.RandomAffine( + degrees=10, + translate=(0.05, 0.05), + interpolation=T.InterpolationMode.BILINEAR, + ), + T.Lambda(lambda x: x + paddle.randn(x.shape) * 0.01), + T.RandomGrayscale(p=0.1), + T.Lambda(lambda x: x + paddle.randn(x.shape) * 0.01), + T.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1), + ]) + + def forward(self, input): + cutouts = [] + gray = T.Grayscale(3) + sideY, sideX = input.shape[2:4] + max_size = min(sideX, sideY) + min_size = min(sideX, sideY, self.cut_size) + output_shape = [1, 3, self.cut_size, self.cut_size] + pad_input = F.pad( + input, + ( + (sideY - max_size) // 2, + (sideY - max_size) // 2, + (sideX - max_size) // 2, + (sideX - max_size) // 2, + ), + **padargs, + ) + cutout = resize(pad_input, out_shape=output_shape) + + if self.Overview > 0: + if self.Overview <= 4: + if self.Overview >= 1: + cutouts.append(cutout) + if self.Overview >= 2: + cutouts.append(gray(cutout)) + if self.Overview >= 3: + cutouts.append(cutout[:, :, :, ::-1]) + if self.Overview == 4: + cutouts.append(gray(cutout[:, :, :, ::-1])) + else: + cutout = resize(pad_input, out_shape=output_shape) + for _ in range(self.Overview): + cutouts.append(cutout) + + if self.InnerCrop > 0: + for i in range(self.InnerCrop): + size = int(paddle.rand([1])**self.IC_Size_Pow * (max_size - min_size) + min_size) + offsetx = paddle.randint(0, sideX - size + 1) + offsety = paddle.randint(0, sideY - size + 1) + cutout = input[:, :, offsety:offsety + size, offsetx:offsetx + size] + if i <= int(self.IC_Grey_P * self.InnerCrop): + cutout = gray(cutout) + cutout = resize(cutout, out_shape=output_shape) + cutouts.append(cutout) + + cutouts = paddle.concat(cutouts) + if skip_augs is not True: + cutouts = self.augs(cutouts) + return cutouts + + +def resample(input, size, align_corners=True): + n, c, h, w = input.shape + dh, dw = size + + input = input.reshape([n * c, 1, h, w]) + + if dh < h: + kernel_h = lanczos(ramp(dh / h, 2), 2).to(input.device, input.dtype) + pad_h = (kernel_h.shape[0] - 1) // 2 + input = F.pad(input, (0, 0, pad_h, pad_h), 'reflect') + input = F.conv2d(input, kernel_h[None, None, :, None]) + + if dw < w: + kernel_w = lanczos(ramp(dw / w, 2), 2).to(input.device, input.dtype) + pad_w = (kernel_w.shape[0] - 1) // 2 + input = F.pad(input, (pad_w, pad_w, 0, 0), 'reflect') + input = F.conv2d(input, kernel_w[None, None, None, :]) + + input = input.reshape([n, c, h, w]) + return F.interpolate(input, size, mode='bicubic', align_corners=align_corners) + + +padargs = {} diff --git a/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/model/nn.py b/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/model/nn.py new file mode 100755 index 000000000..d618183e2 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/model/nn.py @@ -0,0 +1,127 @@ +""" +Various utilities for neural networks implemented by Paddle. This code is rewritten based on: +https://github.com/openai/guided-diffusion/blob/main/guided_diffusion/nn.py +""" +import math + +import paddle +import paddle.nn as nn + + +class SiLU(nn.Layer): + + def forward(self, x): + return x * nn.functional.sigmoid(x) + + +class GroupNorm32(nn.GroupNorm): + + def forward(self, x): + return super().forward(x) + + +def conv_nd(dims, *args, **kwargs): + """ + Create a 1D, 2D, or 3D convolution module. + """ + if dims == 1: + return nn.Conv1D(*args, **kwargs) + elif dims == 2: + return nn.Conv2D(*args, **kwargs) + elif dims == 3: + return nn.Conv3D(*args, **kwargs) + raise ValueError(f"unsupported dimensions: {dims}") + + +def linear(*args, **kwargs): + """ + Create a linear module. + """ + return nn.Linear(*args, **kwargs) + + +def avg_pool_nd(dims, *args, **kwargs): + """ + Create a 1D, 2D, or 3D average pooling module. + """ + if dims == 1: + return nn.AvgPool1D(*args, **kwargs) + elif dims == 2: + return nn.AvgPool2D(*args, **kwargs) + elif dims == 3: + return nn.AvgPool3D(*args, **kwargs) + raise ValueError(f"unsupported dimensions: {dims}") + + +def update_ema(target_params, source_params, rate=0.99): + """ + Update target parameters to be closer to those of source parameters using + an exponential moving average. + + :param target_params: the target parameter sequence. + :param source_params: the source parameter sequence. + :param rate: the EMA rate (closer to 1 means slower). + """ + for targ, src in zip(target_params, source_params): + targ.detach().mul_(rate).add_(src, alpha=1 - rate) + + +def zero_module(module): + """ + Zero out the parameters of a module and return it. + """ + for p in module.parameters(): + p.detach().zero_() + return module + + +def scale_module(module, scale): + """ + Scale the parameters of a module and return it. + """ + for p in module.parameters(): + p.detach().mul_(scale) + return module + + +def mean_flat(tensor): + """ + Take the mean over all non-batch dimensions. + """ + return tensor.mean(axis=list(range(1, len(tensor.shape)))) + + +def normalization(channels): + """ + Make a standard normalization layer. + + :param channels: number of input channels. + :return: an nn.Module for normalization. + """ + return GroupNorm32(32, channels) + + +def timestep_embedding(timesteps, dim, max_period=10000): + """ + Create sinusoidal timestep embeddings. + + :param timesteps: a 1-D Tensor of N indices, one per batch element. + These may be fractional. + :param dim: the dimension of the output. + :param max_period: controls the minimum frequency of the embeddings. + :return: an [N x dim] Tensor of positional embeddings. + """ + half = dim // 2 + freqs = paddle.exp(-math.log(max_period) * paddle.arange(start=0, end=half, dtype=paddle.float32) / half) + args = paddle.cast(timesteps[:, None], 'float32') * freqs[None] + embedding = paddle.concat([paddle.cos(args), paddle.sin(args)], axis=-1) + if dim % 2: + embedding = paddle.concat([embedding, paddle.zeros_like(embedding[:, :1])], axis=-1) + return embedding + + +def checkpoint(func, inputs, params, flag): + """ + This function is disabled. And now just forward. + """ + return func(*inputs) diff --git a/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/model/perlin_noises.py b/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/model/perlin_noises.py new file mode 100755 index 000000000..6dacb331b --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/model/perlin_noises.py @@ -0,0 +1,78 @@ +''' +Perlin noise implementation by Paddle. +This code is rewritten based on: +https://github.com/jina-ai/discoart/blob/main/discoart/nn/perlin_noises.py +''' +import numpy as np +import paddle +import paddle.vision.transforms as TF +from PIL import Image +from PIL import ImageOps + + +def interp(t): + return 3 * t**2 - 2 * t**3 + + +def perlin(width, height, scale=10): + gx, gy = paddle.randn([2, width + 1, height + 1, 1, 1]) + xs = paddle.linspace(0, 1, scale + 1)[:-1, None] + ys = paddle.linspace(0, 1, scale + 1)[None, :-1] + wx = 1 - interp(xs) + wy = 1 - interp(ys) + dots = 0 + dots += wx * wy * (gx[:-1, :-1] * xs + gy[:-1, :-1] * ys) + dots += (1 - wx) * wy * (-gx[1:, :-1] * (1 - xs) + gy[1:, :-1] * ys) + dots += wx * (1 - wy) * (gx[:-1, 1:] * xs - gy[:-1, 1:] * (1 - ys)) + dots += (1 - wx) * (1 - wy) * (-gx[1:, 1:] * (1 - xs) - gy[1:, 1:] * (1 - ys)) + return dots.transpose([0, 2, 1, 3]).reshape([width * scale, height * scale]) + + +def perlin_ms(octaves, width, height, grayscale): + out_array = [0.5] if grayscale else [0.5, 0.5, 0.5] + # out_array = [0.0] if grayscale else [0.0, 0.0, 0.0] + for i in range(1 if grayscale else 3): + scale = 2**len(octaves) + oct_width = width + oct_height = height + for oct in octaves: + p = perlin(oct_width, oct_height, scale) + out_array[i] += p * oct + scale //= 2 + oct_width *= 2 + oct_height *= 2 + return paddle.concat(out_array) + + +def create_perlin_noise(octaves, width, height, grayscale, side_y, side_x): + out = perlin_ms(octaves, width, height, grayscale) + if grayscale: + out = TF.resize(size=(side_y, side_x), img=out.numpy()) + out = np.uint8(out) + out = Image.fromarray(out).convert('RGB') + else: + out = out.reshape([-1, 3, out.shape[0] // 3, out.shape[1]]) + out = out.squeeze().transpose([1, 2, 0]).numpy() + out = TF.resize(size=(side_y, side_x), img=out) + out = out.clip(0, 1) * 255 + out = np.uint8(out) + out = Image.fromarray(out) + + out = ImageOps.autocontrast(out) + return out + + +def regen_perlin(perlin_mode, side_y, side_x, batch_size): + if perlin_mode == 'color': + init = create_perlin_noise([1.5**-i * 0.5 for i in range(12)], 1, 1, False, side_y, side_x) + init2 = create_perlin_noise([1.5**-i * 0.5 for i in range(8)], 4, 4, False, side_y, side_x) + elif perlin_mode == 'gray': + init = create_perlin_noise([1.5**-i * 0.5 for i in range(12)], 1, 1, True, side_y, side_x) + init2 = create_perlin_noise([1.5**-i * 0.5 for i in range(8)], 4, 4, True, side_y, side_x) + else: + init = create_perlin_noise([1.5**-i * 0.5 for i in range(12)], 1, 1, False, side_y, side_x) + init2 = create_perlin_noise([1.5**-i * 0.5 for i in range(8)], 4, 4, True, side_y, side_x) + + init = (TF.to_tensor(init).add(TF.to_tensor(init2)).divide(paddle.to_tensor(2.0)).unsqueeze(0) * 2 - 1) + del init2 + return init.expand([batch_size, -1, -1, -1]) diff --git a/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/model/respace.py b/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/model/respace.py new file mode 100755 index 000000000..c001c70d0 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/model/respace.py @@ -0,0 +1,123 @@ +''' +This code is rewritten by Paddle based on +https://github.com/openai/guided-diffusion/blob/main/guided_diffusion/respace.py +''' +import numpy as np +import paddle + +from .gaussian_diffusion import GaussianDiffusion + + +def space_timesteps(num_timesteps, section_counts): + """ + Create a list of timesteps to use from an original diffusion process, + given the number of timesteps we want to take from equally-sized portions + of the original process. + + For example, if there's 300 timesteps and the section counts are [10,15,20] + then the first 100 timesteps are strided to be 10 timesteps, the second 100 + are strided to be 15 timesteps, and the final 100 are strided to be 20. + + If the stride is a string starting with "ddim", then the fixed striding + from the DDIM paper is used, and only one section is allowed. + + :param num_timesteps: the number of diffusion steps in the original + process to divide up. + :param section_counts: either a list of numbers, or a string containing + comma-separated numbers, indicating the step count + per section. As a special case, use "ddimN" where N + is a number of steps to use the striding from the + DDIM paper. + :return: a set of diffusion steps from the original process to use. + """ + if isinstance(section_counts, str): + if section_counts.startswith("ddim"): + desired_count = int(section_counts[len("ddim"):]) + for i in range(1, num_timesteps): + if len(range(0, num_timesteps, i)) == desired_count: + return set(range(0, num_timesteps, i)) + raise ValueError(f"cannot create exactly {num_timesteps} steps with an integer stride") + section_counts = [int(x) for x in section_counts.split(",")] + size_per = num_timesteps // len(section_counts) + extra = num_timesteps % len(section_counts) + start_idx = 0 + all_steps = [] + for i, section_count in enumerate(section_counts): + size = size_per + (1 if i < extra else 0) + if size < section_count: + raise ValueError(f"cannot divide section of {size} steps into {section_count}") + if section_count <= 1: + frac_stride = 1 + else: + frac_stride = (size - 1) / (section_count - 1) + cur_idx = 0.0 + taken_steps = [] + for _ in range(section_count): + taken_steps.append(start_idx + round(cur_idx)) + cur_idx += frac_stride + all_steps += taken_steps + start_idx += size + return set(all_steps) + + +class SpacedDiffusion(GaussianDiffusion): + """ + A diffusion process which can skip steps in a base diffusion process. + + :param use_timesteps: a collection (sequence or set) of timesteps from the + original diffusion process to retain. + :param kwargs: the kwargs to create the base diffusion process. + """ + + def __init__(self, use_timesteps, **kwargs): + self.use_timesteps = set(use_timesteps) + self.timestep_map = [] + self.original_num_steps = len(kwargs["betas"]) + + base_diffusion = GaussianDiffusion(**kwargs) # pylint: disable=missing-kwoa + last_alpha_cumprod = 1.0 + new_betas = [] + for i, alpha_cumprod in enumerate(base_diffusion.alphas_cumprod): + if i in self.use_timesteps: + new_betas.append(1 - alpha_cumprod / last_alpha_cumprod) + last_alpha_cumprod = alpha_cumprod + self.timestep_map.append(i) + kwargs["betas"] = np.array(new_betas) + super().__init__(**kwargs) + + def p_mean_variance(self, model, *args, **kwargs): # pylint: disable=signature-differs + return super().p_mean_variance(self._wrap_model(model), *args, **kwargs) + + def training_losses(self, model, *args, **kwargs): # pylint: disable=signature-differs + return super().training_losses(self._wrap_model(model), *args, **kwargs) + + def condition_mean(self, cond_fn, *args, **kwargs): + return super().condition_mean(self._wrap_model(cond_fn), *args, **kwargs) + + def condition_score(self, cond_fn, *args, **kwargs): + return super().condition_score(self._wrap_model(cond_fn), *args, **kwargs) + + def _wrap_model(self, model): + if isinstance(model, _WrappedModel): + return model + return _WrappedModel(model, self.timestep_map, self.rescale_timesteps, self.original_num_steps) + + def _scale_timesteps(self, t): + # Scaling is done by the wrapped model. + return t + + +class _WrappedModel: + + def __init__(self, model, timestep_map, rescale_timesteps, original_num_steps): + self.model = model + self.timestep_map = timestep_map + self.rescale_timesteps = rescale_timesteps + self.original_num_steps = original_num_steps + + def __call__(self, x, ts, **kwargs): + map_tensor = paddle.to_tensor(self.timestep_map, place=ts.place, dtype=ts.dtype) + new_ts = map_tensor[ts] + if self.rescale_timesteps: + new_ts = paddle.cast(new_ts, 'float32') * (1000.0 / self.original_num_steps) + return self.model(x, new_ts, **kwargs) diff --git a/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/model/script_util.py b/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/model/script_util.py new file mode 100755 index 000000000..d728a5430 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/model/script_util.py @@ -0,0 +1,201 @@ +''' +This code is based on +https://github.com/openai/guided-diffusion/blob/main/guided_diffusion/script_util.py +''' +import argparse +import inspect + +from . import gaussian_diffusion as gd +from .respace import space_timesteps +from .respace import SpacedDiffusion +from .unet import EncoderUNetModel +from .unet import SuperResModel +from .unet import UNetModel + +NUM_CLASSES = 1000 + + +def diffusion_defaults(): + """ + Defaults for image and classifier training. + """ + return dict( + learn_sigma=False, + diffusion_steps=1000, + noise_schedule="linear", + timestep_respacing="", + use_kl=False, + predict_xstart=False, + rescale_timesteps=False, + rescale_learned_sigmas=False, + ) + + +def model_and_diffusion_defaults(): + """ + Defaults for image training. + """ + res = dict( + image_size=64, + num_channels=128, + num_res_blocks=2, + num_heads=4, + num_heads_upsample=-1, + num_head_channels=-1, + attention_resolutions="16,8", + channel_mult="", + dropout=0.0, + class_cond=False, + use_checkpoint=False, + use_scale_shift_norm=True, + resblock_updown=False, + use_fp16=False, + use_new_attention_order=False, + ) + res.update(diffusion_defaults()) + return res + + +def create_model_and_diffusion( + image_size, + class_cond, + learn_sigma, + num_channels, + num_res_blocks, + channel_mult, + num_heads, + num_head_channels, + num_heads_upsample, + attention_resolutions, + dropout, + diffusion_steps, + noise_schedule, + timestep_respacing, + use_kl, + predict_xstart, + rescale_timesteps, + rescale_learned_sigmas, + use_checkpoint, + use_scale_shift_norm, + resblock_updown, + use_fp16, + use_new_attention_order, +): + model = create_model( + image_size, + num_channels, + num_res_blocks, + channel_mult=channel_mult, + learn_sigma=learn_sigma, + class_cond=class_cond, + use_checkpoint=use_checkpoint, + attention_resolutions=attention_resolutions, + num_heads=num_heads, + num_head_channels=num_head_channels, + num_heads_upsample=num_heads_upsample, + use_scale_shift_norm=use_scale_shift_norm, + dropout=dropout, + resblock_updown=resblock_updown, + use_fp16=use_fp16, + use_new_attention_order=use_new_attention_order, + ) + diffusion = create_gaussian_diffusion( + steps=diffusion_steps, + learn_sigma=learn_sigma, + noise_schedule=noise_schedule, + use_kl=use_kl, + predict_xstart=predict_xstart, + rescale_timesteps=rescale_timesteps, + rescale_learned_sigmas=rescale_learned_sigmas, + timestep_respacing=timestep_respacing, + ) + return model, diffusion + + +def create_model( + image_size, + num_channels, + num_res_blocks, + channel_mult="", + learn_sigma=False, + class_cond=False, + use_checkpoint=False, + attention_resolutions="16", + num_heads=1, + num_head_channels=-1, + num_heads_upsample=-1, + use_scale_shift_norm=False, + dropout=0, + resblock_updown=False, + use_fp16=False, + use_new_attention_order=False, +): + if channel_mult == "": + if image_size == 512: + channel_mult = (0.5, 1, 1, 2, 2, 4, 4) + elif image_size == 256: + channel_mult = (1, 1, 2, 2, 4, 4) + elif image_size == 128: + channel_mult = (1, 1, 2, 3, 4) + elif image_size == 64: + channel_mult = (1, 2, 3, 4) + else: + raise ValueError(f"unsupported image size: {image_size}") + else: + channel_mult = tuple(int(ch_mult) for ch_mult in channel_mult.split(",")) + + attention_ds = [] + for res in attention_resolutions.split(","): + attention_ds.append(image_size // int(res)) + + return UNetModel( + image_size=image_size, + in_channels=3, + model_channels=num_channels, + out_channels=(3 if not learn_sigma else 6), + num_res_blocks=num_res_blocks, + attention_resolutions=tuple(attention_ds), + dropout=dropout, + channel_mult=channel_mult, + num_classes=(NUM_CLASSES if class_cond else None), + use_checkpoint=use_checkpoint, + use_fp16=use_fp16, + num_heads=num_heads, + num_head_channels=num_head_channels, + num_heads_upsample=num_heads_upsample, + use_scale_shift_norm=use_scale_shift_norm, + resblock_updown=resblock_updown, + use_new_attention_order=use_new_attention_order, + ) + + +def create_gaussian_diffusion( + *, + steps=1000, + learn_sigma=False, + sigma_small=False, + noise_schedule="linear", + use_kl=False, + predict_xstart=False, + rescale_timesteps=False, + rescale_learned_sigmas=False, + timestep_respacing="", +): + betas = gd.get_named_beta_schedule(noise_schedule, steps) + if use_kl: + loss_type = gd.LossType.RESCALED_KL + elif rescale_learned_sigmas: + loss_type = gd.LossType.RESCALED_MSE + else: + loss_type = gd.LossType.MSE + if not timestep_respacing: + timestep_respacing = [steps] + return SpacedDiffusion( + use_timesteps=space_timesteps(steps, timestep_respacing), + betas=betas, + model_mean_type=(gd.ModelMeanType.EPSILON if not predict_xstart else gd.ModelMeanType.START_X), + model_var_type=((gd.ModelVarType.FIXED_LARGE if not sigma_small else gd.ModelVarType.FIXED_SMALL) + if not learn_sigma else gd.ModelVarType.LEARNED_RANGE), + loss_type=loss_type, + rescale_timesteps=rescale_timesteps, + ) diff --git a/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/model/sec_diff.py b/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/model/sec_diff.py new file mode 100755 index 000000000..1e361f18f --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/model/sec_diff.py @@ -0,0 +1,135 @@ +''' +This code is rewritten by Paddle based on +https://github.com/jina-ai/discoart/blob/main/discoart/nn/sec_diff.py +''' +import math +from dataclasses import dataclass +from functools import partial + +import paddle +import paddle.nn as nn + + +@dataclass +class DiffusionOutput: + v: paddle.Tensor + pred: paddle.Tensor + eps: paddle.Tensor + + +class SkipBlock(nn.Layer): + + def __init__(self, main, skip=None): + super().__init__() + self.main = nn.Sequential(*main) + self.skip = skip if skip else nn.Identity() + + def forward(self, input): + return paddle.concat([self.main(input), self.skip(input)], axis=1) + + +def append_dims(x, n): + return x[(Ellipsis, *(None, ) * (n - x.ndim))] + + +def expand_to_planes(x, shape): + return paddle.tile(append_dims(x, len(shape)), [1, 1, *shape[2:]]) + + +def alpha_sigma_to_t(alpha, sigma): + return paddle.atan2(sigma, alpha) * 2 / math.pi + + +def t_to_alpha_sigma(t): + return paddle.cos(t * math.pi / 2), paddle.sin(t * math.pi / 2) + + +class SecondaryDiffusionImageNet2(nn.Layer): + + def __init__(self): + super().__init__() + c = 64 # The base channel count + cs = [c, c * 2, c * 2, c * 4, c * 4, c * 8] + + self.timestep_embed = FourierFeatures(1, 16) + self.down = nn.AvgPool2D(2) + self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False) + + self.net = nn.Sequential( + ConvBlock(3 + 16, cs[0]), + ConvBlock(cs[0], cs[0]), + SkipBlock([ + self.down, + ConvBlock(cs[0], cs[1]), + ConvBlock(cs[1], cs[1]), + SkipBlock([ + self.down, + ConvBlock(cs[1], cs[2]), + ConvBlock(cs[2], cs[2]), + SkipBlock([ + self.down, + ConvBlock(cs[2], cs[3]), + ConvBlock(cs[3], cs[3]), + SkipBlock([ + self.down, + ConvBlock(cs[3], cs[4]), + ConvBlock(cs[4], cs[4]), + SkipBlock([ + self.down, + ConvBlock(cs[4], cs[5]), + ConvBlock(cs[5], cs[5]), + ConvBlock(cs[5], cs[5]), + ConvBlock(cs[5], cs[4]), + self.up, + ]), + ConvBlock(cs[4] * 2, cs[4]), + ConvBlock(cs[4], cs[3]), + self.up, + ]), + ConvBlock(cs[3] * 2, cs[3]), + ConvBlock(cs[3], cs[2]), + self.up, + ]), + ConvBlock(cs[2] * 2, cs[2]), + ConvBlock(cs[2], cs[1]), + self.up, + ]), + ConvBlock(cs[1] * 2, cs[1]), + ConvBlock(cs[1], cs[0]), + self.up, + ]), + ConvBlock(cs[0] * 2, cs[0]), + nn.Conv2D(cs[0], 3, 3, padding=1), + ) + + def forward(self, input, t): + timestep_embed = expand_to_planes(self.timestep_embed(t[:, None]), input.shape) + v = self.net(paddle.concat([input, timestep_embed], axis=1)) + alphas, sigmas = map(partial(append_dims, n=v.ndim), t_to_alpha_sigma(t)) + pred = input * alphas - v * sigmas + eps = input * sigmas + v * alphas + return DiffusionOutput(v, pred, eps) + + +class FourierFeatures(nn.Layer): + + def __init__(self, in_features, out_features, std=1.0): + super().__init__() + assert out_features % 2 == 0 + # self.weight = nn.Parameter(paddle.randn([out_features // 2, in_features]) * std) + self.weight = paddle.create_parameter([out_features // 2, in_features], + dtype='float32', + default_initializer=nn.initializer.Normal(mean=0.0, std=std)) + + def forward(self, input): + f = 2 * math.pi * input @ self.weight.T + return paddle.concat([f.cos(), f.sin()], axis=-1) + + +class ConvBlock(nn.Sequential): + + def __init__(self, c_in, c_out): + super().__init__( + nn.Conv2D(c_in, c_out, 3, padding=1), + nn.ReLU(), + ) diff --git a/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/model/transforms.py b/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/model/transforms.py new file mode 100755 index 000000000..e0b620b01 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/model/transforms.py @@ -0,0 +1,757 @@ +''' +This code is rewritten by Paddle based on +https://github.com/pytorch/vision/blob/main/torchvision/transforms/transforms.py +''' +import math +import numbers +import warnings +from enum import Enum +from typing import Any +from typing import Dict +from typing import List +from typing import Optional +from typing import Sequence +from typing import Tuple +from typing import Union + +import paddle +import paddle.nn as nn +from paddle import Tensor +from paddle.nn import functional as F +from paddle.nn.functional import grid_sample +from paddle.vision import transforms as T + + +class Normalize(nn.Layer): + + def __init__(self, mean, std): + super(Normalize, self).__init__() + self.mean = paddle.to_tensor(mean) + self.std = paddle.to_tensor(std) + + def forward(self, tensor: Tensor): + dtype = tensor.dtype + mean = paddle.to_tensor(self.mean, dtype=dtype) + std = paddle.to_tensor(self.std, dtype=dtype) + mean = mean.reshape([1, -1, 1, 1]) + std = std.reshape([1, -1, 1, 1]) + result = tensor.subtract(mean).divide(std) + return result + + +class InterpolationMode(Enum): + """Interpolation modes + Available interpolation methods are ``nearest``, ``bilinear``, ``bicubic``, ``box``, ``hamming``, and ``lanczos``. + """ + + NEAREST = "nearest" + BILINEAR = "bilinear" + BICUBIC = "bicubic" + # For PIL compatibility + BOX = "box" + HAMMING = "hamming" + LANCZOS = "lanczos" + + +class Grayscale(nn.Layer): + + def __init__(self, num_output_channels): + super(Grayscale, self).__init__() + self.num_output_channels = num_output_channels + + def forward(self, x): + output = (0.2989 * x[:, 0:1, :, :] + 0.587 * x[:, 1:2, :, :] + 0.114 * x[:, 2:3, :, :]) + if self.num_output_channels == 3: + return output.expand(x.shape) + + return output + + +class Lambda(nn.Layer): + + def __init__(self, func): + super(Lambda, self).__init__() + self.transform = func + + def forward(self, x): + return self.transform(x) + + +class RandomGrayscale(nn.Layer): + + def __init__(self, p): + super(RandomGrayscale, self).__init__() + self.prob = p + self.transform = Grayscale(3) + + def forward(self, x): + if paddle.rand([1]) < self.prob: + return self.transform(x) + else: + return x + + +class RandomHorizontalFlip(nn.Layer): + + def __init__(self, prob): + super(RandomHorizontalFlip, self).__init__() + self.prob = prob + + def forward(self, x): + if paddle.rand([1]) < self.prob: + return x[:, :, :, ::-1] + else: + return x + + +def _blend(img1: Tensor, img2: Tensor, ratio: float) -> Tensor: + ratio = float(ratio) + bound = 1.0 + return (ratio * img1 + (1.0 - ratio) * img2).clip(0, bound) + + +def trunc_div(a, b): + ipt = paddle.divide(a, b) + sign_ipt = paddle.sign(ipt) + abs_ipt = paddle.abs(ipt) + abs_ipt = paddle.floor(abs_ipt) + out = paddle.multiply(sign_ipt, abs_ipt) + return out + + +def fmod(a, b): + return a - trunc_div(a, b) * b + + +def _rgb2hsv(img: Tensor) -> Tensor: + r, g, b = img.unbind(axis=-3) + + # Implementation is based on https://github.com/python-pillow/Pillow/blob/4174d4267616897df3746d315d5a2d0f82c656ee/ + # src/libImaging/Convert.c#L330 + maxc = paddle.max(img, axis=-3) + minc = paddle.min(img, axis=-3) + + # The algorithm erases S and H channel where `maxc = minc`. This avoids NaN + # from happening in the results, because + # + S channel has division by `maxc`, which is zero only if `maxc = minc` + # + H channel has division by `(maxc - minc)`. + # + # Instead of overwriting NaN afterwards, we just prevent it from occuring so + # we don't need to deal with it in case we save the NaN in a buffer in + # backprop, if it is ever supported, but it doesn't hurt to do so. + eqc = maxc == minc + + cr = maxc - minc + # Since `eqc => cr = 0`, replacing denominator with 1 when `eqc` is fine. + ones = paddle.ones_like(maxc) + s = cr / paddle.where(eqc, ones, maxc) + # Note that `eqc => maxc = minc = r = g = b`. So the following calculation + # of `h` would reduce to `bc - gc + 2 + rc - bc + 4 + rc - bc = 6` so it + # would not matter what values `rc`, `gc`, and `bc` have here, and thus + # replacing denominator with 1 when `eqc` is fine. + cr_divisor = paddle.where(eqc, ones, cr) + rc = (maxc - r) / cr_divisor + gc = (maxc - g) / cr_divisor + bc = (maxc - b) / cr_divisor + + hr = (maxc == r).cast('float32') * (bc - gc) + hg = ((maxc == g) & (maxc != r)).cast('float32') * (2.0 + rc - bc) + hb = ((maxc != g) & (maxc != r)).cast('float32') * (4.0 + gc - rc) + h = hr + hg + hb + h = fmod((h / 6.0 + 1.0), paddle.to_tensor(1.0)) + return paddle.stack((h, s, maxc), axis=-3) + + +def _hsv2rgb(img: Tensor) -> Tensor: + h, s, v = img.unbind(axis=-3) + i = paddle.floor(h * 6.0) + f = (h * 6.0) - i + i = i.cast(dtype='int32') + + p = paddle.clip((v * (1.0 - s)), 0.0, 1.0) + q = paddle.clip((v * (1.0 - s * f)), 0.0, 1.0) + t = paddle.clip((v * (1.0 - s * (1.0 - f))), 0.0, 1.0) + i = i % 6 + + mask = i.unsqueeze(axis=-3) == paddle.arange(6).reshape([-1, 1, 1]) + + a1 = paddle.stack((v, q, p, p, t, v), axis=-3) + a2 = paddle.stack((t, v, v, q, p, p), axis=-3) + a3 = paddle.stack((p, p, t, v, v, q), axis=-3) + a4 = paddle.stack((a1, a2, a3), axis=-4) + + return paddle.einsum("...ijk, ...xijk -> ...xjk", mask.cast(dtype=img.dtype), a4) + + +def adjust_brightness(img: Tensor, brightness_factor: float) -> Tensor: + if brightness_factor < 0: + raise ValueError(f"brightness_factor ({brightness_factor}) is not non-negative.") + + return _blend(img, paddle.zeros_like(img), brightness_factor) + + +def adjust_contrast(img: Tensor, contrast_factor: float) -> Tensor: + if contrast_factor < 0: + raise ValueError(f"contrast_factor ({contrast_factor}) is not non-negative.") + + c = img.shape[1] + + if c == 3: + output = (0.2989 * img[:, 0:1, :, :] + 0.587 * img[:, 1:2, :, :] + 0.114 * img[:, 2:3, :, :]) + mean = paddle.mean(output, axis=(-3, -2, -1), keepdim=True) + + else: + mean = paddle.mean(img, axis=(-3, -2, -1), keepdim=True) + + return _blend(img, mean, contrast_factor) + + +def adjust_hue(img: Tensor, hue_factor: float) -> Tensor: + if not (-0.5 <= hue_factor <= 0.5): + raise ValueError(f"hue_factor ({hue_factor}) is not in [-0.5, 0.5].") + + img = _rgb2hsv(img) + h, s, v = img.unbind(axis=-3) + h = fmod(h + hue_factor, paddle.to_tensor(1.0)) + img = paddle.stack((h, s, v), axis=-3) + img_hue_adj = _hsv2rgb(img) + return img_hue_adj + + +def adjust_saturation(img: Tensor, saturation_factor: float) -> Tensor: + if saturation_factor < 0: + raise ValueError(f"saturation_factor ({saturation_factor}) is not non-negative.") + + output = (0.2989 * img[:, 0:1, :, :] + 0.587 * img[:, 1:2, :, :] + 0.114 * img[:, 2:3, :, :]) + + return _blend(img, output, saturation_factor) + + +class ColorJitter(nn.Layer): + + def __init__(self, brightness=0, contrast=0, saturation=0, hue=0): + super(ColorJitter, self).__init__() + self.brightness = self._check_input(brightness, "brightness") + self.contrast = self._check_input(contrast, "contrast") + self.saturation = self._check_input(saturation, "saturation") + self.hue = self._check_input(hue, "hue", center=0, bound=(-0.5, 0.5), clip_first_on_zero=False) + + def _check_input(self, value, name, center=1, bound=(0, float("inf")), clip_first_on_zero=True): + if isinstance(value, numbers.Number): + if value < 0: + raise ValueError(f"If {name} is a single number, it must be non negative.") + value = [center - float(value), center + float(value)] + if clip_first_on_zero: + value[0] = max(value[0], 0.0) + elif isinstance(value, (tuple, list)) and len(value) == 2: + if not bound[0] <= value[0] <= value[1] <= bound[1]: + raise ValueError(f"{name} values should be between {bound}") + else: + raise TypeError(f"{name} should be a single number or a list/tuple with length 2.") + + # if value is 0 or (1., 1.) for brightness/contrast/saturation + # or (0., 0.) for hue, do nothing + if value[0] == value[1] == center: + value = None + return value + + @staticmethod + def get_params( + brightness: Optional[List[float]], + contrast: Optional[List[float]], + saturation: Optional[List[float]], + hue: Optional[List[float]], + ) -> Tuple[Tensor, Optional[float], Optional[float], Optional[float], Optional[float]]: + """Get the parameters for the randomized transform to be applied on image. + + Args: + brightness (tuple of float (min, max), optional): The range from which the brightness_factor is chosen + uniformly. Pass None to turn off the transformation. + contrast (tuple of float (min, max), optional): The range from which the contrast_factor is chosen + uniformly. Pass None to turn off the transformation. + saturation (tuple of float (min, max), optional): The range from which the saturation_factor is chosen + uniformly. Pass None to turn off the transformation. + hue (tuple of float (min, max), optional): The range from which the hue_factor is chosen uniformly. + Pass None to turn off the transformation. + + Returns: + tuple: The parameters used to apply the randomized transform + along with their random order. + """ + fn_idx = paddle.randperm(4) + + b = None if brightness is None else paddle.empty([1]).uniform_(brightness[0], brightness[1]) + c = None if contrast is None else paddle.empty([1]).uniform_(contrast[0], contrast[1]) + s = None if saturation is None else paddle.empty([1]).uniform_(saturation[0], saturation[1]) + h = None if hue is None else paddle.empty([1]).uniform_(hue[0], hue[1]) + + return fn_idx, b, c, s, h + + def forward(self, img): + """ + Args: + img (PIL Image or Tensor): Input image. + + Returns: + PIL Image or Tensor: Color jittered image. + """ + fn_idx, brightness_factor, contrast_factor, saturation_factor, hue_factor = self.get_params( + self.brightness, self.contrast, self.saturation, self.hue) + + for fn_id in fn_idx: + if fn_id == 0 and brightness_factor is not None: + img = adjust_brightness(img, brightness_factor) + elif fn_id == 1 and contrast_factor is not None: + img = adjust_contrast(img, contrast_factor) + elif fn_id == 2 and saturation_factor is not None: + img = adjust_saturation(img, saturation_factor) + elif fn_id == 3 and hue_factor is not None: + img = adjust_hue(img, hue_factor) + + return img + + def __repr__(self) -> str: + s = (f"{self.__class__.__name__}(" + f"brightness={self.brightness}" + f", contrast={self.contrast}" + f", saturation={self.saturation}" + f", hue={self.hue})") + return s + + +def _apply_grid_transform(img: Tensor, grid: Tensor, mode: str, fill: Optional[List[float]]) -> Tensor: + + if img.shape[0] > 1: + # Apply same grid to a batch of images + grid = grid.expand([img.shape[0], grid.shape[1], grid.shape[2], grid.shape[3]]) + + # Append a dummy mask for customized fill colors, should be faster than grid_sample() twice + if fill is not None: + dummy = paddle.ones((img.shape[0], 1, img.shape[2], img.shape[3]), dtype=img.dtype) + img = paddle.concat((img, dummy), axis=1) + + img = grid_sample(img, grid, mode=mode, padding_mode="zeros", align_corners=False) + + # Fill with required color + if fill is not None: + mask = img[:, -1:, :, :] # N * 1 * H * W + img = img[:, :-1, :, :] # N * C * H * W + mask = mask.expand_as(img) + len_fill = len(fill) if isinstance(fill, (tuple, list)) else 1 + fill_img = paddle.to_tensor(fill, dtype=img.dtype).reshape([1, len_fill, 1, 1]).expand_as(img) + if mode == "nearest": + mask = mask < 0.5 + img[mask] = fill_img[mask] + else: # 'bilinear' + img = img * mask + (1.0 - mask) * fill_img + return img + + +def _gen_affine_grid( + theta: Tensor, + w: int, + h: int, + ow: int, + oh: int, +) -> Tensor: + # https://github.com/pytorch/pytorch/blob/74b65c32be68b15dc7c9e8bb62459efbfbde33d8/aten/src/ATen/native/ + # AffineGridGenerator.cpp#L18 + # Difference with AffineGridGenerator is that: + # 1) we normalize grid values after applying theta + # 2) we can normalize by other image size, such that it covers "extend" option like in PIL.Image.rotate + + d = 0.5 + base_grid = paddle.empty([1, oh, ow, 3], dtype=theta.dtype) + x_grid = paddle.linspace(-ow * 0.5 + d, ow * 0.5 + d - 1, num=ow) + base_grid[..., 0] = (x_grid) + y_grid = paddle.linspace(-oh * 0.5 + d, oh * 0.5 + d - 1, num=oh).unsqueeze_(-1) + base_grid[..., 1] = (y_grid) + base_grid[..., 2] = 1.0 + rescaled_theta = theta.transpose([0, 2, 1]) / paddle.to_tensor([0.5 * w, 0.5 * h], dtype=theta.dtype) + output_grid = base_grid.reshape([1, oh * ow, 3]).bmm(rescaled_theta) + return output_grid.reshape([1, oh, ow, 2]) + + +def affine_impl(img: Tensor, + matrix: List[float], + interpolation: str = "nearest", + fill: Optional[List[float]] = None) -> Tensor: + theta = paddle.to_tensor(matrix, dtype=img.dtype).reshape([1, 2, 3]) + shape = img.shape + # grid will be generated on the same device as theta and img + grid = _gen_affine_grid(theta, w=shape[-1], h=shape[-2], ow=shape[-1], oh=shape[-2]) + return _apply_grid_transform(img, grid, interpolation, fill=fill) + + +def _get_inverse_affine_matrix(center: List[float], + angle: float, + translate: List[float], + scale: float, + shear: List[float], + inverted: bool = True) -> List[float]: + # Helper method to compute inverse matrix for affine transformation + + # Pillow requires inverse affine transformation matrix: + # Affine matrix is : M = T * C * RotateScaleShear * C^-1 + # + # where T is translation matrix: [1, 0, tx | 0, 1, ty | 0, 0, 1] + # C is translation matrix to keep center: [1, 0, cx | 0, 1, cy | 0, 0, 1] + # RotateScaleShear is rotation with scale and shear matrix + # + # RotateScaleShear(a, s, (sx, sy)) = + # = R(a) * S(s) * SHy(sy) * SHx(sx) + # = [ s*cos(a - sy)/cos(sy), s*(-cos(a - sy)*tan(sx)/cos(sy) - sin(a)), 0 ] + # [ s*sin(a + sy)/cos(sy), s*(-sin(a - sy)*tan(sx)/cos(sy) + cos(a)), 0 ] + # [ 0 , 0 , 1 ] + # where R is a rotation matrix, S is a scaling matrix, and SHx and SHy are the shears: + # SHx(s) = [1, -tan(s)] and SHy(s) = [1 , 0] + # [0, 1 ] [-tan(s), 1] + # + # Thus, the inverse is M^-1 = C * RotateScaleShear^-1 * C^-1 * T^-1 + + rot = math.radians(angle) + sx = math.radians(shear[0]) + sy = math.radians(shear[1]) + + cx, cy = center + tx, ty = translate + + # RSS without scaling + a = math.cos(rot - sy) / math.cos(sy) + b = -math.cos(rot - sy) * math.tan(sx) / math.cos(sy) - math.sin(rot) + c = math.sin(rot - sy) / math.cos(sy) + d = -math.sin(rot - sy) * math.tan(sx) / math.cos(sy) + math.cos(rot) + + if inverted: + # Inverted rotation matrix with scale and shear + # det([[a, b], [c, d]]) == 1, since det(rotation) = 1 and det(shear) = 1 + matrix = [d, -b, 0.0, -c, a, 0.0] + matrix = [x / scale for x in matrix] + # Apply inverse of translation and of center translation: RSS^-1 * C^-1 * T^-1 + matrix[2] += matrix[0] * (-cx - tx) + matrix[1] * (-cy - ty) + matrix[5] += matrix[3] * (-cx - tx) + matrix[4] * (-cy - ty) + # Apply center translation: C * RSS^-1 * C^-1 * T^-1 + matrix[2] += cx + matrix[5] += cy + else: + matrix = [a, b, 0.0, c, d, 0.0] + matrix = [x * scale for x in matrix] + # Apply inverse of center translation: RSS * C^-1 + matrix[2] += matrix[0] * (-cx) + matrix[1] * (-cy) + matrix[5] += matrix[3] * (-cx) + matrix[4] * (-cy) + # Apply translation and center : T * C * RSS * C^-1 + matrix[2] += cx + tx + matrix[5] += cy + ty + + return matrix + + +def affine( + img: Tensor, + angle: float, + translate: List[int], + scale: float, + shear: List[float], + interpolation: InterpolationMode = InterpolationMode.NEAREST, + fill: Optional[List[float]] = None, + resample: Optional[int] = None, + fillcolor: Optional[List[float]] = None, + center: Optional[List[int]] = None, +) -> Tensor: + """Apply affine transformation on the image keeping image center invariant. + If the image is paddle Tensor, it is expected + to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions. + + Args: + img (PIL Image or Tensor): image to transform. + angle (number): rotation angle in degrees between -180 and 180, clockwise direction. + translate (sequence of integers): horizontal and vertical translations (post-rotation translation) + scale (float): overall scale + shear (float or sequence): shear angle value in degrees between -180 to 180, clockwise direction. + If a sequence is specified, the first value corresponds to a shear parallel to the x axis, while + the second value corresponds to a shear parallel to the y axis. + interpolation (InterpolationMode): Desired interpolation enum defined by + :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``. + If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported. + For backward compatibility integer values (e.g. ``PIL.Image[.Resampling].NEAREST``) are still accepted, + but deprecated since 0.13 and will be removed in 0.15. Please use InterpolationMode enum. + fill (sequence or number, optional): Pixel fill value for the area outside the transformed + image. If given a number, the value is used for all bands respectively. + + .. note:: + In torchscript mode single int/float value is not supported, please use a sequence + of length 1: ``[value, ]``. + fillcolor (sequence or number, optional): + .. warning:: + This parameter was deprecated in ``0.12`` and will be removed in ``0.14``. Please use ``fill`` instead. + resample (int, optional): + .. warning:: + This parameter was deprecated in ``0.12`` and will be removed in ``0.14``. Please use ``interpolation`` + instead. + center (sequence, optional): Optional center of rotation. Origin is the upper left corner. + Default is the center of the image. + + Returns: + PIL Image or Tensor: Transformed image. + """ + + # Backward compatibility with integer value + if isinstance(interpolation, int): + warnings.warn("Argument 'interpolation' of type int is deprecated since 0.13 and will be removed in 0.15. " + "Please use InterpolationMode enum.") + interpolation = _interpolation_modes_from_int(interpolation) + + if fillcolor is not None: + warnings.warn("The parameter 'fillcolor' is deprecated since 0.12 and will be removed in 0.14. " + "Please use 'fill' instead.") + fill = fillcolor + + if not isinstance(angle, (int, float)): + raise TypeError("Argument angle should be int or float") + + if not isinstance(translate, (list, tuple)): + raise TypeError("Argument translate should be a sequence") + + if len(translate) != 2: + raise ValueError("Argument translate should be a sequence of length 2") + + if scale <= 0.0: + raise ValueError("Argument scale should be positive") + + if not isinstance(shear, (numbers.Number, (list, tuple))): + raise TypeError("Shear should be either a single value or a sequence of two values") + + if not isinstance(interpolation, InterpolationMode): + raise TypeError("Argument interpolation should be a InterpolationMode") + + if isinstance(angle, int): + angle = float(angle) + + if isinstance(translate, tuple): + translate = list(translate) + + if isinstance(shear, numbers.Number): + shear = [shear, 0.0] + + if isinstance(shear, tuple): + shear = list(shear) + + if len(shear) == 1: + shear = [shear[0], shear[0]] + + if len(shear) != 2: + raise ValueError(f"Shear should be a sequence containing two values. Got {shear}") + + if center is not None and not isinstance(center, (list, tuple)): + raise TypeError("Argument center should be a sequence") + center_f = [0.0, 0.0] + if center is not None: + _, height, width = img.shape[0], img.shape[1], img.shape[2] + # Center values should be in pixel coordinates but translated such that (0, 0) corresponds to image center. + center_f = [1.0 * (c - s * 0.5) for c, s in zip(center, [width, height])] + + translate_f = [1.0 * t for t in translate] + matrix = _get_inverse_affine_matrix(center_f, angle, translate_f, scale, shear) + return affine_impl(img, matrix=matrix, interpolation=interpolation.value, fill=fill) + + +def _interpolation_modes_from_int(i: int) -> InterpolationMode: + inverse_modes_mapping = { + 0: InterpolationMode.NEAREST, + 2: InterpolationMode.BILINEAR, + 3: InterpolationMode.BICUBIC, + 4: InterpolationMode.BOX, + 5: InterpolationMode.HAMMING, + 1: InterpolationMode.LANCZOS, + } + return inverse_modes_mapping[i] + + +def _check_sequence_input(x, name, req_sizes): + msg = req_sizes[0] if len(req_sizes) < 2 else " or ".join([str(s) for s in req_sizes]) + if not isinstance(x, Sequence): + raise TypeError(f"{name} should be a sequence of length {msg}.") + if len(x) not in req_sizes: + raise ValueError(f"{name} should be sequence of length {msg}.") + + +def _setup_angle(x, name, req_sizes=(2, )): + if isinstance(x, numbers.Number): + if x < 0: + raise ValueError(f"If {name} is a single number, it must be positive.") + x = [-x, x] + else: + _check_sequence_input(x, name, req_sizes) + + return [float(d) for d in x] + + +class RandomAffine(nn.Layer): + """Random affine transformation of the image keeping center invariant. + If the image is paddle Tensor, it is expected + to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions. + + Args: + degrees (sequence or number): Range of degrees to select from. + If degrees is a number instead of sequence like (min, max), the range of degrees + will be (-degrees, +degrees). Set to 0 to deactivate rotations. + translate (tuple, optional): tuple of maximum absolute fraction for horizontal + and vertical translations. For example translate=(a, b), then horizontal shift + is randomly sampled in the range -img_width * a < dx < img_width * a and vertical shift is + randomly sampled in the range -img_height * b < dy < img_height * b. Will not translate by default. + scale (tuple, optional): scaling factor interval, e.g (a, b), then scale is + randomly sampled from the range a <= scale <= b. Will keep original scale by default. + shear (sequence or number, optional): Range of degrees to select from. + If shear is a number, a shear parallel to the x axis in the range (-shear, +shear) + will be applied. Else if shear is a sequence of 2 values a shear parallel to the x axis in the + range (shear[0], shear[1]) will be applied. Else if shear is a sequence of 4 values, + a x-axis shear in (shear[0], shear[1]) and y-axis shear in (shear[2], shear[3]) will be applied. + Will not apply shear by default. + interpolation (InterpolationMode): Desired interpolation enum defined by + :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``. + If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported. + For backward compatibility integer values (e.g. ``PIL.Image[.Resampling].NEAREST``) are still accepted, + but deprecated since 0.13 and will be removed in 0.15. Please use InterpolationMode enum. + fill (sequence or number): Pixel fill value for the area outside the transformed + image. Default is ``0``. If given a number, the value is used for all bands respectively. + fillcolor (sequence or number, optional): + .. warning:: + This parameter was deprecated in ``0.12`` and will be removed in ``0.14``. Please use ``fill`` instead. + resample (int, optional): + .. warning:: + This parameter was deprecated in ``0.12`` and will be removed in ``0.14``. Please use ``interpolation`` + instead. + center (sequence, optional): Optional center of rotation, (x, y). Origin is the upper left corner. + Default is the center of the image. + + .. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters + + """ + + def __init__( + self, + degrees, + translate=None, + scale=None, + shear=None, + interpolation=InterpolationMode.NEAREST, + fill=0, + fillcolor=None, + resample=None, + center=None, + ): + super(RandomAffine, self).__init__() + if resample is not None: + warnings.warn("The parameter 'resample' is deprecated since 0.12 and will be removed in 0.14. " + "Please use 'interpolation' instead.") + interpolation = _interpolation_modes_from_int(resample) + + # Backward compatibility with integer value + if isinstance(interpolation, int): + warnings.warn("Argument 'interpolation' of type int is deprecated since 0.13 and will be removed in 0.15. " + "Please use InterpolationMode enum.") + interpolation = _interpolation_modes_from_int(interpolation) + + if fillcolor is not None: + warnings.warn("The parameter 'fillcolor' is deprecated since 0.12 and will be removed in 0.14. " + "Please use 'fill' instead.") + fill = fillcolor + + self.degrees = _setup_angle(degrees, name="degrees", req_sizes=(2, )) + + if translate is not None: + _check_sequence_input(translate, "translate", req_sizes=(2, )) + for t in translate: + if not (0.0 <= t <= 1.0): + raise ValueError("translation values should be between 0 and 1") + self.translate = translate + + if scale is not None: + _check_sequence_input(scale, "scale", req_sizes=(2, )) + for s in scale: + if s <= 0: + raise ValueError("scale values should be positive") + self.scale = scale + + if shear is not None: + self.shear = _setup_angle(shear, name="shear", req_sizes=(2, 4)) + else: + self.shear = shear + + self.resample = self.interpolation = interpolation + + if fill is None: + fill = 0 + elif not isinstance(fill, (Sequence, numbers.Number)): + raise TypeError("Fill should be either a sequence or a number.") + + self.fillcolor = self.fill = fill + + if center is not None: + _check_sequence_input(center, "center", req_sizes=(2, )) + + self.center = center + + @staticmethod + def get_params( + degrees: List[float], + translate: Optional[List[float]], + scale_ranges: Optional[List[float]], + shears: Optional[List[float]], + img_size: List[int], + ) -> Tuple[float, Tuple[int, int], float, Tuple[float, float]]: + """Get parameters for affine transformation + + Returns: + params to be passed to the affine transformation + """ + angle = float(paddle.empty([1]).uniform_(float(degrees[0]), float(degrees[1]))) + if translate is not None: + max_dx = float(translate[0] * img_size[0]) + max_dy = float(translate[1] * img_size[1]) + tx = int(float(paddle.empty([1]).uniform_(-max_dx, max_dx))) + ty = int(float(paddle.empty([1]).uniform_(-max_dy, max_dy))) + translations = (tx, ty) + else: + translations = (0, 0) + + if scale_ranges is not None: + scale = float(paddle.empty([1]).uniform_(scale_ranges[0], scale_ranges[1])) + else: + scale = 1.0 + + shear_x = shear_y = 0.0 + if shears is not None: + shear_x = float(paddle.empty([1]).uniform_(shears[0], shears[1])) + if len(shears) == 4: + shear_y = float(paddle.empty([1]).uniform_(shears[2], shears[3])) + + shear = (shear_x, shear_y) + + return angle, translations, scale, shear + + def forward(self, img): + fill = self.fill + channels, height, width = img.shape[1], img.shape[2], img.shape[3] + if isinstance(fill, (int, float)): + fill = [float(fill)] * channels + else: + fill = [float(f) for f in fill] + + img_size = [width, height] # flip for keeping BC on get_params call + + ret = self.get_params(self.degrees, self.translate, self.scale, self.shear, img_size) + + return affine(img, *ret, interpolation=self.interpolation, fill=fill, center=self.center) + + def __repr__(self) -> str: + s = f"{self.__class__.__name__}(degrees={self.degrees}" + s += f", translate={self.translate}" if self.translate is not None else "" + s += f", scale={self.scale}" if self.scale is not None else "" + s += f", shear={self.shear}" if self.shear is not None else "" + s += f", interpolation={self.interpolation.value}" if self.interpolation != InterpolationMode.NEAREST else "" + s += f", fill={self.fill}" if self.fill != 0 else "" + s += f", center={self.center}" if self.center is not None else "" + s += ")" + + return s diff --git a/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/model/unet.py b/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/model/unet.py new file mode 100755 index 000000000..56f3ad61e --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/model/unet.py @@ -0,0 +1,838 @@ +''' +This code is rewritten by Paddle based on +https://github.com/openai/guided-diffusion/blob/main/guided_diffusion/unet.py +''' +import math +from abc import abstractmethod + +import numpy as np +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + +from .nn import avg_pool_nd +from .nn import checkpoint +from .nn import conv_nd +from .nn import linear +from .nn import normalization +from .nn import SiLU +from .nn import timestep_embedding +from .nn import zero_module + + +class AttentionPool2d(nn.Layer): + """ + Adapted from CLIP: https://github.com/openai/CLIP/blob/main/clip/model.py + """ + + def __init__( + self, + spacial_dim: int, + embed_dim: int, + num_heads_channels: int, + output_dim: int = None, + ): + super().__init__() + # self.positional_embedding = nn.Parameter( + # th.randn(embed_dim, spacial_dim ** 2 + 1) / embed_dim ** 0.5 + # ) + positional_embedding = self.create_parameter(paddle.randn(embed_dim, spacial_dim**2 + 1) / embed_dim**0.5) + self.add_parameter("positional_embedding", positional_embedding) + self.qkv_proj = conv_nd(1, embed_dim, 3 * embed_dim, 1) + self.c_proj = conv_nd(1, embed_dim, output_dim or embed_dim, 1) + self.num_heads = embed_dim // num_heads_channels + self.attention = QKVAttention(self.num_heads) + + def forward(self, x): + b, c, *_spatial = x.shape + # x = x.reshape(b, c, -1) # NC(HW) + x = paddle.reshape(x, [b, c, -1]) + x = paddle.concat([x.mean(dim=-1, keepdim=True), x], axis=-1) # NC(HW+1) + x = x + paddle.cast(self.positional_embedding[None, :, :], x.dtype) # NC(HW+1) + x = self.qkv_proj(x) + x = self.attention(x) + x = self.c_proj(x) + return x[:, :, 0] + + +class TimestepBlock(nn.Layer): + """ + Any module where forward() takes timestep embeddings as a second argument. + """ + + @abstractmethod + def forward(self, x, emb): + """ + Apply the module to `x` given `emb` timestep embeddings. + """ + + +class TimestepEmbedSequential(nn.Sequential, TimestepBlock): + """ + A sequential module that passes timestep embeddings to the children that + support it as an extra input. + """ + + def forward(self, x, emb): + for layer in self: + if isinstance(layer, TimestepBlock): + x = layer(x, emb) + else: + x = layer(x) + return x + + +class Upsample(nn.Layer): + """ + An upsampling layer with an optional convolution. + + :param channels: channels in the inputs and outputs. + :param use_conv: a bool determining if a convolution is applied. + :param dims: determines if the signal is 1D, 2D, or 3D. If 3D, then + upsampling occurs in the inner-two dimensions. + """ + + def __init__(self, channels, use_conv, dims=2, out_channels=None): + super().__init__() + self.channels = channels + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.dims = dims + if use_conv: + self.conv = conv_nd(dims, self.channels, self.out_channels, 3, padding=1) + + def forward(self, x): + assert x.shape[1] == self.channels + if self.dims == 3: + x = F.interpolate(x, (x.shape[2], x.shape[3] * 2, x.shape[4] * 2), mode="nearest") + else: + x = F.interpolate(x, scale_factor=2, mode="nearest") + if self.use_conv: + x = self.conv(x) + return x + + +class Downsample(nn.Layer): + """ + A downsampling layer with an optional convolution. + + :param channels: channels in the inputs and outputs. + :param use_conv: a bool determining if a convolution is applied. + :param dims: determines if the signal is 1D, 2D, or 3D. If 3D, then + downsampling occurs in the inner-two dimensions. + """ + + def __init__(self, channels, use_conv, dims=2, out_channels=None): + super().__init__() + self.channels = channels + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.dims = dims + stride = 2 if dims != 3 else (1, 2, 2) + if use_conv: + self.op = conv_nd(dims, self.channels, self.out_channels, 3, stride=stride, padding=1) + else: + assert self.channels == self.out_channels + self.op = avg_pool_nd(dims, kernel_size=stride, stride=stride) + + def forward(self, x): + assert x.shape[1] == self.channels + return self.op(x) + + +class ResBlock(TimestepBlock): + """ + A residual block that can optionally change the number of channels. + + :param channels: the number of input channels. + :param emb_channels: the number of timestep embedding channels. + :param dropout: the rate of dropout. + :param out_channels: if specified, the number of out channels. + :param use_conv: if True and out_channels is specified, use a spatial + convolution instead of a smaller 1x1 convolution to change the + channels in the skip connection. + :param dims: determines if the signal is 1D, 2D, or 3D. + :param use_checkpoint: if True, use gradient checkpointing on this module. + :param up: if True, use this block for upsampling. + :param down: if True, use this block for downsampling. + """ + + def __init__( + self, + channels, + emb_channels, + dropout, + out_channels=None, + use_conv=False, + use_scale_shift_norm=False, + dims=2, + use_checkpoint=False, + up=False, + down=False, + ): + super().__init__() + self.channels = channels + self.emb_channels = emb_channels + self.dropout = dropout + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.use_checkpoint = use_checkpoint + self.use_scale_shift_norm = use_scale_shift_norm + + self.in_layers = nn.Sequential( + normalization(channels), + SiLU(), + conv_nd(dims, channels, self.out_channels, 3, padding=1), + ) + + self.updown = up or down + + if up: + self.h_upd = Upsample(channels, False, dims) + self.x_upd = Upsample(channels, False, dims) + elif down: + self.h_upd = Downsample(channels, False, dims) + self.x_upd = Downsample(channels, False, dims) + else: + self.h_upd = self.x_upd = nn.Identity() + + self.emb_layers = nn.Sequential( + SiLU(), + linear( + emb_channels, + 2 * self.out_channels if use_scale_shift_norm else self.out_channels, + ), + ) + self.out_layers = nn.Sequential( + normalization(self.out_channels), + SiLU(), + nn.Dropout(p=dropout), + zero_module(conv_nd(dims, self.out_channels, self.out_channels, 3, padding=1)), + ) + + if self.out_channels == channels: + self.skip_connection = nn.Identity() + elif use_conv: + self.skip_connection = conv_nd(dims, channels, self.out_channels, 3, padding=1) + else: + self.skip_connection = conv_nd(dims, channels, self.out_channels, 1) + + def forward(self, x, emb): + """ + Apply the block to a Tensor, conditioned on a timestep embedding. + + :param x: an [N x C x ...] Tensor of features. + :param emb: an [N x emb_channels] Tensor of timestep embeddings. + :return: an [N x C x ...] Tensor of outputs. + """ + return checkpoint(self._forward, (x, emb), self.parameters(), self.use_checkpoint) + + def _forward(self, x, emb): + if self.updown: + in_rest, in_conv = self.in_layers[:-1], self.in_layers[-1] + h = in_rest(x) + h = self.h_upd(h) + x = self.x_upd(x) + h = in_conv(h) + else: + h = self.in_layers(x) + emb_out = self.emb_layers(emb) + emb_out = paddle.cast(emb_out, h.dtype) + while len(emb_out.shape) < len(h.shape): + emb_out = emb_out[..., None] + if self.use_scale_shift_norm: + out_norm, out_rest = self.out_layers[0], self.out_layers[1:] + scale, shift = paddle.chunk(emb_out, 2, axis=1) + h = out_norm(h) * (1 + scale) + shift + h = out_rest(h) + else: + h = h + emb_out + h = self.out_layers(h) + return self.skip_connection(x) + h + + +class AttentionBlock(nn.Layer): + """ + An attention block that allows spatial positions to attend to each other. + + Originally ported from here, but adapted to the N-d case. + https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/models/unet.py#L66. + """ + + def __init__( + self, + channels, + num_heads=1, + num_head_channels=-1, + use_checkpoint=False, + use_new_attention_order=False, + ): + super().__init__() + self.channels = channels + if num_head_channels == -1: + self.num_heads = num_heads + else: + assert (channels % num_head_channels == 0 + ), f"q,k,v channels {channels} is not divisible by num_head_channels {num_head_channels}" + self.num_heads = channels // num_head_channels + self.use_checkpoint = use_checkpoint + self.norm = normalization(channels) + self.qkv = conv_nd(1, channels, channels * 3, 1) + if use_new_attention_order: + # split qkv before split heads + self.attention = QKVAttention(self.num_heads) + else: + # split heads before split qkv + self.attention = QKVAttentionLegacy(self.num_heads) + + self.proj_out = zero_module(conv_nd(1, channels, channels, 1)) + + def forward(self, x): + return checkpoint(self._forward, (x, ), self.parameters(), self.use_checkpoint) + + def _forward(self, x): + b, c, *spatial = x.shape + # x = x.reshape(b, c, -1) + x = paddle.reshape(x, [b, c, -1]) + qkv = self.qkv(self.norm(x)) + h = self.attention(qkv) + h = self.proj_out(h) + # return (x + h).reshape(b, c, *spatial) + return paddle.reshape(x + h, [b, c, *spatial]) + + +def count_flops_attn(model, _x, y): + """ + A counter for the `thop` package to count the operations in an + attention operation. + Meant to be used like: + macs, params = thop.profile( + model, + inputs=(inputs, timestamps), + custom_ops={QKVAttention: QKVAttention.count_flops}, + ) + """ + b, c, *spatial = y[0].shape + num_spatial = int(np.prod(spatial)) + # We perform two matmuls with the same number of ops. + # The first computes the weight matrix, the second computes + # the combination of the value vectors. + matmul_ops = 2 * b * (num_spatial**2) * c + model.total_ops += paddle.to_tensor([matmul_ops], dtype='float64') + + +class QKVAttentionLegacy(nn.Layer): + """ + A module which performs QKV attention. Matches legacy QKVAttention + input/ouput heads shaping + """ + + def __init__(self, n_heads): + super().__init__() + self.n_heads = n_heads + + def forward(self, qkv): + """ + Apply QKV attention. + + :param qkv: an [N x (H * 3 * C) x T] tensor of Qs, Ks, and Vs. + :return: an [N x (H * C) x T] tensor after attention. + """ + bs, width, length = qkv.shape + assert width % (3 * self.n_heads) == 0 + ch = width // (3 * self.n_heads) + # q, k, v = qkv.reshape(bs * self.n_heads, ch * 3, length).split(ch, dim=1) + q, k, v = paddle.reshape(qkv, [bs * self.n_heads, ch * 3, length]).split(3, axis=1) + scale = 1 / math.sqrt(math.sqrt(ch)) + weight = paddle.einsum("bct,bcs->bts", q * scale, k * scale) # More stable with f16 than dividing afterwards + weight = paddle.cast(nn.functional.softmax(paddle.cast(weight, 'float32'), axis=-1), weight.dtype) + a = paddle.einsum("bts,bcs->bct", weight, v) + # return a.reshape(bs, -1, length) + return paddle.reshape(a, [bs, -1, length]) + + @staticmethod + def count_flops(model, _x, y): + return count_flops_attn(model, _x, y) + + +class QKVAttention(nn.Layer): + """ + A module which performs QKV attention and splits in a different order. + """ + + def __init__(self, n_heads): + super().__init__() + self.n_heads = n_heads + + def forward(self, qkv): + """ + Apply QKV attention. + + :param qkv: an [N x (3 * H * C) x T] tensor of Qs, Ks, and Vs. + :return: an [N x (H * C) x T] tensor after attention. + """ + bs, width, length = qkv.shape + assert width % (3 * self.n_heads) == 0 + ch = width // (3 * self.n_heads) + q, k, v = qkv.chunk(3, axis=1) + scale = 1 / math.sqrt(math.sqrt(ch)) + weight = paddle.einsum( + "bct,bcs->bts", + (q * scale).view(bs * self.n_heads, ch, length), + (k * scale).view(bs * self.n_heads, ch, length), + ) # More stable with f16 than dividing afterwards + weight = paddle.cast(nn.functional.softmax(paddle.cast(weight, 'float32'), axis=-1), weight.dtype) + a = paddle.einsum("bts,bcs->bct", weight, v.reshape(bs * self.n_heads, ch, length)) + # return a.reshape(bs, -1, length) + return paddle.reshape(a, [bs, -1, length]) + + @staticmethod + def count_flops(model, _x, y): + return count_flops_attn(model, _x, y) + + +class UNetModel(nn.Layer): + """ + The full UNet model with attention and timestep embedding. + + :param in_channels: channels in the input Tensor. + :param model_channels: base channel count for the model. + :param out_channels: channels in the output Tensor. + :param num_res_blocks: number of residual blocks per downsample. + :param attention_resolutions: a collection of downsample rates at which + attention will take place. May be a set, list, or tuple. + For example, if this contains 4, then at 4x downsampling, attention + will be used. + :param dropout: the dropout probability. + :param channel_mult: channel multiplier for each level of the UNet. + :param conv_resample: if True, use learned convolutions for upsampling and + downsampling. + :param dims: determines if the signal is 1D, 2D, or 3D. + :param num_classes: if specified (as an int), then this model will be + class-conditional with `num_classes` classes. + :param use_checkpoint: use gradient checkpointing to reduce memory usage. + :param num_heads: the number of attention heads in each attention layer. + :param num_heads_channels: if specified, ignore num_heads and instead use + a fixed channel width per attention head. + :param num_heads_upsample: works with num_heads to set a different number + of heads for upsampling. Deprecated. + :param use_scale_shift_norm: use a FiLM-like conditioning mechanism. + :param resblock_updown: use residual blocks for up/downsampling. + :param use_new_attention_order: use a different attention pattern for potentially + increased efficiency. + """ + + def __init__( + self, + image_size, + in_channels, + model_channels, + out_channels, + num_res_blocks, + attention_resolutions, + dropout=0, + channel_mult=(1, 2, 4, 8), + conv_resample=True, + dims=2, + num_classes=None, + use_checkpoint=False, + use_fp16=False, + num_heads=1, + num_head_channels=-1, + num_heads_upsample=-1, + use_scale_shift_norm=False, + resblock_updown=False, + use_new_attention_order=False, + ): + super().__init__() + + if num_heads_upsample == -1: + num_heads_upsample = num_heads + + self.image_size = image_size + self.in_channels = in_channels + self.model_channels = model_channels + self.out_channels = out_channels + self.num_res_blocks = num_res_blocks + self.attention_resolutions = attention_resolutions + self.dropout = dropout + self.channel_mult = channel_mult + self.conv_resample = conv_resample + self.num_classes = num_classes + self.use_checkpoint = use_checkpoint + self.dtype = paddle.float16 if use_fp16 else paddle.float32 + self.num_heads = num_heads + self.num_head_channels = num_head_channels + self.num_heads_upsample = num_heads_upsample + + time_embed_dim = model_channels * 4 + self.time_embed = nn.Sequential( + linear(model_channels, time_embed_dim), + SiLU(), + linear(time_embed_dim, time_embed_dim), + ) + + if self.num_classes is not None: + self.label_emb = nn.Embedding(num_classes, time_embed_dim) + + ch = input_ch = int(channel_mult[0] * model_channels) + self.input_blocks = nn.LayerList([TimestepEmbedSequential(conv_nd(dims, in_channels, ch, 3, padding=1))]) + self._feature_size = ch + input_block_chans = [ch] + ds = 1 + for level, mult in enumerate(channel_mult): + for _ in range(num_res_blocks): + layers = [ + ResBlock( + ch, + time_embed_dim, + dropout, + out_channels=int(mult * model_channels), + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ) + ] + ch = int(mult * model_channels) + if ds in attention_resolutions: + layers.append( + AttentionBlock( + ch, + use_checkpoint=use_checkpoint, + num_heads=num_heads, + num_head_channels=num_head_channels, + use_new_attention_order=use_new_attention_order, + )) + self.input_blocks.append(TimestepEmbedSequential(*layers)) + self._feature_size += ch + input_block_chans.append(ch) + if level != len(channel_mult) - 1: + out_ch = ch + self.input_blocks.append( + TimestepEmbedSequential( + ResBlock( + ch, + time_embed_dim, + dropout, + out_channels=out_ch, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + down=True, + ) if resblock_updown else Downsample(ch, conv_resample, dims=dims, out_channels=out_ch))) + ch = out_ch + input_block_chans.append(ch) + ds *= 2 + self._feature_size += ch + + self.middle_block = TimestepEmbedSequential( + ResBlock( + ch, + time_embed_dim, + dropout, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ), + AttentionBlock( + ch, + use_checkpoint=use_checkpoint, + num_heads=num_heads, + num_head_channels=num_head_channels, + use_new_attention_order=use_new_attention_order, + ), + ResBlock( + ch, + time_embed_dim, + dropout, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ), + ) + self._feature_size += ch + + self.output_blocks = nn.LayerList([]) + for level, mult in list(enumerate(channel_mult))[::-1]: + for i in range(num_res_blocks + 1): + ich = input_block_chans.pop() + layers = [ + ResBlock( + ch + ich, + time_embed_dim, + dropout, + out_channels=int(model_channels * mult), + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ) + ] + ch = int(model_channels * mult) + if ds in attention_resolutions: + layers.append( + AttentionBlock( + ch, + use_checkpoint=use_checkpoint, + num_heads=num_heads_upsample, + num_head_channels=num_head_channels, + use_new_attention_order=use_new_attention_order, + )) + if level and i == num_res_blocks: + out_ch = ch + layers.append( + ResBlock( + ch, + time_embed_dim, + dropout, + out_channels=out_ch, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + up=True, + ) if resblock_updown else Upsample(ch, conv_resample, dims=dims, out_channels=out_ch)) + ds //= 2 + self.output_blocks.append(TimestepEmbedSequential(*layers)) + self._feature_size += ch + + self.out = nn.Sequential( + normalization(ch), + SiLU(), + zero_module(conv_nd(dims, input_ch, out_channels, 3, padding=1)), + ) + + def forward(self, x, timesteps, y=None): + """ + Apply the model to an input batch. + + :param x: an [N x C x ...] Tensor of inputs. + :param timesteps: a 1-D batch of timesteps. + :param y: an [N] Tensor of labels, if class-conditional. + :return: an [N x C x ...] Tensor of outputs. + """ + assert (y is not None) == (self.num_classes + is not None), "must specify y if and only if the model is class-conditional" + + hs = [] + emb = self.time_embed(timestep_embedding(timesteps, self.model_channels)) + if self.num_classes is not None: + assert y.shape == (x.shape[0], ) + emb = emb + self.label_emb(y) + + h = paddle.cast(x, self.dtype) + for module in self.input_blocks: + h = module(h, emb) + hs.append(h) + h = self.middle_block(h, emb) + for module in self.output_blocks: + h = paddle.concat([h, hs.pop()], axis=1) + h = module(h, emb) + # h = paddle.cast(h, x.dtype) + return self.out(h) + + +class SuperResModel(UNetModel): + """ + A UNetModel that performs super-resolution. + + Expects an extra kwarg `low_res` to condition on a low-resolution image. + """ + + def __init__(self, image_size, in_channels, *args, **kwargs): + super().__init__(image_size, in_channels * 2, *args, **kwargs) + + def forward(self, x, timesteps, low_res=None, **kwargs): + _, _, new_height, new_width = x.shape + upsampled = F.interpolate(low_res, (new_height, new_width), mode="bilinear") + x = paddle.concat([x, upsampled], axis=1) + return super().forward(x, timesteps, **kwargs) + + +class EncoderUNetModel(nn.Layer): + """ + The half UNet model with attention and timestep embedding. + + For usage, see UNet. + """ + + def __init__( + self, + image_size, + in_channels, + model_channels, + out_channels, + num_res_blocks, + attention_resolutions, + dropout=0, + channel_mult=(1, 2, 4, 8), + conv_resample=True, + dims=2, + use_checkpoint=False, + use_fp16=False, + num_heads=1, + num_head_channels=-1, + num_heads_upsample=-1, + use_scale_shift_norm=False, + resblock_updown=False, + use_new_attention_order=False, + pool="adaptive", + ): + super().__init__() + + if num_heads_upsample == -1: + num_heads_upsample = num_heads + + self.in_channels = in_channels + self.model_channels = model_channels + self.out_channels = out_channels + self.num_res_blocks = num_res_blocks + self.attention_resolutions = attention_resolutions + self.dropout = dropout + self.channel_mult = channel_mult + self.conv_resample = conv_resample + self.use_checkpoint = use_checkpoint + self.dtype = paddle.float16 if use_fp16 else paddle.float32 + self.num_heads = num_heads + self.num_head_channels = num_head_channels + self.num_heads_upsample = num_heads_upsample + + time_embed_dim = model_channels * 4 + self.time_embed = nn.Sequential( + linear(model_channels, time_embed_dim), + SiLU(), + linear(time_embed_dim, time_embed_dim), + ) + + ch = int(channel_mult[0] * model_channels) + self.input_blocks = nn.LayerList([TimestepEmbedSequential(conv_nd(dims, in_channels, ch, 3, padding=1))]) + self._feature_size = ch + input_block_chans = [ch] + ds = 1 + for level, mult in enumerate(channel_mult): + for _ in range(num_res_blocks): + layers = [ + ResBlock( + ch, + time_embed_dim, + dropout, + out_channels=int(mult * model_channels), + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ) + ] + ch = int(mult * model_channels) + if ds in attention_resolutions: + layers.append( + AttentionBlock( + ch, + use_checkpoint=use_checkpoint, + num_heads=num_heads, + num_head_channels=num_head_channels, + use_new_attention_order=use_new_attention_order, + )) + self.input_blocks.append(TimestepEmbedSequential(*layers)) + self._feature_size += ch + input_block_chans.append(ch) + if level != len(channel_mult) - 1: + out_ch = ch + self.input_blocks.append( + TimestepEmbedSequential( + ResBlock( + ch, + time_embed_dim, + dropout, + out_channels=out_ch, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + down=True, + ) if resblock_updown else Downsample(ch, conv_resample, dims=dims, out_channels=out_ch))) + ch = out_ch + input_block_chans.append(ch) + ds *= 2 + self._feature_size += ch + + self.middle_block = TimestepEmbedSequential( + ResBlock( + ch, + time_embed_dim, + dropout, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ), + AttentionBlock( + ch, + use_checkpoint=use_checkpoint, + num_heads=num_heads, + num_head_channels=num_head_channels, + use_new_attention_order=use_new_attention_order, + ), + ResBlock( + ch, + time_embed_dim, + dropout, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ), + ) + self._feature_size += ch + self.pool = pool + if pool == "adaptive": + self.out = nn.Sequential( + normalization(ch), + SiLU(), + nn.AdaptiveAvgPool2D((1, 1)), + zero_module(conv_nd(dims, ch, out_channels, 1)), + nn.Flatten(), + ) + elif pool == "attention": + assert num_head_channels != -1 + self.out = nn.Sequential( + normalization(ch), + SiLU(), + AttentionPool2d((image_size // ds), ch, num_head_channels, out_channels), + ) + elif pool == "spatial": + self.out = nn.Sequential( + nn.Linear(self._feature_size, 2048), + nn.ReLU(), + nn.Linear(2048, self.out_channels), + ) + elif pool == "spatial_v2": + self.out = nn.Sequential( + nn.Linear(self._feature_size, 2048), + normalization(2048), + SiLU(), + nn.Linear(2048, self.out_channels), + ) + else: + raise NotImplementedError(f"Unexpected {pool} pooling") + + def forward(self, x, timesteps): + """ + Apply the model to an input batch. + + :param x: an [N x C x ...] Tensor of inputs. + :param timesteps: a 1-D batch of timesteps. + :return: an [N x K] Tensor of outputs. + """ + emb = self.time_embed(timestep_embedding(timesteps, self.model_channels)) + + results = [] + # h = x.type(self.dtype) + h = paddle.cast(x, self.dtype) + for module in self.input_blocks: + h = module(h, emb) + if self.pool.startswith("spatial"): + # results.append(h.type(x.dtype).mean(axis=(2, 3))) + results.append(paddle.cast(h, x.dtype).mean(axis=(2, 3))) + h = self.middle_block(h, emb) + if self.pool.startswith("spatial"): + results.append(paddle.cast(h, x.dtype).mean(axis=(2, 3))) + h = paddle.concat(results, axis=-1) + return self.out(h) + else: + # h = h.type(x.dtype) + h = paddle.cast(h, x.dtype) + return self.out(h) diff --git a/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/resources/default.yml b/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/resources/default.yml new file mode 100755 index 000000000..97c3c1b98 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/resources/default.yml @@ -0,0 +1,47 @@ +text_prompts: + - A beautiful painting of a singular lighthouse, shining its light across a tumultuous sea of blood by greg rutkowski and thomas kinkade, Trending on artstation. + +init_image: + +width_height: [ 1280, 768] + +skip_steps: 10 +steps: 250 + +cut_ic_pow: 1 +init_scale: 1000 +clip_guidance_scale: 5000 + +tv_scale: 0 +range_scale: 150 +sat_scale: 0 +cutn_batches: 4 + +diffusion_model: 512x512_diffusion_uncond_finetune_008100 +use_secondary_model: True +diffusion_sampling_mode: ddim + +perlin_init: False +perlin_mode: mixed +seed: 445467575 +eta: 0.8 +clamp_grad: True +clamp_max: 0.05 + +randomize_class: True +clip_denoised: False +fuzzy_prompt: False +rand_mag: 0.05 + +cut_overview: "[12]*400+[4]*600" +cut_innercut: "[4]*400+[12]*600" +cut_icgray_p: "[0.2]*400+[0]*600" + +display_rate: 10 +n_batches: 1 +batch_size: 1 +batch_name: '' +clip_models: + - VIT + - RN50 + - RN101 diff --git a/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/resources/docstrings.yml b/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/resources/docstrings.yml new file mode 100755 index 000000000..702015e1c --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/resources/docstrings.yml @@ -0,0 +1,103 @@ +text_prompts: | + Phrase, sentence, or string of words and phrases describing what the image should look like. The words will be analyzed by the AI and will guide the diffusion process toward the image(s) you describe. These can include commas and weights to adjust the relative importance of each element. E.g. "A beautiful painting of a singular lighthouse, shining its light across a tumultuous sea of blood by greg rutkowski and thomas kinkade, Trending on artstation." + Notice that this prompt loosely follows a structure: [subject], [prepositional details], [setting], [meta modifiers and artist]; this is a good starting point for your experiments. + Developing text prompts takes practice and experience, and is not the subject of this guide. If you are a beginner to writing text prompts, a good place to start is on a simple AI art app like Night Cafe, starry ai or WOMBO prior to using DD, to get a feel for how text gets translated into images by GAN tools. These other apps use different technologies, but many of the same principles apply. +init_image: | + Recall that in the image sequence above, the first image shown is just noise. If an init_image is provided, diffusion will replace the noise with the init_image as its starting state. To use an init_image, upload the image to the Colab instance or your Google Drive, and enter the full image path here. + If using an init_image, you may need to increase skip_steps to ~ 50% of total steps to retain the character of the init. See skip_steps above for further discussion. +width_height: | + Desired final image size, in pixels. You can have a square, wide, or tall image, but each edge length should be set to a multiple of 64px, and a minimum of 512px on the default CLIP model setting. If you forget to use multiples of 64px in your dimensions, DD will adjust the dimensions of your image to make it so. + +skip_steps: | + Consider the chart shown here. Noise scheduling (denoise strength) starts very high and progressively gets lower and lower as diffusion steps progress. The noise levels in the first few steps are very high, so images change dramatically in early steps. + As DD moves along the curve, noise levels (and thus the amount an image changes per step) declines, and image coherence from one step to the next increases. + The first few steps of denoising are often so dramatic that some steps (maybe 10-15% of total) can be skipped without affecting the final image. You can experiment with this as a way to cut render times. + If you skip too many steps, however, the remaining noise may not be high enough to generate new content, and thus may not have ‘time left’ to finish an image satisfactorily. + Also, depending on your other settings, you may need to skip steps to prevent CLIP from overshooting your goal, resulting in ‘blown out’ colors (hyper saturated, solid white, or solid black regions) or otherwise poor image quality. Consider that the denoising process is at its strongest in the early steps, so skipping steps can sometimes mitigate other problems. + Lastly, if using an init_image, you will need to skip ~50% of the diffusion steps to retain the shapes in the original init image. + However, if you’re using an init_image, you can also adjust skip_steps up or down for creative reasons. With low skip_steps you can get a result "inspired by" the init_image which will retain the colors and rough layout and shapes but look quite different. With high skip_steps you can preserve most of the init_image contents and just do fine tuning of the texture. + +steps: | + When creating an image, the denoising curve is subdivided into steps for processing. Each step (or iteration) involves the AI looking at subsets of the image called ‘cuts’ and calculating the ‘direction’ the image should be guided to be more like the prompt. Then it adjusts the image with the help of the diffusion denoiser, and moves to the next step. + Increasing steps will provide more opportunities for the AI to adjust the image, and each adjustment will be smaller, and thus will yield a more precise, detailed image. Increasing steps comes at the expense of longer render times. Also, while increasing steps should generally increase image quality, there is a diminishing return on additional steps beyond 250 - 500 steps. However, some intricate images can take 1000, 2000, or more steps. It is really up to the user. + Just know that the render time is directly related to the number of steps, and many other parameters have a major impact on image quality, without costing additional time. + +cut_ic_pow: | + This sets the size of the border used for inner cuts. High cut_ic_pow values have larger borders, and therefore the cuts themselves will be smaller and provide finer details. If you have too many or too-small inner cuts, you may lose overall image coherency and/or it may cause an undesirable ‘mosaic’ effect. Low cut_ic_pow values will allow the inner cuts to be larger, helping image coherency while still helping with some details. + +init_scale: | + This controls how strongly CLIP will try to match the init_image provided. This is balanced against the clip_guidance_scale (CGS) above. Too much init scale, and the image won’t change much during diffusion. Too much CGS and the init image will be lost. +clip_guidance_scale: | + CGS is one of the most important parameters you will use. It tells DD how strongly you want CLIP to move toward your prompt each timestep. Higher is generally better, but if CGS is too strong it will overshoot the goal and distort the image. So a happy medium is needed, and it takes experience to learn how to adjust CGS. + Note that this parameter generally scales with image dimensions. In other words, if you increase your total dimensions by 50% (e.g. a change from 512 x 512 to 512 x 768), then to maintain the same effect on the image, you’d want to increase clip_guidance_scale from 5000 to 7500. + Of the basic settings, clip_guidance_scale, steps and skip_steps are the most important contributors to image quality, so learn them well. +tv_scale: | + Total variance denoising. Optional, set to zero to turn off. Controls ‘smoothness’ of final output. If used, tv_scale will try to smooth out your final image to reduce overall noise. If your image is too ‘crunchy’, increase tv_scale. TV denoising is good at preserving edges while smoothing away noise in flat regions. See https://en.wikipedia.org/wiki/Total_variation_denoising +range_scale: | + Optional, set to zero to turn off. Used for adjustment of color contrast. Lower range_scale will increase contrast. Very low numbers create a reduced color palette, resulting in more vibrant or poster-like images. Higher range_scale will reduce contrast, for more muted images. + +sat_scale: | + Saturation scale. Optional, set to zero to turn off. If used, sat_scale will help mitigate oversaturation. If your image is too saturated, increase sat_scale to reduce the saturation. +cutn_batches: | + Each iteration, the AI cuts the image into smaller pieces known as cuts, and compares each cut to the prompt to decide how to guide the next diffusion step. More cuts can generally lead to better images, since DD has more chances to fine-tune the image precision in each timestep. + Additional cuts are memory intensive, however, and if DD tries to evaluate too many cuts at once, it can run out of memory. You can use cutn_batches to increase cuts per timestep without increasing memory usage. + At the default settings, DD is scheduled to do 16 cuts per timestep. If cutn_batches is set to 1, there will indeed only be 16 cuts total per timestep. + However, if cutn_batches is increased to 4, DD will do 64 cuts total in each timestep, divided into 4 sequential batches of 16 cuts each. Because the cuts are being evaluated only 16 at a time, DD uses the memory required for only 16 cuts, but gives you the quality benefit of 64 cuts. The tradeoff, of course, is that this will take ~4 times as long to render each image. + So, (scheduled cuts) x (cutn_batches) = (total cuts per timestep). Increasing cutn_batches will increase render times, however, as the work is being done sequentially. DD’s default cut schedule is a good place to start, but the cut schedule can be adjusted in the Cutn Scheduling section, explained below. + +diffusion_model: Diffusion_model of choice. + +use_secondary_model: | + Option to use a secondary purpose-made diffusion model to clean up interim diffusion images for CLIP evaluation. If this option is turned off, DD will use the regular (large) diffusion model. Using the secondary model is faster - one user reported a 50% improvement in render speed! However, the secondary model is much smaller, and may reduce image quality and detail. I suggest you experiment with this. + +diffusion_sampling_mode: | + Two alternate diffusion denoising algorithms. ddim has been around longer, and is more established and tested. plms is a newly added alternate method that promises good diffusion results in fewer steps, but has not been as fully tested and may have side effects. This new plms mode is actively being researched in the #settings-and-techniques channel in the DD Discord. + +perlin_init: | + Normally, DD will use an image filled with random noise as a starting point for the diffusion curve. If perlin_init is selected, DD will instead use a Perlin noise model as an initial state. Perlin has very interesting characteristics, distinct from random noise, so it’s worth experimenting with this for your projects. Beyond perlin, you can, of course, generate your own noise images (such as with GIMP, etc) and use them as an init_image (without skipping steps). + Choosing perlin_init does not affect the actual diffusion process, just the starting point for the diffusion. Please note that selecting a perlin_init will replace and override any init_image you may have specified. Further, because the 2D, 3D and video animation systems all rely on the init_image system, if you enable Perlin while using animation modes, the perlin_init will jump in front of any previous image or video input, and DD will NOT give you the expected sequence of coherent images. All of that said, using Perlin and animation modes together do make a very colorful rainbow effect, which can be used creatively. + +perlin_mode: | + sets type of Perlin noise: colored, gray, or a mix of both, giving you additional options for noise types. Experiment to see what these do in your projects. +seed: | + Deep in the diffusion code, there is a random number ‘seed’ which is used as the basis for determining the initial state of the diffusion. By default, this is random, but you can also specify your own seed. This is useful if you like a particular result and would like to run more iterations that will be similar. + After each run, the actual seed value used will be reported in the parameters report, and can be reused if desired by entering seed # here. If a specific numerical seed is used repeatedly, the resulting images will be quite similar but not identical. +eta: | + eta (greek letter η) is a diffusion model variable that mixes in a random amount of scaled noise into each timestep. 0 is no noise, 1.0 is more noise. As with most DD parameters, you can go below zero for eta, but it may give you unpredictable results. + The steps parameter has a close relationship with the eta parameter. If you set eta to 0, then you can get decent output with only 50-75 steps. Setting eta to 1.0 favors higher step counts, ideally around 250 and up. eta has a subtle, unpredictable effect on image, so you’ll need to experiment to see how this affects your projects. +clamp_grad: | + As I understand it, clamp_grad is an internal limiter that stops DD from producing extreme results. Try your images with and without clamp_grad. If the image changes drastically with clamp_grad turned off, it probably means your clip_guidance_scale is too high and should be reduced. +clamp_max: | + Sets the value of the clamp_grad limitation. Default is 0.05, providing for smoother, more muted coloration in images, but setting higher values (0.15-0.3) can provide interesting contrast and vibrancy. + +randomize_class: +clip_denoised: False +fuzzy_prompt: | + Controls whether to add multiple noisy prompts to the prompt losses. If True, can increase variability of image output. Experiment with this. +rand_mag: | + Affects only the fuzzy_prompt. Controls the magnitude of the random noise added by fuzzy_prompt. + +cut_overview: The schedule of overview cuts +cut_innercut: The schedule of inner cuts +cut_icgray_p: This sets the size of the border used for inner cuts. High cut_ic_pow values have larger borders, and therefore the cuts themselves will be smaller and provide finer details. If you have too many or too-small inner cuts, you may lose overall image coherency and/or it may cause an undesirable ‘mosaic’ effect. Low cut_ic_pow values will allow the inner cuts to be larger, helping image coherency while still helping with some details. + +display_rate: | + During a diffusion run, you can monitor the progress of each image being created with this variable. If display_rate is set to 50, DD will show you the in-progress image every 50 timesteps. Setting this to a lower value, like 5 or 10, is a good way to get an early peek at where your image is heading. If you don’t like the progression, just interrupt execution, change some settings, and re-run. If you are planning a long, unmonitored batch, it’s better to set display_rate equal to steps, because displaying interim images does slow Colab down slightly. +n_batches: | + This variable sets the number of still images you want DD to create. If you are using an animation mode (see below for details) DD will ignore n_batches and create a single set of animated frames based on the animation settings. +batch_name: | + The name of the batch, the batch id will be named as "discoart-[batch_name]-seed". To avoid your artworks be overridden by other users, please use a unique name. +clip_models: | + CLIP Model selectors. ViT-B/32, ViT-B/16, ViT-L/14, RN101, RN50, RN50x4, RN50x16, RN50x64. + These various CLIP models are available for you to use during image generation. Models have different styles or ‘flavors,’ so look around. + You can mix in multiple models as well for different results. However, keep in mind that some models are extremely memory-hungry, and turning on additional models will take additional memory and may cause a crash. + The rough order of speed/mem usage is (smallest/fastest to largest/slowest): + ViT-B/32 + RN50 + RN101 + ViT-B/16 + RN50x4 + RN50x16 + RN50x64 + ViT-L/14 + For RN50x64 & ViTL14 you may need to use fewer cuts, depending on your VRAM. diff --git a/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/runner.py b/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/runner.py new file mode 100755 index 000000000..b1e155b06 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_vitb32/reverse_diffusion/runner.py @@ -0,0 +1,285 @@ +''' +This code is rewritten by Paddle based on Jina-ai/discoart. +https://github.com/jina-ai/discoart/blob/main/discoart/runner.py +''' +import gc +import os +import random +from threading import Thread + +import disco_diffusion_clip_vitb32.clip.clip as clip +import numpy as np +import paddle +import paddle.vision.transforms as T +import paddle_lpips as lpips +from docarray import Document +from docarray import DocumentArray +from IPython import display +from ipywidgets import Output +from PIL import Image + +from .helper import logger +from .helper import parse_prompt +from .model.losses import range_loss +from .model.losses import spherical_dist_loss +from .model.losses import tv_loss +from .model.make_cutouts import MakeCutoutsDango +from .model.sec_diff import alpha_sigma_to_t +from .model.sec_diff import SecondaryDiffusionImageNet2 +from .model.transforms import Normalize + + +def do_run(args, models) -> 'DocumentArray': + logger.info('preparing models...') + model, diffusion, clip_models, secondary_model = models + normalize = Normalize( + mean=[0.48145466, 0.4578275, 0.40821073], + std=[0.26862954, 0.26130258, 0.27577711], + ) + lpips_model = lpips.LPIPS(net='vgg') + for parameter in lpips_model.parameters(): + parameter.stop_gradient = True + side_x = (args.width_height[0] // 64) * 64 + side_y = (args.width_height[1] // 64) * 64 + cut_overview = eval(args.cut_overview) + cut_innercut = eval(args.cut_innercut) + cut_icgray_p = eval(args.cut_icgray_p) + + from .model.perlin_noises import create_perlin_noise, regen_perlin + + seed = args.seed + + skip_steps = args.skip_steps + + loss_values = [] + + if seed is not None: + np.random.seed(seed) + random.seed(seed) + paddle.seed(seed) + + model_stats = [] + for clip_model in clip_models: + model_stat = { + 'clip_model': None, + 'target_embeds': [], + 'make_cutouts': None, + 'weights': [], + } + model_stat['clip_model'] = clip_model + + if isinstance(args.text_prompts, str): + args.text_prompts = [args.text_prompts] + + for prompt in args.text_prompts: + txt, weight = parse_prompt(prompt) + txt = clip_model.encode_text(clip.tokenize(prompt)) + if args.fuzzy_prompt: + for i in range(25): + model_stat['target_embeds'].append((txt + paddle.randn(txt.shape) * args.rand_mag).clip(0, 1)) + model_stat['weights'].append(weight) + else: + model_stat['target_embeds'].append(txt) + model_stat['weights'].append(weight) + + model_stat['target_embeds'] = paddle.concat(model_stat['target_embeds']) + model_stat['weights'] = paddle.to_tensor(model_stat['weights']) + if model_stat['weights'].sum().abs() < 1e-3: + raise RuntimeError('The weights must not sum to 0.') + model_stat['weights'] /= model_stat['weights'].sum().abs() + model_stats.append(model_stat) + + init = None + if args.init_image: + d = Document(uri=args.init_image).load_uri_to_image_tensor(side_x, side_y) + init = T.to_tensor(d.tensor).unsqueeze(0) * 2 - 1 + + if args.perlin_init: + if args.perlin_mode == 'color': + init = create_perlin_noise([1.5**-i * 0.5 for i in range(12)], 1, 1, False, side_y, side_x) + init2 = create_perlin_noise([1.5**-i * 0.5 for i in range(8)], 4, 4, False, side_y, side_x) + elif args.perlin_mode == 'gray': + init = create_perlin_noise([1.5**-i * 0.5 for i in range(12)], 1, 1, True, side_y, side_x) + init2 = create_perlin_noise([1.5**-i * 0.5 for i in range(8)], 4, 4, True, side_y, side_x) + else: + init = create_perlin_noise([1.5**-i * 0.5 for i in range(12)], 1, 1, False, side_y, side_x) + init2 = create_perlin_noise([1.5**-i * 0.5 for i in range(8)], 4, 4, True, side_y, side_x) + init = (T.to_tensor(init).add(T.to_tensor(init2)).divide(paddle.to_tensor(2.0)).unsqueeze(0) * 2 - 1) + del init2 + + cur_t = None + + def cond_fn(x, t, y=None): + x_is_NaN = False + n = x.shape[0] + if secondary_model: + alpha = paddle.to_tensor(diffusion.sqrt_alphas_cumprod[cur_t], dtype='float32') + sigma = paddle.to_tensor(diffusion.sqrt_one_minus_alphas_cumprod[cur_t], dtype='float32') + cosine_t = alpha_sigma_to_t(alpha, sigma) + x = paddle.to_tensor(x.detach(), dtype='float32') + x.stop_gradient = False + cosine_t = paddle.tile(paddle.to_tensor(cosine_t.detach().cpu().numpy()), [n]) + cosine_t.stop_gradient = False + out = secondary_model(x, cosine_t).pred + fac = diffusion.sqrt_one_minus_alphas_cumprod[cur_t] + x_in_d = out * fac + x * (1 - fac) + x_in = x_in_d.detach() + x_in.stop_gradient = False + x_in_grad = paddle.zeros_like(x_in, dtype='float32') + else: + t = paddle.ones([n], dtype='int64') * cur_t + out = diffusion.p_mean_variance(model, x, t, clip_denoised=False, model_kwargs={'y': y}) + fac = diffusion.sqrt_one_minus_alphas_cumprod[cur_t] + x_in_d = out['pred_xstart'] * fac + x * (1 - fac) + x_in = x_in_d.detach() + x_in.stop_gradient = False + x_in_grad = paddle.zeros_like(x_in, dtype='float32') + for model_stat in model_stats: + for i in range(args.cutn_batches): + t_int = (int(t.item()) + 1) # errors on last step without +1, need to find source + # when using SLIP Base model the dimensions need to be hard coded to avoid AttributeError: 'VisionTransformer' object has no attribute 'input_resolution' + try: + input_resolution = model_stat['clip_model'].visual.input_resolution + except: + input_resolution = 224 + + cuts = MakeCutoutsDango( + input_resolution, + Overview=cut_overview[1000 - t_int], + InnerCrop=cut_innercut[1000 - t_int], + IC_Size_Pow=args.cut_ic_pow, + IC_Grey_P=cut_icgray_p[1000 - t_int], + ) + clip_in = normalize(cuts(x_in.add(paddle.to_tensor(1.0)).divide(paddle.to_tensor(2.0)))) + image_embeds = (model_stat['clip_model'].encode_image(clip_in)) + + dists = spherical_dist_loss( + image_embeds.unsqueeze(1), + model_stat['target_embeds'].unsqueeze(0), + ) + + dists = dists.reshape([ + cut_overview[1000 - t_int] + cut_innercut[1000 - t_int], + n, + -1, + ]) + losses = dists.multiply(model_stat['weights']).sum(2).mean(0) + loss_values.append(losses.sum().item()) # log loss, probably shouldn't do per cutn_batch + + x_in_grad += (paddle.grad(losses.sum() * args.clip_guidance_scale, x_in)[0] / args.cutn_batches) + tv_losses = tv_loss(x_in) + range_losses = range_loss(x_in) + sat_losses = paddle.abs(x_in - x_in.clip(min=-1, max=1)).mean() + loss = (tv_losses.sum() * args.tv_scale + range_losses.sum() * args.range_scale + + sat_losses.sum() * args.sat_scale) + if init is not None and args.init_scale: + init_losses = lpips_model(x_in, init) + loss = loss + init_losses.sum() * args.init_scale + x_in_grad += paddle.grad(loss, x_in)[0] + if not paddle.isnan(x_in_grad).any(): + grad = -paddle.grad(x_in_d, x, x_in_grad)[0] + else: + x_is_NaN = True + grad = paddle.zeros_like(x) + if args.clamp_grad and not x_is_NaN: + magnitude = grad.square().mean().sqrt() + return (grad * magnitude.clip(max=args.clamp_max) / magnitude) + return grad + + if args.diffusion_sampling_mode == 'ddim': + sample_fn = diffusion.ddim_sample_loop_progressive + else: + sample_fn = diffusion.plms_sample_loop_progressive + + logger.info('creating artwork...') + + image_display = Output() + da_batches = DocumentArray() + + for _nb in range(args.n_batches): + display.clear_output(wait=True) + display.display(args.name_docarray, image_display) + gc.collect() + paddle.device.cuda.empty_cache() + + d = Document(tags=vars(args)) + da_batches.append(d) + + cur_t = diffusion.num_timesteps - skip_steps - 1 + + if args.perlin_init: + init = regen_perlin(args.perlin_mode, side_y, side_x, args.batch_size) + + if args.diffusion_sampling_mode == 'ddim': + samples = sample_fn( + model, + (args.batch_size, 3, side_y, side_x), + clip_denoised=args.clip_denoised, + model_kwargs={}, + cond_fn=cond_fn, + progress=True, + skip_timesteps=skip_steps, + init_image=init, + randomize_class=args.randomize_class, + eta=args.eta, + ) + else: + samples = sample_fn( + model, + (args.batch_size, 3, side_y, side_x), + clip_denoised=args.clip_denoised, + model_kwargs={}, + cond_fn=cond_fn, + progress=True, + skip_timesteps=skip_steps, + init_image=init, + randomize_class=args.randomize_class, + order=2, + ) + + threads = [] + for j, sample in enumerate(samples): + cur_t -= 1 + with image_display: + if j % args.display_rate == 0 or cur_t == -1: + for _, image in enumerate(sample['pred_xstart']): + image = (image + 1) / 2 + image = image.clip(0, 1).squeeze().transpose([1, 2, 0]).numpy() * 255 + image = np.uint8(image) + image = Image.fromarray(image) + + image.save(os.path.join(args.output_dir, 'progress-{}.png'.format(_nb))) + c = Document(tags={'cur_t': cur_t}) + c.load_pil_image_to_datauri(image) + d.chunks.append(c) + display.clear_output(wait=True) + display.display(display.Image(os.path.join(args.output_dir, 'progress-{}.png'.format(_nb)))) + d.chunks.plot_image_sprites(os.path.join(args.output_dir, + f'{args.name_docarray}-progress-{_nb}.png'), + show_index=True) + t = Thread( + target=_silent_push, + args=( + da_batches, + args.name_docarray, + ), + ) + threads.append(t) + t.start() + + if cur_t == -1: + d.load_pil_image_to_datauri(image) + + for t in threads: + t.join() + display.clear_output(wait=True) + logger.info(f'done! {args.name_docarray}') + da_batches.plot_image_sprites(skip_empty=True, show_index=True, keep_aspect_ratio=True) + return da_batches + + +def _silent_push(da_batches: DocumentArray, name: str) -> None: + try: + da_batches.push(name) + except Exception as ex: + logger.debug(f'push failed: {ex}') From ffcde21305c61d950a9f93e57e6180c9a9665b87 Mon Sep 17 00:00:00 2001 From: chenjian Date: Fri, 19 Aug 2022 13:19:56 +0800 Subject: [PATCH 030/117] add disco_diffusion_ernievil_base --- .../disco_diffusion_ernievil_base/README.md | 115 ++ .../disco_diffusion_ernievil_base/module.py | 437 +++++ .../requirements.txt | 8 + .../resize_right/README.md | 3 + .../resize_right/__init__.py | 0 .../resize_right/interp_methods.py | 70 + .../resize_right/resize_right.py | 403 +++++ .../reverse_diffusion/README.md | 2 + .../reverse_diffusion/__init__.py | 156 ++ .../reverse_diffusion/config.py | 77 + .../reverse_diffusion/helper.py | 138 ++ .../reverse_diffusion/model/__init__.py | 3 + .../model/gaussian_diffusion.py | 1214 +++++++++++++ .../reverse_diffusion/model/losses.py | 86 + .../reverse_diffusion/model/make_cutouts.py | 177 ++ .../reverse_diffusion/model/nn.py | 127 ++ .../reverse_diffusion/model/perlin_noises.py | 78 + .../reverse_diffusion/model/respace.py | 123 ++ .../reverse_diffusion/model/script_util.py | 201 +++ .../reverse_diffusion/model/sec_diff.py | 135 ++ .../reverse_diffusion/model/transforms.py | 757 ++++++++ .../reverse_diffusion/model/unet.py | 838 +++++++++ .../reverse_diffusion/resources/default.yml | 45 + .../resources/docstrings.yml | 103 ++ .../reverse_diffusion/runner.py | 285 +++ .../vit_b_16x/ernievil2/__init__.py | 18 + .../ernievil2/transformers/__init__.py | 0 .../vit_b_16x/ernievil2/transformers/beam.py | 1602 +++++++++++++++++ .../transformers/clip_vision_transformer.py | 412 +++++ .../ernievil2/transformers/droppath.py | 59 + .../ernievil2/transformers/efficientnet.py | 836 +++++++++ .../ernievil2/transformers/ernie2.py | 381 ++++ .../ernievil2/transformers/ernie_modeling.py | 739 ++++++++ .../ernievil2/transformers/ernie_tokenizer.py | 245 +++ .../ernievil2/transformers/file_utils.py | 63 + .../ernievil2/transformers/multimodal.py | 39 + .../transformers/paddle_vision_transformer.py | 444 +++++ .../ernievil2/transformers/resnet.py | 445 +++++ .../vit_b_16x/ernievil2/utils/__init__.py | 0 .../vit_b_16x/ernievil2/utils/tokenizer.py | 355 ++++ .../vit_b_16x/ernievil2/utils/utils.py | 75 + .../packages/configs/vit_ernie_base.yaml | 67 + .../ernie_base_3.0/ernie_config.base.json | 13 + 43 files changed, 11374 insertions(+) create mode 100755 modules/image/text_to_image/disco_diffusion_ernievil_base/README.md create mode 100755 modules/image/text_to_image/disco_diffusion_ernievil_base/module.py create mode 100755 modules/image/text_to_image/disco_diffusion_ernievil_base/requirements.txt create mode 100755 modules/image/text_to_image/disco_diffusion_ernievil_base/resize_right/README.md create mode 100755 modules/image/text_to_image/disco_diffusion_ernievil_base/resize_right/__init__.py create mode 100755 modules/image/text_to_image/disco_diffusion_ernievil_base/resize_right/interp_methods.py create mode 100755 modules/image/text_to_image/disco_diffusion_ernievil_base/resize_right/resize_right.py create mode 100755 modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/README.md create mode 100755 modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/__init__.py create mode 100755 modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/config.py create mode 100755 modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/helper.py create mode 100755 modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/model/__init__.py create mode 100755 modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/model/gaussian_diffusion.py create mode 100755 modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/model/losses.py create mode 100755 modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/model/make_cutouts.py create mode 100755 modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/model/nn.py create mode 100755 modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/model/perlin_noises.py create mode 100755 modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/model/respace.py create mode 100755 modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/model/script_util.py create mode 100755 modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/model/sec_diff.py create mode 100755 modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/model/transforms.py create mode 100755 modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/model/unet.py create mode 100755 modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/resources/default.yml create mode 100755 modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/resources/docstrings.yml create mode 100755 modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/runner.py create mode 100755 modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/__init__.py create mode 100755 modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/transformers/__init__.py create mode 100755 modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/transformers/beam.py create mode 100755 modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/transformers/clip_vision_transformer.py create mode 100755 modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/transformers/droppath.py create mode 100755 modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/transformers/efficientnet.py create mode 100755 modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/transformers/ernie2.py create mode 100755 modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/transformers/ernie_modeling.py create mode 100755 modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/transformers/ernie_tokenizer.py create mode 100755 modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/transformers/file_utils.py create mode 100755 modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/transformers/multimodal.py create mode 100755 modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/transformers/paddle_vision_transformer.py create mode 100755 modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/transformers/resnet.py create mode 100755 modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/utils/__init__.py create mode 100755 modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/utils/tokenizer.py create mode 100755 modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/utils/utils.py create mode 100755 modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/packages/configs/vit_ernie_base.yaml create mode 100755 modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/packages/ernie_base_3.0/ernie_config.base.json diff --git a/modules/image/text_to_image/disco_diffusion_ernievil_base/README.md b/modules/image/text_to_image/disco_diffusion_ernievil_base/README.md new file mode 100755 index 000000000..182bb6ec2 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_ernievil_base/README.md @@ -0,0 +1,115 @@ +# disco_diffusion_ernievil_base + +|模型名称|disco_diffusion_ernievil_base| +| :--- | :---: | +|类别|图像-文图生成| +|网络|dd+ERNIE-ViL| +|数据集|-| +|是否支持Fine-tuning|否| +|模型大小|2.9GB| +|最新更新日期|2022-08-02| +|数据指标|-| + +## 一、模型基本信息 + +### 应用效果展示 + + - 输入文本 "小桥流水人家" + + - 输出图像 +

+ +
+ + - 生成过程 +

+ +
+ + +### 模型介绍 + +disco_diffusion_ernievil_base 是一个文图生成模型,可以通过输入一段文字来生成符合该句子语义的图像。该模型由两部分组成,一部分是扩散模型,是一种生成模型,可以从噪声输入中重建出原始图像。另一部分是多模态预训练模型(ERNIE-ViL), 可以将文本和图像表示在同一个特征空间,相近语义的文本和图像在该特征空间里距离会更相近。在该文图生成模型中,扩散模型负责从初始噪声或者指定初始图像中来生成目标图像,ERNIE-ViL负责引导生成图像的语义和输入的文本的语义尽可能接近,随着扩散模型在ERNIE-ViL的引导下不断的迭代生成新图像,最终能够生成文本所描述内容的图像。该模块中使用的模型为ERNIE-ViL,由ERNIE 3.0+ViT构成。 + +更多详情请参考论文:[Diffusion Models Beat GANs on Image Synthesis](https://arxiv.org/abs/2105.05233) + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.2.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install disco_diffusion_ernievil_base + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run disco_diffusion_ernievil_base --text_prompts "孤舟蓑笠翁,独钓寒江雪。风格如齐白石所作。" --output_dir disco_diffusion_ernievil_base_out + ``` + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + + module = hub.Module(name="disco_diffusion_ernievil_base") + text_prompts = ["孤舟蓑笠翁,独钓寒江雪。"] + # 生成图像, 默认会在disco_diffusion_ernievil_base_out目录保存图像 + # 返回的da是一个DocumentArray对象,保存了所有的结果,包括最终结果和迭代过程的中间结果 + # 可以通过操作DocumentArray对象对生成的图像做后处理,保存或者分析 + da = module.generate_image(text_prompts=text_prompts, artist='齐白石', output_dir='./disco_diffusion_ernievil_base_out/') + # 手动将最终生成的图像保存到指定路径 + da[0].save_uri_to_file('disco_diffusion_ernievil_base_out-result.png') + # 展示所有的中间结果 + da[0].chunks.plot_image_sprites(skip_empty=True, show_index=True, keep_aspect_ratio=True) + # 将整个生成过程保存为一个动态图gif + da[0].chunks.save_gif('disco_diffusion_ernievil_base_out-result.gif', show_index=True, inline_display=True, size_ratio=0.5) + ``` + +- ### 3、API + + - ```python + def generate_image( + text_prompts, + style: Optional[str] = None, + artist: Optional[str] = None, + width_height: Optional[List[int]] = [1280, 768], + seed: Optional[int] = None, + output_dir: Optional[str] = 'disco_diffusion_ernievil_base_out'): + ``` + + - 文图生成API,生成文本描述内容的图像。 + + - **参数** + + - text_prompts(str): 输入的语句,描述想要生成的图像的内容。通常比较有效的构造方式为 "一段描述性的文字内容" + "指定艺术家的名字",如"孤舟蓑笠翁,独钓寒江雪。风格如齐白石所作"。 + - style(Optional[str]): 指定绘画的风格,如水墨画、油画、水彩画等。当不指定时,风格完全由您所填写的prompt决定。 + - artist(Optional[str]): 指定特定的艺术家,如齐白石、Greg Rutkowsk,将会生成所指定艺术家的绘画风格。当不指定时,风格完全由您所填写的prompt决定。各种艺术家的风格可以参考[网站](https://weirdwonderfulai.art/resources/disco-diffusion-70-plus-artist-studies/)。 + - width_height(Optional[List[int]]): 指定最终输出图像的宽高,宽和高都需要是64的倍数,生成的图像越大,所需要的计算时间越长。 + - seed(Optional[int]): 随机种子,由于输入默认是随机高斯噪声,设置不同的随机种子会由不同的初始输入,从而最终生成不同的结果,可以设置该参数来获得不同的输出图像。 + - output_dir(Optional[str]): 保存输出图像的目录,默认为"disco_diffusion_ernievil_base_out"。 + + + - **返回** + - ra(DocumentArray): DocumentArray对象, 包含`n_batches`个Documents,其中每个Document都保存了迭代过程的所有中间结果。详细可参考[DocumentArray使用文档](https://docarray.jina.ai/fundamentals/documentarray/index.html)。 + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + ```shell + $ hub install disco_diffusion_ernievil_base == 1.0.0 + ``` diff --git a/modules/image/text_to_image/disco_diffusion_ernievil_base/module.py b/modules/image/text_to_image/disco_diffusion_ernievil_base/module.py new file mode 100755 index 000000000..a4159ee0f --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_ernievil_base/module.py @@ -0,0 +1,437 @@ +# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import ast +import os +import sys +from functools import partial +from typing import List +from typing import Optional + +import paddle +from disco_diffusion_ernievil_base import resize_right +from disco_diffusion_ernievil_base.reverse_diffusion import create +from disco_diffusion_ernievil_base.vit_b_16x import ernievil2 + +import paddlehub as hub +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving + + +@moduleinfo(name="disco_diffusion_ernievil_base", + version="1.0.0", + type="image/text_to_image", + summary="", + author="paddlepaddle", + author_email="paddle-dev@baidu.com") +class DiscoDiffusionClip: + + def generate_image(self, + text_prompts, + style: Optional[str] = None, + artist: Optional[str] = None, + init_image: Optional[str] = None, + width_height: Optional[List[int]] = [1280, 768], + skip_steps: Optional[int] = 0, + steps: Optional[int] = 250, + cut_ic_pow: Optional[int] = 1, + init_scale: Optional[int] = 1000, + clip_guidance_scale: Optional[int] = 5000, + tv_scale: Optional[int] = 0, + range_scale: Optional[int] = 0, + sat_scale: Optional[int] = 0, + cutn_batches: Optional[int] = 4, + diffusion_sampling_mode: Optional[str] = 'ddim', + perlin_init: Optional[bool] = False, + perlin_mode: Optional[str] = 'mixed', + seed: Optional[int] = None, + eta: Optional[float] = 0.8, + clamp_grad: Optional[bool] = True, + clamp_max: Optional[float] = 0.05, + randomize_class: Optional[bool] = True, + clip_denoised: Optional[bool] = False, + fuzzy_prompt: Optional[bool] = False, + rand_mag: Optional[float] = 0.05, + cut_overview: Optional[str] = '[12]*400+[4]*600', + cut_innercut: Optional[str] = '[4]*400+[12]*600', + cut_icgray_p: Optional[str] = '[0.2]*400+[0]*600', + display_rate: Optional[int] = 10, + n_batches: Optional[int] = 1, + batch_size: Optional[int] = 1, + batch_name: Optional[str] = '', + use_gpu: Optional[bool] = True, + output_dir: Optional[str] = 'disco_diffusion_ernievil_base_out'): + """ + Create Disco Diffusion artworks and save the result into a DocumentArray. + + :param text_prompts: Phrase, sentence, or string of words and phrases describing what the image should look like. The words will be analyzed by the AI and will guide the diffusion process toward the image(s) you describe. These can include commas and weights to adjust the relative importance of each element. E.g. "A beautiful painting of a singular lighthouse, shining its light across a tumultuous sea of blood by greg rutkowski and thomas kinkade, Trending on artstation."Notice that this prompt loosely follows a structure: [subject], [prepositional details], [setting], [meta modifiers and artist]; this is a good starting point for your experiments. Developing text prompts takes practice and experience, and is not the subject of this guide. If you are a beginner to writing text prompts, a good place to start is on a simple AI art app like Night Cafe, starry ai or WOMBO prior to using DD, to get a feel for how text gets translated into images by GAN tools. These other apps use different technologies, but many of the same principles apply. + + :param init_image: Recall that in the image sequence above, the first image shown is just noise. If an init_image is provided, diffusion will replace the noise with the init_image as its starting state. To use an init_image, upload the image to the Colab instance or your Google Drive, and enter the full image path here. If using an init_image, you may need to increase skip_steps to ~ 50% of total steps to retain the character of the init. See skip_steps above for further discussion. + :param style: Image style, such as oil paintings, if specified, style will be used to construct prompts. + :param artist: Artist style, if specified, style will be used to construct prompts. + :param width_height: Desired final image size, in pixels. You can have a square, wide, or tall image, but each edge length should be set to a multiple of 64px, and a minimum of 512px on the default CLIP model setting. If you forget to use multiples of 64px in your dimensions, DD will adjust the dimensions of your image to make it so. + :param skip_steps: Consider the chart shown here. Noise scheduling (denoise strength) starts very high and progressively gets lower and lower as diffusion steps progress. The noise levels in the first few steps are very high, so images change dramatically in early steps.As DD moves along the curve, noise levels (and thus the amount an image changes per step) declines, and image coherence from one step to the next increases.The first few steps of denoising are often so dramatic that some steps (maybe 10-15% of total) can be skipped without affecting the final image. You can experiment with this as a way to cut render times.If you skip too many steps, however, the remaining noise may not be high enough to generate new content, and thus may not have ‘time left’ to finish an image satisfactorily.Also, depending on your other settings, you may need to skip steps to prevent CLIP from overshooting your goal, resulting in ‘blown out’ colors (hyper saturated, solid white, or solid black regions) or otherwise poor image quality. Consider that the denoising process is at its strongest in the early steps, so skipping steps can sometimes mitigate other problems.Lastly, if using an init_image, you will need to skip ~50% of the diffusion steps to retain the shapes in the original init image. However, if you’re using an init_image, you can also adjust skip_steps up or down for creative reasons. With low skip_steps you can get a result "inspired by" the init_image which will retain the colors and rough layout and shapes but look quite different. With high skip_steps you can preserve most of the init_image contents and just do fine tuning of the texture. + :param steps: When creating an image, the denoising curve is subdivided into steps for processing. Each step (or iteration) involves the AI looking at subsets of the image called ‘cuts’ and calculating the ‘direction’ the image should be guided to be more like the prompt. Then it adjusts the image with the help of the diffusion denoiser, and moves to the next step.Increasing steps will provide more opportunities for the AI to adjust the image, and each adjustment will be smaller, and thus will yield a more precise, detailed image. Increasing steps comes at the expense of longer render times. Also, while increasing steps should generally increase image quality, there is a diminishing return on additional steps beyond 250 - 500 steps. However, some intricate images can take 1000, 2000, or more steps. It is really up to the user. Just know that the render time is directly related to the number of steps, and many other parameters have a major impact on image quality, without costing additional time. + :param cut_ic_pow: This sets the size of the border used for inner cuts. High cut_ic_pow values have larger borders, and therefore the cuts themselves will be smaller and provide finer details. If you have too many or too-small inner cuts, you may lose overall image coherency and/or it may cause an undesirable ‘mosaic’ effect. Low cut_ic_pow values will allow the inner cuts to be larger, helping image coherency while still helping with some details. + :param init_scale: This controls how strongly CLIP will try to match the init_image provided. This is balanced against the clip_guidance_scale (CGS) above. Too much init scale, and the image won’t change much during diffusion. Too much CGS and the init image will be lost. + :param clip_guidance_scale: CGS is one of the most important parameters you will use. It tells DD how strongly you want CLIP to move toward your prompt each timestep. Higher is generally better, but if CGS is too strong it will overshoot the goal and distort the image. So a happy medium is needed, and it takes experience to learn how to adjust CGS. Note that this parameter generally scales with image dimensions. In other words, if you increase your total dimensions by 50% (e.g. a change from 512 x 512 to 512 x 768), then to maintain the same effect on the image, you’d want to increase clip_guidance_scale from 5000 to 7500. Of the basic settings, clip_guidance_scale, steps and skip_steps are the most important contributors to image quality, so learn them well. + :param tv_scale: Total variance denoising. Optional, set to zero to turn off. Controls ‘smoothness’ of final output. If used, tv_scale will try to smooth out your final image to reduce overall noise. If your image is too ‘crunchy’, increase tv_scale. TV denoising is good at preserving edges while smoothing away noise in flat regions. See https://en.wikipedia.org/wiki/Total_variation_denoising + :param range_scale: Optional, set to zero to turn off. Used for adjustment of color contrast. Lower range_scale will increase contrast. Very low numbers create a reduced color palette, resulting in more vibrant or poster-like images. Higher range_scale will reduce contrast, for more muted images. + :param sat_scale: Saturation scale. Optional, set to zero to turn off. If used, sat_scale will help mitigate oversaturation. If your image is too saturated, increase sat_scale to reduce the saturation. + :param cutn_batches: Each iteration, the AI cuts the image into smaller pieces known as cuts, and compares each cut to the prompt to decide how to guide the next diffusion step. More cuts can generally lead to better images, since DD has more chances to fine-tune the image precision in each timestep. Additional cuts are memory intensive, however, and if DD tries to evaluate too many cuts at once, it can run out of memory. You can use cutn_batches to increase cuts per timestep without increasing memory usage. At the default settings, DD is scheduled to do 16 cuts per timestep. If cutn_batches is set to 1, there will indeed only be 16 cuts total per timestep. However, if cutn_batches is increased to 4, DD will do 64 cuts total in each timestep, divided into 4 sequential batches of 16 cuts each. Because the cuts are being evaluated only 16 at a time, DD uses the memory required for only 16 cuts, but gives you the quality benefit of 64 cuts. The tradeoff, of course, is that this will take ~4 times as long to render each image.So, (scheduled cuts) x (cutn_batches) = (total cuts per timestep). Increasing cutn_batches will increase render times, however, as the work is being done sequentially. DD’s default cut schedule is a good place to start, but the cut schedule can be adjusted in the Cutn Scheduling section, explained below. + :param diffusion_sampling_mode: Two alternate diffusion denoising algorithms. ddim has been around longer, and is more established and tested. plms is a newly added alternate method that promises good diffusion results in fewer steps, but has not been as fully tested and may have side effects. This new plms mode is actively being researched in the #settings-and-techniques channel in the DD Discord. + :param perlin_init: Normally, DD will use an image filled with random noise as a starting point for the diffusion curve. If perlin_init is selected, DD will instead use a Perlin noise model as an initial state. Perlin has very interesting characteristics, distinct from random noise, so it’s worth experimenting with this for your projects. Beyond perlin, you can, of course, generate your own noise images (such as with GIMP, etc) and use them as an init_image (without skipping steps). Choosing perlin_init does not affect the actual diffusion process, just the starting point for the diffusion. Please note that selecting a perlin_init will replace and override any init_image you may have specified. Further, because the 2D, 3D and video animation systems all rely on the init_image system, if you enable Perlin while using animation modes, the perlin_init will jump in front of any previous image or video input, and DD will NOT give you the expected sequence of coherent images. All of that said, using Perlin and animation modes together do make a very colorful rainbow effect, which can be used creatively. + :param perlin_mode: sets type of Perlin noise: colored, gray, or a mix of both, giving you additional options for noise types. Experiment to see what these do in your projects. + :param seed: Deep in the diffusion code, there is a random number ‘seed’ which is used as the basis for determining the initial state of the diffusion. By default, this is random, but you can also specify your own seed. This is useful if you like a particular result and would like to run more iterations that will be similar. After each run, the actual seed value used will be reported in the parameters report, and can be reused if desired by entering seed # here. If a specific numerical seed is used repeatedly, the resulting images will be quite similar but not identical. + :param eta: eta (greek letter η) is a diffusion model variable that mixes in a random amount of scaled noise into each timestep. 0 is no noise, 1.0 is more noise. As with most DD parameters, you can go below zero for eta, but it may give you unpredictable results. The steps parameter has a close relationship with the eta parameter. If you set eta to 0, then you can get decent output with only 50-75 steps. Setting eta to 1.0 favors higher step counts, ideally around 250 and up. eta has a subtle, unpredictable effect on image, so you’ll need to experiment to see how this affects your projects. + :param clamp_grad: As I understand it, clamp_grad is an internal limiter that stops DD from producing extreme results. Try your images with and without clamp_grad. If the image changes drastically with clamp_grad turned off, it probably means your clip_guidance_scale is too high and should be reduced. + :param clamp_max: Sets the value of the clamp_grad limitation. Default is 0.05, providing for smoother, more muted coloration in images, but setting higher values (0.15-0.3) can provide interesting contrast and vibrancy. + :param fuzzy_prompt: Controls whether to add multiple noisy prompts to the prompt losses. If True, can increase variability of image output. Experiment with this. + :param rand_mag: Affects only the fuzzy_prompt. Controls the magnitude of the random noise added by fuzzy_prompt. + :param cut_overview: The schedule of overview cuts + :param cut_innercut: The schedule of inner cuts + :param cut_icgray_p: This sets the size of the border used for inner cuts. High cut_ic_pow values have larger borders, and therefore the cuts themselves will be smaller and provide finer details. If you have too many or too-small inner cuts, you may lose overall image coherency and/or it may cause an undesirable ‘mosaic’ effect. Low cut_ic_pow values will allow the inner cuts to be larger, helping image coherency while still helping with some details. + :param display_rate: During a diffusion run, you can monitor the progress of each image being created with this variable. If display_rate is set to 50, DD will show you the in-progress image every 50 timesteps. Setting this to a lower value, like 5 or 10, is a good way to get an early peek at where your image is heading. If you don’t like the progression, just interrupt execution, change some settings, and re-run. If you are planning a long, unmonitored batch, it’s better to set display_rate equal to steps, because displaying interim images does slow Colab down slightly. + :param n_batches: This variable sets the number of still images you want DD to create. If you are using an animation mode (see below for details) DD will ignore n_batches and create a single set of animated frames based on the animation settings. + :param batch_name: The name of the batch, the batch id will be named as "discoart-[batch_name]-seed". To avoid your artworks be overridden by other users, please use a unique name. + :param use_gpu: whether to use gpu or not. + :return: a DocumentArray object that has `n_batches` Documents + """ + if use_gpu: + try: + _places = os.environ.get("CUDA_VISIBLE_DEVICES", None) + if _places: + paddle.device.set_device("gpu:{}".format(0)) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + else: + paddle.device.set_device("cpu") + paddle.disable_static() + if not os.path.exists(output_dir): + os.makedirs(output_dir, exist_ok=True) + + if isinstance(text_prompts, str): + text_prompts = text_prompts.rstrip(',.,。') + if style is not None: + text_prompts += ",{}".format(style) + if artist is not None: + text_prompts += ",由{}所作".format(artist) + elif isinstance(text_prompts, list): + text_prompts[0] = text_prompts[0].rstrip(',.,。') + if style is not None: + text_prompts[0] += ",{}".format(style) + if artist is not None: + text_prompts[0] += ",由{}所作".format(artist) + + return create(text_prompts=text_prompts, + init_image=init_image, + width_height=width_height, + skip_steps=skip_steps, + steps=steps, + cut_ic_pow=cut_ic_pow, + init_scale=init_scale, + clip_guidance_scale=clip_guidance_scale, + tv_scale=tv_scale, + range_scale=range_scale, + sat_scale=sat_scale, + cutn_batches=cutn_batches, + diffusion_sampling_mode=diffusion_sampling_mode, + perlin_init=perlin_init, + perlin_mode=perlin_mode, + seed=seed, + eta=eta, + clamp_grad=clamp_grad, + clamp_max=clamp_max, + randomize_class=randomize_class, + clip_denoised=clip_denoised, + fuzzy_prompt=fuzzy_prompt, + rand_mag=rand_mag, + cut_overview=cut_overview, + cut_innercut=cut_innercut, + cut_icgray_p=cut_icgray_p, + display_rate=display_rate, + n_batches=n_batches, + batch_size=batch_size, + batch_name=batch_name, + clip_models=['vit_b_16x'], + output_dir=output_dir) + + @serving + def serving_method(self, text_prompts, **kwargs): + """ + Run as a service. + """ + results = [] + for text_prompt in text_prompts: + result = self.generate_image(text_prompts=text_prompt, **kwargs)[0].to_base64() + results.append(result) + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.generate_image(text_prompts=args.text_prompts, + style=args.style, + artist=args.artist, + init_image=args.init_image, + width_height=args.width_height, + skip_steps=args.skip_steps, + steps=args.steps, + cut_ic_pow=args.cut_ic_pow, + init_scale=args.init_scale, + clip_guidance_scale=args.clip_guidance_scale, + tv_scale=args.tv_scale, + range_scale=args.range_scale, + sat_scale=args.sat_scale, + cutn_batches=args.cutn_batches, + diffusion_sampling_mode=args.diffusion_sampling_mode, + perlin_init=args.perlin_init, + perlin_mode=args.perlin_mode, + seed=args.seed, + eta=args.eta, + clamp_grad=args.clamp_grad, + clamp_max=args.clamp_max, + randomize_class=args.randomize_class, + clip_denoised=args.clip_denoised, + fuzzy_prompt=args.fuzzy_prompt, + rand_mag=args.rand_mag, + cut_overview=args.cut_overview, + cut_innercut=args.cut_innercut, + cut_icgray_p=args.cut_icgray_p, + display_rate=args.display_rate, + n_batches=args.n_batches, + batch_size=args.batch_size, + batch_name=args.batch_name, + output_dir=args.output_dir) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_input_group.add_argument( + '--skip_steps', + type=int, + default=0, + help= + 'Consider the chart shown here. Noise scheduling (denoise strength) starts very high and progressively gets lower and lower as diffusion steps progress. The noise levels in the first few steps are very high, so images change dramatically in early steps.As DD moves along the curve, noise levels (and thus the amount an image changes per step) declines, and image coherence from one step to the next increases.The first few steps of denoising are often so dramatic that some steps (maybe 10-15%% of total) can be skipped without affecting the final image. You can experiment with this as a way to cut render times.If you skip too many steps, however, the remaining noise may not be high enough to generate new content, and thus may not have ‘time left’ to finish an image satisfactorily.Also, depending on your other settings, you may need to skip steps to prevent CLIP from overshooting your goal, resulting in ‘blown out’ colors (hyper saturated, solid white, or solid black regions) or otherwise poor image quality. Consider that the denoising process is at its strongest in the early steps, so skipping steps can sometimes mitigate other problems.Lastly, if using an init_image, you will need to skip ~50%% of the diffusion steps to retain the shapes in the original init image. However, if you’re using an init_image, you can also adjust skip_steps up or down for creative reasons. With low skip_steps you can get a result "inspired by" the init_image which will retain the colors and rough layout and shapes but look quite different. With high skip_steps you can preserve most of the init_image contents and just do fine tuning of the texture' + ) + self.arg_input_group.add_argument( + '--steps', + type=int, + default=250, + help= + "When creating an image, the denoising curve is subdivided into steps for processing. Each step (or iteration) involves the AI looking at subsets of the image called ‘cuts’ and calculating the ‘direction’ the image should be guided to be more like the prompt. Then it adjusts the image with the help of the diffusion denoiser, and moves to the next step.Increasing steps will provide more opportunities for the AI to adjust the image, and each adjustment will be smaller, and thus will yield a more precise, detailed image. Increasing steps comes at the expense of longer render times. Also, while increasing steps should generally increase image quality, there is a diminishing return on additional steps beyond 250 - 500 steps. However, some intricate images can take 1000, 2000, or more steps. It is really up to the user. Just know that the render time is directly related to the number of steps, and many other parameters have a major impact on image quality, without costing additional time." + ) + self.arg_input_group.add_argument( + '--cut_ic_pow', + type=int, + default=1, + help= + "This sets the size of the border used for inner cuts. High cut_ic_pow values have larger borders, and therefore the cuts themselves will be smaller and provide finer details. If you have too many or too-small inner cuts, you may lose overall image coherency and/or it may cause an undesirable ‘mosaic’ effect. Low cut_ic_pow values will allow the inner cuts to be larger, helping image coherency while still helping with some details." + ) + self.arg_input_group.add_argument( + '--init_scale', + type=int, + default=1000, + help= + "This controls how strongly CLIP will try to match the init_image provided. This is balanced against the clip_guidance_scale (CGS) above. Too much init scale, and the image won’t change much during diffusion. Too much CGS and the init image will be lost." + ) + self.arg_input_group.add_argument( + '--clip_guidance_scale', + type=int, + default=5000, + help= + "CGS is one of the most important parameters you will use. It tells DD how strongly you want CLIP to move toward your prompt each timestep. Higher is generally better, but if CGS is too strong it will overshoot the goal and distort the image. So a happy medium is needed, and it takes experience to learn how to adjust CGS. Note that this parameter generally scales with image dimensions. In other words, if you increase your total dimensions by 50% (e.g. a change from 512 x 512 to 512 x 768), then to maintain the same effect on the image, you’d want to increase clip_guidance_scale from 5000 to 7500. Of the basic settings, clip_guidance_scale, steps and skip_steps are the most important contributors to image quality, so learn them well." + ) + self.arg_input_group.add_argument( + '--tv_scale', + type=int, + default=0, + help= + "Total variance denoising. Optional, set to zero to turn off. Controls ‘smoothness’ of final output. If used, tv_scale will try to smooth out your final image to reduce overall noise. If your image is too ‘crunchy’, increase tv_scale. TV denoising is good at preserving edges while smoothing away noise in flat regions. See https://en.wikipedia.org/wiki/Total_variation_denoising" + ) + self.arg_input_group.add_argument( + '--range_scale', + type=int, + default=0, + help= + "Optional, set to zero to turn off. Used for adjustment of color contrast. Lower range_scale will increase contrast. Very low numbers create a reduced color palette, resulting in more vibrant or poster-like images. Higher range_scale will reduce contrast, for more muted images." + ) + self.arg_input_group.add_argument( + '--sat_scale', + type=int, + default=0, + help= + "Saturation scale. Optional, set to zero to turn off. If used, sat_scale will help mitigate oversaturation. If your image is too saturated, increase sat_scale to reduce the saturation." + ) + self.arg_input_group.add_argument( + '--cutn_batches', + type=int, + default=4, + help= + "Each iteration, the AI cuts the image into smaller pieces known as cuts, and compares each cut to the prompt to decide how to guide the next diffusion step. More cuts can generally lead to better images, since DD has more chances to fine-tune the image precision in each timestep. Additional cuts are memory intensive, however, and if DD tries to evaluate too many cuts at once, it can run out of memory. You can use cutn_batches to increase cuts per timestep without increasing memory usage. At the default settings, DD is scheduled to do 16 cuts per timestep. If cutn_batches is set to 1, there will indeed only be 16 cuts total per timestep. However, if cutn_batches is increased to 4, DD will do 64 cuts total in each timestep, divided into 4 sequential batches of 16 cuts each. Because the cuts are being evaluated only 16 at a time, DD uses the memory required for only 16 cuts, but gives you the quality benefit of 64 cuts. The tradeoff, of course, is that this will take ~4 times as long to render each image.So, (scheduled cuts) x (cutn_batches) = (total cuts per timestep). Increasing cutn_batches will increase render times, however, as the work is being done sequentially. DD’s default cut schedule is a good place to start, but the cut schedule can be adjusted in the Cutn Scheduling section, explained below." + ) + self.arg_input_group.add_argument( + '--diffusion_sampling_mode', + type=str, + default='ddim', + help= + "Two alternate diffusion denoising algorithms. ddim has been around longer, and is more established and tested. plms is a newly added alternate method that promises good diffusion results in fewer steps, but has not been as fully tested and may have side effects. This new plms mode is actively being researched in the #settings-and-techniques channel in the DD Discord." + ) + self.arg_input_group.add_argument( + '--perlin_init', + type=bool, + default=False, + help= + "Normally, DD will use an image filled with random noise as a starting point for the diffusion curve. If perlin_init is selected, DD will instead use a Perlin noise model as an initial state. Perlin has very interesting characteristics, distinct from random noise, so it’s worth experimenting with this for your projects. Beyond perlin, you can, of course, generate your own noise images (such as with GIMP, etc) and use them as an init_image (without skipping steps). Choosing perlin_init does not affect the actual diffusion process, just the starting point for the diffusion. Please note that selecting a perlin_init will replace and override any init_image you may have specified. Further, because the 2D, 3D and video animation systems all rely on the init_image system, if you enable Perlin while using animation modes, the perlin_init will jump in front of any previous image or video input, and DD will NOT give you the expected sequence of coherent images. All of that said, using Perlin and animation modes together do make a very colorful rainbow effect, which can be used creatively." + ) + self.arg_input_group.add_argument( + '--perlin_mode', + type=str, + default='mixed', + help= + "sets type of Perlin noise: colored, gray, or a mix of both, giving you additional options for noise types. Experiment to see what these do in your projects." + ) + self.arg_input_group.add_argument( + '--seed', + type=int, + default=None, + help= + "Deep in the diffusion code, there is a random number ‘seed’ which is used as the basis for determining the initial state of the diffusion. By default, this is random, but you can also specify your own seed. This is useful if you like a particular result and would like to run more iterations that will be similar. After each run, the actual seed value used will be reported in the parameters report, and can be reused if desired by entering seed # here. If a specific numerical seed is used repeatedly, the resulting images will be quite similar but not identical." + ) + self.arg_input_group.add_argument( + '--eta', + type=float, + default=0.8, + help= + "eta (greek letter η) is a diffusion model variable that mixes in a random amount of scaled noise into each timestep. 0 is no noise, 1.0 is more noise. As with most DD parameters, you can go below zero for eta, but it may give you unpredictable results. The steps parameter has a close relationship with the eta parameter. If you set eta to 0, then you can get decent output with only 50-75 steps. Setting eta to 1.0 favors higher step counts, ideally around 250 and up. eta has a subtle, unpredictable effect on image, so you’ll need to experiment to see how this affects your projects." + ) + self.arg_input_group.add_argument( + '--clamp_grad', + type=bool, + default=True, + help= + "As I understand it, clamp_grad is an internal limiter that stops DD from producing extreme results. Try your images with and without clamp_grad. If the image changes drastically with clamp_grad turned off, it probably means your clip_guidance_scale is too high and should be reduced." + ) + self.arg_input_group.add_argument( + '--clamp_max', + type=float, + default=0.05, + help= + "Sets the value of the clamp_grad limitation. Default is 0.05, providing for smoother, more muted coloration in images, but setting higher values (0.15-0.3) can provide interesting contrast and vibrancy." + ) + self.arg_input_group.add_argument('--randomize_class', type=bool, default=True, help="Random class.") + self.arg_input_group.add_argument('--clip_denoised', type=bool, default=False, help="Clip denoised.") + self.arg_input_group.add_argument( + '--fuzzy_prompt', + type=bool, + default=False, + help= + "Controls whether to add multiple noisy prompts to the prompt losses. If True, can increase variability of image output. Experiment with this." + ) + self.arg_input_group.add_argument( + '--rand_mag', + type=float, + default=0.5, + help="Affects only the fuzzy_prompt. Controls the magnitude of the random noise added by fuzzy_prompt.") + self.arg_input_group.add_argument('--cut_overview', + type=str, + default='[12]*400+[4]*600', + help="The schedule of overview cuts") + self.arg_input_group.add_argument('--cut_innercut', + type=str, + default='[4]*400+[12]*600', + help="The schedule of inner cuts") + self.arg_input_group.add_argument( + '--cut_icgray_p', + type=str, + default='[0.2]*400+[0]*600', + help= + "This sets the size of the border used for inner cuts. High cut_ic_pow values have larger borders, and therefore the cuts themselves will be smaller and provide finer details. If you have too many or too-small inner cuts, you may lose overall image coherency and/or it may cause an undesirable ‘mosaic’ effect. Low cut_ic_pow values will allow the inner cuts to be larger, helping image coherency while still helping with some details." + ) + self.arg_input_group.add_argument( + '--display_rate', + type=int, + default=10, + help= + "During a diffusion run, you can monitor the progress of each image being created with this variable. If display_rate is set to 50, DD will show you the in-progress image every 50 timesteps. Setting this to a lower value, like 5 or 10, is a good way to get an early peek at where your image is heading. If you don’t like the progression, just interrupt execution, change some settings, and re-run. If you are planning a long, unmonitored batch, it’s better to set display_rate equal to steps, because displaying interim images does slow Colab down slightly." + ) + self.arg_config_group.add_argument('--use_gpu', + type=ast.literal_eval, + default=True, + help="whether use GPU or not") + self.arg_config_group.add_argument('--output_dir', + type=str, + default='disco_diffusion_ernievil_base_out', + help='Output directory.') + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--text_prompts', type=str) + self.arg_input_group.add_argument( + '--style', + type=str, + default=None, + help='Image style, such as oil paintings, if specified, style will be used to construct prompts.') + self.arg_input_group.add_argument('--artist', + type=str, + default=None, + help='Artist style, if specified, style will be used to construct prompts.') + self.arg_input_group.add_argument( + '--init_image', + type=str, + default=None, + help= + "Recall that in the image sequence above, the first image shown is just noise. If an init_image is provided, diffusion will replace the noise with the init_image as its starting state. To use an init_image, upload the image to the Colab instance or your Google Drive, and enter the full image path here. If using an init_image, you may need to increase skip_steps to ~ 50% of total steps to retain the character of the init. See skip_steps above for further discussion." + ) + self.arg_input_group.add_argument( + '--width_height', + type=ast.literal_eval, + default=[1280, 768], + help= + "Desired final image size, in pixels. You can have a square, wide, or tall image, but each edge length should be set to a multiple of 64px, and a minimum of 512px on the default CLIP model setting. If you forget to use multiples of 64px in your dimensions, DD will adjust the dimensions of your image to make it so." + ) + self.arg_input_group.add_argument( + '--n_batches', + type=int, + default=1, + help= + "This variable sets the number of still images you want DD to create. If you are using an animation mode (see below for details) DD will ignore n_batches and create a single set of animated frames based on the animation settings." + ) + self.arg_input_group.add_argument('--batch_size', type=int, default=1, help="Batch size.") + self.arg_input_group.add_argument( + '--batch_name', + type=str, + default='', + help= + 'The name of the batch, the batch id will be named as "discoart-[batch_name]-seed". To avoid your artworks be overridden by other users, please use a unique name.' + ) diff --git a/modules/image/text_to_image/disco_diffusion_ernievil_base/requirements.txt b/modules/image/text_to_image/disco_diffusion_ernievil_base/requirements.txt new file mode 100755 index 000000000..8b4bc0ea4 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_ernievil_base/requirements.txt @@ -0,0 +1,8 @@ +numpy +paddle_lpips==0.1.2 +ftfy +docarray>=0.13.29 +pyyaml +regex +tqdm +ipywidgets diff --git a/modules/image/text_to_image/disco_diffusion_ernievil_base/resize_right/README.md b/modules/image/text_to_image/disco_diffusion_ernievil_base/resize_right/README.md new file mode 100755 index 000000000..1f8d0bb0a --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_ernievil_base/resize_right/README.md @@ -0,0 +1,3 @@ +# ResizeRight (Paddle) +Fully differentiable resize function implemented by Paddle. +This module is based on [assafshocher/ResizeRight](https://github.com/assafshocher/ResizeRight). diff --git a/modules/image/text_to_image/disco_diffusion_ernievil_base/resize_right/__init__.py b/modules/image/text_to_image/disco_diffusion_ernievil_base/resize_right/__init__.py new file mode 100755 index 000000000..e69de29bb diff --git a/modules/image/text_to_image/disco_diffusion_ernievil_base/resize_right/interp_methods.py b/modules/image/text_to_image/disco_diffusion_ernievil_base/resize_right/interp_methods.py new file mode 100755 index 000000000..276eb055a --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_ernievil_base/resize_right/interp_methods.py @@ -0,0 +1,70 @@ +from math import pi + +try: + import paddle +except ImportError: + paddle = None + +try: + import numpy + import numpy as np +except ImportError: + numpy = None + +if numpy is None and paddle is None: + raise ImportError("Must have either Numpy or PyTorch but both not found") + + +def set_framework_dependencies(x): + if type(x) is numpy.ndarray: + to_dtype = lambda a: a + fw = numpy + else: + to_dtype = lambda a: paddle.cast(a, x.dtype) + fw = paddle + # eps = fw.finfo(fw.float32).eps + eps = paddle.to_tensor(np.finfo(np.float32).eps) + return fw, to_dtype, eps + + +def support_sz(sz): + + def wrapper(f): + f.support_sz = sz + return f + + return wrapper + + +@support_sz(4) +def cubic(x): + fw, to_dtype, eps = set_framework_dependencies(x) + absx = fw.abs(x) + absx2 = absx**2 + absx3 = absx**3 + return ((1.5 * absx3 - 2.5 * absx2 + 1.) * to_dtype(absx <= 1.) + + (-0.5 * absx3 + 2.5 * absx2 - 4. * absx + 2.) * to_dtype((1. < absx) & (absx <= 2.))) + + +@support_sz(4) +def lanczos2(x): + fw, to_dtype, eps = set_framework_dependencies(x) + return (((fw.sin(pi * x) * fw.sin(pi * x / 2) + eps) / ((pi**2 * x**2 / 2) + eps)) * to_dtype(abs(x) < 2)) + + +@support_sz(6) +def lanczos3(x): + fw, to_dtype, eps = set_framework_dependencies(x) + return (((fw.sin(pi * x) * fw.sin(pi * x / 3) + eps) / ((pi**2 * x**2 / 3) + eps)) * to_dtype(abs(x) < 3)) + + +@support_sz(2) +def linear(x): + fw, to_dtype, eps = set_framework_dependencies(x) + return ((x + 1) * to_dtype((-1 <= x) & (x < 0)) + (1 - x) * to_dtype((0 <= x) & (x <= 1))) + + +@support_sz(1) +def box(x): + fw, to_dtype, eps = set_framework_dependencies(x) + return to_dtype((-1 <= x) & (x < 0)) + to_dtype((0 <= x) & (x <= 1)) diff --git a/modules/image/text_to_image/disco_diffusion_ernievil_base/resize_right/resize_right.py b/modules/image/text_to_image/disco_diffusion_ernievil_base/resize_right/resize_right.py new file mode 100755 index 000000000..b63c61718 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_ernievil_base/resize_right/resize_right.py @@ -0,0 +1,403 @@ +import warnings +from fractions import Fraction +from math import ceil +from typing import Tuple + +import disco_diffusion_ernievil_base.resize_right.interp_methods as interp_methods + + +class NoneClass: + pass + + +try: + import paddle + from paddle import nn + nnModuleWrapped = nn.Layer +except ImportError: + warnings.warn('No PyTorch found, will work only with Numpy') + paddle = None + nnModuleWrapped = NoneClass + +try: + import numpy + import numpy as np +except ImportError: + warnings.warn('No Numpy found, will work only with PyTorch') + numpy = None + +if numpy is None and paddle is None: + raise ImportError("Must have either Numpy or PyTorch but both not found") + + +def resize(input, + scale_factors=None, + out_shape=None, + interp_method=interp_methods.cubic, + support_sz=None, + antialiasing=True, + by_convs=False, + scale_tolerance=None, + max_numerator=10, + pad_mode='constant'): + # get properties of the input tensor + in_shape, n_dims = input.shape, input.ndim + + # fw stands for framework that can be either numpy or paddle, + # determined by the input type + fw = numpy if type(input) is numpy.ndarray else paddle + eps = np.finfo(np.float32).eps if fw == numpy else paddle.to_tensor(np.finfo(np.float32).eps) + device = input.place if fw is paddle else None + + # set missing scale factors or output shapem one according to another, + # scream if both missing. this is also where all the defults policies + # take place. also handling the by_convs attribute carefully. + scale_factors, out_shape, by_convs = set_scale_and_out_sz(in_shape, out_shape, scale_factors, by_convs, + scale_tolerance, max_numerator, eps, fw) + + # sort indices of dimensions according to scale of each dimension. + # since we are going dim by dim this is efficient + sorted_filtered_dims_and_scales = [(dim, scale_factors[dim], by_convs[dim], in_shape[dim], out_shape[dim]) + for dim in sorted(range(n_dims), key=lambda ind: scale_factors[ind]) + if scale_factors[dim] != 1.] + # unless support size is specified by the user, it is an attribute + # of the interpolation method + if support_sz is None: + support_sz = interp_method.support_sz + + # output begins identical to input and changes with each iteration + output = input + + # iterate over dims + for (dim, scale_factor, dim_by_convs, in_sz, out_sz) in sorted_filtered_dims_and_scales: + # STEP 1- PROJECTED GRID: The non-integer locations of the projection + # of output pixel locations to the input tensor + projected_grid = get_projected_grid(in_sz, out_sz, scale_factor, fw, dim_by_convs, device) + + # STEP 1.5: ANTIALIASING- If antialiasing is taking place, we modify + # the window size and the interpolation method (see inside function) + cur_interp_method, cur_support_sz = apply_antialiasing_if_needed(interp_method, support_sz, scale_factor, + antialiasing) + + # STEP 2- FIELDS OF VIEW: for each output pixels, map the input pixels + # that influence it. Also calculate needed padding and update grid + # accoedingly + field_of_view = get_field_of_view(projected_grid, cur_support_sz, fw, eps, device) + + # STEP 2.5- CALCULATE PAD AND UPDATE: according to the field of view, + # the input should be padded to handle the boundaries, coordinates + # should be updated. actual padding only occurs when weights are + # aplied (step 4). if using by_convs for this dim, then we need to + # calc right and left boundaries for each filter instead. + pad_sz, projected_grid, field_of_view = calc_pad_sz(in_sz, out_sz, field_of_view, projected_grid, scale_factor, + dim_by_convs, fw, device) + # STEP 3- CALCULATE WEIGHTS: Match a set of weights to the pixels in + # the field of view for each output pixel + weights = get_weights(cur_interp_method, projected_grid, field_of_view) + + # STEP 4- APPLY WEIGHTS: Each output pixel is calculated by multiplying + # its set of weights with the pixel values in its field of view. + # We now multiply the fields of view with their matching weights. + # We do this by tensor multiplication and broadcasting. + # if by_convs is true for this dim, then we do this action by + # convolutions. this is equivalent but faster. + if not dim_by_convs: + output = apply_weights(output, field_of_view, weights, dim, n_dims, pad_sz, pad_mode, fw) + else: + output = apply_convs(output, scale_factor, in_sz, out_sz, weights, dim, pad_sz, pad_mode, fw) + return output + + +def get_projected_grid(in_sz, out_sz, scale_factor, fw, by_convs, device=None): + # we start by having the ouput coordinates which are just integer locations + # in the special case when usin by_convs, we only need two cycles of grid + # points. the first and last. + grid_sz = out_sz if not by_convs else scale_factor.numerator + out_coordinates = fw_arange(grid_sz, fw, device) + + # This is projecting the ouput pixel locations in 1d to the input tensor, + # as non-integer locations. + # the following fomrula is derived in the paper + # "From Discrete to Continuous Convolutions" by Shocher et al. + return (out_coordinates / float(scale_factor) + (in_sz - 1) / 2 - (out_sz - 1) / (2 * float(scale_factor))) + + +def get_field_of_view(projected_grid, cur_support_sz, fw, eps, device): + # for each output pixel, map which input pixels influence it, in 1d. + # we start by calculating the leftmost neighbor, using half of the window + # size (eps is for when boundary is exact int) + left_boundaries = fw_ceil(projected_grid - cur_support_sz / 2 - eps, fw) + + # then we simply take all the pixel centers in the field by counting + # window size pixels from the left boundary + ordinal_numbers = fw_arange(ceil(cur_support_sz - eps), fw, device) + return left_boundaries[:, None] + ordinal_numbers + + +def calc_pad_sz(in_sz, out_sz, field_of_view, projected_grid, scale_factor, dim_by_convs, fw, device): + if not dim_by_convs: + # determine padding according to neighbor coords out of bound. + # this is a generalized notion of padding, when pad<0 it means crop + pad_sz = [-field_of_view[0, 0].item(), field_of_view[-1, -1].item() - in_sz + 1] + + # since input image will be changed by padding, coordinates of both + # field_of_view and projected_grid need to be updated + field_of_view += pad_sz[0] + projected_grid += pad_sz[0] + + else: + # only used for by_convs, to calc the boundaries of each filter the + # number of distinct convolutions is the numerator of the scale factor + num_convs, stride = scale_factor.numerator, scale_factor.denominator + + # calculate left and right boundaries for each conv. left can also be + # negative right can be bigger than in_sz. such cases imply padding if + # needed. however if# both are in-bounds, it means we need to crop, + # practically apply the conv only on part of the image. + left_pads = -field_of_view[:, 0] + + # next calc is tricky, explanation by rows: + # 1) counting output pixels between the first position of each filter + # to the right boundary of the input + # 2) dividing it by number of filters to count how many 'jumps' + # each filter does + # 3) multiplying by the stride gives us the distance over the input + # coords done by all these jumps for each filter + # 4) to this distance we add the right boundary of the filter when + # placed in its leftmost position. so now we get the right boundary + # of that filter in input coord. + # 5) the padding size needed is obtained by subtracting the rightmost + # input coordinate. if the result is positive padding is needed. if + # negative then negative padding means shaving off pixel columns. + right_pads = (((out_sz - fw_arange(num_convs, fw, device) - 1) # (1) + // num_convs) # (2) + * stride # (3) + + field_of_view[:, -1] # (4) + - in_sz + 1) # (5) + + # in the by_convs case pad_sz is a list of left-right pairs. one per + # each filter + + pad_sz = list(zip(left_pads, right_pads)) + + return pad_sz, projected_grid, field_of_view + + +def get_weights(interp_method, projected_grid, field_of_view): + # the set of weights per each output pixels is the result of the chosen + # interpolation method applied to the distances between projected grid + # locations and the pixel-centers in the field of view (distances are + # directed, can be positive or negative) + weights = interp_method(projected_grid[:, None] - field_of_view) + + # we now carefully normalize the weights to sum to 1 per each output pixel + sum_weights = weights.sum(1, keepdim=True) + sum_weights[sum_weights == 0] = 1 + return weights / sum_weights + + +def apply_weights(input, field_of_view, weights, dim, n_dims, pad_sz, pad_mode, fw): + # for this operation we assume the resized dim is the first one. + # so we transpose and will transpose back after multiplying + tmp_input = fw_swapaxes(input, dim, 0, fw) + + # apply padding + tmp_input = fw_pad(tmp_input, fw, pad_sz, pad_mode) + + # field_of_view is a tensor of order 2: for each output (1d location + # along cur dim)- a list of 1d neighbors locations. + # note that this whole operations is applied to each dim separately, + # this is why it is all in 1d. + # neighbors = tmp_input[field_of_view] is a tensor of order image_dims+1: + # for each output pixel (this time indicated in all dims), these are the + # values of the neighbors in the 1d field of view. note that we only + # consider neighbors along the current dim, but such set exists for every + # multi-dim location, hence the final tensor order is image_dims+1. + paddle.device.cuda.empty_cache() + neighbors = tmp_input[field_of_view] + + # weights is an order 2 tensor: for each output location along 1d- a list + # of weights matching the field of view. we augment it with ones, for + # broadcasting, so that when multiplies some tensor the weights affect + # only its first dim. + tmp_weights = fw.reshape(weights, (*weights.shape, *[1] * (n_dims - 1))) + + # now we simply multiply the weights with the neighbors, and then sum + # along the field of view, to get a single value per out pixel + tmp_output = (neighbors * tmp_weights).sum(1) + # we transpose back the resized dim to its original position + return fw_swapaxes(tmp_output, 0, dim, fw) + + +def apply_convs(input, scale_factor, in_sz, out_sz, weights, dim, pad_sz, pad_mode, fw): + # for this operations we assume the resized dim is the last one. + # so we transpose and will transpose back after multiplying + input = fw_swapaxes(input, dim, -1, fw) + + # the stride for all convs is the denominator of the scale factor + stride, num_convs = scale_factor.denominator, scale_factor.numerator + + # prepare an empty tensor for the output + tmp_out_shape = list(input.shape) + tmp_out_shape[-1] = out_sz + tmp_output = fw_empty(tuple(tmp_out_shape), fw, input.device) + + # iterate over the conv operations. we have as many as the numerator + # of the scale-factor. for each we need boundaries and a filter. + for conv_ind, (pad_sz, filt) in enumerate(zip(pad_sz, weights)): + # apply padding (we pad last dim, padding can be negative) + pad_dim = input.ndim - 1 + tmp_input = fw_pad(input, fw, pad_sz, pad_mode, dim=pad_dim) + + # apply convolution over last dim. store in the output tensor with + # positional strides so that when the loop is comlete conv results are + # interwind + tmp_output[..., conv_ind::num_convs] = fw_conv(tmp_input, filt, stride) + + return fw_swapaxes(tmp_output, -1, dim, fw) + + +def set_scale_and_out_sz(in_shape, out_shape, scale_factors, by_convs, scale_tolerance, max_numerator, eps, fw): + # eventually we must have both scale-factors and out-sizes for all in/out + # dims. however, we support many possible partial arguments + if scale_factors is None and out_shape is None: + raise ValueError("either scale_factors or out_shape should be " + "provided") + if out_shape is not None: + # if out_shape has less dims than in_shape, we defaultly resize the + # first dims for numpy and last dims for paddle + out_shape = (list(out_shape) + + list(in_shape[len(out_shape):]) if fw is numpy else list(in_shape[:-len(out_shape)]) + + list(out_shape)) + if scale_factors is None: + # if no scale given, we calculate it as the out to in ratio + # (not recomended) + scale_factors = [out_sz / in_sz for out_sz, in_sz in zip(out_shape, in_shape)] + if scale_factors is not None: + # by default, if a single number is given as scale, we assume resizing + # two dims (most common are images with 2 spatial dims) + scale_factors = (scale_factors if isinstance(scale_factors, (list, tuple)) else [scale_factors, scale_factors]) + # if less scale_factors than in_shape dims, we defaultly resize the + # first dims for numpy and last dims for paddle + scale_factors = (list(scale_factors) + [1] * (len(in_shape) - len(scale_factors)) if fw is numpy else [1] * + (len(in_shape) - len(scale_factors)) + list(scale_factors)) + if out_shape is None: + # when no out_shape given, it is calculated by multiplying the + # scale by the in_shape (not recomended) + out_shape = [ceil(scale_factor * in_sz) for scale_factor, in_sz in zip(scale_factors, in_shape)] + # next part intentionally after out_shape determined for stability + # we fix by_convs to be a list of truth values in case it is not + if not isinstance(by_convs, (list, tuple)): + by_convs = [by_convs] * len(out_shape) + + # next loop fixes the scale for each dim to be either frac or float. + # this is determined by by_convs and by tolerance for scale accuracy. + for ind, (sf, dim_by_convs) in enumerate(zip(scale_factors, by_convs)): + # first we fractionaize + if dim_by_convs: + frac = Fraction(1 / sf).limit_denominator(max_numerator) + frac = Fraction(numerator=frac.denominator, denominator=frac.numerator) + + # if accuracy is within tolerance scale will be frac. if not, then + # it will be float and the by_convs attr will be set false for + # this dim + if scale_tolerance is None: + scale_tolerance = eps + if dim_by_convs and abs(frac - sf) < scale_tolerance: + scale_factors[ind] = frac + else: + scale_factors[ind] = float(sf) + by_convs[ind] = False + + return scale_factors, out_shape, by_convs + + +def apply_antialiasing_if_needed(interp_method, support_sz, scale_factor, antialiasing): + # antialiasing is "stretching" the field of view according to the scale + # factor (only for downscaling). this is low-pass filtering. this + # requires modifying both the interpolation (stretching the 1d + # function and multiplying by the scale-factor) and the window size. + scale_factor = float(scale_factor) + if scale_factor >= 1.0 or not antialiasing: + return interp_method, support_sz + cur_interp_method = (lambda arg: scale_factor * interp_method(scale_factor * arg)) + cur_support_sz = support_sz / scale_factor + return cur_interp_method, cur_support_sz + + +def fw_ceil(x, fw): + if fw is numpy: + return fw.int_(fw.ceil(x)) + else: + return paddle.cast(x.ceil(), dtype='int64') + + +def fw_floor(x, fw): + if fw is numpy: + return fw.int_(fw.floor(x)) + else: + return paddle.cast(x.floor(), dtype='int64') + + +def fw_cat(x, fw): + if fw is numpy: + return fw.concatenate(x) + else: + return fw.concat(x) + + +def fw_swapaxes(x, ax_1, ax_2, fw): + if fw is numpy: + return fw.swapaxes(x, ax_1, ax_2) + else: + if ax_1 == -1: + ax_1 = len(x.shape) - 1 + if ax_2 == -1: + ax_2 = len(x.shape) - 1 + perm0 = list(range(len(x.shape))) + temp = ax_1 + perm0[temp] = ax_2 + perm0[ax_2] = temp + return fw.transpose(x, perm0) + + +def fw_pad(x, fw, pad_sz, pad_mode, dim=0): + if pad_sz == (0, 0): + return x + if fw is numpy: + pad_vec = [(0, 0)] * x.ndim + pad_vec[dim] = pad_sz + return fw.pad(x, pad_width=pad_vec, mode=pad_mode) + else: + if x.ndim < 3: + x = x[None, None, ...] + + pad_vec = [0] * ((x.ndim - 2) * 2) + pad_vec[0:2] = pad_sz + return fw_swapaxes(fw.nn.functional.pad(fw_swapaxes(x, dim, -1, fw), pad=pad_vec, mode=pad_mode), dim, -1, fw) + + +def fw_conv(input, filter, stride): + # we want to apply 1d conv to any nd array. the way to do it is to reshape + # the input to a 4D tensor. first two dims are singeletons, 3rd dim stores + # all the spatial dims that we are not convolving along now. then we can + # apply conv2d with a 1xK filter. This convolves the same way all the other + # dims stored in the 3d dim. like depthwise conv over these. + # TODO: numpy support + reshaped_input = input.reshape(1, 1, -1, input.shape[-1]) + reshaped_output = paddle.nn.functional.conv2d(reshaped_input, filter.view(1, 1, 1, -1), stride=(1, stride)) + return reshaped_output.reshape(*input.shape[:-1], -1) + + +def fw_arange(upper_bound, fw, device): + if fw is numpy: + return fw.arange(upper_bound) + else: + return fw.arange(upper_bound) + + +def fw_empty(shape, fw, device): + if fw is numpy: + return fw.empty(shape) + else: + return fw.empty(shape=shape) diff --git a/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/README.md b/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/README.md new file mode 100755 index 000000000..711671bad --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/README.md @@ -0,0 +1,2 @@ +# Diffusion model (Paddle) +This module implements diffusion model which accepts a text prompt and outputs images semantically close to the text. The code is rewritten by Paddle, and mainly refer to two projects: jina-ai/discoart[https://github.com/jina-ai/discoart] and openai/guided-diffusion[https://github.com/openai/guided-diffusion]. Thanks for their wonderful work. diff --git a/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/__init__.py b/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/__init__.py new file mode 100755 index 000000000..39fc908dc --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/__init__.py @@ -0,0 +1,156 @@ +''' +https://github.com/jina-ai/discoart/blob/main/discoart/__init__.py +''' +import os +import warnings + +os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE' + +__all__ = ['create'] + +import sys + +__resources_path__ = os.path.join( + os.path.dirname(sys.modules.get(__package__).__file__ if __package__ in sys.modules else __file__), + 'resources', +) + +import gc + +# check if GPU is available +import paddle + +# download and load models, this will take some time on the first load + +from .helper import load_all_models, load_diffusion_model, load_clip_models + +model_config, secondary_model = load_all_models('512x512_diffusion_uncond_finetune_008100', use_secondary_model=True) + +from typing import TYPE_CHECKING, overload, List, Optional + +if TYPE_CHECKING: + from docarray import DocumentArray, Document + +_clip_models_cache = {} + +# begin_create_overload + + +@overload +def create(text_prompts: Optional[List[str]] = [ + 'A beautiful painting of a singular lighthouse, shining its light across a tumultuous sea of blood by greg rutkowski and thomas kinkade, Trending on artstation.', + 'yellow color scheme', +], + init_image: Optional[str] = None, + width_height: Optional[List[int]] = [1280, 768], + skip_steps: Optional[int] = 10, + steps: Optional[int] = 250, + cut_ic_pow: Optional[int] = 1, + init_scale: Optional[int] = 1000, + clip_guidance_scale: Optional[int] = 5000, + tv_scale: Optional[int] = 0, + range_scale: Optional[int] = 150, + sat_scale: Optional[int] = 0, + cutn_batches: Optional[int] = 4, + diffusion_model: Optional[str] = '512x512_diffusion_uncond_finetune_008100', + use_secondary_model: Optional[bool] = True, + diffusion_sampling_mode: Optional[str] = 'ddim', + perlin_init: Optional[bool] = False, + perlin_mode: Optional[str] = 'mixed', + seed: Optional[int] = None, + eta: Optional[float] = 0.8, + clamp_grad: Optional[bool] = True, + clamp_max: Optional[float] = 0.05, + randomize_class: Optional[bool] = True, + clip_denoised: Optional[bool] = False, + fuzzy_prompt: Optional[bool] = False, + rand_mag: Optional[float] = 0.05, + cut_overview: Optional[str] = '[12]*400+[4]*600', + cut_innercut: Optional[str] = '[4]*400+[12]*600', + cut_icgray_p: Optional[str] = '[0.2]*400+[0]*600', + display_rate: Optional[int] = 10, + n_batches: Optional[int] = 4, + batch_size: Optional[int] = 1, + batch_name: Optional[str] = '', + clip_models: Optional[list] = ['ViTB32', 'ViTB16', 'RN50'], + output_dir: Optional[str] = 'discoart_output') -> 'DocumentArray': + """ + Create Disco Diffusion artworks and save the result into a DocumentArray. + + :param text_prompts: Phrase, sentence, or string of words and phrases describing what the image should look like. The words will be analyzed by the AI and will guide the diffusion process toward the image(s) you describe. These can include commas and weights to adjust the relative importance of each element. E.g. "A beautiful painting of a singular lighthouse, shining its light across a tumultuous sea of blood by greg rutkowski and thomas kinkade, Trending on artstation."Notice that this prompt loosely follows a structure: [subject], [prepositional details], [setting], [meta modifiers and artist]; this is a good starting point for your experiments. Developing text prompts takes practice and experience, and is not the subject of this guide. If you are a beginner to writing text prompts, a good place to start is on a simple AI art app like Night Cafe, starry ai or WOMBO prior to using DD, to get a feel for how text gets translated into images by GAN tools. These other apps use different technologies, but many of the same principles apply. + :param init_image: Recall that in the image sequence above, the first image shown is just noise. If an init_image is provided, diffusion will replace the noise with the init_image as its starting state. To use an init_image, upload the image to the Colab instance or your Google Drive, and enter the full image path here. If using an init_image, you may need to increase skip_steps to ~ 50% of total steps to retain the character of the init. See skip_steps above for further discussion. + :param width_height: Desired final image size, in pixels. You can have a square, wide, or tall image, but each edge length should be set to a multiple of 64px, and a minimum of 512px on the default CLIP model setting. If you forget to use multiples of 64px in your dimensions, DD will adjust the dimensions of your image to make it so. + :param skip_steps: Consider the chart shown here. Noise scheduling (denoise strength) starts very high and progressively gets lower and lower as diffusion steps progress. The noise levels in the first few steps are very high, so images change dramatically in early steps.As DD moves along the curve, noise levels (and thus the amount an image changes per step) declines, and image coherence from one step to the next increases.The first few steps of denoising are often so dramatic that some steps (maybe 10-15% of total) can be skipped without affecting the final image. You can experiment with this as a way to cut render times.If you skip too many steps, however, the remaining noise may not be high enough to generate new content, and thus may not have ‘time left’ to finish an image satisfactorily.Also, depending on your other settings, you may need to skip steps to prevent CLIP from overshooting your goal, resulting in ‘blown out’ colors (hyper saturated, solid white, or solid black regions) or otherwise poor image quality. Consider that the denoising process is at its strongest in the early steps, so skipping steps can sometimes mitigate other problems.Lastly, if using an init_image, you will need to skip ~50% of the diffusion steps to retain the shapes in the original init image. However, if you’re using an init_image, you can also adjust skip_steps up or down for creative reasons. With low skip_steps you can get a result "inspired by" the init_image which will retain the colors and rough layout and shapes but look quite different. With high skip_steps you can preserve most of the init_image contents and just do fine tuning of the texture. + :param steps: When creating an image, the denoising curve is subdivided into steps for processing. Each step (or iteration) involves the AI looking at subsets of the image called ‘cuts’ and calculating the ‘direction’ the image should be guided to be more like the prompt. Then it adjusts the image with the help of the diffusion denoiser, and moves to the next step.Increasing steps will provide more opportunities for the AI to adjust the image, and each adjustment will be smaller, and thus will yield a more precise, detailed image. Increasing steps comes at the expense of longer render times. Also, while increasing steps should generally increase image quality, there is a diminishing return on additional steps beyond 250 - 500 steps. However, some intricate images can take 1000, 2000, or more steps. It is really up to the user. Just know that the render time is directly related to the number of steps, and many other parameters have a major impact on image quality, without costing additional time. + :param cut_ic_pow: This sets the size of the border used for inner cuts. High cut_ic_pow values have larger borders, and therefore the cuts themselves will be smaller and provide finer details. If you have too many or too-small inner cuts, you may lose overall image coherency and/or it may cause an undesirable ‘mosaic’ effect. Low cut_ic_pow values will allow the inner cuts to be larger, helping image coherency while still helping with some details. + :param init_scale: This controls how strongly CLIP will try to match the init_image provided. This is balanced against the clip_guidance_scale (CGS) above. Too much init scale, and the image won’t change much during diffusion. Too much CGS and the init image will be lost. + :param clip_guidance_scale: CGS is one of the most important parameters you will use. It tells DD how strongly you want CLIP to move toward your prompt each timestep. Higher is generally better, but if CGS is too strong it will overshoot the goal and distort the image. So a happy medium is needed, and it takes experience to learn how to adjust CGS. Note that this parameter generally scales with image dimensions. In other words, if you increase your total dimensions by 50% (e.g. a change from 512 x 512 to 512 x 768), then to maintain the same effect on the image, you’d want to increase clip_guidance_scale from 5000 to 7500. Of the basic settings, clip_guidance_scale, steps and skip_steps are the most important contributors to image quality, so learn them well. + :param tv_scale: Total variance denoising. Optional, set to zero to turn off. Controls ‘smoothness’ of final output. If used, tv_scale will try to smooth out your final image to reduce overall noise. If your image is too ‘crunchy’, increase tv_scale. TV denoising is good at preserving edges while smoothing away noise in flat regions. See https://en.wikipedia.org/wiki/Total_variation_denoising + :param range_scale: Optional, set to zero to turn off. Used for adjustment of color contrast. Lower range_scale will increase contrast. Very low numbers create a reduced color palette, resulting in more vibrant or poster-like images. Higher range_scale will reduce contrast, for more muted images. + :param sat_scale: Saturation scale. Optional, set to zero to turn off. If used, sat_scale will help mitigate oversaturation. If your image is too saturated, increase sat_scale to reduce the saturation. + :param cutn_batches: Each iteration, the AI cuts the image into smaller pieces known as cuts, and compares each cut to the prompt to decide how to guide the next diffusion step. More cuts can generally lead to better images, since DD has more chances to fine-tune the image precision in each timestep. Additional cuts are memory intensive, however, and if DD tries to evaluate too many cuts at once, it can run out of memory. You can use cutn_batches to increase cuts per timestep without increasing memory usage. At the default settings, DD is scheduled to do 16 cuts per timestep. If cutn_batches is set to 1, there will indeed only be 16 cuts total per timestep. However, if cutn_batches is increased to 4, DD will do 64 cuts total in each timestep, divided into 4 sequential batches of 16 cuts each. Because the cuts are being evaluated only 16 at a time, DD uses the memory required for only 16 cuts, but gives you the quality benefit of 64 cuts. The tradeoff, of course, is that this will take ~4 times as long to render each image.So, (scheduled cuts) x (cutn_batches) = (total cuts per timestep). Increasing cutn_batches will increase render times, however, as the work is being done sequentially. DD’s default cut schedule is a good place to start, but the cut schedule can be adjusted in the Cutn Scheduling section, explained below. + :param diffusion_model: Diffusion_model of choice. + :param use_secondary_model: Option to use a secondary purpose-made diffusion model to clean up interim diffusion images for CLIP evaluation. If this option is turned off, DD will use the regular (large) diffusion model. Using the secondary model is faster - one user reported a 50% improvement in render speed! However, the secondary model is much smaller, and may reduce image quality and detail. I suggest you experiment with this. + :param diffusion_sampling_mode: Two alternate diffusion denoising algorithms. ddim has been around longer, and is more established and tested. plms is a newly added alternate method that promises good diffusion results in fewer steps, but has not been as fully tested and may have side effects. This new plms mode is actively being researched in the #settings-and-techniques channel in the DD Discord. + :param perlin_init: Normally, DD will use an image filled with random noise as a starting point for the diffusion curve. If perlin_init is selected, DD will instead use a Perlin noise model as an initial state. Perlin has very interesting characteristics, distinct from random noise, so it’s worth experimenting with this for your projects. Beyond perlin, you can, of course, generate your own noise images (such as with GIMP, etc) and use them as an init_image (without skipping steps). Choosing perlin_init does not affect the actual diffusion process, just the starting point for the diffusion. Please note that selecting a perlin_init will replace and override any init_image you may have specified. Further, because the 2D, 3D and video animation systems all rely on the init_image system, if you enable Perlin while using animation modes, the perlin_init will jump in front of any previous image or video input, and DD will NOT give you the expected sequence of coherent images. All of that said, using Perlin and animation modes together do make a very colorful rainbow effect, which can be used creatively. + :param perlin_mode: sets type of Perlin noise: colored, gray, or a mix of both, giving you additional options for noise types. Experiment to see what these do in your projects. + :param seed: Deep in the diffusion code, there is a random number ‘seed’ which is used as the basis for determining the initial state of the diffusion. By default, this is random, but you can also specify your own seed. This is useful if you like a particular result and would like to run more iterations that will be similar. After each run, the actual seed value used will be reported in the parameters report, and can be reused if desired by entering seed # here. If a specific numerical seed is used repeatedly, the resulting images will be quite similar but not identical. + :param eta: eta (greek letter η) is a diffusion model variable that mixes in a random amount of scaled noise into each timestep. 0 is no noise, 1.0 is more noise. As with most DD parameters, you can go below zero for eta, but it may give you unpredictable results. The steps parameter has a close relationship with the eta parameter. If you set eta to 0, then you can get decent output with only 50-75 steps. Setting eta to 1.0 favors higher step counts, ideally around 250 and up. eta has a subtle, unpredictable effect on image, so you’ll need to experiment to see how this affects your projects. + :param clamp_grad: As I understand it, clamp_grad is an internal limiter that stops DD from producing extreme results. Try your images with and without clamp_grad. If the image changes drastically with clamp_grad turned off, it probably means your clip_guidance_scale is too high and should be reduced. + :param clamp_max: Sets the value of the clamp_grad limitation. Default is 0.05, providing for smoother, more muted coloration in images, but setting higher values (0.15-0.3) can provide interesting contrast and vibrancy. + :param fuzzy_prompt: Controls whether to add multiple noisy prompts to the prompt losses. If True, can increase variability of image output. Experiment with this. + :param rand_mag: Affects only the fuzzy_prompt. Controls the magnitude of the random noise added by fuzzy_prompt. + :param cut_overview: The schedule of overview cuts + :param cut_innercut: The schedule of inner cuts + :param cut_icgray_p: This sets the size of the border used for inner cuts. High cut_ic_pow values have larger borders, and therefore the cuts themselves will be smaller and provide finer details. If you have too many or too-small inner cuts, you may lose overall image coherency and/or it may cause an undesirable ‘mosaic’ effect. Low cut_ic_pow values will allow the inner cuts to be larger, helping image coherency while still helping with some details. + :param display_rate: During a diffusion run, you can monitor the progress of each image being created with this variable. If display_rate is set to 50, DD will show you the in-progress image every 50 timesteps. Setting this to a lower value, like 5 or 10, is a good way to get an early peek at where your image is heading. If you don’t like the progression, just interrupt execution, change some settings, and re-run. If you are planning a long, unmonitored batch, it’s better to set display_rate equal to steps, because displaying interim images does slow Colab down slightly. + :param n_batches: This variable sets the number of still images you want DD to create. If you are using an animation mode (see below for details) DD will ignore n_batches and create a single set of animated frames based on the animation settings. + :param batch_name: The name of the batch, the batch id will be named as "discoart-[batch_name]-seed". To avoid your artworks be overridden by other users, please use a unique name. + :param clip_models: CLIP Model selectors. ViTB32, ViTB16, ViTL14, RN101, RN50, RN50x4, RN50x16, RN50x64.These various CLIP models are available for you to use during image generation. Models have different styles or ‘flavors,’ so look around. You can mix in multiple models as well for different results. However, keep in mind that some models are extremely memory-hungry, and turning on additional models will take additional memory and may cause a crash.The rough order of speed/mem usage is (smallest/fastest to largest/slowest):VitB32RN50RN101VitB16RN50x4RN50x16RN50x64ViTL14For RN50x64 & ViTL14 you may need to use fewer cuts, depending on your VRAM. + :return: a DocumentArray object that has `n_batches` Documents + """ + + +# end_create_overload + + +@overload +def create(init_document: 'Document') -> 'DocumentArray': + """ + Create an artwork using a DocArray ``Document`` object as initial state. + :param init_document: its ``.tags`` will be used as parameters, ``.uri`` (if present) will be used as init image. + :return: a DocumentArray object that has `n_batches` Documents + """ + + +def create(**kwargs) -> 'DocumentArray': + from .config import load_config + from .runner import do_run + + if 'init_document' in kwargs: + d = kwargs['init_document'] + _kwargs = d.tags + if not _kwargs: + warnings.warn('init_document has no .tags, fallback to default config') + if d.uri: + _kwargs['init_image'] = kwargs['init_document'].uri + else: + warnings.warn('init_document has no .uri, fallback to no init image') + kwargs.pop('init_document') + if kwargs: + warnings.warn('init_document has .tags and .uri, but kwargs are also present, will override .tags') + _kwargs.update(kwargs) + _args = load_config(user_config=_kwargs) + else: + _args = load_config(user_config=kwargs) + + model, diffusion = load_diffusion_model(model_config, _args.diffusion_model, steps=_args.steps) + + clip_models = load_clip_models(enabled=_args.clip_models, clip_models=_clip_models_cache) + + gc.collect() + paddle.device.cuda.empty_cache() + try: + return do_run(_args, (model, diffusion, clip_models, secondary_model)) + except KeyboardInterrupt: + pass diff --git a/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/config.py b/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/config.py new file mode 100755 index 000000000..0cbc71e6f --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/config.py @@ -0,0 +1,77 @@ +''' +https://github.com/jina-ai/discoart/blob/main/discoart/config.py +''' +import copy +import random +import warnings +from types import SimpleNamespace +from typing import Dict + +import yaml +from yaml import Loader + +from . import __resources_path__ + +with open(f'{__resources_path__}/default.yml') as ymlfile: + default_args = yaml.load(ymlfile, Loader=Loader) + + +def load_config(user_config: Dict, ): + cfg = copy.deepcopy(default_args) + + if user_config: + cfg.update(**user_config) + + for k in user_config.keys(): + if k not in cfg: + warnings.warn(f'unknown argument {k}, ignored') + + for k, v in cfg.items(): + if k in ('batch_size', 'display_rate', 'seed', 'skip_steps', 'steps', 'n_batches', + 'cutn_batches') and isinstance(v, float): + cfg[k] = int(v) + if k == 'width_height': + cfg[k] = [int(vv) for vv in v] + + cfg.update(**{ + 'seed': cfg['seed'] or random.randint(0, 2**32), + }) + + if cfg['batch_name']: + da_name = f'{__package__}-{cfg["batch_name"]}-{cfg["seed"]}' + else: + da_name = f'{__package__}-{cfg["seed"]}' + warnings.warn('you did not set `batch_name`, set it to have unique session ID') + + cfg.update(**{'name_docarray': da_name}) + + print_args_table(cfg) + + return SimpleNamespace(**cfg) + + +def print_args_table(cfg): + from rich.table import Table + from rich import box + from rich.console import Console + + console = Console() + + param_str = Table( + title=cfg['name_docarray'], + box=box.ROUNDED, + highlight=True, + title_justify='left', + ) + param_str.add_column('Argument', justify='right') + param_str.add_column('Value', justify='left') + + for k, v in sorted(cfg.items()): + value = str(v) + + if not default_args.get(k, None) == v: + value = f'[b]{value}[/]' + + param_str.add_row(k, value) + + console.print(param_str) diff --git a/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/helper.py b/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/helper.py new file mode 100755 index 000000000..ef72e68bf --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/helper.py @@ -0,0 +1,138 @@ +''' +This code is rewritten by Paddle based on Jina-ai/discoart. +https://github.com/jina-ai/discoart/blob/main/discoart/helper.py +''' +import hashlib +import logging +import os +import subprocess +import sys +from os.path import expanduser +from pathlib import Path +from typing import Any +from typing import Dict +from typing import List + +import paddle + + +def _get_logger(): + logger = logging.getLogger(__package__) + _log_level = os.environ.get('DISCOART_LOG_LEVEL', 'INFO') + logger.setLevel(_log_level) + ch = logging.StreamHandler() + ch.setLevel(_log_level) + formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + ch.setFormatter(formatter) + logger.addHandler(ch) + return logger + + +logger = _get_logger() + + +def load_clip_models(enabled: List[str], clip_models: Dict[str, Any] = {}): + + import disco_diffusion_ernievil_base.vit_b_16x.ernievil2 as ernievil2 + from disco_diffusion_ernievil_base.vit_b_16x.ernievil2.utils.utils import build_model + + # load enabled models + for k in enabled: + if k not in clip_models: + clip_models[k] = build_model(name=k) + clip_models[k].eval() + for parameter in clip_models[k].parameters(): + parameter.stop_gradient = True + + # disable not enabled models to save memory + for k in clip_models: + if k not in enabled: + clip_models.pop(k) + + return list(clip_models.values()) + + +def load_all_models(diffusion_model, use_secondary_model): + from .model.script_util import ( + model_and_diffusion_defaults, ) + + model_config = model_and_diffusion_defaults() + + if diffusion_model == '512x512_diffusion_uncond_finetune_008100': + model_config.update({ + 'attention_resolutions': '32, 16, 8', + 'class_cond': False, + 'diffusion_steps': 1000, # No need to edit this, it is taken care of later. + 'rescale_timesteps': True, + 'timestep_respacing': 250, # No need to edit this, it is taken care of later. + 'image_size': 512, + 'learn_sigma': True, + 'noise_schedule': 'linear', + 'num_channels': 256, + 'num_head_channels': 64, + 'num_res_blocks': 2, + 'resblock_updown': True, + 'use_fp16': False, + 'use_scale_shift_norm': True, + }) + elif diffusion_model == '256x256_diffusion_uncond': + model_config.update({ + 'attention_resolutions': '32, 16, 8', + 'class_cond': False, + 'diffusion_steps': 1000, # No need to edit this, it is taken care of later. + 'rescale_timesteps': True, + 'timestep_respacing': 250, # No need to edit this, it is taken care of later. + 'image_size': 256, + 'learn_sigma': True, + 'noise_schedule': 'linear', + 'num_channels': 256, + 'num_head_channels': 64, + 'num_res_blocks': 2, + 'resblock_updown': True, + 'use_fp16': False, + 'use_scale_shift_norm': True, + }) + + secondary_model = None + if use_secondary_model: + from .model.sec_diff import SecondaryDiffusionImageNet2 + secondary_model = SecondaryDiffusionImageNet2() + model_dict = paddle.load( + os.path.join(os.path.dirname(__file__), 'pre_trained', 'secondary_model_imagenet_2.pdparams')) + secondary_model.set_state_dict(model_dict) + secondary_model.eval() + for parameter in secondary_model.parameters(): + parameter.stop_gradient = True + + return model_config, secondary_model + + +def load_diffusion_model(model_config, diffusion_model, steps): + from .model.script_util import ( + create_model_and_diffusion, ) + + timestep_respacing = f'ddim{steps}' + diffusion_steps = (1000 // steps) * steps if steps < 1000 else steps + model_config.update({ + 'timestep_respacing': timestep_respacing, + 'diffusion_steps': diffusion_steps, + }) + + model, diffusion = create_model_and_diffusion(**model_config) + model.set_state_dict( + paddle.load(os.path.join(os.path.dirname(__file__), 'pre_trained', f'{diffusion_model}.pdparams'))) + model.eval() + for name, param in model.named_parameters(): + param.stop_gradient = True + + return model, diffusion + + +def parse_prompt(prompt): + if prompt.startswith('http://') or prompt.startswith('https://'): + vals = prompt.rsplit(':', 2) + vals = [vals[0] + ':' + vals[1], *vals[2:]] + else: + vals = prompt.rsplit(':', 1) + vals = vals + ['', '1'][len(vals):] + return vals[0], float(vals[1]) diff --git a/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/model/__init__.py b/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/model/__init__.py new file mode 100755 index 000000000..466800666 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/model/__init__.py @@ -0,0 +1,3 @@ +""" +Codebase for "Improved Denoising Diffusion Probabilistic Models" implemented by Paddle. +""" diff --git a/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/model/gaussian_diffusion.py b/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/model/gaussian_diffusion.py new file mode 100755 index 000000000..86cd2c650 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/model/gaussian_diffusion.py @@ -0,0 +1,1214 @@ +""" +Diffusion model implemented by Paddle. +This code is rewritten based on Pytorch version of of Ho et al's diffusion models: +https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/diffusion_utils_2.py +""" +import enum +import math + +import numpy as np +import paddle + +from .losses import discretized_gaussian_log_likelihood +from .losses import normal_kl +from .nn import mean_flat + + +def get_named_beta_schedule(schedule_name, num_diffusion_timesteps): + """ + Get a pre-defined beta schedule for the given name. + + The beta schedule library consists of beta schedules which remain similar + in the limit of num_diffusion_timesteps. + Beta schedules may be added, but should not be removed or changed once + they are committed to maintain backwards compatibility. + """ + if schedule_name == "linear": + # Linear schedule from Ho et al, extended to work for any number of + # diffusion steps. + scale = 1000 / num_diffusion_timesteps + beta_start = scale * 0.0001 + beta_end = scale * 0.02 + return np.linspace(beta_start, beta_end, num_diffusion_timesteps, dtype=np.float64) + elif schedule_name == "cosine": + return betas_for_alpha_bar( + num_diffusion_timesteps, + lambda t: math.cos((t + 0.008) / 1.008 * math.pi / 2)**2, + ) + else: + raise NotImplementedError(f"unknown beta schedule: {schedule_name}") + + +def betas_for_alpha_bar(num_diffusion_timesteps, alpha_bar, max_beta=0.999): + """ + Create a beta schedule that discretizes the given alpha_t_bar function, + which defines the cumulative product of (1-beta) over time from t = [0,1]. + + :param num_diffusion_timesteps: the number of betas to produce. + :param alpha_bar: a lambda that takes an argument t from 0 to 1 and + produces the cumulative product of (1-beta) up to that + part of the diffusion process. + :param max_beta: the maximum beta to use; use values lower than 1 to + prevent singularities. + """ + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta)) + return np.array(betas) + + +class ModelMeanType(enum.Enum): + """ + Which type of output the model predicts. + """ + + PREVIOUS_X = enum.auto() # the model predicts x_{t-1} + START_X = enum.auto() # the model predicts x_0 + EPSILON = enum.auto() # the model predicts epsilon + + +class ModelVarType(enum.Enum): + """ + What is used as the model's output variance. + + The LEARNED_RANGE option has been added to allow the model to predict + values between FIXED_SMALL and FIXED_LARGE, making its job easier. + """ + + LEARNED = enum.auto() + FIXED_SMALL = enum.auto() + FIXED_LARGE = enum.auto() + LEARNED_RANGE = enum.auto() + + +class LossType(enum.Enum): + MSE = enum.auto() # use raw MSE loss (and KL when learning variances) + RESCALED_MSE = (enum.auto()) # use raw MSE loss (with RESCALED_KL when learning variances) + KL = enum.auto() # use the variational lower-bound + RESCALED_KL = enum.auto() # like KL, but rescale to estimate the full VLB + + def is_vb(self): + return self == LossType.KL or self == LossType.RESCALED_KL + + +class GaussianDiffusion: + """ + Utilities for training and sampling diffusion models. + + Ported directly from here, and then adapted over time to further experimentation. + https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/diffusion_utils_2.py#L42 + + :param betas: a 1-D numpy array of betas for each diffusion timestep, + starting at T and going to 1. + :param model_mean_type: a ModelMeanType determining what the model outputs. + :param model_var_type: a ModelVarType determining how variance is output. + :param loss_type: a LossType determining the loss function to use. + :param rescale_timesteps: if True, pass floating point timesteps into the + model so that they are always scaled like in the + original paper (0 to 1000). + """ + + def __init__( + self, + *, + betas, + model_mean_type, + model_var_type, + loss_type, + rescale_timesteps=False, + ): + self.model_mean_type = model_mean_type + self.model_var_type = model_var_type + self.loss_type = loss_type + self.rescale_timesteps = rescale_timesteps + + # Use float64 for accuracy. + betas = np.array(betas, dtype=np.float64) + self.betas = betas + assert len(betas.shape) == 1, "betas must be 1-D" + assert (betas > 0).all() and (betas <= 1).all() + + self.num_timesteps = int(betas.shape[0]) + + alphas = 1.0 - betas + self.alphas_cumprod = np.cumprod(alphas, axis=0) + self.alphas_cumprod_prev = np.append(1.0, self.alphas_cumprod[:-1]) + self.alphas_cumprod_next = np.append(self.alphas_cumprod[1:], 0.0) + assert self.alphas_cumprod_prev.shape == (self.num_timesteps, ) + + # calculations for diffusion q(x_t | x_{t-1}) and others + self.sqrt_alphas_cumprod = np.sqrt(self.alphas_cumprod) + self.sqrt_one_minus_alphas_cumprod = np.sqrt(1.0 - self.alphas_cumprod) + self.log_one_minus_alphas_cumprod = np.log(1.0 - self.alphas_cumprod) + self.sqrt_recip_alphas_cumprod = np.sqrt(1.0 / self.alphas_cumprod) + self.sqrt_recipm1_alphas_cumprod = np.sqrt(1.0 / self.alphas_cumprod - 1) + + # calculations for posterior q(x_{t-1} | x_t, x_0) + self.posterior_variance = (betas * (1.0 - self.alphas_cumprod_prev) / (1.0 - self.alphas_cumprod)) + # log calculation clipped because the posterior variance is 0 at the + # beginning of the diffusion chain. + self.posterior_log_variance_clipped = np.log(np.append(self.posterior_variance[1], self.posterior_variance[1:])) + self.posterior_mean_coef1 = (betas * np.sqrt(self.alphas_cumprod_prev) / (1.0 - self.alphas_cumprod)) + self.posterior_mean_coef2 = ((1.0 - self.alphas_cumprod_prev) * np.sqrt(alphas) / (1.0 - self.alphas_cumprod)) + + def q_mean_variance(self, x_start, t): + """ + Get the distribution q(x_t | x_0). + + :param x_start: the [N x C x ...] tensor of noiseless inputs. + :param t: the number of diffusion steps (minus 1). Here, 0 means one step. + :return: A tuple (mean, variance, log_variance), all of x_start's shape. + """ + mean = (_extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start) + variance = _extract_into_tensor(1.0 - self.alphas_cumprod, t, x_start.shape) + log_variance = _extract_into_tensor(self.log_one_minus_alphas_cumprod, t, x_start.shape) + return mean, variance, log_variance + + def q_sample(self, x_start, t, noise=None): + """ + Diffuse the data for a given number of diffusion steps. + + In other words, sample from q(x_t | x_0). + + :param x_start: the initial data batch. + :param t: the number of diffusion steps (minus 1). Here, 0 means one step. + :param noise: if specified, the split-out normal noise. + :return: A noisy version of x_start. + """ + if noise is None: + # noise = th.randn_like(x_start) + noise = paddle.randn(x_start.shape, x_start.dtype) + assert noise.shape == x_start.shape + return (_extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start + + _extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape) * noise) + + def q_posterior_mean_variance(self, x_start, x_t, t): + """ + Compute the mean and variance of the diffusion posterior: + + q(x_{t-1} | x_t, x_0) + + """ + assert x_start.shape == x_t.shape + posterior_mean = (_extract_into_tensor(self.posterior_mean_coef1, t, x_t.shape) * x_start + + _extract_into_tensor(self.posterior_mean_coef2, t, x_t.shape) * x_t) + posterior_variance = _extract_into_tensor(self.posterior_variance, t, x_t.shape) + posterior_log_variance_clipped = _extract_into_tensor(self.posterior_log_variance_clipped, t, x_t.shape) + assert (posterior_mean.shape[0] == posterior_variance.shape[0] == posterior_log_variance_clipped.shape[0] == + x_start.shape[0]) + return posterior_mean, posterior_variance, posterior_log_variance_clipped + + def p_mean_variance(self, model, x, t, clip_denoised=True, denoised_fn=None, model_kwargs=None): + """ + Apply the model to get p(x_{t-1} | x_t), as well as a prediction of + the initial x, x_0. + + :param model: the model, which takes a signal and a batch of timesteps + as input. + :param x: the [N x C x ...] tensor at time t. + :param t: a 1-D Tensor of timesteps. + :param clip_denoised: if True, clip the denoised signal into [-1, 1]. + :param denoised_fn: if not None, a function which applies to the + x_start prediction before it is used to sample. Applies before + clip_denoised. + :param model_kwargs: if not None, a dict of extra keyword arguments to + pass to the model. This can be used for conditioning. + :return: a dict with the following keys: + - 'mean': the model mean output. + - 'variance': the model variance output. + - 'log_variance': the log of 'variance'. + - 'pred_xstart': the prediction for x_0. + """ + if model_kwargs is None: + model_kwargs = {} + + B, C = x.shape[:2] + assert t.shape == [B] + model_output = model(x, self._scale_timesteps(t), **model_kwargs) + + if self.model_var_type in [ModelVarType.LEARNED, ModelVarType.LEARNED_RANGE]: + assert model_output.shape == [B, C * 2, *x.shape[2:]] + model_output, model_var_values = paddle.split(model_output, 2, axis=1) + if self.model_var_type == ModelVarType.LEARNED: + model_log_variance = model_var_values + model_variance = paddle.exp(model_log_variance) + else: + min_log = _extract_into_tensor(self.posterior_log_variance_clipped, t, x.shape) + max_log = _extract_into_tensor(np.log(self.betas), t, x.shape) + # The model_var_values is [-1, 1] for [min_var, max_var]. + frac = (model_var_values + 1) / 2 + model_log_variance = frac * max_log + (1 - frac) * min_log + model_variance = paddle.exp(model_log_variance) + else: + model_variance, model_log_variance = { + # for fixedlarge, we set the initial (log-)variance like so + # to get a better decoder log likelihood. + ModelVarType.FIXED_LARGE: ( + np.append(self.posterior_variance[1], self.betas[1:]), + np.log(np.append(self.posterior_variance[1], self.betas[1:])), + ), + ModelVarType.FIXED_SMALL: ( + self.posterior_variance, + self.posterior_log_variance_clipped, + ), + }[self.model_var_type] + model_variance = _extract_into_tensor(model_variance, t, x.shape) + model_log_variance = _extract_into_tensor(model_log_variance, t, x.shape) + + def process_xstart(x): + if denoised_fn is not None: + x = denoised_fn(x) + if clip_denoised: + return x.clamp(-1, 1) + return x + + if self.model_mean_type == ModelMeanType.PREVIOUS_X: + pred_xstart = process_xstart(self._predict_xstart_from_xprev(x_t=x, t=t, xprev=model_output)) + model_mean = model_output + elif self.model_mean_type in [ModelMeanType.START_X, ModelMeanType.EPSILON]: + if self.model_mean_type == ModelMeanType.START_X: + pred_xstart = process_xstart(model_output) + else: + pred_xstart = process_xstart(self._predict_xstart_from_eps(x_t=x, t=t, eps=model_output)) + model_mean, _, _ = self.q_posterior_mean_variance(x_start=pred_xstart, x_t=x, t=t) + else: + raise NotImplementedError(self.model_mean_type) + + assert (model_mean.shape == model_log_variance.shape == pred_xstart.shape == x.shape) + return { + "mean": model_mean, + "variance": model_variance, + "log_variance": model_log_variance, + "pred_xstart": pred_xstart, + } + + def _predict_xstart_from_eps(self, x_t, t, eps): + assert x_t.shape == eps.shape + return (_extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t - + _extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape) * eps) + + def _predict_xstart_from_xprev(self, x_t, t, xprev): + assert x_t.shape == xprev.shape + return ( # (xprev - coef2*x_t) / coef1 + _extract_into_tensor(1.0 / self.posterior_mean_coef1, t, x_t.shape) * xprev - + _extract_into_tensor(self.posterior_mean_coef2 / self.posterior_mean_coef1, t, x_t.shape) * x_t) + + def _predict_eps_from_xstart(self, x_t, t, pred_xstart): + return (_extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t - + pred_xstart) / _extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape) + + def _scale_timesteps(self, t): + if self.rescale_timesteps: + return paddle.cast((t), 'float32') * (1000.0 / self.num_timesteps) + return t + + def condition_mean(self, cond_fn, p_mean_var, x, t, model_kwargs=None): + """ + Compute the mean for the previous step, given a function cond_fn that + computes the gradient of a conditional log probability with respect to + x. In particular, cond_fn computes grad(log(p(y|x))), and we want to + condition on y. + + This uses the conditioning strategy from Sohl-Dickstein et al. (2015). + """ + gradient = cond_fn(x, self._scale_timesteps(t), **model_kwargs) + new_mean = (paddle.cast((p_mean_var["mean"]), 'float32') + p_mean_var["variance"] * paddle.cast( + (gradient), 'float32')) + return new_mean + + def condition_mean_with_grad(self, cond_fn, p_mean_var, x, t, model_kwargs=None): + """ + Compute the mean for the previous step, given a function cond_fn that + computes the gradient of a conditional log probability with respect to + x. In particular, cond_fn computes grad(log(p(y|x))), and we want to + condition on y. + + This uses the conditioning strategy from Sohl-Dickstein et al. (2015). + """ + gradient = cond_fn(x, t, p_mean_var, **model_kwargs) + new_mean = (paddle.cast((p_mean_var["mean"]), 'float32') + p_mean_var["variance"] * paddle.cast( + (gradient), 'float32')) + return new_mean + + def condition_score(self, cond_fn, p_mean_var, x, t, model_kwargs=None): + """ + Compute what the p_mean_variance output would have been, should the + model's score function be conditioned by cond_fn. + + See condition_mean() for details on cond_fn. + + Unlike condition_mean(), this instead uses the conditioning strategy + from Song et al (2020). + """ + alpha_bar = _extract_into_tensor(self.alphas_cumprod, t, x.shape) + + eps = self._predict_eps_from_xstart(x, t, p_mean_var["pred_xstart"]) + eps = eps - (1 - alpha_bar).sqrt() * cond_fn(x, self._scale_timesteps(t), **model_kwargs) + + out = p_mean_var.copy() + out["pred_xstart"] = self._predict_xstart_from_eps(x, t, eps) + out["mean"], _, _ = self.q_posterior_mean_variance(x_start=out["pred_xstart"], x_t=x, t=t) + return out + + def condition_score_with_grad(self, cond_fn, p_mean_var, x, t, model_kwargs=None): + """ + Compute what the p_mean_variance output would have been, should the + model's score function be conditioned by cond_fn. + + See condition_mean() for details on cond_fn. + + Unlike condition_mean(), this instead uses the conditioning strategy + from Song et al (2020). + """ + alpha_bar = _extract_into_tensor(self.alphas_cumprod, t, x.shape) + + eps = self._predict_eps_from_xstart(x, t, p_mean_var["pred_xstart"]) + eps = eps - (1 - alpha_bar).sqrt() * cond_fn(x, t, p_mean_var, **model_kwargs) + + out = p_mean_var.copy() + out["pred_xstart"] = self._predict_xstart_from_eps(x, t, eps) + out["mean"], _, _ = self.q_posterior_mean_variance(x_start=out["pred_xstart"], x_t=x, t=t) + return out + + def p_sample( + self, + model, + x, + t, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + ): + """ + Sample x_{t-1} from the model at the given timestep. + + :param model: the model to sample from. + :param x: the current tensor at x_{t-1}. + :param t: the value of t, starting at 0 for the first diffusion step. + :param clip_denoised: if True, clip the x_start prediction to [-1, 1]. + :param denoised_fn: if not None, a function which applies to the + x_start prediction before it is used to sample. + :param cond_fn: if not None, this is a gradient function that acts + similarly to the model. + :param model_kwargs: if not None, a dict of extra keyword arguments to + pass to the model. This can be used for conditioning. + :return: a dict containing the following keys: + - 'sample': a random sample from the model. + - 'pred_xstart': a prediction of x_0. + """ + out = self.p_mean_variance( + model, + x, + t, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + model_kwargs=model_kwargs, + ) + # noise = th.randn_like(x) + noise = paddle.randn(x.shape, x.dtype) + nonzero_mask = (paddle.cast((t != 0), 'float32').reshape([-1, + *([1] * (len(x.shape) - 1))])) # no noise when t == 0 + if cond_fn is not None: + out["mean"] = self.condition_mean(cond_fn, out, x, t, model_kwargs=model_kwargs) + sample = out["mean"] + nonzero_mask * paddle.exp(0.5 * out["log_variance"]) * noise + return {"sample": sample, "pred_xstart": out["pred_xstart"]} + + def p_sample_with_grad( + self, + model, + x, + t, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + ): + """ + Sample x_{t-1} from the model at the given timestep. + + :param model: the model to sample from. + :param x: the current tensor at x_{t-1}. + :param t: the value of t, starting at 0 for the first diffusion step. + :param clip_denoised: if True, clip the x_start prediction to [-1, 1]. + :param denoised_fn: if not None, a function which applies to the + x_start prediction before it is used to sample. + :param cond_fn: if not None, this is a gradient function that acts + similarly to the model. + :param model_kwargs: if not None, a dict of extra keyword arguments to + pass to the model. This can be used for conditioning. + :return: a dict containing the following keys: + - 'sample': a random sample from the model. + - 'pred_xstart': a prediction of x_0. + """ + # with th.enable_grad(): + # x = x.detach().requires_grad_() + x = x.detach() + # x.stop_gradient = False + out = self.p_mean_variance( + model, + x, + t, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + model_kwargs=model_kwargs, + ) + # noise = th.randn_like(x) + noise = paddle.randn(x.shape, x.dtype) + nonzero_mask = (paddle.cast((t != 0), 'float32').reshape([-1, + *([1] * (len(x.shape) - 1))])) # no noise when t == 0 + if cond_fn is not None: + out["mean"] = self.condition_mean_with_grad(cond_fn, out, x, t, model_kwargs=model_kwargs) + sample = out["mean"] + nonzero_mask * paddle.exp(0.5 * out["log_variance"]) * noise + return {"sample": sample, "pred_xstart": out["pred_xstart"].detach()} + + def p_sample_loop( + self, + model, + shape, + noise=None, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + device=None, + progress=False, + skip_timesteps=0, + init_image=None, + randomize_class=False, + cond_fn_with_grad=False, + ): + """ + Generate samples from the model. + + :param model: the model module. + :param shape: the shape of the samples, (N, C, H, W). + :param noise: if specified, the noise from the encoder to sample. + Should be of the same shape as `shape`. + :param clip_denoised: if True, clip x_start predictions to [-1, 1]. + :param denoised_fn: if not None, a function which applies to the + x_start prediction before it is used to sample. + :param cond_fn: if not None, this is a gradient function that acts + similarly to the model. + :param model_kwargs: if not None, a dict of extra keyword arguments to + pass to the model. This can be used for conditioning. + :param device: if specified, the device to create the samples on. + If not specified, use a model parameter's device. + :param progress: if True, show a tqdm progress bar. + :return: a non-differentiable batch of samples. + """ + final = None + for sample in self.p_sample_loop_progressive( + model, + shape, + noise=noise, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + cond_fn=cond_fn, + model_kwargs=model_kwargs, + device=device, + progress=progress, + skip_timesteps=skip_timesteps, + init_image=init_image, + randomize_class=randomize_class, + cond_fn_with_grad=cond_fn_with_grad, + ): + final = sample + return final["sample"] + + def p_sample_loop_progressive( + self, + model, + shape, + noise=None, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + device=None, + progress=False, + skip_timesteps=0, + init_image=None, + randomize_class=False, + cond_fn_with_grad=False, + ): + """ + Generate samples from the model and yield intermediate samples from + each timestep of diffusion. + + Arguments are the same as p_sample_loop(). + Returns a generator over dicts, where each dict is the return value of + p_sample(). + """ + if device is None: + device = model.parameters()[0].place + assert isinstance(shape, (tuple, list)) + if noise is not None: + img = noise + else: + img = paddle.randn(shape) + + if skip_timesteps and init_image is None: + init_image = paddle.zeros_like(img) + + indices = list(range(self.num_timesteps - skip_timesteps))[::-1] + + if init_image is not None: + my_t = paddle.ones([shape[0]], dtype='int64') * indices[0] + img = self.q_sample(init_image, my_t, img) + + if progress: + # Lazy import so that we don't depend on tqdm. + from tqdm.auto import tqdm + + indices = tqdm(indices) + + for i in indices: + t = paddle.to_tensor([i] * shape[0], place=device) + if randomize_class and 'y' in model_kwargs: + model_kwargs['y'] = paddle.randint(low=0, high=model.num_classes, shape=model_kwargs['y'].shape) + # with paddle.no_grad(): + sample_fn = self.p_sample_with_grad if cond_fn_with_grad else self.p_sample + out = sample_fn( + model, + img, + t, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + cond_fn=cond_fn, + model_kwargs=model_kwargs, + ) + yield out + img = out["sample"] + + def ddim_sample( + self, + model, + x, + t, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + eta=0.0, + ): + """ + Sample x_{t-1} from the model using DDIM. + + Same usage as p_sample(). + """ + out_orig = self.p_mean_variance( + model, + x, + t, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + model_kwargs=model_kwargs, + ) + if cond_fn is not None: + out = self.condition_score(cond_fn, out_orig, x, t, model_kwargs=model_kwargs) + else: + out = out_orig + + # Usually our model outputs epsilon, but we re-derive it + # in case we used x_start or x_prev prediction. + eps = self._predict_eps_from_xstart(x, t, out["pred_xstart"]) + + alpha_bar = _extract_into_tensor(self.alphas_cumprod, t, x.shape) + alpha_bar_prev = _extract_into_tensor(self.alphas_cumprod_prev, t, x.shape) + sigma = (eta * paddle.sqrt( + (1 - alpha_bar_prev) / (1 - alpha_bar)) * paddle.sqrt(1 - alpha_bar / alpha_bar_prev)) + # Equation 12. + # noise = th.randn_like(x) + noise = paddle.randn(x.shape, x.dtype) + mean_pred = (out["pred_xstart"] * paddle.sqrt(alpha_bar_prev) + + paddle.sqrt(1 - alpha_bar_prev - sigma**2) * eps) + nonzero_mask = (paddle.cast((t != 0), 'float32').reshape([-1, + *([1] * (len(x.shape) - 1))])) # no noise when t == 0 + sample = mean_pred + nonzero_mask * sigma * noise + return {"sample": sample, "pred_xstart": out_orig["pred_xstart"]} + + def ddim_sample_with_grad( + self, + model, + x, + t, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + eta=0.0, + ): + """ + Sample x_{t-1} from the model using DDIM. + + Same usage as p_sample(). + """ + # with th.enable_grad(): + # x = x.detach().requires_grad_() + x = x.detach() + # x.stop_gradient = False + out_orig = self.p_mean_variance( + model, + x, + t, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + model_kwargs=model_kwargs, + ) + if cond_fn is not None: + out = self.condition_score_with_grad(cond_fn, out_orig, x, t, model_kwargs=model_kwargs) + else: + out = out_orig + + out["pred_xstart"] = out["pred_xstart"].detach() + + # Usually our model outputs epsilon, but we re-derive it + # in case we used x_start or x_prev prediction. + eps = self._predict_eps_from_xstart(x, t, out["pred_xstart"]) + + alpha_bar = _extract_into_tensor(self.alphas_cumprod, t, x.shape) + alpha_bar_prev = _extract_into_tensor(self.alphas_cumprod_prev, t, x.shape) + sigma = (eta * paddle.sqrt( + (1 - alpha_bar_prev) / (1 - alpha_bar)) * paddle.sqrt(1 - alpha_bar / alpha_bar_prev)) + # Equation 12. + # noise = th.randn_like(x) + noise = paddle.randn(x.shape, x.dtype) + mean_pred = (out["pred_xstart"] * paddle.sqrt(alpha_bar_prev) + + paddle.sqrt(1 - alpha_bar_prev - sigma**2) * eps) + nonzero_mask = (paddle.cast((t != 0), 'float32').reshape([-1, + *([1] * (len(x.shape) - 1))])) # no noise when t == 0 + sample = mean_pred + nonzero_mask * sigma * noise + return {"sample": sample, "pred_xstart": out_orig["pred_xstart"].detach()} + + def ddim_reverse_sample( + self, + model, + x, + t, + clip_denoised=True, + denoised_fn=None, + model_kwargs=None, + eta=0.0, + ): + """ + Sample x_{t+1} from the model using DDIM reverse ODE. + """ + assert eta == 0.0, "Reverse ODE only for deterministic path" + out = self.p_mean_variance( + model, + x, + t, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + model_kwargs=model_kwargs, + ) + # Usually our model outputs epsilon, but we re-derive it + # in case we used x_start or x_prev prediction. + eps = (_extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x.shape) * x - + out["pred_xstart"]) / _extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x.shape) + alpha_bar_next = _extract_into_tensor(self.alphas_cumprod_next, t, x.shape) + + # Equation 12. reversed + mean_pred = (out["pred_xstart"] * paddle.sqrt(alpha_bar_next) + paddle.sqrt(1 - alpha_bar_next) * eps) + + return {"sample": mean_pred, "pred_xstart": out["pred_xstart"]} + + def ddim_sample_loop( + self, + model, + shape, + noise=None, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + device=None, + progress=False, + eta=0.0, + skip_timesteps=0, + init_image=None, + randomize_class=False, + cond_fn_with_grad=False, + ): + """ + Generate samples from the model using DDIM. + + Same usage as p_sample_loop(). + """ + final = None + for sample in self.ddim_sample_loop_progressive( + model, + shape, + noise=noise, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + cond_fn=cond_fn, + model_kwargs=model_kwargs, + device=device, + progress=progress, + eta=eta, + skip_timesteps=skip_timesteps, + init_image=init_image, + randomize_class=randomize_class, + cond_fn_with_grad=cond_fn_with_grad, + ): + final = sample + return final["sample"] + + def ddim_sample_loop_progressive( + self, + model, + shape, + noise=None, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + device=None, + progress=False, + eta=0.0, + skip_timesteps=0, + init_image=None, + randomize_class=False, + cond_fn_with_grad=False, + ): + """ + Use DDIM to sample from the model and yield intermediate samples from + each timestep of DDIM. + + Same usage as p_sample_loop_progressive(). + """ + # if device is None: + # device = next(model.parameters()).device + if device is None: + device = model.parameters()[0].place + assert isinstance(shape, (tuple, list)) + if noise is not None: + img = noise + else: + img = paddle.randn(shape) + + if skip_timesteps and init_image is None: + init_image = paddle.zeros_like(img) + + indices = list(range(self.num_timesteps - skip_timesteps))[::-1] + + if init_image is not None: + my_t = paddle.ones([shape[0]], dtype='int64') * indices[0] + img = self.q_sample(init_image, my_t, img) + + if progress: + # Lazy import so that we don't depend on tqdm. + from tqdm.auto import tqdm + + indices = tqdm(indices) + + for i in indices: + t = paddle.to_tensor([i] * shape[0]) + if randomize_class and 'y' in model_kwargs: + model_kwargs['y'] = paddle.randint( + low=0, + high=model.num_classes, + shape=model_kwargs['y'].shape, + ) + sample_fn = self.ddim_sample_with_grad if cond_fn_with_grad else self.ddim_sample + out = sample_fn( + model, + img, + t, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + cond_fn=cond_fn, + model_kwargs=model_kwargs, + eta=eta, + ) + yield out + img = out["sample"] + + def plms_sample( + self, + model, + x, + t, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + cond_fn_with_grad=False, + order=2, + old_out=None, + ): + """ + Sample x_{t-1} from the model using Pseudo Linear Multistep. + + Same usage as p_sample(). + """ + if not int(order) or not 1 <= order <= 4: + raise ValueError('order is invalid (should be int from 1-4).') + + def get_model_output(x, t): + with paddle.set_grad_enabled(cond_fn_with_grad and cond_fn is not None): + x = x.detach().requires_grad_() if cond_fn_with_grad else x + out_orig = self.p_mean_variance( + model, + x, + t, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + model_kwargs=model_kwargs, + ) + if cond_fn is not None: + if cond_fn_with_grad: + out = self.condition_score_with_grad(cond_fn, out_orig, x, t, model_kwargs=model_kwargs) + x = x.detach() + else: + out = self.condition_score(cond_fn, out_orig, x, t, model_kwargs=model_kwargs) + else: + out = out_orig + + # Usually our model outputs epsilon, but we re-derive it + # in case we used x_start or x_prev prediction. + eps = self._predict_eps_from_xstart(x, t, out["pred_xstart"]) + return eps, out, out_orig + + alpha_bar = _extract_into_tensor(self.alphas_cumprod, t, x.shape) + alpha_bar_prev = _extract_into_tensor(self.alphas_cumprod_prev, t, x.shape) + eps, out, out_orig = get_model_output(x, t) + + if order > 1 and old_out is None: + # Pseudo Improved Euler + old_eps = [eps] + mean_pred = out["pred_xstart"] * paddle.sqrt(alpha_bar_prev) + paddle.sqrt(1 - alpha_bar_prev) * eps + eps_2, _, _ = get_model_output(mean_pred, t - 1) + eps_prime = (eps + eps_2) / 2 + pred_prime = self._predict_xstart_from_eps(x, t, eps_prime) + mean_pred = pred_prime * paddle.sqrt(alpha_bar_prev) + paddle.sqrt(1 - alpha_bar_prev) * eps_prime + else: + # Pseudo Linear Multistep (Adams-Bashforth) + old_eps = old_out["old_eps"] + old_eps.append(eps) + cur_order = min(order, len(old_eps)) + if cur_order == 1: + eps_prime = old_eps[-1] + elif cur_order == 2: + eps_prime = (3 * old_eps[-1] - old_eps[-2]) / 2 + elif cur_order == 3: + eps_prime = (23 * old_eps[-1] - 16 * old_eps[-2] + 5 * old_eps[-3]) / 12 + elif cur_order == 4: + eps_prime = (55 * old_eps[-1] - 59 * old_eps[-2] + 37 * old_eps[-3] - 9 * old_eps[-4]) / 24 + else: + raise RuntimeError('cur_order is invalid.') + pred_prime = self._predict_xstart_from_eps(x, t, eps_prime) + mean_pred = pred_prime * paddle.sqrt(alpha_bar_prev) + paddle.sqrt(1 - alpha_bar_prev) * eps_prime + + if len(old_eps) >= order: + old_eps.pop(0) + + nonzero_mask = paddle.cast((t != 0), 'float32').reshape([-1, *([1] * (len(x.shape) - 1))]) + sample = mean_pred * nonzero_mask + out["pred_xstart"] * (1 - nonzero_mask) + + return {"sample": sample, "pred_xstart": out_orig["pred_xstart"], "old_eps": old_eps} + + def plms_sample_loop( + self, + model, + shape, + noise=None, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + device=None, + progress=False, + skip_timesteps=0, + init_image=None, + randomize_class=False, + cond_fn_with_grad=False, + order=2, + ): + """ + Generate samples from the model using Pseudo Linear Multistep. + + Same usage as p_sample_loop(). + """ + final = None + for sample in self.plms_sample_loop_progressive( + model, + shape, + noise=noise, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + cond_fn=cond_fn, + model_kwargs=model_kwargs, + device=device, + progress=progress, + skip_timesteps=skip_timesteps, + init_image=init_image, + randomize_class=randomize_class, + cond_fn_with_grad=cond_fn_with_grad, + order=order, + ): + final = sample + return final["sample"] + + def plms_sample_loop_progressive( + self, + model, + shape, + noise=None, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + device=None, + progress=False, + skip_timesteps=0, + init_image=None, + randomize_class=False, + cond_fn_with_grad=False, + order=2, + ): + """ + Use PLMS to sample from the model and yield intermediate samples from each + timestep of PLMS. + + Same usage as p_sample_loop_progressive(). + """ + if device is None: + device = model.parameters()[0].place + assert isinstance(shape, (tuple, list)) + if noise is not None: + img = noise + else: + img = paddle.randn(shape) + + if skip_timesteps and init_image is None: + init_image = paddle.zeros_like(img) + + indices = list(range(self.num_timesteps - skip_timesteps))[::-1] + + if init_image is not None: + my_t = paddle.ones([shape[0]], dtype='int64') * indices[0] + img = self.q_sample(init_image, my_t, img) + + if progress: + # Lazy import so that we don't depend on tqdm. + from tqdm.auto import tqdm + + indices = tqdm(indices) + + old_out = None + + for i in indices: + t = paddle.to_tensor([i] * shape[0], place=device) + if randomize_class and 'y' in model_kwargs: + model_kwargs['y'] = paddle.randint(low=0, high=model.num_classes, shape=model_kwargs['y'].shape) + # with paddle.no_grad(): + out = self.plms_sample( + model, + img, + t, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + cond_fn=cond_fn, + model_kwargs=model_kwargs, + cond_fn_with_grad=cond_fn_with_grad, + order=order, + old_out=old_out, + ) + yield out + old_out = out + img = out["sample"] + + def _vb_terms_bpd(self, model, x_start, x_t, t, clip_denoised=True, model_kwargs=None): + """ + Get a term for the variational lower-bound. + + The resulting units are bits (rather than nats, as one might expect). + This allows for comparison to other papers. + + :return: a dict with the following keys: + - 'output': a shape [N] tensor of NLLs or KLs. + - 'pred_xstart': the x_0 predictions. + """ + true_mean, _, true_log_variance_clipped = self.q_posterior_mean_variance(x_start=x_start, x_t=x_t, t=t) + out = self.p_mean_variance(model, x_t, t, clip_denoised=clip_denoised, model_kwargs=model_kwargs) + kl = normal_kl(true_mean, true_log_variance_clipped, out["mean"], out["log_variance"]) + kl = mean_flat(kl) / np.log(2.0) + + decoder_nll = -discretized_gaussian_log_likelihood( + x_start, means=out["mean"], log_scales=0.5 * out["log_variance"]) + assert decoder_nll.shape == x_start.shape + decoder_nll = mean_flat(decoder_nll) / np.log(2.0) + + # At the first timestep return the decoder NLL, + # otherwise return KL(q(x_{t-1}|x_t,x_0) || p(x_{t-1}|x_t)) + output = paddle.where((t == 0), decoder_nll, kl) + return {"output": output, "pred_xstart": out["pred_xstart"]} + + def training_losses(self, model, x_start, t, model_kwargs=None, noise=None): + """ + Compute training losses for a single timestep. + + :param model: the model to evaluate loss on. + :param x_start: the [N x C x ...] tensor of inputs. + :param t: a batch of timestep indices. + :param model_kwargs: if not None, a dict of extra keyword arguments to + pass to the model. This can be used for conditioning. + :param noise: if specified, the specific Gaussian noise to try to remove. + :return: a dict with the key "loss" containing a tensor of shape [N]. + Some mean or variance settings may also have other keys. + """ + if model_kwargs is None: + model_kwargs = {} + if noise is None: + # noise = th.randn_like(x_start) + noise = paddle.randn(x_start.shape, x_start.dtype) + x_t = self.q_sample(x_start, t, noise=noise) + + terms = {} + + if self.loss_type == LossType.KL or self.loss_type == LossType.RESCALED_KL: + terms["loss"] = self._vb_terms_bpd( + model=model, + x_start=x_start, + x_t=x_t, + t=t, + clip_denoised=False, + model_kwargs=model_kwargs, + )["output"] + if self.loss_type == LossType.RESCALED_KL: + terms["loss"] *= self.num_timesteps + elif self.loss_type == LossType.MSE or self.loss_type == LossType.RESCALED_MSE: + model_output = model(x_t, self._scale_timesteps(t), **model_kwargs) + + if self.model_var_type in [ + ModelVarType.LEARNED, + ModelVarType.LEARNED_RANGE, + ]: + B, C = x_t.shape[:2] + assert model_output.shape == (B, C * 2, *x_t.shape[2:]) + model_output, model_var_values = paddle.split(model_output, 2, dim=1) + # Learn the variance using the variational bound, but don't let + # it affect our mean prediction. + frozen_out = paddle.concat([model_output.detach(), model_var_values], axis=1) + terms["vb"] = self._vb_terms_bpd( + model=lambda *args, r=frozen_out: r, + x_start=x_start, + x_t=x_t, + t=t, + clip_denoised=False, + )["output"] + if self.loss_type == LossType.RESCALED_MSE: + # Divide by 1000 for equivalence with initial implementation. + # Without a factor of 1/1000, the VB term hurts the MSE term. + terms["vb"] *= self.num_timesteps / 1000.0 + + target = { + ModelMeanType.PREVIOUS_X: self.q_posterior_mean_variance(x_start=x_start, x_t=x_t, t=t)[0], + ModelMeanType.START_X: x_start, + ModelMeanType.EPSILON: noise, + }[self.model_mean_type] + assert model_output.shape == target.shape == x_start.shape + terms["mse"] = mean_flat((target - model_output)**2) + if "vb" in terms: + terms["loss"] = terms["mse"] + terms["vb"] + else: + terms["loss"] = terms["mse"] + else: + raise NotImplementedError(self.loss_type) + + return terms + + def _prior_bpd(self, x_start): + """ + Get the prior KL term for the variational lower-bound, measured in + bits-per-dim. + + This term can't be optimized, as it only depends on the encoder. + + :param x_start: the [N x C x ...] tensor of inputs. + :return: a batch of [N] KL values (in bits), one per batch element. + """ + batch_size = x_start.shape[0] + t = paddle.to_tensor([self.num_timesteps - 1] * batch_size, place=x_start.place) + qt_mean, _, qt_log_variance = self.q_mean_variance(x_start, t) + kl_prior = normal_kl(mean1=qt_mean, logvar1=qt_log_variance, mean2=0.0, logvar2=0.0) + return mean_flat(kl_prior) / np.log(2.0) + + def calc_bpd_loop(self, model, x_start, clip_denoised=True, model_kwargs=None): + """ + Compute the entire variational lower-bound, measured in bits-per-dim, + as well as other related quantities. + + :param model: the model to evaluate loss on. + :param x_start: the [N x C x ...] tensor of inputs. + :param clip_denoised: if True, clip denoised samples. + :param model_kwargs: if not None, a dict of extra keyword arguments to + pass to the model. This can be used for conditioning. + + :return: a dict containing the following keys: + - total_bpd: the total variational lower-bound, per batch element. + - prior_bpd: the prior term in the lower-bound. + - vb: an [N x T] tensor of terms in the lower-bound. + - xstart_mse: an [N x T] tensor of x_0 MSEs for each timestep. + - mse: an [N x T] tensor of epsilon MSEs for each timestep. + """ + device = x_start.place + batch_size = x_start.shape[0] + + vb = [] + xstart_mse = [] + mse = [] + for t in list(range(self.num_timesteps))[::-1]: + t_batch = paddle.to_tensor([t] * batch_size, place=device) + # noise = th.randn_like(x_start) + noise = paddle.randn(x_start.shape, x_start.dtype) + x_t = self.q_sample(x_start=x_start, t=t_batch, noise=noise) + # Calculate VLB term at the current timestep + # with paddle.no_grad(): + out = self._vb_terms_bpd( + model, + x_start=x_start, + x_t=x_t, + t=t_batch, + clip_denoised=clip_denoised, + model_kwargs=model_kwargs, + ) + vb.append(out["output"]) + xstart_mse.append(mean_flat((out["pred_xstart"] - x_start)**2)) + eps = self._predict_eps_from_xstart(x_t, t_batch, out["pred_xstart"]) + mse.append(mean_flat((eps - noise)**2)) + + vb = paddle.stack(vb, axis=1) + xstart_mse = paddle.stack(xstart_mse, axis=1) + mse = paddle.stack(mse, axis=1) + + prior_bpd = self._prior_bpd(x_start) + total_bpd = vb.sum(axis=1) + prior_bpd + return { + "total_bpd": total_bpd, + "prior_bpd": prior_bpd, + "vb": vb, + "xstart_mse": xstart_mse, + "mse": mse, + } + + +def _extract_into_tensor(arr, timesteps, broadcast_shape): + """ + Extract values from a 1-D numpy array for a batch of indices. + + :param arr: the 1-D numpy array. + :param timesteps: a tensor of indices into the array to extract. + :param broadcast_shape: a larger shape of K dimensions with the batch + dimension equal to the length of timesteps. + :return: a tensor of shape [batch_size, 1, ...] where the shape has K dims. + """ + res = paddle.to_tensor(arr, place=timesteps.place)[timesteps] + while len(res.shape) < len(broadcast_shape): + res = res[..., None] + return res.expand(broadcast_shape) diff --git a/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/model/losses.py b/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/model/losses.py new file mode 100755 index 000000000..5c3970de5 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/model/losses.py @@ -0,0 +1,86 @@ +""" +Helpers for various likelihood-based losses implemented by Paddle. These are ported from the original +Ho et al. diffusion models codebase: +https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/utils.py +""" +import numpy as np +import paddle +import paddle.nn.functional as F + + +def normal_kl(mean1, logvar1, mean2, logvar2): + """ + Compute the KL divergence between two gaussians. + + Shapes are automatically broadcasted, so batches can be compared to + scalars, among other use cases. + """ + tensor = None + for obj in (mean1, logvar1, mean2, logvar2): + if isinstance(obj, paddle.Tensor): + tensor = obj + break + assert tensor is not None, "at least one argument must be a Tensor" + + # Force variances to be Tensors. Broadcasting helps convert scalars to + # Tensors, but it does not work for th.exp(). + logvar1, logvar2 = [x if isinstance(x, paddle.Tensor) else paddle.to_tensor(x) for x in (logvar1, logvar2)] + + return 0.5 * (-1.0 + logvar2 - logvar1 + paddle.exp(logvar1 - logvar2) + + ((mean1 - mean2)**2) * paddle.exp(-logvar2)) + + +def approx_standard_normal_cdf(x): + """ + A fast approximation of the cumulative distribution function of the + standard normal. + """ + return 0.5 * (1.0 + paddle.tanh(np.sqrt(2.0 / np.pi) * (x + 0.044715 * paddle.pow(x, 3)))) + + +def discretized_gaussian_log_likelihood(x, *, means, log_scales): + """ + Compute the log-likelihood of a Gaussian distribution discretizing to a + given image. + + :param x: the target images. It is assumed that this was uint8 values, + rescaled to the range [-1, 1]. + :param means: the Gaussian mean Tensor. + :param log_scales: the Gaussian log stddev Tensor. + :return: a tensor like x of log probabilities (in nats). + """ + assert x.shape == means.shape == log_scales.shape + centered_x = x - means + inv_stdv = paddle.exp(-log_scales) + plus_in = inv_stdv * (centered_x + 1.0 / 255.0) + cdf_plus = approx_standard_normal_cdf(plus_in) + min_in = inv_stdv * (centered_x - 1.0 / 255.0) + cdf_min = approx_standard_normal_cdf(min_in) + log_cdf_plus = paddle.log(cdf_plus.clip(min=1e-12)) + log_one_minus_cdf_min = paddle.log((1.0 - cdf_min).clip(min=1e-12)) + cdf_delta = cdf_plus - cdf_min + log_probs = paddle.where( + x < -0.999, + log_cdf_plus, + paddle.where(x > 0.999, log_one_minus_cdf_min, paddle.log(cdf_delta.clip(min=1e-12))), + ) + assert log_probs.shape == x.shape + return log_probs + + +def spherical_dist_loss(x, y): + x = F.normalize(x, axis=-1) + y = F.normalize(y, axis=-1) + return (x - y).norm(axis=-1).divide(paddle.to_tensor(2.0)).asin().pow(2).multiply(paddle.to_tensor(2.0)) + + +def tv_loss(input): + """L2 total variation loss, as in Mahendran et al.""" + input = F.pad(input, (0, 1, 0, 1), 'replicate') + x_diff = input[..., :-1, 1:] - input[..., :-1, :-1] + y_diff = input[..., 1:, :-1] - input[..., :-1, :-1] + return (x_diff**2 + y_diff**2).mean([1, 2, 3]) + + +def range_loss(input): + return (input - input.clip(-1, 1)).pow(2).mean([1, 2, 3]) diff --git a/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/model/make_cutouts.py b/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/model/make_cutouts.py new file mode 100755 index 000000000..babaedfb9 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/model/make_cutouts.py @@ -0,0 +1,177 @@ +''' +This code is rewritten by Paddle based on Jina-ai/discoart. +https://github.com/jina-ai/discoart/blob/main/discoart/nn/make_cutouts.py +''' +import math + +import paddle +import paddle.nn as nn +from disco_diffusion_ernievil_base.resize_right.resize_right import resize +from paddle.nn import functional as F + +from . import transforms as T + +skip_augs = False # @param{type: 'boolean'} + + +def sinc(x): + return paddle.where(x != 0, paddle.sin(math.pi * x) / (math.pi * x), x.new_ones([])) + + +def lanczos(x, a): + cond = paddle.logical_and(-a < x, x < a) + out = paddle.where(cond, sinc(x) * sinc(x / a), x.new_zeros([])) + return out / out.sum() + + +def ramp(ratio, width): + n = math.ceil(width / ratio + 1) + out = paddle.empty([n]) + cur = 0 + for i in range(out.shape[0]): + out[i] = cur + cur += ratio + return paddle.concat([-out[1:].flip([0]), out])[1:-1] + + +class MakeCutouts(nn.Layer): + + def __init__(self, cut_size, cutn, skip_augs=False): + super().__init__() + self.cut_size = cut_size + self.cutn = cutn + self.skip_augs = skip_augs + self.augs = nn.Sequential(*[ + T.RandomHorizontalFlip(prob=0.5), + T.Lambda(lambda x: x + paddle.randn(x.shape) * 0.01), + T.RandomAffine(degrees=15, translate=(0.1, 0.1)), + T.Lambda(lambda x: x + paddle.randn(x.shape) * 0.01), + T.RandomPerspective(distortion_scale=0.4, p=0.7), + T.Lambda(lambda x: x + paddle.randn(x.shape) * 0.01), + T.RandomGrayscale(p=0.15), + T.Lambda(lambda x: x + paddle.randn(x.shape) * 0.01), + T.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1), + ]) + + def forward(self, input): + input = T.Pad(input.shape[2] // 4, fill=0)(input) + sideY, sideX = input.shape[2:4] + max_size = min(sideX, sideY) + + cutouts = [] + for ch in range(self.cutn): + if ch > self.cutn - self.cutn // 4: + cutout = input.clone() + else: + size = int(max_size * + paddle.zeros(1, ).normal_(mean=0.8, std=0.3).clip(float(self.cut_size / max_size), 1.0)) + offsetx = paddle.randint(0, abs(sideX - size + 1), ()) + offsety = paddle.randint(0, abs(sideY - size + 1), ()) + cutout = input[:, :, offsety:offsety + size, offsetx:offsetx + size] + + if not self.skip_augs: + cutout = self.augs(cutout) + cutouts.append(resample(cutout, (self.cut_size, self.cut_size))) + del cutout + + cutouts = paddle.concat(cutouts, axis=0) + return cutouts + + +class MakeCutoutsDango(nn.Layer): + + def __init__(self, cut_size, Overview=4, InnerCrop=0, IC_Size_Pow=0.5, IC_Grey_P=0.2): + super().__init__() + self.cut_size = cut_size + self.Overview = Overview + self.InnerCrop = InnerCrop + self.IC_Size_Pow = IC_Size_Pow + self.IC_Grey_P = IC_Grey_P + self.augs = nn.Sequential(*[ + T.RandomHorizontalFlip(prob=0.5), + T.Lambda(lambda x: x + paddle.randn(x.shape) * 0.01), + T.RandomAffine( + degrees=10, + translate=(0.05, 0.05), + interpolation=T.InterpolationMode.BILINEAR, + ), + T.Lambda(lambda x: x + paddle.randn(x.shape) * 0.01), + T.RandomGrayscale(p=0.1), + T.Lambda(lambda x: x + paddle.randn(x.shape) * 0.01), + T.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1), + ]) + + def forward(self, input): + cutouts = [] + gray = T.Grayscale(3) + sideY, sideX = input.shape[2:4] + max_size = min(sideX, sideY) + min_size = min(sideX, sideY, self.cut_size) + output_shape = [1, 3, self.cut_size, self.cut_size] + pad_input = F.pad( + input, + ( + (sideY - max_size) // 2, + (sideY - max_size) // 2, + (sideX - max_size) // 2, + (sideX - max_size) // 2, + ), + **padargs, + ) + cutout = resize(pad_input, out_shape=output_shape) + + if self.Overview > 0: + if self.Overview <= 4: + if self.Overview >= 1: + cutouts.append(cutout) + if self.Overview >= 2: + cutouts.append(gray(cutout)) + if self.Overview >= 3: + cutouts.append(cutout[:, :, :, ::-1]) + if self.Overview == 4: + cutouts.append(gray(cutout[:, :, :, ::-1])) + else: + cutout = resize(pad_input, out_shape=output_shape) + for _ in range(self.Overview): + cutouts.append(cutout) + + if self.InnerCrop > 0: + for i in range(self.InnerCrop): + size = int(paddle.rand([1])**self.IC_Size_Pow * (max_size - min_size) + min_size) + offsetx = paddle.randint(0, sideX - size + 1) + offsety = paddle.randint(0, sideY - size + 1) + cutout = input[:, :, offsety:offsety + size, offsetx:offsetx + size] + if i <= int(self.IC_Grey_P * self.InnerCrop): + cutout = gray(cutout) + cutout = resize(cutout, out_shape=output_shape) + cutouts.append(cutout) + + cutouts = paddle.concat(cutouts) + if skip_augs is not True: + cutouts = self.augs(cutouts) + return cutouts + + +def resample(input, size, align_corners=True): + n, c, h, w = input.shape + dh, dw = size + + input = input.reshape([n * c, 1, h, w]) + + if dh < h: + kernel_h = lanczos(ramp(dh / h, 2), 2).to(input.device, input.dtype) + pad_h = (kernel_h.shape[0] - 1) // 2 + input = F.pad(input, (0, 0, pad_h, pad_h), 'reflect') + input = F.conv2d(input, kernel_h[None, None, :, None]) + + if dw < w: + kernel_w = lanczos(ramp(dw / w, 2), 2).to(input.device, input.dtype) + pad_w = (kernel_w.shape[0] - 1) // 2 + input = F.pad(input, (pad_w, pad_w, 0, 0), 'reflect') + input = F.conv2d(input, kernel_w[None, None, None, :]) + + input = input.reshape([n, c, h, w]) + return F.interpolate(input, size, mode='bicubic', align_corners=align_corners) + + +padargs = {} diff --git a/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/model/nn.py b/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/model/nn.py new file mode 100755 index 000000000..d618183e2 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/model/nn.py @@ -0,0 +1,127 @@ +""" +Various utilities for neural networks implemented by Paddle. This code is rewritten based on: +https://github.com/openai/guided-diffusion/blob/main/guided_diffusion/nn.py +""" +import math + +import paddle +import paddle.nn as nn + + +class SiLU(nn.Layer): + + def forward(self, x): + return x * nn.functional.sigmoid(x) + + +class GroupNorm32(nn.GroupNorm): + + def forward(self, x): + return super().forward(x) + + +def conv_nd(dims, *args, **kwargs): + """ + Create a 1D, 2D, or 3D convolution module. + """ + if dims == 1: + return nn.Conv1D(*args, **kwargs) + elif dims == 2: + return nn.Conv2D(*args, **kwargs) + elif dims == 3: + return nn.Conv3D(*args, **kwargs) + raise ValueError(f"unsupported dimensions: {dims}") + + +def linear(*args, **kwargs): + """ + Create a linear module. + """ + return nn.Linear(*args, **kwargs) + + +def avg_pool_nd(dims, *args, **kwargs): + """ + Create a 1D, 2D, or 3D average pooling module. + """ + if dims == 1: + return nn.AvgPool1D(*args, **kwargs) + elif dims == 2: + return nn.AvgPool2D(*args, **kwargs) + elif dims == 3: + return nn.AvgPool3D(*args, **kwargs) + raise ValueError(f"unsupported dimensions: {dims}") + + +def update_ema(target_params, source_params, rate=0.99): + """ + Update target parameters to be closer to those of source parameters using + an exponential moving average. + + :param target_params: the target parameter sequence. + :param source_params: the source parameter sequence. + :param rate: the EMA rate (closer to 1 means slower). + """ + for targ, src in zip(target_params, source_params): + targ.detach().mul_(rate).add_(src, alpha=1 - rate) + + +def zero_module(module): + """ + Zero out the parameters of a module and return it. + """ + for p in module.parameters(): + p.detach().zero_() + return module + + +def scale_module(module, scale): + """ + Scale the parameters of a module and return it. + """ + for p in module.parameters(): + p.detach().mul_(scale) + return module + + +def mean_flat(tensor): + """ + Take the mean over all non-batch dimensions. + """ + return tensor.mean(axis=list(range(1, len(tensor.shape)))) + + +def normalization(channels): + """ + Make a standard normalization layer. + + :param channels: number of input channels. + :return: an nn.Module for normalization. + """ + return GroupNorm32(32, channels) + + +def timestep_embedding(timesteps, dim, max_period=10000): + """ + Create sinusoidal timestep embeddings. + + :param timesteps: a 1-D Tensor of N indices, one per batch element. + These may be fractional. + :param dim: the dimension of the output. + :param max_period: controls the minimum frequency of the embeddings. + :return: an [N x dim] Tensor of positional embeddings. + """ + half = dim // 2 + freqs = paddle.exp(-math.log(max_period) * paddle.arange(start=0, end=half, dtype=paddle.float32) / half) + args = paddle.cast(timesteps[:, None], 'float32') * freqs[None] + embedding = paddle.concat([paddle.cos(args), paddle.sin(args)], axis=-1) + if dim % 2: + embedding = paddle.concat([embedding, paddle.zeros_like(embedding[:, :1])], axis=-1) + return embedding + + +def checkpoint(func, inputs, params, flag): + """ + This function is disabled. And now just forward. + """ + return func(*inputs) diff --git a/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/model/perlin_noises.py b/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/model/perlin_noises.py new file mode 100755 index 000000000..6dacb331b --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/model/perlin_noises.py @@ -0,0 +1,78 @@ +''' +Perlin noise implementation by Paddle. +This code is rewritten based on: +https://github.com/jina-ai/discoart/blob/main/discoart/nn/perlin_noises.py +''' +import numpy as np +import paddle +import paddle.vision.transforms as TF +from PIL import Image +from PIL import ImageOps + + +def interp(t): + return 3 * t**2 - 2 * t**3 + + +def perlin(width, height, scale=10): + gx, gy = paddle.randn([2, width + 1, height + 1, 1, 1]) + xs = paddle.linspace(0, 1, scale + 1)[:-1, None] + ys = paddle.linspace(0, 1, scale + 1)[None, :-1] + wx = 1 - interp(xs) + wy = 1 - interp(ys) + dots = 0 + dots += wx * wy * (gx[:-1, :-1] * xs + gy[:-1, :-1] * ys) + dots += (1 - wx) * wy * (-gx[1:, :-1] * (1 - xs) + gy[1:, :-1] * ys) + dots += wx * (1 - wy) * (gx[:-1, 1:] * xs - gy[:-1, 1:] * (1 - ys)) + dots += (1 - wx) * (1 - wy) * (-gx[1:, 1:] * (1 - xs) - gy[1:, 1:] * (1 - ys)) + return dots.transpose([0, 2, 1, 3]).reshape([width * scale, height * scale]) + + +def perlin_ms(octaves, width, height, grayscale): + out_array = [0.5] if grayscale else [0.5, 0.5, 0.5] + # out_array = [0.0] if grayscale else [0.0, 0.0, 0.0] + for i in range(1 if grayscale else 3): + scale = 2**len(octaves) + oct_width = width + oct_height = height + for oct in octaves: + p = perlin(oct_width, oct_height, scale) + out_array[i] += p * oct + scale //= 2 + oct_width *= 2 + oct_height *= 2 + return paddle.concat(out_array) + + +def create_perlin_noise(octaves, width, height, grayscale, side_y, side_x): + out = perlin_ms(octaves, width, height, grayscale) + if grayscale: + out = TF.resize(size=(side_y, side_x), img=out.numpy()) + out = np.uint8(out) + out = Image.fromarray(out).convert('RGB') + else: + out = out.reshape([-1, 3, out.shape[0] // 3, out.shape[1]]) + out = out.squeeze().transpose([1, 2, 0]).numpy() + out = TF.resize(size=(side_y, side_x), img=out) + out = out.clip(0, 1) * 255 + out = np.uint8(out) + out = Image.fromarray(out) + + out = ImageOps.autocontrast(out) + return out + + +def regen_perlin(perlin_mode, side_y, side_x, batch_size): + if perlin_mode == 'color': + init = create_perlin_noise([1.5**-i * 0.5 for i in range(12)], 1, 1, False, side_y, side_x) + init2 = create_perlin_noise([1.5**-i * 0.5 for i in range(8)], 4, 4, False, side_y, side_x) + elif perlin_mode == 'gray': + init = create_perlin_noise([1.5**-i * 0.5 for i in range(12)], 1, 1, True, side_y, side_x) + init2 = create_perlin_noise([1.5**-i * 0.5 for i in range(8)], 4, 4, True, side_y, side_x) + else: + init = create_perlin_noise([1.5**-i * 0.5 for i in range(12)], 1, 1, False, side_y, side_x) + init2 = create_perlin_noise([1.5**-i * 0.5 for i in range(8)], 4, 4, True, side_y, side_x) + + init = (TF.to_tensor(init).add(TF.to_tensor(init2)).divide(paddle.to_tensor(2.0)).unsqueeze(0) * 2 - 1) + del init2 + return init.expand([batch_size, -1, -1, -1]) diff --git a/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/model/respace.py b/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/model/respace.py new file mode 100755 index 000000000..c001c70d0 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/model/respace.py @@ -0,0 +1,123 @@ +''' +This code is rewritten by Paddle based on +https://github.com/openai/guided-diffusion/blob/main/guided_diffusion/respace.py +''' +import numpy as np +import paddle + +from .gaussian_diffusion import GaussianDiffusion + + +def space_timesteps(num_timesteps, section_counts): + """ + Create a list of timesteps to use from an original diffusion process, + given the number of timesteps we want to take from equally-sized portions + of the original process. + + For example, if there's 300 timesteps and the section counts are [10,15,20] + then the first 100 timesteps are strided to be 10 timesteps, the second 100 + are strided to be 15 timesteps, and the final 100 are strided to be 20. + + If the stride is a string starting with "ddim", then the fixed striding + from the DDIM paper is used, and only one section is allowed. + + :param num_timesteps: the number of diffusion steps in the original + process to divide up. + :param section_counts: either a list of numbers, or a string containing + comma-separated numbers, indicating the step count + per section. As a special case, use "ddimN" where N + is a number of steps to use the striding from the + DDIM paper. + :return: a set of diffusion steps from the original process to use. + """ + if isinstance(section_counts, str): + if section_counts.startswith("ddim"): + desired_count = int(section_counts[len("ddim"):]) + for i in range(1, num_timesteps): + if len(range(0, num_timesteps, i)) == desired_count: + return set(range(0, num_timesteps, i)) + raise ValueError(f"cannot create exactly {num_timesteps} steps with an integer stride") + section_counts = [int(x) for x in section_counts.split(",")] + size_per = num_timesteps // len(section_counts) + extra = num_timesteps % len(section_counts) + start_idx = 0 + all_steps = [] + for i, section_count in enumerate(section_counts): + size = size_per + (1 if i < extra else 0) + if size < section_count: + raise ValueError(f"cannot divide section of {size} steps into {section_count}") + if section_count <= 1: + frac_stride = 1 + else: + frac_stride = (size - 1) / (section_count - 1) + cur_idx = 0.0 + taken_steps = [] + for _ in range(section_count): + taken_steps.append(start_idx + round(cur_idx)) + cur_idx += frac_stride + all_steps += taken_steps + start_idx += size + return set(all_steps) + + +class SpacedDiffusion(GaussianDiffusion): + """ + A diffusion process which can skip steps in a base diffusion process. + + :param use_timesteps: a collection (sequence or set) of timesteps from the + original diffusion process to retain. + :param kwargs: the kwargs to create the base diffusion process. + """ + + def __init__(self, use_timesteps, **kwargs): + self.use_timesteps = set(use_timesteps) + self.timestep_map = [] + self.original_num_steps = len(kwargs["betas"]) + + base_diffusion = GaussianDiffusion(**kwargs) # pylint: disable=missing-kwoa + last_alpha_cumprod = 1.0 + new_betas = [] + for i, alpha_cumprod in enumerate(base_diffusion.alphas_cumprod): + if i in self.use_timesteps: + new_betas.append(1 - alpha_cumprod / last_alpha_cumprod) + last_alpha_cumprod = alpha_cumprod + self.timestep_map.append(i) + kwargs["betas"] = np.array(new_betas) + super().__init__(**kwargs) + + def p_mean_variance(self, model, *args, **kwargs): # pylint: disable=signature-differs + return super().p_mean_variance(self._wrap_model(model), *args, **kwargs) + + def training_losses(self, model, *args, **kwargs): # pylint: disable=signature-differs + return super().training_losses(self._wrap_model(model), *args, **kwargs) + + def condition_mean(self, cond_fn, *args, **kwargs): + return super().condition_mean(self._wrap_model(cond_fn), *args, **kwargs) + + def condition_score(self, cond_fn, *args, **kwargs): + return super().condition_score(self._wrap_model(cond_fn), *args, **kwargs) + + def _wrap_model(self, model): + if isinstance(model, _WrappedModel): + return model + return _WrappedModel(model, self.timestep_map, self.rescale_timesteps, self.original_num_steps) + + def _scale_timesteps(self, t): + # Scaling is done by the wrapped model. + return t + + +class _WrappedModel: + + def __init__(self, model, timestep_map, rescale_timesteps, original_num_steps): + self.model = model + self.timestep_map = timestep_map + self.rescale_timesteps = rescale_timesteps + self.original_num_steps = original_num_steps + + def __call__(self, x, ts, **kwargs): + map_tensor = paddle.to_tensor(self.timestep_map, place=ts.place, dtype=ts.dtype) + new_ts = map_tensor[ts] + if self.rescale_timesteps: + new_ts = paddle.cast(new_ts, 'float32') * (1000.0 / self.original_num_steps) + return self.model(x, new_ts, **kwargs) diff --git a/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/model/script_util.py b/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/model/script_util.py new file mode 100755 index 000000000..d728a5430 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/model/script_util.py @@ -0,0 +1,201 @@ +''' +This code is based on +https://github.com/openai/guided-diffusion/blob/main/guided_diffusion/script_util.py +''' +import argparse +import inspect + +from . import gaussian_diffusion as gd +from .respace import space_timesteps +from .respace import SpacedDiffusion +from .unet import EncoderUNetModel +from .unet import SuperResModel +from .unet import UNetModel + +NUM_CLASSES = 1000 + + +def diffusion_defaults(): + """ + Defaults for image and classifier training. + """ + return dict( + learn_sigma=False, + diffusion_steps=1000, + noise_schedule="linear", + timestep_respacing="", + use_kl=False, + predict_xstart=False, + rescale_timesteps=False, + rescale_learned_sigmas=False, + ) + + +def model_and_diffusion_defaults(): + """ + Defaults for image training. + """ + res = dict( + image_size=64, + num_channels=128, + num_res_blocks=2, + num_heads=4, + num_heads_upsample=-1, + num_head_channels=-1, + attention_resolutions="16,8", + channel_mult="", + dropout=0.0, + class_cond=False, + use_checkpoint=False, + use_scale_shift_norm=True, + resblock_updown=False, + use_fp16=False, + use_new_attention_order=False, + ) + res.update(diffusion_defaults()) + return res + + +def create_model_and_diffusion( + image_size, + class_cond, + learn_sigma, + num_channels, + num_res_blocks, + channel_mult, + num_heads, + num_head_channels, + num_heads_upsample, + attention_resolutions, + dropout, + diffusion_steps, + noise_schedule, + timestep_respacing, + use_kl, + predict_xstart, + rescale_timesteps, + rescale_learned_sigmas, + use_checkpoint, + use_scale_shift_norm, + resblock_updown, + use_fp16, + use_new_attention_order, +): + model = create_model( + image_size, + num_channels, + num_res_blocks, + channel_mult=channel_mult, + learn_sigma=learn_sigma, + class_cond=class_cond, + use_checkpoint=use_checkpoint, + attention_resolutions=attention_resolutions, + num_heads=num_heads, + num_head_channels=num_head_channels, + num_heads_upsample=num_heads_upsample, + use_scale_shift_norm=use_scale_shift_norm, + dropout=dropout, + resblock_updown=resblock_updown, + use_fp16=use_fp16, + use_new_attention_order=use_new_attention_order, + ) + diffusion = create_gaussian_diffusion( + steps=diffusion_steps, + learn_sigma=learn_sigma, + noise_schedule=noise_schedule, + use_kl=use_kl, + predict_xstart=predict_xstart, + rescale_timesteps=rescale_timesteps, + rescale_learned_sigmas=rescale_learned_sigmas, + timestep_respacing=timestep_respacing, + ) + return model, diffusion + + +def create_model( + image_size, + num_channels, + num_res_blocks, + channel_mult="", + learn_sigma=False, + class_cond=False, + use_checkpoint=False, + attention_resolutions="16", + num_heads=1, + num_head_channels=-1, + num_heads_upsample=-1, + use_scale_shift_norm=False, + dropout=0, + resblock_updown=False, + use_fp16=False, + use_new_attention_order=False, +): + if channel_mult == "": + if image_size == 512: + channel_mult = (0.5, 1, 1, 2, 2, 4, 4) + elif image_size == 256: + channel_mult = (1, 1, 2, 2, 4, 4) + elif image_size == 128: + channel_mult = (1, 1, 2, 3, 4) + elif image_size == 64: + channel_mult = (1, 2, 3, 4) + else: + raise ValueError(f"unsupported image size: {image_size}") + else: + channel_mult = tuple(int(ch_mult) for ch_mult in channel_mult.split(",")) + + attention_ds = [] + for res in attention_resolutions.split(","): + attention_ds.append(image_size // int(res)) + + return UNetModel( + image_size=image_size, + in_channels=3, + model_channels=num_channels, + out_channels=(3 if not learn_sigma else 6), + num_res_blocks=num_res_blocks, + attention_resolutions=tuple(attention_ds), + dropout=dropout, + channel_mult=channel_mult, + num_classes=(NUM_CLASSES if class_cond else None), + use_checkpoint=use_checkpoint, + use_fp16=use_fp16, + num_heads=num_heads, + num_head_channels=num_head_channels, + num_heads_upsample=num_heads_upsample, + use_scale_shift_norm=use_scale_shift_norm, + resblock_updown=resblock_updown, + use_new_attention_order=use_new_attention_order, + ) + + +def create_gaussian_diffusion( + *, + steps=1000, + learn_sigma=False, + sigma_small=False, + noise_schedule="linear", + use_kl=False, + predict_xstart=False, + rescale_timesteps=False, + rescale_learned_sigmas=False, + timestep_respacing="", +): + betas = gd.get_named_beta_schedule(noise_schedule, steps) + if use_kl: + loss_type = gd.LossType.RESCALED_KL + elif rescale_learned_sigmas: + loss_type = gd.LossType.RESCALED_MSE + else: + loss_type = gd.LossType.MSE + if not timestep_respacing: + timestep_respacing = [steps] + return SpacedDiffusion( + use_timesteps=space_timesteps(steps, timestep_respacing), + betas=betas, + model_mean_type=(gd.ModelMeanType.EPSILON if not predict_xstart else gd.ModelMeanType.START_X), + model_var_type=((gd.ModelVarType.FIXED_LARGE if not sigma_small else gd.ModelVarType.FIXED_SMALL) + if not learn_sigma else gd.ModelVarType.LEARNED_RANGE), + loss_type=loss_type, + rescale_timesteps=rescale_timesteps, + ) diff --git a/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/model/sec_diff.py b/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/model/sec_diff.py new file mode 100755 index 000000000..1e361f18f --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/model/sec_diff.py @@ -0,0 +1,135 @@ +''' +This code is rewritten by Paddle based on +https://github.com/jina-ai/discoart/blob/main/discoart/nn/sec_diff.py +''' +import math +from dataclasses import dataclass +from functools import partial + +import paddle +import paddle.nn as nn + + +@dataclass +class DiffusionOutput: + v: paddle.Tensor + pred: paddle.Tensor + eps: paddle.Tensor + + +class SkipBlock(nn.Layer): + + def __init__(self, main, skip=None): + super().__init__() + self.main = nn.Sequential(*main) + self.skip = skip if skip else nn.Identity() + + def forward(self, input): + return paddle.concat([self.main(input), self.skip(input)], axis=1) + + +def append_dims(x, n): + return x[(Ellipsis, *(None, ) * (n - x.ndim))] + + +def expand_to_planes(x, shape): + return paddle.tile(append_dims(x, len(shape)), [1, 1, *shape[2:]]) + + +def alpha_sigma_to_t(alpha, sigma): + return paddle.atan2(sigma, alpha) * 2 / math.pi + + +def t_to_alpha_sigma(t): + return paddle.cos(t * math.pi / 2), paddle.sin(t * math.pi / 2) + + +class SecondaryDiffusionImageNet2(nn.Layer): + + def __init__(self): + super().__init__() + c = 64 # The base channel count + cs = [c, c * 2, c * 2, c * 4, c * 4, c * 8] + + self.timestep_embed = FourierFeatures(1, 16) + self.down = nn.AvgPool2D(2) + self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False) + + self.net = nn.Sequential( + ConvBlock(3 + 16, cs[0]), + ConvBlock(cs[0], cs[0]), + SkipBlock([ + self.down, + ConvBlock(cs[0], cs[1]), + ConvBlock(cs[1], cs[1]), + SkipBlock([ + self.down, + ConvBlock(cs[1], cs[2]), + ConvBlock(cs[2], cs[2]), + SkipBlock([ + self.down, + ConvBlock(cs[2], cs[3]), + ConvBlock(cs[3], cs[3]), + SkipBlock([ + self.down, + ConvBlock(cs[3], cs[4]), + ConvBlock(cs[4], cs[4]), + SkipBlock([ + self.down, + ConvBlock(cs[4], cs[5]), + ConvBlock(cs[5], cs[5]), + ConvBlock(cs[5], cs[5]), + ConvBlock(cs[5], cs[4]), + self.up, + ]), + ConvBlock(cs[4] * 2, cs[4]), + ConvBlock(cs[4], cs[3]), + self.up, + ]), + ConvBlock(cs[3] * 2, cs[3]), + ConvBlock(cs[3], cs[2]), + self.up, + ]), + ConvBlock(cs[2] * 2, cs[2]), + ConvBlock(cs[2], cs[1]), + self.up, + ]), + ConvBlock(cs[1] * 2, cs[1]), + ConvBlock(cs[1], cs[0]), + self.up, + ]), + ConvBlock(cs[0] * 2, cs[0]), + nn.Conv2D(cs[0], 3, 3, padding=1), + ) + + def forward(self, input, t): + timestep_embed = expand_to_planes(self.timestep_embed(t[:, None]), input.shape) + v = self.net(paddle.concat([input, timestep_embed], axis=1)) + alphas, sigmas = map(partial(append_dims, n=v.ndim), t_to_alpha_sigma(t)) + pred = input * alphas - v * sigmas + eps = input * sigmas + v * alphas + return DiffusionOutput(v, pred, eps) + + +class FourierFeatures(nn.Layer): + + def __init__(self, in_features, out_features, std=1.0): + super().__init__() + assert out_features % 2 == 0 + # self.weight = nn.Parameter(paddle.randn([out_features // 2, in_features]) * std) + self.weight = paddle.create_parameter([out_features // 2, in_features], + dtype='float32', + default_initializer=nn.initializer.Normal(mean=0.0, std=std)) + + def forward(self, input): + f = 2 * math.pi * input @ self.weight.T + return paddle.concat([f.cos(), f.sin()], axis=-1) + + +class ConvBlock(nn.Sequential): + + def __init__(self, c_in, c_out): + super().__init__( + nn.Conv2D(c_in, c_out, 3, padding=1), + nn.ReLU(), + ) diff --git a/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/model/transforms.py b/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/model/transforms.py new file mode 100755 index 000000000..e0b620b01 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/model/transforms.py @@ -0,0 +1,757 @@ +''' +This code is rewritten by Paddle based on +https://github.com/pytorch/vision/blob/main/torchvision/transforms/transforms.py +''' +import math +import numbers +import warnings +from enum import Enum +from typing import Any +from typing import Dict +from typing import List +from typing import Optional +from typing import Sequence +from typing import Tuple +from typing import Union + +import paddle +import paddle.nn as nn +from paddle import Tensor +from paddle.nn import functional as F +from paddle.nn.functional import grid_sample +from paddle.vision import transforms as T + + +class Normalize(nn.Layer): + + def __init__(self, mean, std): + super(Normalize, self).__init__() + self.mean = paddle.to_tensor(mean) + self.std = paddle.to_tensor(std) + + def forward(self, tensor: Tensor): + dtype = tensor.dtype + mean = paddle.to_tensor(self.mean, dtype=dtype) + std = paddle.to_tensor(self.std, dtype=dtype) + mean = mean.reshape([1, -1, 1, 1]) + std = std.reshape([1, -1, 1, 1]) + result = tensor.subtract(mean).divide(std) + return result + + +class InterpolationMode(Enum): + """Interpolation modes + Available interpolation methods are ``nearest``, ``bilinear``, ``bicubic``, ``box``, ``hamming``, and ``lanczos``. + """ + + NEAREST = "nearest" + BILINEAR = "bilinear" + BICUBIC = "bicubic" + # For PIL compatibility + BOX = "box" + HAMMING = "hamming" + LANCZOS = "lanczos" + + +class Grayscale(nn.Layer): + + def __init__(self, num_output_channels): + super(Grayscale, self).__init__() + self.num_output_channels = num_output_channels + + def forward(self, x): + output = (0.2989 * x[:, 0:1, :, :] + 0.587 * x[:, 1:2, :, :] + 0.114 * x[:, 2:3, :, :]) + if self.num_output_channels == 3: + return output.expand(x.shape) + + return output + + +class Lambda(nn.Layer): + + def __init__(self, func): + super(Lambda, self).__init__() + self.transform = func + + def forward(self, x): + return self.transform(x) + + +class RandomGrayscale(nn.Layer): + + def __init__(self, p): + super(RandomGrayscale, self).__init__() + self.prob = p + self.transform = Grayscale(3) + + def forward(self, x): + if paddle.rand([1]) < self.prob: + return self.transform(x) + else: + return x + + +class RandomHorizontalFlip(nn.Layer): + + def __init__(self, prob): + super(RandomHorizontalFlip, self).__init__() + self.prob = prob + + def forward(self, x): + if paddle.rand([1]) < self.prob: + return x[:, :, :, ::-1] + else: + return x + + +def _blend(img1: Tensor, img2: Tensor, ratio: float) -> Tensor: + ratio = float(ratio) + bound = 1.0 + return (ratio * img1 + (1.0 - ratio) * img2).clip(0, bound) + + +def trunc_div(a, b): + ipt = paddle.divide(a, b) + sign_ipt = paddle.sign(ipt) + abs_ipt = paddle.abs(ipt) + abs_ipt = paddle.floor(abs_ipt) + out = paddle.multiply(sign_ipt, abs_ipt) + return out + + +def fmod(a, b): + return a - trunc_div(a, b) * b + + +def _rgb2hsv(img: Tensor) -> Tensor: + r, g, b = img.unbind(axis=-3) + + # Implementation is based on https://github.com/python-pillow/Pillow/blob/4174d4267616897df3746d315d5a2d0f82c656ee/ + # src/libImaging/Convert.c#L330 + maxc = paddle.max(img, axis=-3) + minc = paddle.min(img, axis=-3) + + # The algorithm erases S and H channel where `maxc = minc`. This avoids NaN + # from happening in the results, because + # + S channel has division by `maxc`, which is zero only if `maxc = minc` + # + H channel has division by `(maxc - minc)`. + # + # Instead of overwriting NaN afterwards, we just prevent it from occuring so + # we don't need to deal with it in case we save the NaN in a buffer in + # backprop, if it is ever supported, but it doesn't hurt to do so. + eqc = maxc == minc + + cr = maxc - minc + # Since `eqc => cr = 0`, replacing denominator with 1 when `eqc` is fine. + ones = paddle.ones_like(maxc) + s = cr / paddle.where(eqc, ones, maxc) + # Note that `eqc => maxc = minc = r = g = b`. So the following calculation + # of `h` would reduce to `bc - gc + 2 + rc - bc + 4 + rc - bc = 6` so it + # would not matter what values `rc`, `gc`, and `bc` have here, and thus + # replacing denominator with 1 when `eqc` is fine. + cr_divisor = paddle.where(eqc, ones, cr) + rc = (maxc - r) / cr_divisor + gc = (maxc - g) / cr_divisor + bc = (maxc - b) / cr_divisor + + hr = (maxc == r).cast('float32') * (bc - gc) + hg = ((maxc == g) & (maxc != r)).cast('float32') * (2.0 + rc - bc) + hb = ((maxc != g) & (maxc != r)).cast('float32') * (4.0 + gc - rc) + h = hr + hg + hb + h = fmod((h / 6.0 + 1.0), paddle.to_tensor(1.0)) + return paddle.stack((h, s, maxc), axis=-3) + + +def _hsv2rgb(img: Tensor) -> Tensor: + h, s, v = img.unbind(axis=-3) + i = paddle.floor(h * 6.0) + f = (h * 6.0) - i + i = i.cast(dtype='int32') + + p = paddle.clip((v * (1.0 - s)), 0.0, 1.0) + q = paddle.clip((v * (1.0 - s * f)), 0.0, 1.0) + t = paddle.clip((v * (1.0 - s * (1.0 - f))), 0.0, 1.0) + i = i % 6 + + mask = i.unsqueeze(axis=-3) == paddle.arange(6).reshape([-1, 1, 1]) + + a1 = paddle.stack((v, q, p, p, t, v), axis=-3) + a2 = paddle.stack((t, v, v, q, p, p), axis=-3) + a3 = paddle.stack((p, p, t, v, v, q), axis=-3) + a4 = paddle.stack((a1, a2, a3), axis=-4) + + return paddle.einsum("...ijk, ...xijk -> ...xjk", mask.cast(dtype=img.dtype), a4) + + +def adjust_brightness(img: Tensor, brightness_factor: float) -> Tensor: + if brightness_factor < 0: + raise ValueError(f"brightness_factor ({brightness_factor}) is not non-negative.") + + return _blend(img, paddle.zeros_like(img), brightness_factor) + + +def adjust_contrast(img: Tensor, contrast_factor: float) -> Tensor: + if contrast_factor < 0: + raise ValueError(f"contrast_factor ({contrast_factor}) is not non-negative.") + + c = img.shape[1] + + if c == 3: + output = (0.2989 * img[:, 0:1, :, :] + 0.587 * img[:, 1:2, :, :] + 0.114 * img[:, 2:3, :, :]) + mean = paddle.mean(output, axis=(-3, -2, -1), keepdim=True) + + else: + mean = paddle.mean(img, axis=(-3, -2, -1), keepdim=True) + + return _blend(img, mean, contrast_factor) + + +def adjust_hue(img: Tensor, hue_factor: float) -> Tensor: + if not (-0.5 <= hue_factor <= 0.5): + raise ValueError(f"hue_factor ({hue_factor}) is not in [-0.5, 0.5].") + + img = _rgb2hsv(img) + h, s, v = img.unbind(axis=-3) + h = fmod(h + hue_factor, paddle.to_tensor(1.0)) + img = paddle.stack((h, s, v), axis=-3) + img_hue_adj = _hsv2rgb(img) + return img_hue_adj + + +def adjust_saturation(img: Tensor, saturation_factor: float) -> Tensor: + if saturation_factor < 0: + raise ValueError(f"saturation_factor ({saturation_factor}) is not non-negative.") + + output = (0.2989 * img[:, 0:1, :, :] + 0.587 * img[:, 1:2, :, :] + 0.114 * img[:, 2:3, :, :]) + + return _blend(img, output, saturation_factor) + + +class ColorJitter(nn.Layer): + + def __init__(self, brightness=0, contrast=0, saturation=0, hue=0): + super(ColorJitter, self).__init__() + self.brightness = self._check_input(brightness, "brightness") + self.contrast = self._check_input(contrast, "contrast") + self.saturation = self._check_input(saturation, "saturation") + self.hue = self._check_input(hue, "hue", center=0, bound=(-0.5, 0.5), clip_first_on_zero=False) + + def _check_input(self, value, name, center=1, bound=(0, float("inf")), clip_first_on_zero=True): + if isinstance(value, numbers.Number): + if value < 0: + raise ValueError(f"If {name} is a single number, it must be non negative.") + value = [center - float(value), center + float(value)] + if clip_first_on_zero: + value[0] = max(value[0], 0.0) + elif isinstance(value, (tuple, list)) and len(value) == 2: + if not bound[0] <= value[0] <= value[1] <= bound[1]: + raise ValueError(f"{name} values should be between {bound}") + else: + raise TypeError(f"{name} should be a single number or a list/tuple with length 2.") + + # if value is 0 or (1., 1.) for brightness/contrast/saturation + # or (0., 0.) for hue, do nothing + if value[0] == value[1] == center: + value = None + return value + + @staticmethod + def get_params( + brightness: Optional[List[float]], + contrast: Optional[List[float]], + saturation: Optional[List[float]], + hue: Optional[List[float]], + ) -> Tuple[Tensor, Optional[float], Optional[float], Optional[float], Optional[float]]: + """Get the parameters for the randomized transform to be applied on image. + + Args: + brightness (tuple of float (min, max), optional): The range from which the brightness_factor is chosen + uniformly. Pass None to turn off the transformation. + contrast (tuple of float (min, max), optional): The range from which the contrast_factor is chosen + uniformly. Pass None to turn off the transformation. + saturation (tuple of float (min, max), optional): The range from which the saturation_factor is chosen + uniformly. Pass None to turn off the transformation. + hue (tuple of float (min, max), optional): The range from which the hue_factor is chosen uniformly. + Pass None to turn off the transformation. + + Returns: + tuple: The parameters used to apply the randomized transform + along with their random order. + """ + fn_idx = paddle.randperm(4) + + b = None if brightness is None else paddle.empty([1]).uniform_(brightness[0], brightness[1]) + c = None if contrast is None else paddle.empty([1]).uniform_(contrast[0], contrast[1]) + s = None if saturation is None else paddle.empty([1]).uniform_(saturation[0], saturation[1]) + h = None if hue is None else paddle.empty([1]).uniform_(hue[0], hue[1]) + + return fn_idx, b, c, s, h + + def forward(self, img): + """ + Args: + img (PIL Image or Tensor): Input image. + + Returns: + PIL Image or Tensor: Color jittered image. + """ + fn_idx, brightness_factor, contrast_factor, saturation_factor, hue_factor = self.get_params( + self.brightness, self.contrast, self.saturation, self.hue) + + for fn_id in fn_idx: + if fn_id == 0 and brightness_factor is not None: + img = adjust_brightness(img, brightness_factor) + elif fn_id == 1 and contrast_factor is not None: + img = adjust_contrast(img, contrast_factor) + elif fn_id == 2 and saturation_factor is not None: + img = adjust_saturation(img, saturation_factor) + elif fn_id == 3 and hue_factor is not None: + img = adjust_hue(img, hue_factor) + + return img + + def __repr__(self) -> str: + s = (f"{self.__class__.__name__}(" + f"brightness={self.brightness}" + f", contrast={self.contrast}" + f", saturation={self.saturation}" + f", hue={self.hue})") + return s + + +def _apply_grid_transform(img: Tensor, grid: Tensor, mode: str, fill: Optional[List[float]]) -> Tensor: + + if img.shape[0] > 1: + # Apply same grid to a batch of images + grid = grid.expand([img.shape[0], grid.shape[1], grid.shape[2], grid.shape[3]]) + + # Append a dummy mask for customized fill colors, should be faster than grid_sample() twice + if fill is not None: + dummy = paddle.ones((img.shape[0], 1, img.shape[2], img.shape[3]), dtype=img.dtype) + img = paddle.concat((img, dummy), axis=1) + + img = grid_sample(img, grid, mode=mode, padding_mode="zeros", align_corners=False) + + # Fill with required color + if fill is not None: + mask = img[:, -1:, :, :] # N * 1 * H * W + img = img[:, :-1, :, :] # N * C * H * W + mask = mask.expand_as(img) + len_fill = len(fill) if isinstance(fill, (tuple, list)) else 1 + fill_img = paddle.to_tensor(fill, dtype=img.dtype).reshape([1, len_fill, 1, 1]).expand_as(img) + if mode == "nearest": + mask = mask < 0.5 + img[mask] = fill_img[mask] + else: # 'bilinear' + img = img * mask + (1.0 - mask) * fill_img + return img + + +def _gen_affine_grid( + theta: Tensor, + w: int, + h: int, + ow: int, + oh: int, +) -> Tensor: + # https://github.com/pytorch/pytorch/blob/74b65c32be68b15dc7c9e8bb62459efbfbde33d8/aten/src/ATen/native/ + # AffineGridGenerator.cpp#L18 + # Difference with AffineGridGenerator is that: + # 1) we normalize grid values after applying theta + # 2) we can normalize by other image size, such that it covers "extend" option like in PIL.Image.rotate + + d = 0.5 + base_grid = paddle.empty([1, oh, ow, 3], dtype=theta.dtype) + x_grid = paddle.linspace(-ow * 0.5 + d, ow * 0.5 + d - 1, num=ow) + base_grid[..., 0] = (x_grid) + y_grid = paddle.linspace(-oh * 0.5 + d, oh * 0.5 + d - 1, num=oh).unsqueeze_(-1) + base_grid[..., 1] = (y_grid) + base_grid[..., 2] = 1.0 + rescaled_theta = theta.transpose([0, 2, 1]) / paddle.to_tensor([0.5 * w, 0.5 * h], dtype=theta.dtype) + output_grid = base_grid.reshape([1, oh * ow, 3]).bmm(rescaled_theta) + return output_grid.reshape([1, oh, ow, 2]) + + +def affine_impl(img: Tensor, + matrix: List[float], + interpolation: str = "nearest", + fill: Optional[List[float]] = None) -> Tensor: + theta = paddle.to_tensor(matrix, dtype=img.dtype).reshape([1, 2, 3]) + shape = img.shape + # grid will be generated on the same device as theta and img + grid = _gen_affine_grid(theta, w=shape[-1], h=shape[-2], ow=shape[-1], oh=shape[-2]) + return _apply_grid_transform(img, grid, interpolation, fill=fill) + + +def _get_inverse_affine_matrix(center: List[float], + angle: float, + translate: List[float], + scale: float, + shear: List[float], + inverted: bool = True) -> List[float]: + # Helper method to compute inverse matrix for affine transformation + + # Pillow requires inverse affine transformation matrix: + # Affine matrix is : M = T * C * RotateScaleShear * C^-1 + # + # where T is translation matrix: [1, 0, tx | 0, 1, ty | 0, 0, 1] + # C is translation matrix to keep center: [1, 0, cx | 0, 1, cy | 0, 0, 1] + # RotateScaleShear is rotation with scale and shear matrix + # + # RotateScaleShear(a, s, (sx, sy)) = + # = R(a) * S(s) * SHy(sy) * SHx(sx) + # = [ s*cos(a - sy)/cos(sy), s*(-cos(a - sy)*tan(sx)/cos(sy) - sin(a)), 0 ] + # [ s*sin(a + sy)/cos(sy), s*(-sin(a - sy)*tan(sx)/cos(sy) + cos(a)), 0 ] + # [ 0 , 0 , 1 ] + # where R is a rotation matrix, S is a scaling matrix, and SHx and SHy are the shears: + # SHx(s) = [1, -tan(s)] and SHy(s) = [1 , 0] + # [0, 1 ] [-tan(s), 1] + # + # Thus, the inverse is M^-1 = C * RotateScaleShear^-1 * C^-1 * T^-1 + + rot = math.radians(angle) + sx = math.radians(shear[0]) + sy = math.radians(shear[1]) + + cx, cy = center + tx, ty = translate + + # RSS without scaling + a = math.cos(rot - sy) / math.cos(sy) + b = -math.cos(rot - sy) * math.tan(sx) / math.cos(sy) - math.sin(rot) + c = math.sin(rot - sy) / math.cos(sy) + d = -math.sin(rot - sy) * math.tan(sx) / math.cos(sy) + math.cos(rot) + + if inverted: + # Inverted rotation matrix with scale and shear + # det([[a, b], [c, d]]) == 1, since det(rotation) = 1 and det(shear) = 1 + matrix = [d, -b, 0.0, -c, a, 0.0] + matrix = [x / scale for x in matrix] + # Apply inverse of translation and of center translation: RSS^-1 * C^-1 * T^-1 + matrix[2] += matrix[0] * (-cx - tx) + matrix[1] * (-cy - ty) + matrix[5] += matrix[3] * (-cx - tx) + matrix[4] * (-cy - ty) + # Apply center translation: C * RSS^-1 * C^-1 * T^-1 + matrix[2] += cx + matrix[5] += cy + else: + matrix = [a, b, 0.0, c, d, 0.0] + matrix = [x * scale for x in matrix] + # Apply inverse of center translation: RSS * C^-1 + matrix[2] += matrix[0] * (-cx) + matrix[1] * (-cy) + matrix[5] += matrix[3] * (-cx) + matrix[4] * (-cy) + # Apply translation and center : T * C * RSS * C^-1 + matrix[2] += cx + tx + matrix[5] += cy + ty + + return matrix + + +def affine( + img: Tensor, + angle: float, + translate: List[int], + scale: float, + shear: List[float], + interpolation: InterpolationMode = InterpolationMode.NEAREST, + fill: Optional[List[float]] = None, + resample: Optional[int] = None, + fillcolor: Optional[List[float]] = None, + center: Optional[List[int]] = None, +) -> Tensor: + """Apply affine transformation on the image keeping image center invariant. + If the image is paddle Tensor, it is expected + to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions. + + Args: + img (PIL Image or Tensor): image to transform. + angle (number): rotation angle in degrees between -180 and 180, clockwise direction. + translate (sequence of integers): horizontal and vertical translations (post-rotation translation) + scale (float): overall scale + shear (float or sequence): shear angle value in degrees between -180 to 180, clockwise direction. + If a sequence is specified, the first value corresponds to a shear parallel to the x axis, while + the second value corresponds to a shear parallel to the y axis. + interpolation (InterpolationMode): Desired interpolation enum defined by + :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``. + If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported. + For backward compatibility integer values (e.g. ``PIL.Image[.Resampling].NEAREST``) are still accepted, + but deprecated since 0.13 and will be removed in 0.15. Please use InterpolationMode enum. + fill (sequence or number, optional): Pixel fill value for the area outside the transformed + image. If given a number, the value is used for all bands respectively. + + .. note:: + In torchscript mode single int/float value is not supported, please use a sequence + of length 1: ``[value, ]``. + fillcolor (sequence or number, optional): + .. warning:: + This parameter was deprecated in ``0.12`` and will be removed in ``0.14``. Please use ``fill`` instead. + resample (int, optional): + .. warning:: + This parameter was deprecated in ``0.12`` and will be removed in ``0.14``. Please use ``interpolation`` + instead. + center (sequence, optional): Optional center of rotation. Origin is the upper left corner. + Default is the center of the image. + + Returns: + PIL Image or Tensor: Transformed image. + """ + + # Backward compatibility with integer value + if isinstance(interpolation, int): + warnings.warn("Argument 'interpolation' of type int is deprecated since 0.13 and will be removed in 0.15. " + "Please use InterpolationMode enum.") + interpolation = _interpolation_modes_from_int(interpolation) + + if fillcolor is not None: + warnings.warn("The parameter 'fillcolor' is deprecated since 0.12 and will be removed in 0.14. " + "Please use 'fill' instead.") + fill = fillcolor + + if not isinstance(angle, (int, float)): + raise TypeError("Argument angle should be int or float") + + if not isinstance(translate, (list, tuple)): + raise TypeError("Argument translate should be a sequence") + + if len(translate) != 2: + raise ValueError("Argument translate should be a sequence of length 2") + + if scale <= 0.0: + raise ValueError("Argument scale should be positive") + + if not isinstance(shear, (numbers.Number, (list, tuple))): + raise TypeError("Shear should be either a single value or a sequence of two values") + + if not isinstance(interpolation, InterpolationMode): + raise TypeError("Argument interpolation should be a InterpolationMode") + + if isinstance(angle, int): + angle = float(angle) + + if isinstance(translate, tuple): + translate = list(translate) + + if isinstance(shear, numbers.Number): + shear = [shear, 0.0] + + if isinstance(shear, tuple): + shear = list(shear) + + if len(shear) == 1: + shear = [shear[0], shear[0]] + + if len(shear) != 2: + raise ValueError(f"Shear should be a sequence containing two values. Got {shear}") + + if center is not None and not isinstance(center, (list, tuple)): + raise TypeError("Argument center should be a sequence") + center_f = [0.0, 0.0] + if center is not None: + _, height, width = img.shape[0], img.shape[1], img.shape[2] + # Center values should be in pixel coordinates but translated such that (0, 0) corresponds to image center. + center_f = [1.0 * (c - s * 0.5) for c, s in zip(center, [width, height])] + + translate_f = [1.0 * t for t in translate] + matrix = _get_inverse_affine_matrix(center_f, angle, translate_f, scale, shear) + return affine_impl(img, matrix=matrix, interpolation=interpolation.value, fill=fill) + + +def _interpolation_modes_from_int(i: int) -> InterpolationMode: + inverse_modes_mapping = { + 0: InterpolationMode.NEAREST, + 2: InterpolationMode.BILINEAR, + 3: InterpolationMode.BICUBIC, + 4: InterpolationMode.BOX, + 5: InterpolationMode.HAMMING, + 1: InterpolationMode.LANCZOS, + } + return inverse_modes_mapping[i] + + +def _check_sequence_input(x, name, req_sizes): + msg = req_sizes[0] if len(req_sizes) < 2 else " or ".join([str(s) for s in req_sizes]) + if not isinstance(x, Sequence): + raise TypeError(f"{name} should be a sequence of length {msg}.") + if len(x) not in req_sizes: + raise ValueError(f"{name} should be sequence of length {msg}.") + + +def _setup_angle(x, name, req_sizes=(2, )): + if isinstance(x, numbers.Number): + if x < 0: + raise ValueError(f"If {name} is a single number, it must be positive.") + x = [-x, x] + else: + _check_sequence_input(x, name, req_sizes) + + return [float(d) for d in x] + + +class RandomAffine(nn.Layer): + """Random affine transformation of the image keeping center invariant. + If the image is paddle Tensor, it is expected + to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions. + + Args: + degrees (sequence or number): Range of degrees to select from. + If degrees is a number instead of sequence like (min, max), the range of degrees + will be (-degrees, +degrees). Set to 0 to deactivate rotations. + translate (tuple, optional): tuple of maximum absolute fraction for horizontal + and vertical translations. For example translate=(a, b), then horizontal shift + is randomly sampled in the range -img_width * a < dx < img_width * a and vertical shift is + randomly sampled in the range -img_height * b < dy < img_height * b. Will not translate by default. + scale (tuple, optional): scaling factor interval, e.g (a, b), then scale is + randomly sampled from the range a <= scale <= b. Will keep original scale by default. + shear (sequence or number, optional): Range of degrees to select from. + If shear is a number, a shear parallel to the x axis in the range (-shear, +shear) + will be applied. Else if shear is a sequence of 2 values a shear parallel to the x axis in the + range (shear[0], shear[1]) will be applied. Else if shear is a sequence of 4 values, + a x-axis shear in (shear[0], shear[1]) and y-axis shear in (shear[2], shear[3]) will be applied. + Will not apply shear by default. + interpolation (InterpolationMode): Desired interpolation enum defined by + :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``. + If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported. + For backward compatibility integer values (e.g. ``PIL.Image[.Resampling].NEAREST``) are still accepted, + but deprecated since 0.13 and will be removed in 0.15. Please use InterpolationMode enum. + fill (sequence or number): Pixel fill value for the area outside the transformed + image. Default is ``0``. If given a number, the value is used for all bands respectively. + fillcolor (sequence or number, optional): + .. warning:: + This parameter was deprecated in ``0.12`` and will be removed in ``0.14``. Please use ``fill`` instead. + resample (int, optional): + .. warning:: + This parameter was deprecated in ``0.12`` and will be removed in ``0.14``. Please use ``interpolation`` + instead. + center (sequence, optional): Optional center of rotation, (x, y). Origin is the upper left corner. + Default is the center of the image. + + .. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters + + """ + + def __init__( + self, + degrees, + translate=None, + scale=None, + shear=None, + interpolation=InterpolationMode.NEAREST, + fill=0, + fillcolor=None, + resample=None, + center=None, + ): + super(RandomAffine, self).__init__() + if resample is not None: + warnings.warn("The parameter 'resample' is deprecated since 0.12 and will be removed in 0.14. " + "Please use 'interpolation' instead.") + interpolation = _interpolation_modes_from_int(resample) + + # Backward compatibility with integer value + if isinstance(interpolation, int): + warnings.warn("Argument 'interpolation' of type int is deprecated since 0.13 and will be removed in 0.15. " + "Please use InterpolationMode enum.") + interpolation = _interpolation_modes_from_int(interpolation) + + if fillcolor is not None: + warnings.warn("The parameter 'fillcolor' is deprecated since 0.12 and will be removed in 0.14. " + "Please use 'fill' instead.") + fill = fillcolor + + self.degrees = _setup_angle(degrees, name="degrees", req_sizes=(2, )) + + if translate is not None: + _check_sequence_input(translate, "translate", req_sizes=(2, )) + for t in translate: + if not (0.0 <= t <= 1.0): + raise ValueError("translation values should be between 0 and 1") + self.translate = translate + + if scale is not None: + _check_sequence_input(scale, "scale", req_sizes=(2, )) + for s in scale: + if s <= 0: + raise ValueError("scale values should be positive") + self.scale = scale + + if shear is not None: + self.shear = _setup_angle(shear, name="shear", req_sizes=(2, 4)) + else: + self.shear = shear + + self.resample = self.interpolation = interpolation + + if fill is None: + fill = 0 + elif not isinstance(fill, (Sequence, numbers.Number)): + raise TypeError("Fill should be either a sequence or a number.") + + self.fillcolor = self.fill = fill + + if center is not None: + _check_sequence_input(center, "center", req_sizes=(2, )) + + self.center = center + + @staticmethod + def get_params( + degrees: List[float], + translate: Optional[List[float]], + scale_ranges: Optional[List[float]], + shears: Optional[List[float]], + img_size: List[int], + ) -> Tuple[float, Tuple[int, int], float, Tuple[float, float]]: + """Get parameters for affine transformation + + Returns: + params to be passed to the affine transformation + """ + angle = float(paddle.empty([1]).uniform_(float(degrees[0]), float(degrees[1]))) + if translate is not None: + max_dx = float(translate[0] * img_size[0]) + max_dy = float(translate[1] * img_size[1]) + tx = int(float(paddle.empty([1]).uniform_(-max_dx, max_dx))) + ty = int(float(paddle.empty([1]).uniform_(-max_dy, max_dy))) + translations = (tx, ty) + else: + translations = (0, 0) + + if scale_ranges is not None: + scale = float(paddle.empty([1]).uniform_(scale_ranges[0], scale_ranges[1])) + else: + scale = 1.0 + + shear_x = shear_y = 0.0 + if shears is not None: + shear_x = float(paddle.empty([1]).uniform_(shears[0], shears[1])) + if len(shears) == 4: + shear_y = float(paddle.empty([1]).uniform_(shears[2], shears[3])) + + shear = (shear_x, shear_y) + + return angle, translations, scale, shear + + def forward(self, img): + fill = self.fill + channels, height, width = img.shape[1], img.shape[2], img.shape[3] + if isinstance(fill, (int, float)): + fill = [float(fill)] * channels + else: + fill = [float(f) for f in fill] + + img_size = [width, height] # flip for keeping BC on get_params call + + ret = self.get_params(self.degrees, self.translate, self.scale, self.shear, img_size) + + return affine(img, *ret, interpolation=self.interpolation, fill=fill, center=self.center) + + def __repr__(self) -> str: + s = f"{self.__class__.__name__}(degrees={self.degrees}" + s += f", translate={self.translate}" if self.translate is not None else "" + s += f", scale={self.scale}" if self.scale is not None else "" + s += f", shear={self.shear}" if self.shear is not None else "" + s += f", interpolation={self.interpolation.value}" if self.interpolation != InterpolationMode.NEAREST else "" + s += f", fill={self.fill}" if self.fill != 0 else "" + s += f", center={self.center}" if self.center is not None else "" + s += ")" + + return s diff --git a/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/model/unet.py b/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/model/unet.py new file mode 100755 index 000000000..56f3ad61e --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/model/unet.py @@ -0,0 +1,838 @@ +''' +This code is rewritten by Paddle based on +https://github.com/openai/guided-diffusion/blob/main/guided_diffusion/unet.py +''' +import math +from abc import abstractmethod + +import numpy as np +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + +from .nn import avg_pool_nd +from .nn import checkpoint +from .nn import conv_nd +from .nn import linear +from .nn import normalization +from .nn import SiLU +from .nn import timestep_embedding +from .nn import zero_module + + +class AttentionPool2d(nn.Layer): + """ + Adapted from CLIP: https://github.com/openai/CLIP/blob/main/clip/model.py + """ + + def __init__( + self, + spacial_dim: int, + embed_dim: int, + num_heads_channels: int, + output_dim: int = None, + ): + super().__init__() + # self.positional_embedding = nn.Parameter( + # th.randn(embed_dim, spacial_dim ** 2 + 1) / embed_dim ** 0.5 + # ) + positional_embedding = self.create_parameter(paddle.randn(embed_dim, spacial_dim**2 + 1) / embed_dim**0.5) + self.add_parameter("positional_embedding", positional_embedding) + self.qkv_proj = conv_nd(1, embed_dim, 3 * embed_dim, 1) + self.c_proj = conv_nd(1, embed_dim, output_dim or embed_dim, 1) + self.num_heads = embed_dim // num_heads_channels + self.attention = QKVAttention(self.num_heads) + + def forward(self, x): + b, c, *_spatial = x.shape + # x = x.reshape(b, c, -1) # NC(HW) + x = paddle.reshape(x, [b, c, -1]) + x = paddle.concat([x.mean(dim=-1, keepdim=True), x], axis=-1) # NC(HW+1) + x = x + paddle.cast(self.positional_embedding[None, :, :], x.dtype) # NC(HW+1) + x = self.qkv_proj(x) + x = self.attention(x) + x = self.c_proj(x) + return x[:, :, 0] + + +class TimestepBlock(nn.Layer): + """ + Any module where forward() takes timestep embeddings as a second argument. + """ + + @abstractmethod + def forward(self, x, emb): + """ + Apply the module to `x` given `emb` timestep embeddings. + """ + + +class TimestepEmbedSequential(nn.Sequential, TimestepBlock): + """ + A sequential module that passes timestep embeddings to the children that + support it as an extra input. + """ + + def forward(self, x, emb): + for layer in self: + if isinstance(layer, TimestepBlock): + x = layer(x, emb) + else: + x = layer(x) + return x + + +class Upsample(nn.Layer): + """ + An upsampling layer with an optional convolution. + + :param channels: channels in the inputs and outputs. + :param use_conv: a bool determining if a convolution is applied. + :param dims: determines if the signal is 1D, 2D, or 3D. If 3D, then + upsampling occurs in the inner-two dimensions. + """ + + def __init__(self, channels, use_conv, dims=2, out_channels=None): + super().__init__() + self.channels = channels + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.dims = dims + if use_conv: + self.conv = conv_nd(dims, self.channels, self.out_channels, 3, padding=1) + + def forward(self, x): + assert x.shape[1] == self.channels + if self.dims == 3: + x = F.interpolate(x, (x.shape[2], x.shape[3] * 2, x.shape[4] * 2), mode="nearest") + else: + x = F.interpolate(x, scale_factor=2, mode="nearest") + if self.use_conv: + x = self.conv(x) + return x + + +class Downsample(nn.Layer): + """ + A downsampling layer with an optional convolution. + + :param channels: channels in the inputs and outputs. + :param use_conv: a bool determining if a convolution is applied. + :param dims: determines if the signal is 1D, 2D, or 3D. If 3D, then + downsampling occurs in the inner-two dimensions. + """ + + def __init__(self, channels, use_conv, dims=2, out_channels=None): + super().__init__() + self.channels = channels + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.dims = dims + stride = 2 if dims != 3 else (1, 2, 2) + if use_conv: + self.op = conv_nd(dims, self.channels, self.out_channels, 3, stride=stride, padding=1) + else: + assert self.channels == self.out_channels + self.op = avg_pool_nd(dims, kernel_size=stride, stride=stride) + + def forward(self, x): + assert x.shape[1] == self.channels + return self.op(x) + + +class ResBlock(TimestepBlock): + """ + A residual block that can optionally change the number of channels. + + :param channels: the number of input channels. + :param emb_channels: the number of timestep embedding channels. + :param dropout: the rate of dropout. + :param out_channels: if specified, the number of out channels. + :param use_conv: if True and out_channels is specified, use a spatial + convolution instead of a smaller 1x1 convolution to change the + channels in the skip connection. + :param dims: determines if the signal is 1D, 2D, or 3D. + :param use_checkpoint: if True, use gradient checkpointing on this module. + :param up: if True, use this block for upsampling. + :param down: if True, use this block for downsampling. + """ + + def __init__( + self, + channels, + emb_channels, + dropout, + out_channels=None, + use_conv=False, + use_scale_shift_norm=False, + dims=2, + use_checkpoint=False, + up=False, + down=False, + ): + super().__init__() + self.channels = channels + self.emb_channels = emb_channels + self.dropout = dropout + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.use_checkpoint = use_checkpoint + self.use_scale_shift_norm = use_scale_shift_norm + + self.in_layers = nn.Sequential( + normalization(channels), + SiLU(), + conv_nd(dims, channels, self.out_channels, 3, padding=1), + ) + + self.updown = up or down + + if up: + self.h_upd = Upsample(channels, False, dims) + self.x_upd = Upsample(channels, False, dims) + elif down: + self.h_upd = Downsample(channels, False, dims) + self.x_upd = Downsample(channels, False, dims) + else: + self.h_upd = self.x_upd = nn.Identity() + + self.emb_layers = nn.Sequential( + SiLU(), + linear( + emb_channels, + 2 * self.out_channels if use_scale_shift_norm else self.out_channels, + ), + ) + self.out_layers = nn.Sequential( + normalization(self.out_channels), + SiLU(), + nn.Dropout(p=dropout), + zero_module(conv_nd(dims, self.out_channels, self.out_channels, 3, padding=1)), + ) + + if self.out_channels == channels: + self.skip_connection = nn.Identity() + elif use_conv: + self.skip_connection = conv_nd(dims, channels, self.out_channels, 3, padding=1) + else: + self.skip_connection = conv_nd(dims, channels, self.out_channels, 1) + + def forward(self, x, emb): + """ + Apply the block to a Tensor, conditioned on a timestep embedding. + + :param x: an [N x C x ...] Tensor of features. + :param emb: an [N x emb_channels] Tensor of timestep embeddings. + :return: an [N x C x ...] Tensor of outputs. + """ + return checkpoint(self._forward, (x, emb), self.parameters(), self.use_checkpoint) + + def _forward(self, x, emb): + if self.updown: + in_rest, in_conv = self.in_layers[:-1], self.in_layers[-1] + h = in_rest(x) + h = self.h_upd(h) + x = self.x_upd(x) + h = in_conv(h) + else: + h = self.in_layers(x) + emb_out = self.emb_layers(emb) + emb_out = paddle.cast(emb_out, h.dtype) + while len(emb_out.shape) < len(h.shape): + emb_out = emb_out[..., None] + if self.use_scale_shift_norm: + out_norm, out_rest = self.out_layers[0], self.out_layers[1:] + scale, shift = paddle.chunk(emb_out, 2, axis=1) + h = out_norm(h) * (1 + scale) + shift + h = out_rest(h) + else: + h = h + emb_out + h = self.out_layers(h) + return self.skip_connection(x) + h + + +class AttentionBlock(nn.Layer): + """ + An attention block that allows spatial positions to attend to each other. + + Originally ported from here, but adapted to the N-d case. + https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/models/unet.py#L66. + """ + + def __init__( + self, + channels, + num_heads=1, + num_head_channels=-1, + use_checkpoint=False, + use_new_attention_order=False, + ): + super().__init__() + self.channels = channels + if num_head_channels == -1: + self.num_heads = num_heads + else: + assert (channels % num_head_channels == 0 + ), f"q,k,v channels {channels} is not divisible by num_head_channels {num_head_channels}" + self.num_heads = channels // num_head_channels + self.use_checkpoint = use_checkpoint + self.norm = normalization(channels) + self.qkv = conv_nd(1, channels, channels * 3, 1) + if use_new_attention_order: + # split qkv before split heads + self.attention = QKVAttention(self.num_heads) + else: + # split heads before split qkv + self.attention = QKVAttentionLegacy(self.num_heads) + + self.proj_out = zero_module(conv_nd(1, channels, channels, 1)) + + def forward(self, x): + return checkpoint(self._forward, (x, ), self.parameters(), self.use_checkpoint) + + def _forward(self, x): + b, c, *spatial = x.shape + # x = x.reshape(b, c, -1) + x = paddle.reshape(x, [b, c, -1]) + qkv = self.qkv(self.norm(x)) + h = self.attention(qkv) + h = self.proj_out(h) + # return (x + h).reshape(b, c, *spatial) + return paddle.reshape(x + h, [b, c, *spatial]) + + +def count_flops_attn(model, _x, y): + """ + A counter for the `thop` package to count the operations in an + attention operation. + Meant to be used like: + macs, params = thop.profile( + model, + inputs=(inputs, timestamps), + custom_ops={QKVAttention: QKVAttention.count_flops}, + ) + """ + b, c, *spatial = y[0].shape + num_spatial = int(np.prod(spatial)) + # We perform two matmuls with the same number of ops. + # The first computes the weight matrix, the second computes + # the combination of the value vectors. + matmul_ops = 2 * b * (num_spatial**2) * c + model.total_ops += paddle.to_tensor([matmul_ops], dtype='float64') + + +class QKVAttentionLegacy(nn.Layer): + """ + A module which performs QKV attention. Matches legacy QKVAttention + input/ouput heads shaping + """ + + def __init__(self, n_heads): + super().__init__() + self.n_heads = n_heads + + def forward(self, qkv): + """ + Apply QKV attention. + + :param qkv: an [N x (H * 3 * C) x T] tensor of Qs, Ks, and Vs. + :return: an [N x (H * C) x T] tensor after attention. + """ + bs, width, length = qkv.shape + assert width % (3 * self.n_heads) == 0 + ch = width // (3 * self.n_heads) + # q, k, v = qkv.reshape(bs * self.n_heads, ch * 3, length).split(ch, dim=1) + q, k, v = paddle.reshape(qkv, [bs * self.n_heads, ch * 3, length]).split(3, axis=1) + scale = 1 / math.sqrt(math.sqrt(ch)) + weight = paddle.einsum("bct,bcs->bts", q * scale, k * scale) # More stable with f16 than dividing afterwards + weight = paddle.cast(nn.functional.softmax(paddle.cast(weight, 'float32'), axis=-1), weight.dtype) + a = paddle.einsum("bts,bcs->bct", weight, v) + # return a.reshape(bs, -1, length) + return paddle.reshape(a, [bs, -1, length]) + + @staticmethod + def count_flops(model, _x, y): + return count_flops_attn(model, _x, y) + + +class QKVAttention(nn.Layer): + """ + A module which performs QKV attention and splits in a different order. + """ + + def __init__(self, n_heads): + super().__init__() + self.n_heads = n_heads + + def forward(self, qkv): + """ + Apply QKV attention. + + :param qkv: an [N x (3 * H * C) x T] tensor of Qs, Ks, and Vs. + :return: an [N x (H * C) x T] tensor after attention. + """ + bs, width, length = qkv.shape + assert width % (3 * self.n_heads) == 0 + ch = width // (3 * self.n_heads) + q, k, v = qkv.chunk(3, axis=1) + scale = 1 / math.sqrt(math.sqrt(ch)) + weight = paddle.einsum( + "bct,bcs->bts", + (q * scale).view(bs * self.n_heads, ch, length), + (k * scale).view(bs * self.n_heads, ch, length), + ) # More stable with f16 than dividing afterwards + weight = paddle.cast(nn.functional.softmax(paddle.cast(weight, 'float32'), axis=-1), weight.dtype) + a = paddle.einsum("bts,bcs->bct", weight, v.reshape(bs * self.n_heads, ch, length)) + # return a.reshape(bs, -1, length) + return paddle.reshape(a, [bs, -1, length]) + + @staticmethod + def count_flops(model, _x, y): + return count_flops_attn(model, _x, y) + + +class UNetModel(nn.Layer): + """ + The full UNet model with attention and timestep embedding. + + :param in_channels: channels in the input Tensor. + :param model_channels: base channel count for the model. + :param out_channels: channels in the output Tensor. + :param num_res_blocks: number of residual blocks per downsample. + :param attention_resolutions: a collection of downsample rates at which + attention will take place. May be a set, list, or tuple. + For example, if this contains 4, then at 4x downsampling, attention + will be used. + :param dropout: the dropout probability. + :param channel_mult: channel multiplier for each level of the UNet. + :param conv_resample: if True, use learned convolutions for upsampling and + downsampling. + :param dims: determines if the signal is 1D, 2D, or 3D. + :param num_classes: if specified (as an int), then this model will be + class-conditional with `num_classes` classes. + :param use_checkpoint: use gradient checkpointing to reduce memory usage. + :param num_heads: the number of attention heads in each attention layer. + :param num_heads_channels: if specified, ignore num_heads and instead use + a fixed channel width per attention head. + :param num_heads_upsample: works with num_heads to set a different number + of heads for upsampling. Deprecated. + :param use_scale_shift_norm: use a FiLM-like conditioning mechanism. + :param resblock_updown: use residual blocks for up/downsampling. + :param use_new_attention_order: use a different attention pattern for potentially + increased efficiency. + """ + + def __init__( + self, + image_size, + in_channels, + model_channels, + out_channels, + num_res_blocks, + attention_resolutions, + dropout=0, + channel_mult=(1, 2, 4, 8), + conv_resample=True, + dims=2, + num_classes=None, + use_checkpoint=False, + use_fp16=False, + num_heads=1, + num_head_channels=-1, + num_heads_upsample=-1, + use_scale_shift_norm=False, + resblock_updown=False, + use_new_attention_order=False, + ): + super().__init__() + + if num_heads_upsample == -1: + num_heads_upsample = num_heads + + self.image_size = image_size + self.in_channels = in_channels + self.model_channels = model_channels + self.out_channels = out_channels + self.num_res_blocks = num_res_blocks + self.attention_resolutions = attention_resolutions + self.dropout = dropout + self.channel_mult = channel_mult + self.conv_resample = conv_resample + self.num_classes = num_classes + self.use_checkpoint = use_checkpoint + self.dtype = paddle.float16 if use_fp16 else paddle.float32 + self.num_heads = num_heads + self.num_head_channels = num_head_channels + self.num_heads_upsample = num_heads_upsample + + time_embed_dim = model_channels * 4 + self.time_embed = nn.Sequential( + linear(model_channels, time_embed_dim), + SiLU(), + linear(time_embed_dim, time_embed_dim), + ) + + if self.num_classes is not None: + self.label_emb = nn.Embedding(num_classes, time_embed_dim) + + ch = input_ch = int(channel_mult[0] * model_channels) + self.input_blocks = nn.LayerList([TimestepEmbedSequential(conv_nd(dims, in_channels, ch, 3, padding=1))]) + self._feature_size = ch + input_block_chans = [ch] + ds = 1 + for level, mult in enumerate(channel_mult): + for _ in range(num_res_blocks): + layers = [ + ResBlock( + ch, + time_embed_dim, + dropout, + out_channels=int(mult * model_channels), + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ) + ] + ch = int(mult * model_channels) + if ds in attention_resolutions: + layers.append( + AttentionBlock( + ch, + use_checkpoint=use_checkpoint, + num_heads=num_heads, + num_head_channels=num_head_channels, + use_new_attention_order=use_new_attention_order, + )) + self.input_blocks.append(TimestepEmbedSequential(*layers)) + self._feature_size += ch + input_block_chans.append(ch) + if level != len(channel_mult) - 1: + out_ch = ch + self.input_blocks.append( + TimestepEmbedSequential( + ResBlock( + ch, + time_embed_dim, + dropout, + out_channels=out_ch, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + down=True, + ) if resblock_updown else Downsample(ch, conv_resample, dims=dims, out_channels=out_ch))) + ch = out_ch + input_block_chans.append(ch) + ds *= 2 + self._feature_size += ch + + self.middle_block = TimestepEmbedSequential( + ResBlock( + ch, + time_embed_dim, + dropout, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ), + AttentionBlock( + ch, + use_checkpoint=use_checkpoint, + num_heads=num_heads, + num_head_channels=num_head_channels, + use_new_attention_order=use_new_attention_order, + ), + ResBlock( + ch, + time_embed_dim, + dropout, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ), + ) + self._feature_size += ch + + self.output_blocks = nn.LayerList([]) + for level, mult in list(enumerate(channel_mult))[::-1]: + for i in range(num_res_blocks + 1): + ich = input_block_chans.pop() + layers = [ + ResBlock( + ch + ich, + time_embed_dim, + dropout, + out_channels=int(model_channels * mult), + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ) + ] + ch = int(model_channels * mult) + if ds in attention_resolutions: + layers.append( + AttentionBlock( + ch, + use_checkpoint=use_checkpoint, + num_heads=num_heads_upsample, + num_head_channels=num_head_channels, + use_new_attention_order=use_new_attention_order, + )) + if level and i == num_res_blocks: + out_ch = ch + layers.append( + ResBlock( + ch, + time_embed_dim, + dropout, + out_channels=out_ch, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + up=True, + ) if resblock_updown else Upsample(ch, conv_resample, dims=dims, out_channels=out_ch)) + ds //= 2 + self.output_blocks.append(TimestepEmbedSequential(*layers)) + self._feature_size += ch + + self.out = nn.Sequential( + normalization(ch), + SiLU(), + zero_module(conv_nd(dims, input_ch, out_channels, 3, padding=1)), + ) + + def forward(self, x, timesteps, y=None): + """ + Apply the model to an input batch. + + :param x: an [N x C x ...] Tensor of inputs. + :param timesteps: a 1-D batch of timesteps. + :param y: an [N] Tensor of labels, if class-conditional. + :return: an [N x C x ...] Tensor of outputs. + """ + assert (y is not None) == (self.num_classes + is not None), "must specify y if and only if the model is class-conditional" + + hs = [] + emb = self.time_embed(timestep_embedding(timesteps, self.model_channels)) + if self.num_classes is not None: + assert y.shape == (x.shape[0], ) + emb = emb + self.label_emb(y) + + h = paddle.cast(x, self.dtype) + for module in self.input_blocks: + h = module(h, emb) + hs.append(h) + h = self.middle_block(h, emb) + for module in self.output_blocks: + h = paddle.concat([h, hs.pop()], axis=1) + h = module(h, emb) + # h = paddle.cast(h, x.dtype) + return self.out(h) + + +class SuperResModel(UNetModel): + """ + A UNetModel that performs super-resolution. + + Expects an extra kwarg `low_res` to condition on a low-resolution image. + """ + + def __init__(self, image_size, in_channels, *args, **kwargs): + super().__init__(image_size, in_channels * 2, *args, **kwargs) + + def forward(self, x, timesteps, low_res=None, **kwargs): + _, _, new_height, new_width = x.shape + upsampled = F.interpolate(low_res, (new_height, new_width), mode="bilinear") + x = paddle.concat([x, upsampled], axis=1) + return super().forward(x, timesteps, **kwargs) + + +class EncoderUNetModel(nn.Layer): + """ + The half UNet model with attention and timestep embedding. + + For usage, see UNet. + """ + + def __init__( + self, + image_size, + in_channels, + model_channels, + out_channels, + num_res_blocks, + attention_resolutions, + dropout=0, + channel_mult=(1, 2, 4, 8), + conv_resample=True, + dims=2, + use_checkpoint=False, + use_fp16=False, + num_heads=1, + num_head_channels=-1, + num_heads_upsample=-1, + use_scale_shift_norm=False, + resblock_updown=False, + use_new_attention_order=False, + pool="adaptive", + ): + super().__init__() + + if num_heads_upsample == -1: + num_heads_upsample = num_heads + + self.in_channels = in_channels + self.model_channels = model_channels + self.out_channels = out_channels + self.num_res_blocks = num_res_blocks + self.attention_resolutions = attention_resolutions + self.dropout = dropout + self.channel_mult = channel_mult + self.conv_resample = conv_resample + self.use_checkpoint = use_checkpoint + self.dtype = paddle.float16 if use_fp16 else paddle.float32 + self.num_heads = num_heads + self.num_head_channels = num_head_channels + self.num_heads_upsample = num_heads_upsample + + time_embed_dim = model_channels * 4 + self.time_embed = nn.Sequential( + linear(model_channels, time_embed_dim), + SiLU(), + linear(time_embed_dim, time_embed_dim), + ) + + ch = int(channel_mult[0] * model_channels) + self.input_blocks = nn.LayerList([TimestepEmbedSequential(conv_nd(dims, in_channels, ch, 3, padding=1))]) + self._feature_size = ch + input_block_chans = [ch] + ds = 1 + for level, mult in enumerate(channel_mult): + for _ in range(num_res_blocks): + layers = [ + ResBlock( + ch, + time_embed_dim, + dropout, + out_channels=int(mult * model_channels), + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ) + ] + ch = int(mult * model_channels) + if ds in attention_resolutions: + layers.append( + AttentionBlock( + ch, + use_checkpoint=use_checkpoint, + num_heads=num_heads, + num_head_channels=num_head_channels, + use_new_attention_order=use_new_attention_order, + )) + self.input_blocks.append(TimestepEmbedSequential(*layers)) + self._feature_size += ch + input_block_chans.append(ch) + if level != len(channel_mult) - 1: + out_ch = ch + self.input_blocks.append( + TimestepEmbedSequential( + ResBlock( + ch, + time_embed_dim, + dropout, + out_channels=out_ch, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + down=True, + ) if resblock_updown else Downsample(ch, conv_resample, dims=dims, out_channels=out_ch))) + ch = out_ch + input_block_chans.append(ch) + ds *= 2 + self._feature_size += ch + + self.middle_block = TimestepEmbedSequential( + ResBlock( + ch, + time_embed_dim, + dropout, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ), + AttentionBlock( + ch, + use_checkpoint=use_checkpoint, + num_heads=num_heads, + num_head_channels=num_head_channels, + use_new_attention_order=use_new_attention_order, + ), + ResBlock( + ch, + time_embed_dim, + dropout, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ), + ) + self._feature_size += ch + self.pool = pool + if pool == "adaptive": + self.out = nn.Sequential( + normalization(ch), + SiLU(), + nn.AdaptiveAvgPool2D((1, 1)), + zero_module(conv_nd(dims, ch, out_channels, 1)), + nn.Flatten(), + ) + elif pool == "attention": + assert num_head_channels != -1 + self.out = nn.Sequential( + normalization(ch), + SiLU(), + AttentionPool2d((image_size // ds), ch, num_head_channels, out_channels), + ) + elif pool == "spatial": + self.out = nn.Sequential( + nn.Linear(self._feature_size, 2048), + nn.ReLU(), + nn.Linear(2048, self.out_channels), + ) + elif pool == "spatial_v2": + self.out = nn.Sequential( + nn.Linear(self._feature_size, 2048), + normalization(2048), + SiLU(), + nn.Linear(2048, self.out_channels), + ) + else: + raise NotImplementedError(f"Unexpected {pool} pooling") + + def forward(self, x, timesteps): + """ + Apply the model to an input batch. + + :param x: an [N x C x ...] Tensor of inputs. + :param timesteps: a 1-D batch of timesteps. + :return: an [N x K] Tensor of outputs. + """ + emb = self.time_embed(timestep_embedding(timesteps, self.model_channels)) + + results = [] + # h = x.type(self.dtype) + h = paddle.cast(x, self.dtype) + for module in self.input_blocks: + h = module(h, emb) + if self.pool.startswith("spatial"): + # results.append(h.type(x.dtype).mean(axis=(2, 3))) + results.append(paddle.cast(h, x.dtype).mean(axis=(2, 3))) + h = self.middle_block(h, emb) + if self.pool.startswith("spatial"): + results.append(paddle.cast(h, x.dtype).mean(axis=(2, 3))) + h = paddle.concat(results, axis=-1) + return self.out(h) + else: + # h = h.type(x.dtype) + h = paddle.cast(h, x.dtype) + return self.out(h) diff --git a/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/resources/default.yml b/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/resources/default.yml new file mode 100755 index 000000000..3a161f169 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/resources/default.yml @@ -0,0 +1,45 @@ +text_prompts: + - greg rutkowski和thomas kinkade在artstation上的一幅美丽的画,一个独特的灯塔,照耀着它的光穿过喧嚣的血海。 + +init_image: +width_height: [ 1280, 768] + +skip_steps: 10 +steps: 250 + +cut_ic_pow: 1 +init_scale: 1000 +clip_guidance_scale: 5000 + +tv_scale: 0 +range_scale: 150 +sat_scale: 0 +cutn_batches: 4 + +diffusion_model: 512x512_diffusion_uncond_finetune_008100 +use_secondary_model: True +diffusion_sampling_mode: ddim + +perlin_init: False +perlin_mode: mixed +seed: 445467575 +eta: 0.8 +clamp_grad: True +clamp_max: 0.05 + +randomize_class: True +clip_denoised: False +fuzzy_prompt: False +rand_mag: 0.05 + +cut_overview: "[12]*400+[4]*600" +cut_innercut: "[4]*400+[12]*600" +cut_icgray_p: "[0.2]*400+[0]*600" + +display_rate: 10 +n_batches: 1 +batch_size: 1 +batch_name: '' +clip_models: + - ViTB16 +output_dir: "./" diff --git a/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/resources/docstrings.yml b/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/resources/docstrings.yml new file mode 100755 index 000000000..702015e1c --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/resources/docstrings.yml @@ -0,0 +1,103 @@ +text_prompts: | + Phrase, sentence, or string of words and phrases describing what the image should look like. The words will be analyzed by the AI and will guide the diffusion process toward the image(s) you describe. These can include commas and weights to adjust the relative importance of each element. E.g. "A beautiful painting of a singular lighthouse, shining its light across a tumultuous sea of blood by greg rutkowski and thomas kinkade, Trending on artstation." + Notice that this prompt loosely follows a structure: [subject], [prepositional details], [setting], [meta modifiers and artist]; this is a good starting point for your experiments. + Developing text prompts takes practice and experience, and is not the subject of this guide. If you are a beginner to writing text prompts, a good place to start is on a simple AI art app like Night Cafe, starry ai or WOMBO prior to using DD, to get a feel for how text gets translated into images by GAN tools. These other apps use different technologies, but many of the same principles apply. +init_image: | + Recall that in the image sequence above, the first image shown is just noise. If an init_image is provided, diffusion will replace the noise with the init_image as its starting state. To use an init_image, upload the image to the Colab instance or your Google Drive, and enter the full image path here. + If using an init_image, you may need to increase skip_steps to ~ 50% of total steps to retain the character of the init. See skip_steps above for further discussion. +width_height: | + Desired final image size, in pixels. You can have a square, wide, or tall image, but each edge length should be set to a multiple of 64px, and a minimum of 512px on the default CLIP model setting. If you forget to use multiples of 64px in your dimensions, DD will adjust the dimensions of your image to make it so. + +skip_steps: | + Consider the chart shown here. Noise scheduling (denoise strength) starts very high and progressively gets lower and lower as diffusion steps progress. The noise levels in the first few steps are very high, so images change dramatically in early steps. + As DD moves along the curve, noise levels (and thus the amount an image changes per step) declines, and image coherence from one step to the next increases. + The first few steps of denoising are often so dramatic that some steps (maybe 10-15% of total) can be skipped without affecting the final image. You can experiment with this as a way to cut render times. + If you skip too many steps, however, the remaining noise may not be high enough to generate new content, and thus may not have ‘time left’ to finish an image satisfactorily. + Also, depending on your other settings, you may need to skip steps to prevent CLIP from overshooting your goal, resulting in ‘blown out’ colors (hyper saturated, solid white, or solid black regions) or otherwise poor image quality. Consider that the denoising process is at its strongest in the early steps, so skipping steps can sometimes mitigate other problems. + Lastly, if using an init_image, you will need to skip ~50% of the diffusion steps to retain the shapes in the original init image. + However, if you’re using an init_image, you can also adjust skip_steps up or down for creative reasons. With low skip_steps you can get a result "inspired by" the init_image which will retain the colors and rough layout and shapes but look quite different. With high skip_steps you can preserve most of the init_image contents and just do fine tuning of the texture. + +steps: | + When creating an image, the denoising curve is subdivided into steps for processing. Each step (or iteration) involves the AI looking at subsets of the image called ‘cuts’ and calculating the ‘direction’ the image should be guided to be more like the prompt. Then it adjusts the image with the help of the diffusion denoiser, and moves to the next step. + Increasing steps will provide more opportunities for the AI to adjust the image, and each adjustment will be smaller, and thus will yield a more precise, detailed image. Increasing steps comes at the expense of longer render times. Also, while increasing steps should generally increase image quality, there is a diminishing return on additional steps beyond 250 - 500 steps. However, some intricate images can take 1000, 2000, or more steps. It is really up to the user. + Just know that the render time is directly related to the number of steps, and many other parameters have a major impact on image quality, without costing additional time. + +cut_ic_pow: | + This sets the size of the border used for inner cuts. High cut_ic_pow values have larger borders, and therefore the cuts themselves will be smaller and provide finer details. If you have too many or too-small inner cuts, you may lose overall image coherency and/or it may cause an undesirable ‘mosaic’ effect. Low cut_ic_pow values will allow the inner cuts to be larger, helping image coherency while still helping with some details. + +init_scale: | + This controls how strongly CLIP will try to match the init_image provided. This is balanced against the clip_guidance_scale (CGS) above. Too much init scale, and the image won’t change much during diffusion. Too much CGS and the init image will be lost. +clip_guidance_scale: | + CGS is one of the most important parameters you will use. It tells DD how strongly you want CLIP to move toward your prompt each timestep. Higher is generally better, but if CGS is too strong it will overshoot the goal and distort the image. So a happy medium is needed, and it takes experience to learn how to adjust CGS. + Note that this parameter generally scales with image dimensions. In other words, if you increase your total dimensions by 50% (e.g. a change from 512 x 512 to 512 x 768), then to maintain the same effect on the image, you’d want to increase clip_guidance_scale from 5000 to 7500. + Of the basic settings, clip_guidance_scale, steps and skip_steps are the most important contributors to image quality, so learn them well. +tv_scale: | + Total variance denoising. Optional, set to zero to turn off. Controls ‘smoothness’ of final output. If used, tv_scale will try to smooth out your final image to reduce overall noise. If your image is too ‘crunchy’, increase tv_scale. TV denoising is good at preserving edges while smoothing away noise in flat regions. See https://en.wikipedia.org/wiki/Total_variation_denoising +range_scale: | + Optional, set to zero to turn off. Used for adjustment of color contrast. Lower range_scale will increase contrast. Very low numbers create a reduced color palette, resulting in more vibrant or poster-like images. Higher range_scale will reduce contrast, for more muted images. + +sat_scale: | + Saturation scale. Optional, set to zero to turn off. If used, sat_scale will help mitigate oversaturation. If your image is too saturated, increase sat_scale to reduce the saturation. +cutn_batches: | + Each iteration, the AI cuts the image into smaller pieces known as cuts, and compares each cut to the prompt to decide how to guide the next diffusion step. More cuts can generally lead to better images, since DD has more chances to fine-tune the image precision in each timestep. + Additional cuts are memory intensive, however, and if DD tries to evaluate too many cuts at once, it can run out of memory. You can use cutn_batches to increase cuts per timestep without increasing memory usage. + At the default settings, DD is scheduled to do 16 cuts per timestep. If cutn_batches is set to 1, there will indeed only be 16 cuts total per timestep. + However, if cutn_batches is increased to 4, DD will do 64 cuts total in each timestep, divided into 4 sequential batches of 16 cuts each. Because the cuts are being evaluated only 16 at a time, DD uses the memory required for only 16 cuts, but gives you the quality benefit of 64 cuts. The tradeoff, of course, is that this will take ~4 times as long to render each image. + So, (scheduled cuts) x (cutn_batches) = (total cuts per timestep). Increasing cutn_batches will increase render times, however, as the work is being done sequentially. DD’s default cut schedule is a good place to start, but the cut schedule can be adjusted in the Cutn Scheduling section, explained below. + +diffusion_model: Diffusion_model of choice. + +use_secondary_model: | + Option to use a secondary purpose-made diffusion model to clean up interim diffusion images for CLIP evaluation. If this option is turned off, DD will use the regular (large) diffusion model. Using the secondary model is faster - one user reported a 50% improvement in render speed! However, the secondary model is much smaller, and may reduce image quality and detail. I suggest you experiment with this. + +diffusion_sampling_mode: | + Two alternate diffusion denoising algorithms. ddim has been around longer, and is more established and tested. plms is a newly added alternate method that promises good diffusion results in fewer steps, but has not been as fully tested and may have side effects. This new plms mode is actively being researched in the #settings-and-techniques channel in the DD Discord. + +perlin_init: | + Normally, DD will use an image filled with random noise as a starting point for the diffusion curve. If perlin_init is selected, DD will instead use a Perlin noise model as an initial state. Perlin has very interesting characteristics, distinct from random noise, so it’s worth experimenting with this for your projects. Beyond perlin, you can, of course, generate your own noise images (such as with GIMP, etc) and use them as an init_image (without skipping steps). + Choosing perlin_init does not affect the actual diffusion process, just the starting point for the diffusion. Please note that selecting a perlin_init will replace and override any init_image you may have specified. Further, because the 2D, 3D and video animation systems all rely on the init_image system, if you enable Perlin while using animation modes, the perlin_init will jump in front of any previous image or video input, and DD will NOT give you the expected sequence of coherent images. All of that said, using Perlin and animation modes together do make a very colorful rainbow effect, which can be used creatively. + +perlin_mode: | + sets type of Perlin noise: colored, gray, or a mix of both, giving you additional options for noise types. Experiment to see what these do in your projects. +seed: | + Deep in the diffusion code, there is a random number ‘seed’ which is used as the basis for determining the initial state of the diffusion. By default, this is random, but you can also specify your own seed. This is useful if you like a particular result and would like to run more iterations that will be similar. + After each run, the actual seed value used will be reported in the parameters report, and can be reused if desired by entering seed # here. If a specific numerical seed is used repeatedly, the resulting images will be quite similar but not identical. +eta: | + eta (greek letter η) is a diffusion model variable that mixes in a random amount of scaled noise into each timestep. 0 is no noise, 1.0 is more noise. As with most DD parameters, you can go below zero for eta, but it may give you unpredictable results. + The steps parameter has a close relationship with the eta parameter. If you set eta to 0, then you can get decent output with only 50-75 steps. Setting eta to 1.0 favors higher step counts, ideally around 250 and up. eta has a subtle, unpredictable effect on image, so you’ll need to experiment to see how this affects your projects. +clamp_grad: | + As I understand it, clamp_grad is an internal limiter that stops DD from producing extreme results. Try your images with and without clamp_grad. If the image changes drastically with clamp_grad turned off, it probably means your clip_guidance_scale is too high and should be reduced. +clamp_max: | + Sets the value of the clamp_grad limitation. Default is 0.05, providing for smoother, more muted coloration in images, but setting higher values (0.15-0.3) can provide interesting contrast and vibrancy. + +randomize_class: +clip_denoised: False +fuzzy_prompt: | + Controls whether to add multiple noisy prompts to the prompt losses. If True, can increase variability of image output. Experiment with this. +rand_mag: | + Affects only the fuzzy_prompt. Controls the magnitude of the random noise added by fuzzy_prompt. + +cut_overview: The schedule of overview cuts +cut_innercut: The schedule of inner cuts +cut_icgray_p: This sets the size of the border used for inner cuts. High cut_ic_pow values have larger borders, and therefore the cuts themselves will be smaller and provide finer details. If you have too many or too-small inner cuts, you may lose overall image coherency and/or it may cause an undesirable ‘mosaic’ effect. Low cut_ic_pow values will allow the inner cuts to be larger, helping image coherency while still helping with some details. + +display_rate: | + During a diffusion run, you can monitor the progress of each image being created with this variable. If display_rate is set to 50, DD will show you the in-progress image every 50 timesteps. Setting this to a lower value, like 5 or 10, is a good way to get an early peek at where your image is heading. If you don’t like the progression, just interrupt execution, change some settings, and re-run. If you are planning a long, unmonitored batch, it’s better to set display_rate equal to steps, because displaying interim images does slow Colab down slightly. +n_batches: | + This variable sets the number of still images you want DD to create. If you are using an animation mode (see below for details) DD will ignore n_batches and create a single set of animated frames based on the animation settings. +batch_name: | + The name of the batch, the batch id will be named as "discoart-[batch_name]-seed". To avoid your artworks be overridden by other users, please use a unique name. +clip_models: | + CLIP Model selectors. ViT-B/32, ViT-B/16, ViT-L/14, RN101, RN50, RN50x4, RN50x16, RN50x64. + These various CLIP models are available for you to use during image generation. Models have different styles or ‘flavors,’ so look around. + You can mix in multiple models as well for different results. However, keep in mind that some models are extremely memory-hungry, and turning on additional models will take additional memory and may cause a crash. + The rough order of speed/mem usage is (smallest/fastest to largest/slowest): + ViT-B/32 + RN50 + RN101 + ViT-B/16 + RN50x4 + RN50x16 + RN50x64 + ViT-L/14 + For RN50x64 & ViTL14 you may need to use fewer cuts, depending on your VRAM. diff --git a/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/runner.py b/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/runner.py new file mode 100755 index 000000000..c3fa9e757 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_ernievil_base/reverse_diffusion/runner.py @@ -0,0 +1,285 @@ +''' +This code is rewritten by Paddle based on Jina-ai/discoart. +https://github.com/jina-ai/discoart/blob/main/discoart/runner.py +''' +import gc +import os +import random +from threading import Thread + +import numpy as np +import paddle +import paddle.vision.transforms as T +import paddle_lpips as lpips +from disco_diffusion_ernievil_base.vit_b_16x.ernievil2.utils.utils import tokenize +from docarray import Document +from docarray import DocumentArray +from IPython import display +from ipywidgets import Output +from PIL import Image + +from .helper import logger +from .helper import parse_prompt +from .model.losses import range_loss +from .model.losses import spherical_dist_loss +from .model.losses import tv_loss +from .model.make_cutouts import MakeCutoutsDango +from .model.sec_diff import alpha_sigma_to_t +from .model.sec_diff import SecondaryDiffusionImageNet2 +from .model.transforms import Normalize + + +def do_run(args, models) -> 'DocumentArray': + logger.info('preparing models...') + model, diffusion, clip_models, secondary_model = models + normalize = Normalize( + mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225], + ) + lpips_model = lpips.LPIPS(net='vgg') + for parameter in lpips_model.parameters(): + parameter.stop_gradient = True + side_x = (args.width_height[0] // 64) * 64 + side_y = (args.width_height[1] // 64) * 64 + cut_overview = eval(args.cut_overview) + cut_innercut = eval(args.cut_innercut) + cut_icgray_p = eval(args.cut_icgray_p) + + from .model.perlin_noises import create_perlin_noise, regen_perlin + + seed = args.seed + + skip_steps = args.skip_steps + + loss_values = [] + + if seed is not None: + np.random.seed(seed) + random.seed(seed) + paddle.seed(seed) + + model_stats = [] + for clip_model in clip_models: + model_stat = { + 'clip_model': None, + 'target_embeds': [], + 'make_cutouts': None, + 'weights': [], + } + model_stat['clip_model'] = clip_model + + if isinstance(args.text_prompts, str): + args.text_prompts = [args.text_prompts] + + for prompt in args.text_prompts: + txt, weight = parse_prompt(prompt) + txt = clip_model.encode_text(tokenize(prompt)) + if args.fuzzy_prompt: + for i in range(25): + model_stat['target_embeds'].append((txt + paddle.randn(txt.shape) * args.rand_mag).clip(0, 1)) + model_stat['weights'].append(weight) + else: + model_stat['target_embeds'].append(txt) + model_stat['weights'].append(weight) + + model_stat['target_embeds'] = paddle.concat(model_stat['target_embeds']) + model_stat['weights'] = paddle.to_tensor(model_stat['weights']) + if model_stat['weights'].sum().abs() < 1e-3: + raise RuntimeError('The weights must not sum to 0.') + model_stat['weights'] /= model_stat['weights'].sum().abs() + model_stats.append(model_stat) + + init = None + if args.init_image: + d = Document(uri=args.init_image).load_uri_to_image_tensor(side_x, side_y) + init = T.to_tensor(d.tensor).unsqueeze(0) * 2 - 1 + + if args.perlin_init: + if args.perlin_mode == 'color': + init = create_perlin_noise([1.5**-i * 0.5 for i in range(12)], 1, 1, False, side_y, side_x) + init2 = create_perlin_noise([1.5**-i * 0.5 for i in range(8)], 4, 4, False, side_y, side_x) + elif args.perlin_mode == 'gray': + init = create_perlin_noise([1.5**-i * 0.5 for i in range(12)], 1, 1, True, side_y, side_x) + init2 = create_perlin_noise([1.5**-i * 0.5 for i in range(8)], 4, 4, True, side_y, side_x) + else: + init = create_perlin_noise([1.5**-i * 0.5 for i in range(12)], 1, 1, False, side_y, side_x) + init2 = create_perlin_noise([1.5**-i * 0.5 for i in range(8)], 4, 4, True, side_y, side_x) + init = (T.to_tensor(init).add(T.to_tensor(init2)).divide(paddle.to_tensor(2.0)).unsqueeze(0) * 2 - 1) + del init2 + + cur_t = None + + def cond_fn(x, t, y=None): + x_is_NaN = False + n = x.shape[0] + if secondary_model: + alpha = paddle.to_tensor(diffusion.sqrt_alphas_cumprod[cur_t], dtype='float32') + sigma = paddle.to_tensor(diffusion.sqrt_one_minus_alphas_cumprod[cur_t], dtype='float32') + cosine_t = alpha_sigma_to_t(alpha, sigma) + x = paddle.to_tensor(x.detach(), dtype='float32') + x.stop_gradient = False + cosine_t = paddle.tile(paddle.to_tensor(cosine_t.detach().cpu().numpy()), [n]) + cosine_t.stop_gradient = False + out = secondary_model(x, cosine_t).pred + fac = diffusion.sqrt_one_minus_alphas_cumprod[cur_t] + x_in_d = out * fac + x * (1 - fac) + x_in = x_in_d.detach() + x_in.stop_gradient = False + x_in_grad = paddle.zeros_like(x_in, dtype='float32') + else: + t = paddle.ones([n], dtype='int64') * cur_t + out = diffusion.p_mean_variance(model, x, t, clip_denoised=False, model_kwargs={'y': y}) + fac = diffusion.sqrt_one_minus_alphas_cumprod[cur_t] + x_in_d = out['pred_xstart'] * fac + x * (1 - fac) + x_in = x_in_d.detach() + x_in.stop_gradient = False + x_in_grad = paddle.zeros_like(x_in, dtype='float32') + for model_stat in model_stats: + for i in range(args.cutn_batches): + t_int = (int(t.item()) + 1) # errors on last step without +1, need to find source + # when using SLIP Base model the dimensions need to be hard coded to avoid AttributeError: 'VisionTransformer' object has no attribute 'input_resolution' + try: + input_resolution = model_stat['clip_model'].visual.input_resolution + except: + input_resolution = 224 + + cuts = MakeCutoutsDango( + input_resolution, + Overview=cut_overview[1000 - t_int], + InnerCrop=cut_innercut[1000 - t_int], + IC_Size_Pow=args.cut_ic_pow, + IC_Grey_P=cut_icgray_p[1000 - t_int], + ) + clip_in = normalize(cuts(x_in.add(paddle.to_tensor(1.0)).divide(paddle.to_tensor(2.0)))) + image_embeds = (model_stat['clip_model'].encode_image(clip_in)) + + dists = spherical_dist_loss( + image_embeds.unsqueeze(1), + model_stat['target_embeds'].unsqueeze(0), + ) + + dists = dists.reshape([ + cut_overview[1000 - t_int] + cut_innercut[1000 - t_int], + n, + -1, + ]) + losses = dists.multiply(model_stat['weights']).sum(2).mean(0) + loss_values.append(losses.sum().item()) # log loss, probably shouldn't do per cutn_batch + + x_in_grad += ((paddle.grad(losses.sum() * args.clip_guidance_scale, x_in)[0]) / args.cutn_batches) + tv_losses = tv_loss(x_in) + range_losses = range_loss(x_in) + sat_losses = paddle.abs(x_in - x_in.clip(min=-1, max=1)).mean() + loss = (tv_losses.sum() * args.tv_scale + range_losses.sum() * args.range_scale + + sat_losses.sum() * args.sat_scale) + if init is not None and args.init_scale: + init_losses = lpips_model(x_in, init) + loss = loss + init_losses.sum() * args.init_scale + x_in_grad += paddle.grad(loss, x_in)[0] + if not paddle.isnan(x_in_grad).any(): + grad = -paddle.grad(x_in_d, x, x_in_grad)[0] + else: + x_is_NaN = True + grad = paddle.zeros_like(x) + if args.clamp_grad and not x_is_NaN: + magnitude = grad.square().mean().sqrt() + return (grad * magnitude.clip(max=args.clamp_max) / magnitude) + return grad + + if args.diffusion_sampling_mode == 'ddim': + sample_fn = diffusion.ddim_sample_loop_progressive + else: + sample_fn = diffusion.plms_sample_loop_progressive + + logger.info('creating artwork...') + + image_display = Output() + da_batches = DocumentArray() + + for _nb in range(args.n_batches): + display.clear_output(wait=True) + display.display(args.name_docarray, image_display) + gc.collect() + paddle.device.cuda.empty_cache() + + d = Document(tags=vars(args)) + da_batches.append(d) + + cur_t = diffusion.num_timesteps - skip_steps - 1 + + if args.perlin_init: + init = regen_perlin(args.perlin_mode, side_y, side_x, args.batch_size) + + if args.diffusion_sampling_mode == 'ddim': + samples = sample_fn( + model, + (args.batch_size, 3, side_y, side_x), + clip_denoised=args.clip_denoised, + model_kwargs={}, + cond_fn=cond_fn, + progress=True, + skip_timesteps=skip_steps, + init_image=init, + randomize_class=args.randomize_class, + eta=args.eta, + ) + else: + samples = sample_fn( + model, + (args.batch_size, 3, side_y, side_x), + clip_denoised=args.clip_denoised, + model_kwargs={}, + cond_fn=cond_fn, + progress=True, + skip_timesteps=skip_steps, + init_image=init, + randomize_class=args.randomize_class, + order=2, + ) + + threads = [] + for j, sample in enumerate(samples): + cur_t -= 1 + with image_display: + if j % args.display_rate == 0 or cur_t == -1: + for _, image in enumerate(sample['pred_xstart']): + image = (image + 1) / 2 + image = image.clip(0, 1).squeeze().transpose([1, 2, 0]).numpy() * 255 + image = np.uint8(image) + image = Image.fromarray(image) + + image.save(os.path.join(args.output_dir, 'progress-{}.png'.format(_nb))) + c = Document(tags={'cur_t': cur_t}) + c.load_pil_image_to_datauri(image) + d.chunks.append(c) + display.clear_output(wait=True) + display.display(display.Image(os.path.join(args.output_dir, 'progress-{}.png'.format(_nb)))) + d.chunks.plot_image_sprites(os.path.join(args.output_dir, + f'{args.name_docarray}-progress-{_nb}.png'), + show_index=True) + t = Thread( + target=_silent_push, + args=( + da_batches, + args.name_docarray, + ), + ) + threads.append(t) + t.start() + + if cur_t == -1: + d.load_pil_image_to_datauri(image) + + for t in threads: + t.join() + display.clear_output(wait=True) + logger.info(f'done! {args.name_docarray}') + da_batches.plot_image_sprites(skip_empty=True, show_index=True, keep_aspect_ratio=True) + return da_batches + + +def _silent_push(da_batches: DocumentArray, name: str) -> None: + try: + da_batches.push(name) + except Exception as ex: + logger.debug(f'push failed: {ex}') diff --git a/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/__init__.py b/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/__init__.py new file mode 100755 index 000000000..5c75b1c83 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/__init__.py @@ -0,0 +1,18 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__version__ = '2.0.0' # Maybe dev is better + +from . import transformers +from . import utils diff --git a/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/transformers/__init__.py b/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/transformers/__init__.py new file mode 100755 index 000000000..e69de29bb diff --git a/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/transformers/beam.py b/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/transformers/beam.py new file mode 100755 index 000000000..d316ec9bb --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/transformers/beam.py @@ -0,0 +1,1602 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import print_function + +import sys +import warnings +from functools import partial +from functools import reduce + +import paddle +from paddle.fluid import core +from paddle.fluid.data_feeder import check_dtype +from paddle.fluid.data_feeder import check_type +from paddle.fluid.data_feeder import check_variable_and_dtype +from paddle.fluid.data_feeder import convert_dtype +from paddle.fluid.framework import default_main_program +from paddle.fluid.framework import in_dygraph_mode +from paddle.fluid.layer_helper import LayerHelper +from paddle.fluid.layers import control_flow +from paddle.fluid.layers import nn +from paddle.fluid.layers import sequence_lod +from paddle.fluid.layers import tensor +from paddle.fluid.layers import utils +from paddle.fluid.layers.utils import * +from paddle.fluid.param_attr import ParamAttr +from paddle.utils import deprecated +#import paddle.nn as nn + + +class ArrayWrapper(object): + + def __init__(self, x): + self.array = [x] + + def append(self, x): + self.array.append(x) + return self + + def __getitem__(self, item): + return self.array.__getitem__(item) + + +def _maybe_copy(state, new_state, step_mask): + """update rnn state or just pass the old state through""" + new_state = nn.elementwise_mul(new_state, step_mask, axis=0) \ + + nn.elementwise_mul(state, (1 - step_mask), axis=0) + return new_state + + +def _transpose_batch_time(x): + perm = [1, 0] + list(range(2, len(x.shape))) + return nn.transpose(x, perm) + + +class Decoder(object): + """ + :api_attr: Static Graph + + Decoder is the base class for any decoder instance used in `dynamic_decode`. + It provides interface for output generation for one time step, which can be + used to generate sequences. + + The key abstraction provided by Decoder is: + + 1. :code:`(initial_input, initial_state, finished) = initialize(inits)` , + which generates the input and state for the first decoding step, and gives the + initial status telling whether each sequence in the batch is finished. + It would be called once before the decoding iterations. + + 2. :code:`(output, next_state, next_input, finished) = step(time, input, state)` , + which transforms the input and state to the output and new state, generates + input for the next decoding step, and emits the flag indicating finished status. + It is the main part for each decoding iteration. + + 3. :code:`(final_outputs, final_state) = finalize(outputs, final_state, sequence_lengths)` , + which revises the outputs(stack of all time steps' output) and final state(state from the + last decoding step) to get the counterpart for special usage. + Not necessary to be implemented if no need to revise the stacked outputs and + state from the last decoding step. If implemented, it would be called after + the decoding iterations. + + Decoder is more general compared to RNNCell, since the returned `next_input` + and `finished` make it can determine the input and when to finish by itself + when used in dynamic decoding. Decoder always wraps a RNNCell instance though + not necessary. + """ + + def initialize(self, inits): + r""" + Called once before the decoding iterations. + + Parameters: + inits: Argument provided by the caller. + + Returns: + tuple: A tuple( :code:`(initial_inputs, initial_states, finished)` ). \ + `initial_inputs` and `initial_states` both are a (possibly nested \ + structure of) tensor variable[s], and `finished` is a tensor with \ + bool data type. + """ + raise NotImplementedError + + def step(self, time, inputs, states, **kwargs): + r""" + Called per step of decoding. + + Parameters: + time(Variable): A Tensor with shape :math:`[1]` provided by the caller. + The data type is int64. + inputs(Variable): A (possibly nested structure of) tensor variable[s]. + states(Variable): A (possibly nested structure of) tensor variable[s]. + **kwargs: Additional keyword arguments, provided by the caller. + + Returns: + tuple: A tuple( :code:(outputs, next_states, next_inputs, finished)` ). \ + `next_inputs` and `next_states` both are a (possibly nested \ + structure of) tensor variable[s], and the structure, shape and \ + data type must be same as the counterpart from input arguments. \ + `outputs` is a (possibly nested structure of) tensor variable[s]. \ + `finished` is a Tensor with bool data type. + """ + raise NotImplementedError + + def finalize(self, outputs, final_states, sequence_lengths): + r""" + Called once after the decoding iterations if implemented. + + Parameters: + outputs(Variable): A (possibly nested structure of) tensor variable[s]. + The structure and data type is same as `output_dtype`. + The tensor stacks all time steps' output thus has shape + :math:`[time\_step, batch\_size, ...]` , which is done by the caller. + final_states(Variable): A (possibly nested structure of) tensor variable[s]. + It is the `next_states` returned by `decoder.step` at last decoding step, + thus has the same structure, shape and data type with states at any time + step. + + Returns: + tuple: A tuple( :code:`(final_outputs, final_states)` ). \ + `final_outputs` and `final_states` both are a (possibly nested \ + structure of) tensor variable[s]. + """ + raise NotImplementedError + + @property + def tracks_own_finished(self): + """ + Describes whether the Decoder keeps track of finished states by itself. + + `decoder.step()` would emit a bool `finished` value at each decoding + step. The emited `finished` can be used to determine whether every + batch entries is finished directly, or it can be combined with the + finished tracker keeped in `dynamic_decode` by performing a logical OR + to take the already finished into account. + + If `False`, the latter would be took when performing `dynamic_decode`, + which is the default. Otherwise, the former would be took, which uses + the finished value emited by the decoder as all batch entry finished + status directly, and it is the case when batch entries might be + reordered such as beams in BeamSearchDecoder. + + Returns: + bool: A python bool `False`. + """ + return False + + +class BeamSearchDecoder(Decoder): + """ + Decoder with beam search decoding strategy. It wraps a cell to get probabilities, + and follows a beam search step to calculate scores and select candidate + token ids for each decoding step. + + Please refer to `Beam search `_ + for more details. + + **NOTE** When decoding with beam search, the `inputs` and `states` of cell + would be tiled to `beam_size` (unsqueeze and tile), resulting to shapes like + `[batch_size * beam_size, ...]` , which is built into `BeamSearchDecoder` and + done automatically. Thus any other tensor with shape `[batch_size, ...]` used + in `cell.call` needs to be tiled manually first, which can be completed by using + :code:`BeamSearchDecoder.tile_beam_merge_with_batch` . The most common case + for this is the encoder output in attention mechanism. + + Returns: + BeamSearchDecoder: An instance of decoder which can be used in \ + `paddle.nn.dynamic_decode` to implement decoding. + + Examples: + + .. code-block:: python + + import numpy as np + import paddle + from paddle.nn import BeamSearchDecoder, dynamic_decode + from paddle.nn import GRUCell, Linear, Embedding + trg_embeder = Embedding(100, 32) + output_layer = Linear(32, 32) + decoder_cell = GRUCell(input_size=32, hidden_size=32) + decoder = BeamSearchDecoder(decoder_cell, + start_token=0, + end_token=1, + beam_size=4, + embedding_fn=trg_embeder, + output_fn=output_layer) + + """ + + def __init__(self, cell, start_token, end_token, beam_size, embedding_fn=None, output_fn=None): + """ + Constructor of BeamSearchDecoder. + + Parameters: + cell(RNNCellBase): An instance of `RNNCellBase` or object with the same interface. + start_token(int): The start token id. + end_token(int): The end token id. + beam_size(int): The beam width used in beam search. + embedding_fn(optional): A callable to apply to selected candidate ids. + Mostly it is an embedding layer to transform ids to embeddings, + and the returned value acts as the `input` argument for `cell.call`. + If not provided, the id to embedding transformation must be built into + `cell.call`. Default None. + output_fn(optional): A callable to apply to the cell's output prior to + calculate scores and select candidate token ids. Default None. + """ + self.cell = cell + self.embedding_fn = embedding_fn + self.output_fn = output_fn + self.start_token = start_token + self.end_token = end_token + self.beam_size = beam_size + + @staticmethod + def tile_beam_merge_with_batch(x, beam_size): + r""" + Tile the batch dimension of a tensor. Specifically, this function takes + a tensor t shaped `[batch_size, s0, s1, ...]` composed of minibatch + entries `t[0], ..., t[batch_size - 1]` and tiles it to have a shape + `[batch_size * beam_size, s0, s1, ...]` composed of minibatch entries + `t[0], t[0], ..., t[1], t[1], ...` where each minibatch entry is repeated + `beam_size` times. + + Parameters: + x(Variable): A tensor with shape `[batch_size, ...]`. The data type + should be float32, float64, int32, int64 or bool. + beam_size(int): The beam width used in beam search. + + Returns: + Variable: A tensor with shape `[batch_size * beam_size, ...]`, whose \ + data type is same as `x`. + """ + check_type(x, 'x', (Variable), 'BeamSearchDecoder.tile_beam_merge_with_batch') + x = nn.unsqueeze(x, [1]) # [batch_size, 1, ...] + expand_times = [1] * len(x.shape) + expand_times[1] = beam_size + x = paddle.tile(x, expand_times) # [batch_size, beam_size, ...] + x = nn.transpose(x, list(range(2, len(x.shape))) + [0, 1]) # [..., batch_size, beam_size] + # use 0 to copy to avoid wrong shape + x = nn.reshape(x, shape=[0] * (len(x.shape) - 2) + [-1]) # [..., batch_size * beam_size] + x = nn.transpose(x, [len(x.shape) - 1] + list(range(0, len(x.shape) - 1))) # [batch_size * beam_size, ...] + return x + + def _split_batch_beams(self, x): + r""" + Reshape a tensor with shape `[batch_size * beam_size, ...]` to a new + tensor with shape `[batch_size, beam_size, ...]`. + + Parameters: + x(Variable): A tensor with shape `[batch_size * beam_size, ...]`. The + data type should be float32, float64, int32, int64 or bool. + + Returns: + Variable: A tensor with shape `[batch_size, beam_size, ...]`, whose \ + data type is same as `x`. + """ + check_type(x, 'x', (Variable), 'BeamSearchDecoder._split_batch_beams') + # TODO: avoid fake shape in compile-time like tile_beam_merge_with_batch + return nn.reshape(x, shape=[-1, self.beam_size] + list(x.shape[1:])) + + def _merge_batch_beams(self, x): + r""" + Reshape a tensor with shape `[batch_size, beam_size, ...]` to a new + tensor with shape `[batch_size * beam_size, ...]`. + + Parameters: + x(Variable): A tensor with shape `[batch_size, beam_size, ...]`. The + data type should be float32, float64, int32, int64 or bool. + + Returns: + Variable: A tensor with shape `[batch_size * beam_size, ...]`, whose \ + data type is same as `x`. + """ + check_type(x, 'x', (Variable), 'BeamSearchDecoder._merge_batch_beams') + # TODO: avoid fake shape in compile-time like tile_beam_merge_with_batch + return nn.reshape(x, shape=[-1] + list(x.shape[2:])) + + def _expand_to_beam_size(self, x): + r""" + This function takes a tensor t shaped `[batch_size, s0, s1, ...]` composed + of minibatch entries `t[0], ..., t[batch_size - 1]` and tiles it to have a + shape `[batch_size, beam_size, s0, s1, ...]` composed of minibatch entries + `t[0], t[0], ..., t[1], t[1], ...` where each minibatch entry is repeated + `beam_size` times. + + Parameters: + x(Variable): A tensor with shape `[batch_size, ...]`, The data type + should be float32, float64, int32, int64 or bool. + + Returns: + Variable: A tensor with shape `[batch_size, beam_size, ...]`, whose \ + data type is same as `x`. + """ + check_type(x, 'x', (Variable), 'BeamSearchDecoder._expand_to_beam_size') + x = nn.unsqueeze(x, [1]) + expand_times = [1] * len(x.shape) + expand_times[1] = self.beam_size + x = paddle.tile(x, expand_times) + return x + + def _mask_probs(self, probs, finished): + r""" + Mask log probabilities. It forces finished beams to allocate all probability + mass to eos and unfinished beams to remain unchanged. + + Parameters: + probs(Variable): A tensor with shape `[batch_size, beam_size, vocab_size]`, + representing the log probabilities. Its data type should be float32 or float64. + finished(Variable): A tensor with shape `[batch_size, beam_size]`, + representing the finished status for all beams. Its data type + should be bool. + + Returns: + Variable: A tensor with the same shape and data type as `x`, \ + where unfinished beams stay unchanged and finished beams are \ + replaced with a tensor with all probability on the EOS token. + """ + check_type(probs, 'probs', (Variable), 'BeamSearchDecoder._mask_probs') + check_type(finished, 'finished', (Variable), 'BeamSearchDecoder._mask_probs') + # TODO: use where_op + finished = tensor.cast(finished, dtype=probs.dtype) + probs = nn.elementwise_mul(paddle.tile(nn.unsqueeze(finished, [2]), [1, 1, self.vocab_size]), + self.noend_mask_tensor, + axis=-1) - nn.elementwise_mul(probs, (finished - 1), axis=0) + return probs + + def _gather(self, x, indices, batch_size): + r""" + Gather from the tensor `x` using `indices`. + + Parameters: + x(Variable): A tensor with shape `[batch_size, beam_size, ...]`. + indices(Variable): A `int64` tensor with shape `[batch_size, beam_size]`, + representing the indices that we use to gather. + batch_size(Variable): A tensor with shape `[1]`. Its data type should + be int32 or int64. + + Returns: + Variable: A tensor with the same shape and data type as `x`, \ + representing the gathered tensor. + """ + check_type(x, 'x', (Variable), 'BeamSearchDecoder._gather') + check_type(indices, 'indices', (Variable), 'BeamSearchDecoder._gather') + check_type(batch_size, 'batch_size', (Variable), 'BeamSearchDecoder._gather') + # TODO: compatibility of int32 and int64 + batch_size = tensor.cast(batch_size, indices.dtype) if batch_size.dtype != indices.dtype else batch_size + batch_size.stop_gradient = True # TODO: remove this + batch_pos = paddle.tile(nn.unsqueeze(tensor.range(0, batch_size, 1, dtype=indices.dtype), [1]), + [1, self.beam_size]) + topk_coordinates = nn.stack([batch_pos, indices], axis=2) + topk_coordinates.stop_gradient = True + return nn.gather_nd(x, topk_coordinates) + + class OutputWrapper(collections.namedtuple("OutputWrapper", ("scores", "predicted_ids", "parent_ids"))): + """ + The structure for the returned value `outputs` of `decoder.step`. + A namedtuple includes scores, predicted_ids, parent_ids as fields. + """ + pass + + class StateWrapper(collections.namedtuple("StateWrapper", ("cell_states", "log_probs", "finished", "lengths"))): + """ + The structure for the argument `states` of `decoder.step`. + A namedtuple includes cell_states, log_probs, finished, lengths as fields. + """ + pass + + def initialize(self, initial_cell_states, bos_ids=None): + r""" + Initialize the BeamSearchDecoder. + + Parameters: + initial_cell_states(Variable): A (possibly nested structure of) + tensor variable[s]. An argument provided by the caller. + + Returns: + tuple: A tuple( :code:`(initial_inputs, initial_states, finished)` ). \ + `initial_inputs` is a tensor t filled by `start_token` with shape \ + `[batch_size, beam_size]` when `embedding_fn` is None, or the \ + returned value of `embedding_fn(t)` when `embedding_fn` is provided. \ + `initial_states` is a nested structure(namedtuple including cell_states, \ + log_probs, finished, lengths as fields) of tensor variables, where \ + `log_probs, finished, lengths` all has a tensor value shaped \ + `[batch_size, beam_size]` with data type `float32, bool, int64`. \ + cell_states has a value with the same structure as the input \ + argument `initial_cell_states` but with tiled shape `[batch_size, beam_size, ...]`. \ + `finished` is a `bool` tensor filled by False with shape `[batch_size, beam_size]`. + """ + self.kinf = 1e9 + state = flatten(initial_cell_states)[0] + self.batch_size = nn.shape(state)[0] + + if bos_ids is not None: + self.start_token = bos_ids + + self.start_token_tensor = tensor.fill_constant(shape=[1], dtype="int64", value=self.start_token) + self.end_token_tensor = tensor.fill_constant(shape=[1], dtype="int64", value=self.end_token) + + init_cell_states = map_structure(self._expand_to_beam_size, initial_cell_states) + init_inputs = paddle.full(shape=[self.batch_size, self.beam_size], + fill_value=self.start_token_tensor, + dtype=self.start_token_tensor.dtype) + log_probs = paddle.tile(tensor.assign(np.array([[0.] + [-self.kinf] * (self.beam_size - 1)], dtype="float32")), + [self.batch_size, 1]) + if paddle.get_default_dtype() == "float64": + log_probs = tensor.cast(log_probs, "float64") + # TODO: remove the restriction of force_cpu + init_finished = tensor.fill_constant_batch_size_like(input=state, + shape=[-1, self.beam_size], + dtype="bool", + value=False, + force_cpu=True) + init_lengths = tensor.zeros_like(init_inputs) + init_inputs = self.embedding_fn(init_inputs) if self.embedding_fn else init_inputs + return init_inputs, self.StateWrapper(init_cell_states, log_probs, init_finished, init_lengths), init_finished + + def _beam_search_step(self, time, logits, next_cell_states, beam_state): + r""" + Calculate scores and select candidate token ids. + + Parameters: + time(Variable): An `int64` tensor with shape `[1]` provided by the caller, + representing the current time step number of decoding. + logits(Variable): A tensor with shape `[batch_size, beam_size, vocab_size]`, + representing the logits at the current time step. Its data type is float32. + next_cell_states(Variable): A (possibly nested structure of) tensor variable[s]. + It has the same structure, shape and data type as the `cell_states` of + `initial_states` returned by `initialize()`. It represents the next state + from the cell. + beam_state(Variable): A structure of tensor variables. + It is same as the `initial_states` returned by `initialize()` for + the first decoding step and `beam_search_state` returned by + `step()` for the others. + + Returns: + tuple: A tuple( :code:`(beam_search_output, beam_search_state)` ). \ + `beam_search_output` is a namedtuple(including scores, predicted_ids, \ + parent_ids as fields) of tensor variables, where \ + `scores, predicted_ids, parent_ids` all has a tensor value shaped \ + `[batch_size, beam_size]` with data type `float32, int64, int64`. + `beam_search_state` has the same structure, shape and data type \ + as the input argument `beam_state`. + + """ + self.vocab_size = logits.shape[-1] + self.vocab_size_tensor = tensor.fill_constant(shape=[1], dtype="int64", value=self.vocab_size) + noend_array = [-self.kinf] * self.vocab_size + noend_array[self.end_token] = 0 + + self.noend_mask_tensor = tensor.assign(np.array(noend_array, "float32")) + if paddle.get_default_dtype() == "float64": + self.noend_mask_tensor = tensor.cast(self.noend_mask_tensor, "float64") + + step_log_probs = nn.log(nn.softmax(logits)) + step_log_probs = self._mask_probs(step_log_probs, beam_state.finished) + log_probs = nn.elementwise_add(x=step_log_probs, y=beam_state.log_probs, axis=0) + # TODO: length penalty + scores = log_probs + scores = nn.reshape(scores, [-1, self.beam_size * self.vocab_size]) + # TODO: add grad for topk then this beam search can be used to train + topk_scores, topk_indices = paddle.topk(x=scores, k=self.beam_size) + beam_indices = nn.elementwise_floordiv(topk_indices, self.vocab_size_tensor) + token_indices = nn.elementwise_mod(topk_indices, self.vocab_size_tensor) + next_log_probs = self._gather(nn.reshape(log_probs, [-1, self.beam_size * self.vocab_size]), topk_indices, + self.batch_size) + next_cell_states = map_structure(lambda x: self._gather(x, beam_indices, self.batch_size), next_cell_states) + next_finished = self._gather(beam_state.finished, beam_indices, self.batch_size) + next_lengths = self._gather(beam_state.lengths, beam_indices, self.batch_size) + next_lengths = next_lengths + tensor.cast(nn.logical_not(next_finished), beam_state.lengths.dtype) + next_finished = control_flow.logical_or(next_finished, control_flow.equal(token_indices, self.end_token_tensor)) + + beam_search_output = self.OutputWrapper(topk_scores, token_indices, beam_indices) + beam_search_state = self.StateWrapper(next_cell_states, next_log_probs, next_finished, next_lengths) + return beam_search_output, beam_search_state + + def step(self, time, inputs, states, **kwargs): + r""" + Perform a beam search decoding step, which uses `cell` to get probabilities, + and follows a beam search step to calculate scores and select candidate + token ids. + + Parameters: + time(Variable): An `int64` tensor with shape `[1]` provided by the caller, + representing the current time step number of decoding. + inputs(Variable): A tensor variable. It is same as `initial_inputs` + returned by `initialize()` for the first decoding step and + `next_inputs` returned by `step()` for the others. + states(Variable): A structure of tensor variables. + It is same as the `initial_states` returned by `initialize()` for + the first decoding step and `beam_search_state` returned by + `step()` for the others. + **kwargs: Additional keyword arguments, provided by the caller. + + Returns: + tuple: A tuple( :code:`(beam_search_output, beam_search_state, next_inputs, finished)` ). \ + `beam_search_state` and `next_inputs` have the same structure, \ + shape and data type as the input arguments `states` and `inputs` separately. \ + `beam_search_output` is a namedtuple(including scores, predicted_ids, \ + parent_ids as fields) of tensor variables, where \ + `scores, predicted_ids, parent_ids` all has a tensor value shaped \ + `[batch_size, beam_size]` with data type `float32, int64, int64`. \ + `finished` is a `bool` tensor with shape `[batch_size, beam_size]`. + """ + inputs = map_structure(self._merge_batch_beams, inputs) + cell_states = map_structure(self._merge_batch_beams, states.cell_states) + cell_outputs, next_cell_states = self.cell(inputs, cell_states, **kwargs) + cell_outputs = map_structure(self._split_batch_beams, cell_outputs) + next_cell_states = map_structure(self._split_batch_beams, next_cell_states) + + if self.output_fn is not None: + cell_outputs = self.output_fn(cell_outputs) + + beam_search_output, beam_search_state = self._beam_search_step(time=time, + logits=cell_outputs, + next_cell_states=next_cell_states, + beam_state=states) + finished = beam_search_state.finished + sample_ids = beam_search_output.predicted_ids + sample_ids.stop_gradient = True + next_inputs = self.embedding_fn(sample_ids) if self.embedding_fn else sample_ids + + return (beam_search_output, beam_search_state, next_inputs, finished) + + def finalize(self, outputs, final_states, sequence_lengths): + r""" + Use `gather_tree` to backtrace along the beam search tree and construct + the full predicted sequences. + + Parameters: + outputs(Variable): A structure(namedtuple) of tensor variables, + The structure and data type is same as `output_dtype`. + The tensor stacks all time steps' output thus has shape + `[time_step, batch_size, ...]`, which is done by the caller. + final_states(Variable): A structure(namedtuple) of tensor variables. + It is the `next_states` returned by `decoder.step` at last + decoding step, thus has the same structure, shape and data type + with states at any time step. + sequence_lengths(Variable): An `int64` tensor shaped `[batch_size, beam_size]`. + It contains sequence lengths for each beam determined during + decoding. + + Returns: + tuple: A tuple( :code:`(predicted_ids, final_states)` ). \ + `predicted_ids` is an `int64` tensor shaped \ + `[time_step, batch_size, beam_size]`. `final_states` is the same \ + as the input argument `final_states`. + """ + predicted_ids = nn.gather_tree(outputs.predicted_ids, outputs.parent_ids) + # TODO: use FinalBeamSearchDecoderOutput as output + return predicted_ids, final_states + + @property + def tracks_own_finished(self): + """ + BeamSearchDecoder reorders its beams and their finished state. Thus it + conflicts with `dynamic_decode` function's tracking of finished states. + Setting this property to true to avoid early stopping of decoding due + to mismanagement of the finished state. + + Returns: + bool: A python bool `True`. + """ + return True + + +def _dynamic_decode_imperative(decoder, + inits=None, + max_step_num=None, + output_time_major=False, + impute_finished=False, + is_test=False, + return_length=False, + bos_ids=None, + **kwargs): + + def _maybe_copy(state, new_state, step_mask): + # TODO: use where_op + state_dtype = state.dtype + if convert_dtype(state_dtype) in ["bool"]: + state = tensor.cast(state, dtype="float32") + new_state = tensor.cast(new_state, dtype="float32") + if step_mask.dtype != state.dtype: + step_mask = tensor.cast(step_mask, dtype=state.dtype) + # otherwise, renamed bool gradients of would be summed up leading + # to sum(bool) error. + step_mask.stop_gradient = True + new_state = nn.elementwise_mul(state, step_mask, axis=0) - nn.elementwise_mul(new_state, (step_mask - 1), + axis=0) + if convert_dtype(state_dtype) in ["bool"]: + new_state = tensor.cast(new_state, dtype=state_dtype) + return new_state + + initial_inputs, initial_states, initial_finished = decoder.initialize(inits, bos_ids=bos_ids) + inputs, states, finished = (initial_inputs, initial_states, initial_finished) + cond = control_flow.logical_not((nn.reduce_all(initial_finished))) + sequence_lengths = tensor.cast(tensor.zeros_like(initial_finished), "int64") + outputs = None + + step_idx = 0 + step_idx_tensor = tensor.fill_constant(shape=[1], dtype="int64", value=step_idx) + while cond.numpy(): + (step_outputs, next_states, next_inputs, next_finished) = decoder.step(step_idx_tensor, inputs, states, + **kwargs) + if not decoder.tracks_own_finished: + # BeamSearchDecoder would track it own finished, since + # beams would be reordered and the finished status of each + # entry might change. Otherwise, perform logical OR which + # would not change the already finished. + next_finished = control_flow.logical_or(next_finished, finished) + # To confirm states.finished/finished be consistent with + # next_finished. + tensor.assign(next_finished, finished) + next_sequence_lengths = nn.elementwise_add( + sequence_lengths, tensor.cast(control_flow.logical_not(finished), sequence_lengths.dtype)) + if impute_finished: # rectify the states for the finished. + next_states = map_structure(lambda x, y: _maybe_copy(x, y, finished), states, next_states) + else: + warnings.warn( + "`next_states` has no `lengths` attribute, the returned `sequence_lengths` would be all zeros." + ) if not hasattr(next_states, "lengths") else None + next_sequence_lengths = getattr(next_states, "lengths", sequence_lengths) + + outputs = map_structure(lambda x: ArrayWrapper(x), step_outputs) if step_idx == 0 else map_structure( + lambda x, x_array: x_array.append(x), step_outputs, outputs) + inputs, states, finished, sequence_lengths = (next_inputs, next_states, next_finished, next_sequence_lengths) + + control_flow.increment(x=step_idx_tensor, value=1.0, in_place=True) + step_idx += 1 + + cond = control_flow.logical_not(nn.reduce_all(finished)) + if max_step_num is not None and step_idx > max_step_num: + break + + final_outputs = map_structure(lambda x: nn.stack(x.array, axis=0), outputs) + final_states = states + + try: + final_outputs, final_states = decoder.finalize(final_outputs, final_states, sequence_lengths) + except NotImplementedError: + pass + + if not output_time_major: + final_outputs = map_structure(lambda x: nn.transpose(x, [1, 0] + list(range(2, len(x.shape)))), final_outputs) + + return (final_outputs, final_states, sequence_lengths) if return_length else (final_outputs, final_states) + + +def _dynamic_decode_declarative(decoder, + inits=None, + max_step_num=None, + output_time_major=False, + impute_finished=False, + is_test=False, + return_length=False, + **kwargs): + initial_inputs, initial_states, initial_finished = decoder.initialize(inits) + global_inputs, global_states, global_finished = (initial_inputs, initial_states, initial_finished) + global_finished.stop_gradient = True + step_idx = tensor.fill_constant(shape=[1], dtype="int64", value=0) + + cond = control_flow.logical_not((nn.reduce_all(initial_finished))) + if max_step_num is not None: + max_step_num = tensor.fill_constant(shape=[1], dtype="int64", value=max_step_num) + while_op = control_flow.While(cond, is_test=is_test) + + sequence_lengths = tensor.cast(tensor.zeros_like(initial_finished), "int64") + sequence_lengths.stop_gradient = True + + if is_test: + # for test, reuse inputs and states variables to save memory + inputs = map_structure(lambda x: x, initial_inputs) + states = map_structure(lambda x: x, initial_states) + else: + # inputs and states of all steps must be saved for backward and training + inputs_arrays = map_structure(lambda x: control_flow.array_write(x, step_idx), initial_inputs) + states_arrays = map_structure(lambda x: control_flow.array_write(x, step_idx), initial_states) + + def _maybe_copy(state, new_state, step_mask): + # TODO: use where_op + state_dtype = state.dtype + if convert_dtype(state_dtype) in ["bool"]: + state = tensor.cast(state, dtype="float32") + new_state = tensor.cast(new_state, dtype="float32") + if step_mask.dtype != state.dtype: + step_mask = tensor.cast(step_mask, dtype=state.dtype) + # otherwise, renamed bool gradients of would be summed up leading + # to sum(bool) error. + step_mask.stop_gradient = True + new_state = nn.elementwise_mul(state, step_mask, axis=0) - nn.elementwise_mul(new_state, (step_mask - 1), + axis=0) + if convert_dtype(state_dtype) in ["bool"]: + new_state = tensor.cast(new_state, dtype=state_dtype) + return new_state + + def _transpose_batch_time(x): + return nn.transpose(x, [1, 0] + list(range(2, len(x.shape)))) + + def _create_array_out_of_while(dtype): + current_block_idx = default_main_program().current_block_idx + default_main_program().current_block_idx = default_main_program().current_block().parent_idx + tensor_array = control_flow.create_array(dtype) + default_main_program().current_block_idx = current_block_idx + return tensor_array + + # While + with while_op.block(): + if not is_test: + inputs = map_structure(lambda array: control_flow.array_read(array, step_idx), inputs_arrays) + states = map_structure(lambda array: control_flow.array_read(array, step_idx), states_arrays) + (outputs, next_states, next_inputs, next_finished) = decoder.step(step_idx, inputs, states, **kwargs) + if not decoder.tracks_own_finished: + # BeamSearchDecoder would track it own finished, since beams would + # be reordered and the finished status of each entry might change. + # Otherwise, perform logical OR which would not change the already + # finished. + next_finished = control_flow.logical_or(next_finished, global_finished) + next_sequence_lengths = nn.elementwise_add( + sequence_lengths, tensor.cast(control_flow.logical_not(global_finished), sequence_lengths.dtype)) + if impute_finished: # rectify the states for the finished. + next_states = map_structure( + lambda x, y: _maybe_copy(x, y, global_finished), + states, + next_states, + ) + else: + warnings.warn( + "`next_states` has no `lengths` attribute, the returned `sequence_lengths` would be all zeros." + ) if not hasattr(next_states, "lengths") else None + next_sequence_lengths = getattr(next_states, "lengths", sequence_lengths) + + # create tensor array in global block after dtype[s] of outputs can be got + outputs_arrays = map_structure(lambda x: _create_array_out_of_while(x.dtype), outputs) + + map_structure(lambda x, x_array: control_flow.array_write(x, i=step_idx, array=x_array), outputs, + outputs_arrays) + control_flow.increment(x=step_idx, value=1.0, in_place=True) + # update the global_finished first, since it might be also in states of + # decoder, which otherwise would write a stale finished status to array + tensor.assign(next_finished, global_finished) + tensor.assign(next_sequence_lengths, sequence_lengths) + if is_test: + map_structure(tensor.assign, next_inputs, global_inputs) + map_structure(tensor.assign, next_states, global_states) + else: + map_structure(lambda x, x_array: control_flow.array_write(x, i=step_idx, array=x_array), next_inputs, + inputs_arrays) + map_structure(lambda x, x_array: control_flow.array_write(x, i=step_idx, array=x_array), next_states, + states_arrays) + if max_step_num is not None: + control_flow.logical_and(control_flow.logical_not(nn.reduce_all(global_finished)), + control_flow.less_equal(step_idx, max_step_num), cond) + else: + control_flow.logical_not(nn.reduce_all(global_finished), cond) + + final_outputs = map_structure(lambda array: tensor.tensor_array_to_tensor(array, axis=0, use_stack=True)[0], + outputs_arrays) + if is_test: + final_states = global_states + else: + final_states = map_structure(lambda array: control_flow.array_read(array, step_idx), states_arrays) + + try: + final_outputs, final_states = decoder.finalize(final_outputs, final_states, sequence_lengths) + except NotImplementedError: + pass + + if not output_time_major: + final_outputs = map_structure(_transpose_batch_time, final_outputs) + + return (final_outputs, final_states, sequence_lengths) if return_length else (final_outputs, final_states) + + +def dynamic_decode(decoder, + inits=None, + max_step_num=None, + output_time_major=False, + impute_finished=False, + is_test=False, + return_length=False, + bos_ids=None, + **kwargs): + r""" + Dynamic decoding performs :code:`decoder.step()` repeatedly until the returned + Tensor indicating finished status contains all True values or the number of + decoding step reaches to :attr:`max_step_num`. + + :code:`decoder.initialize()` would be called once before the decoding loop. + If the `decoder` has implemented `finalize` method, :code:`decoder.finalize()` + would be called once after the decoding loop. + + Parameters: + decoder(Decoder): An instance of `Decoder`. + inits(object, optional): Argument passed to `decoder.initialize`. + Default `None`. + max_step_num(int, optional): The maximum number of steps. If not provided, + decode until the decoder is fully done, or in other words, the returned + Tensor by :code:`decoder.step()` indicating finished status contains + all True. Default `None`. + output_time_major(bool, optional): Indicate the data layout of Tensor included + in the final outputs(the first returned value of this method). If + attr:`False`, the data layout would be batch major with shape + `[batch_size, seq_len, ...]`. If attr:`True`, the data layout would + be time major with shape `[seq_len, batch_size, ...]`. Default: `False`. + impute_finished(bool, optional): If `True` and `decoder.tracks_own_finished` + is False, then states get copied through for batch entries which are + marked as finished, which differs with the unfinished using the new states + returned by :code:`decoder.step()` and ensures that the final states have + the correct values. Otherwise, states wouldn't be copied through when + finished. If the returned `final_states` is needed, it should be set as + True, which causes some slowdown. Default `False`. + is_test(bool, optional): A flag indicating whether to use test mode. In + test mode, it is more memory saving. Default `False`. + return_length(bool, optional): A flag indicating whether to return an + extra Tensor variable in the output tuple, which stores the actual + lengths of all decoded sequences. Default `False`. + **kwargs: Additional keyword arguments. Arguments passed to `decoder.step`. + + Returns: + tuple: A tuple( :code:`(final_outputs, final_states, sequence_lengths)` ) \ + when `return_length` is True, otherwise a tuple( :code:`(final_outputs, final_states)` ). \ + The final outputs and states, both are Tensor or nested structure of Tensor. \ + `final_outputs` has the same structure and data types as the :code:`outputs` \ + returned by :code:`decoder.step()` , and each Tenser in `final_outputs` \ + is the stacked of all decoding steps' outputs, which might be revised \ + by :code:`decoder.finalize()` if the decoder has implemented `finalize`. \ + `final_states` is the counterpart at last time step of initial states \ + returned by :code:`decoder.initialize()` , thus has the same structure \ + with it and has tensors with same shapes and data types. `sequence_lengths` \ + is an `int64` tensor with the same shape as `finished` returned \ + by :code:`decoder.initialize()` , and it stores the actual lengths of \ + all decoded sequences. + + + Examples: + + .. code-block:: python + + import numpy as np + import paddle + from paddle.nn import BeamSearchDecoder, dynamic_decode + from paddle.nn import GRUCell, Linear, Embedding + trg_embeder = Embedding(100, 32) + output_layer = Linear(32, 32) + decoder_cell = GRUCell(input_size=32, hidden_size=32) + decoder = BeamSearchDecoder(decoder_cell, + start_token=0, + end_token=1, + beam_size=4, + embedding_fn=trg_embeder, + output_fn=output_layer) + encoder_output = paddle.ones((4, 8, 32), dtype=paddle.get_default_dtype()) + outputs = dynamic_decode(decoder=decoder, + inits=decoder_cell.get_initial_states(encoder_output), + max_step_num=10) + """ + if in_dygraph_mode(): + return _dynamic_decode_imperative(decoder, inits, max_step_num, output_time_major, impute_finished, is_test, + return_length, bos_ids, **kwargs) + else: + print(">>> hello_debug: not support") + + #return _dynamic_decode_declarative(decoder, inits, max_step_num, + # output_time_major, impute_finished, + # is_test, return_length, **kwargs) + + +class DecodeHelper(object): + """ + DecodeHelper is the base class for any helper instance used in `BasicDecoder`. + It provides interface to implement sampling and produce inputs for the next + time step in dynamic decoding. + """ + + def initialize(self): + r""" + DecodeHelper initialization to produce inputs for the first decoding step + and give the initial status telling whether each sequence in the batch + is finished. It is the partial of the initialization of `BasicDecoder`. + + Returns: + tuple: A tuple( :code:`(initial_inputs, initial_finished)` ). \ + `initial_inputs` is a (possibly nested structure of) tensor \ + variable[s], and the tensor's shape is `[batch_size, ...]`. \ + `initial_finished` is a bool tensor with shape `[batch_size]`. + """ + pass + + def sample(self, time, outputs, states): + """ + Perform sampling with some strategies according to `outputs`. It is the + partial of `BasicDecoder.step`. + + Parameters: + time(Variable): An `int64` tensor with shape `[1]` provided by the caller, + representing the current time step number of decoding. + outputs(Variable): A tensor variable. Usually it's data type is float32 + or float64, and it's shape is `[batch_size, vocabulary_size]`, + representing the predicted logits of current step. It is same as + `outputs` returned by `BasicDecoder.output_fn(BasicDecoder.cell.call())`. + states(Variable): A (possibly nested structure of) tensor variable[s]. + It is same as `new_states` returned by `BasicDecoder.cell.call()`. + + Returns: + Variable: An `int64` tensor representing the sampled ids. + """ + pass + + def next_inputs(self, time, outputs, states, sample_ids): + r""" + Produce the inputs and states for next time step and give status telling + whether each minibatch entry is finished. It is called after `sample` in + `BasicDecoder.step`. It is the partial of `BasicDecoder.step`. + + Parameters: + time(Variable): An `int64` tensor with shape `[1]` provided by the caller, + representing the current time step number of decoding. + outputs(Variable): A tensor variable. Usually it's data type is float32 + or float64, and it's shape is `[batch_size, vocabulary_size]`, + representing the predicted logits of current step. It is same as + `outputs` returned by `BasicDecoder.output_fn(BasicDecoder.cell.call())`. + states(Variable): A (possibly nested structure of) tensor variable[s]. + It is same as `new_states` returned by `BasicDecoder.cell.call()`. + sample_ids(Variable): A (possibly nested structure of) tensor variable[s]. + It is same as `sample_ids` returned by `sample()`. + + Returns: + tuple: A tuple( :code:`(finished, next_inputs, next_states)` ). \ + `next_inputs` and `next_states` both are a (possibly nested \ + structure of) tensor variable[s], and the structure, shape and \ + data type of `next_states` must be same as the input argument \ + `states`. `finished` is a bool tensor with shape `[batch_size]`. + """ + pass + + +class TrainingHelper(DecodeHelper): + """ + TrainingHelper is a subclass of DecodeHelper. It is a decoding helper + slicing from the full sequence inputs as the inputs for corresponding + step. And it uses `argmax` to sample from the outputs of `cell.call()`. + + Since the needs of sequence inputs, it is used mostly for teach-forcing MLE + (maximum likelihood) training, and the sampled would not be used. + + Examples: + .. code-block:: python + + import paddle.fluid as fluid + import paddle.fluid.layers as layers + trg_emb = fluid.data(name="trg_emb", + shape=[None, None, 128], + dtype="float32") + trg_seq_length = fluid.data(name="trg_seq_length", + shape=[None], + dtype="int64") + helper = layers.TrainingHelper(trg_emb, trg_seq_length) + decoder_cell = layers.GRUCell(hidden_size=128) + decoder = layers.BasicDecoder(decoder_cell, helper) + outputs = layers.dynamic_decode( + decoder, + inits=decoder_cell.get_initial_states(trg_emb), + is_test=False) + """ + + def __init__(self, inputs, sequence_length, time_major=False): + """ + Constructor of TrainingHelper. + + Parameters: + inputs(Variable): A (possibly nested structure of) tensor variable[s]. + The shape of tensor should be `[batch_size, sequence_length, ...]` + for `time_major == False` or `[sequence_length, batch_size, ...]` + for `time_major == True`. It represents the inputs to be sliced + from at every decoding step. + sequence_length(Variable): A tensor with shape `[batch_size]`. + It stores real length of each instance in `inputs`, by which we + can label the finished status of each instance at every decoding + step. + time_major(bool, optional): Indicate the data layout of Tensor included + in `inputs`. If `False`, the data layout would be batch major with + shape `[batch_size, sequence_length, ...]`. If `True`, the data + layout would be time major with shape `[sequence_length, batch_size, ...]`. + Default: `False`. + """ + self.inputs = inputs + self.sequence_length = sequence_length + self.time_major = time_major + # extend inputs to avoid to slice out of range in `next_inputs` + # may be easier and have better performance than condition_op + self.inputs_ = map_structure( + lambda x: nn.pad(x, + paddings=([0, 1] + [0, 0] * (len(x.shape) - 1)) + if time_major else ([0, 0, 0, 1] + [0, 0] * (len(x.shape) - 2))), self.inputs) + + def initialize(self): + r""" + TrainingHelper initialization produces inputs for the first decoding + step by slicing at the first time step of full sequence inputs, and it + gives initial status telling whether each sequence in the batch is + finished. It is the partial of the initialization of `BasicDecoder`. + + Returns: + tuple: A tuple( :code:`(initial_inputs, initial_finished)` ). \ + `initial_inputs` is a (possibly nested structure of) tensor \ + variable[s], and the tensor's shape is `[batch_size, ...]`. \ + `initial_finished` is a bool tensor with shape `[batch_size]`. + """ + init_finished = control_flow.equal(self.sequence_length, + tensor.fill_constant(shape=[1], dtype=self.sequence_length.dtype, value=0)) + # TODO: support zero length + init_inputs = map_structure(lambda x: x[0] if self.time_major else x[:, 0], self.inputs) + return init_inputs, init_finished + + def sample(self, time, outputs, states): + r""" + Perform sampling by using `argmax` according to the `outputs`. Mostly + the sampled ids would not be used since the inputs for next decoding + step would be got by slicing. + + Parameters: + time(Variable): An `int64` tensor with shape `[1]` provided by the + caller, representing the current time step number of decoding. + outputs(Variable): A tensor variable. Usually it's data type is float32 + or float64, and it's shape is `[batch_size, vocabulary_size]`, + representing the predicted logits of current step. It is same as + `outputs` returned by `BasicDecoder.output_fn(BasicDecoder.cell.call())`. + states(Variable): A (possibly nested structure of) tensor variable[s]. + It is same as `new_states` returned by `BasicDecoder.cell.call()`. + + Returns: + Variable: An `int64` tensor with shape `[batch_size]`, representing \ + the sampled ids. + """ + sample_ids = tensor.argmax(outputs, axis=-1) + return sample_ids + + def next_inputs(self, time, outputs, states, sample_ids): + r""" + Generate inputs for the next decoding step by slicing at corresponding + step of the full sequence inputs. Simultaneously, produce the states + for next time step by directly using the input `states` and emit status + telling whether each minibatch entry reaches to the corresponding length. + + Parameters: + time(Variable): An `int64` tensor with shape `[1]` provided by the + caller, representing the current time step number of decoding. + outputs(Variable): A tensor variable. Usually it's data type is float32 + or float64, and it's shape is `[batch_size, vocabulary_size]`, + representing the predicted logits of current step. It is same as + `outputs` returned by `BasicDecoder.output_fn(BasicDecoder.cell.call())`. + states(Variable): A (possibly nested structure of) tensor variable[s]. + It is same as `new_states` returned by `BasicDecoder.cell.call()`. + sample_ids(Variable): An `int64` tensor variable shaped `[batch_size]`. + It is same as `sample_ids` returned by `sample()`. + + Returns: + tuple: A tuple( :code:`(finished, next_inputs, next_states)` ). \ + `next_inputs` and `next_states` both are a (possibly nested \ + structure of) tensor variable[s], and the tensor's shape is \ + `[batch_size, ...]`. `next_states` is identical to the input \ + argument `states`. `finished` is a `bool` Tensor with \ + shape `[batch_size]`. + """ + # TODO: compatibility of int32 and int64 + time = tensor.cast(time, "int32") if convert_dtype(time.dtype) not in ["int32"] else time + if self.sequence_length.dtype != time.dtype: + self.sequence_length = tensor.cast(self.sequence_length, time.dtype) + next_time = time + 1 + finished = control_flow.less_equal(self.sequence_length, next_time) + + def _slice(x): # TODO: use Variable.__getitem__ + axes = [0 if self.time_major else 1] + return nn.squeeze(nn.slice(x, axes=axes, starts=[next_time], ends=[next_time + 1]), axes=axes) + + next_inputs = map_structure(_slice, self.inputs_) + return finished, next_inputs, states + + +class GreedyEmbeddingHelper(DecodeHelper): + """ + GreedyEmbeddingHelper is a subclass of DecodeHelper. It is a decoding helper + uses the argmax of the output (treated as logits) and passes the results + through an embedding layer to get inputs for the next decoding step. + + Examples: + .. code-block:: python + + import paddle.fluid as fluid + import paddle.fluid.layers as layers + trg_emb = fluid.data(name="trg_emb", + shape=[None, None, 128], + dtype="float32") + + trg_embeder = lambda x: fluid.embedding( + x, size=[10000, 128], param_attr=fluid.ParamAttr(name="trg_embedding")) + output_layer = lambda x: layers.fc(x, + size=10000, + num_flatten_dims=len(x.shape) - 1, + param_attr=fluid.ParamAttr(name= + "output_w"), + bias_attr=False) + helper = layers.GreedyEmbeddingHelper(trg_embeder, start_tokens=0, end_token=1) + decoder_cell = layers.GRUCell(hidden_size=128) + decoder = layers.BasicDecoder(decoder_cell, helper, output_fn=output_layer) + outputs = layers.dynamic_decode( + decoder=decoder, inits=decoder_cell.get_initial_states(encoder_output)) + """ + + def __init__(self, embedding_fn, start_tokens, end_token): + r""" + Constructor of GreedyEmbeddingHelper. + + Parameters: + embedding_fn(callable): A functor to apply on the argmax results. + Mostly it is an embedding layer to transform ids to embeddings. + **Note that fluid.embedding should be used here rather than + fluid.layers.embedding, since shape of ids is [batch_size]. + when using fluid.layers.embedding, must unsqueeze in embedding_fn.** + start_tokens(Variable): A `int64` tensor shaped `[batch_size]`, + representing the start tokens. + end_token(int): The end token id. + + Returns: + tuple: A tuple( :code:`(initial_inputs, initial_states, finished)` ). \ + `initial_inputs` and `initial_states` both are a (possibly nested \ + structure of) tensor variable[s], and `finished` is a tensor with \ + bool data type. + """ + self.embedding_fn = embedding_fn + self.start_tokens = start_tokens + self.end_token = tensor.fill_constant(shape=[1], dtype="int64", value=end_token) + + def initialize(self): + r""" + GreedyEmbeddingHelper initialization produces inputs for the first decoding + step by using `start_tokens` of the constructor, and gives initial + status telling whether each sequence in the batch is finished. + It is the partial of the initialization of `BasicDecoder`. + + Returns: + tuple: A tuple( :code:`(initial_inputs, initial_finished)` ). \ + `initial_inputs` is same as `start_tokens` of the constructor. \ + `initial_finished` is a `bool` tensor filled by False and has \ + the same shape as `start_tokens`. + """ + # TODO: remove the restriction of force_cpu + init_finished = tensor.fill_constant_batch_size_like(input=self.start_tokens, + shape=[-1], + dtype="bool", + value=False, + force_cpu=True) + init_inputs = self.embedding_fn(self.start_tokens) + return init_inputs, init_finished + + def sample(self, time, outputs, states): + r""" + Perform sampling by using `argmax` according to the `outputs`. + + Parameters: + time(Variable): An `int64` tensor with shape `[1]` provided by the + caller, representing the current time step number of decoding. + outputs(Variable): A tensor variable. Usually it's data type is float32 + or float64, and it's shape is `[batch_size, vocabulary_size]`, + representing the predicted logits of current step. It is same as + `outputs` returned by `BasicDecoder.output_fn(BasicDecoder.cell.call())`. + states(Variable): A (possibly nested structure of) tensor variable[s]. + It is same as `new_states` returned by `BasicDecoder.cell.call()`. + + Returns: + Variable: An `int64` tensor with shape `[batch_size]`, representing \ + the sampled ids. + """ + sample_ids = tensor.argmax(outputs, axis=-1) + return sample_ids + + def next_inputs(self, time, outputs, states, sample_ids): + r""" + Generate inputs for the next decoding step by applying `embedding_fn` + to `sample_ids`. Simultaneously, produce the states for next time step + by directly using the input `states` and emit status telling whether + each minibatch entry gets an `end_token` sample. + + Parameters: + time(Variable): An `int64` tensor with shape `[1]` provided by the + caller, representing the current time step number of decoding. + outputs(Variable): A tensor variable. Usually it's data type is float32 + or float64, and it's shape is `[batch_size, vocabulary_size]`, + representing the predicted logits of current step. It is same as + `outputs` returned by `BasicDecoder.output_fn(BasicDecoder.cell.call())`. + states(Variable): A (possibly nested structure of) tensor variable[s]. + It is same as `new_states` returned by `BasicDecoder.cell.call()`. + sample_ids(Variable): An `int64` tensor variable shaped `[batch_size]`. + It is same as `sample_ids` returned by `sample()`. + + Returns: + tuple: A tuple( :code:`(finished, next_inputs, next_states)` ). \ + `next_inputs` and `next_states` both are a (possibly nested \ + structure of) tensor variable[s], and the tensor's shape is \ + `[batch_size, ...]`. `next_states` is identical to the input \ + argument `states`. `finished` is a `bool` Tensor with \ + shape `[batch_size]`. + """ + finished = control_flow.equal(sample_ids, self.end_token) + next_inputs = self.embedding_fn(sample_ids) + return finished, next_inputs, states + + +class BasicDecoder(Decoder): + """ + BasicDecoder is a subclass of Decoder and assembles a RNNCell and DecodeHelper + instance as members, where the DecodeHelper helps to implement customed + decoding strategies.. It performs one decoding step as following steps: + + 1. Perform `cell_outputs, cell_states = cell.call(inputs, states)` + to get outputs and new states from cell. + + 2. Perform `sample_ids = helper.sample(time, cell_outputs, cell_states)` + to sample ids as decoded results of the current time step. + + 3. Perform `finished, next_inputs, next_states = helper.next_inputs(time, + cell_outputs, cell_states, sample_ids)` to generate inputs, states and + finished status for the next decoding step. + + Examples: + .. code-block:: python + + import paddle.fluid as fluid + import paddle.fluid.layers as layers + trg_emb = fluid.data(name="trg_emb", + shape=[None, None, 128], + dtype="float32") + + trg_embeder = lambda x: fluid.embedding( + x, size=[10000, 128], param_attr=fluid.ParamAttr(name="trg_embedding")) + output_layer = lambda x: layers.fc(x, + size=10000, + num_flatten_dims=len(x.shape) - 1, + param_attr=fluid.ParamAttr(name= + "output_w"), + bias_attr=False) + helper = layers.SampleEmbeddingHelper(trg_embeder, start_tokens=0, end_token=1) + decoder_cell = layers.GRUCell(hidden_size=128) + decoder = layers.BasicDecoder(decoder_cell, helper, output_fn=output_layer) + outputs = layers.dynamic_decode( + decoder=decoder, inits=decoder_cell.get_initial_states(encoder_output)) + """ + + def __init__(self, cell, helper, output_fn=None): + """ + Constructor of BasicDecoder. + + Parameters: + cell(RNNCell): An instance of `RNNCell` or object with the same interface. + helper(DecodeHelper): An instance of `DecodeHelper`. + output_fn(optional): A callable to apply to the cell's output prior to + sampling. Default None. + """ + self.cell = cell + self.helper = helper + self.output_fn = output_fn + + def initialize(self, initial_cell_states): + r""" + BasicDecoder initialization includes helper initialization and cell + initialization, and cell initialization uses `initial_cell_states` as + the result directly. + + Parameters: + initial_cell_states(Variable): A (possibly nested structure of) + tensor variable[s]. An argument provided by the caller `dynamic_decode`. + + Returns: + tuple: A tuple( :code:(initial_inputs, initial_cell_states, finished)` ). \ + `initial_inputs` and `initial_states` both are a (possibly nested \ + structure of) tensor variable[s], and `finished` is a tensor with \ + bool data type. `initial_inputs` and `finished` are the results \ + of `helper.initialize()`, and `initial_cell_states` is same as \ + the input argument counterpart. + """ + (initial_inputs, initial_finished) = self.helper.initialize() + return initial_inputs, initial_cell_states, initial_finished + + class OutputWrapper(collections.namedtuple("OutputWrapper", ("cell_outputs", "sample_ids"))): + """ + The structure for the returned value `outputs` of `decoder.step`. + A namedtuple includes cell_outputs, sample_ids as fields. + """ + pass + + def step(self, time, inputs, states, **kwargs): + r""" + Perform one decoding step as following steps: + + 1. Perform `cell_outputs, cell_states = cell.call(inputs, states)` + to get outputs and new states from cell. + + 2. Perform `sample_ids = helper.sample(time, cell_outputs, cell_states)` + to sample ids as decoded results of the current time step. + + 3. Perform `finished, next_inputs, next_states = helper.next_inputs(time, + cell_outputs, cell_states, sample_ids)` to generate inputs, states and + finished status for the next decoding step. + + Parameters: + time(Variable): An `int64` tensor with shape `[1]` provided by the caller, + representing the current time step number of decoding. + inputs(Variable): A tensor variable. It is same as `initial_inputs` + returned by `initialize()` for the first decoding step and + `next_inputs` returned by `step()` for the others. + states(Variable): A structure of tensor variables. + It is same as the `initial_cell_states` returned by `initialize()` + for the first decoding step and `next_states` returned by + `step()` for the others. + **kwargs: Additional keyword arguments, provided by the caller + `dynamic_decode`. + + Returns: + tuple: A tuple( :code:`(outputs, next_states, next_inputs, finished)` ). \ + `outputs` is a namedtuple(including cell_outputs, sample_ids, \ + as fields) of tensor variables, where `cell_outputs` is the result \ + fof `cell.call()` and `sample_ids` is the result of `helper.sample()`. \ + `next_states` and `next_inputs` have the same structure, shape \ + and data type as the input arguments `states` and `inputs` separately. \ + `finished` is a `bool` tensor with shape `[batch_size]`. + """ + cell_outputs, cell_states = self.cell(inputs, states, **kwargs) + if self.output_fn is not None: + cell_outputs = self.output_fn(cell_outputs) + sample_ids = self.helper.sample(time=time, outputs=cell_outputs, states=cell_states) + sample_ids.stop_gradient = True + (finished, next_inputs, next_states) = self.helper.next_inputs(time=time, + outputs=cell_outputs, + states=cell_states, + sample_ids=sample_ids) + outputs = self.OutputWrapper(cell_outputs, sample_ids) + return (outputs, next_states, next_inputs, finished) + + +def beam_search(pre_ids, + pre_scores, + ids, + scores, + beam_size, + end_id, + level=0, + is_accumulated=True, + name=None, + return_parent_idx=False): + r""" + + Beam search is a classical algorithm for selecting candidate words in a + machine translation task. + + Refer to `Beam search `_ + for more details. + + **This operator only supports LoDTensor.** It is used after finishing + scores calculation to perform beam search for one time step. Specifically, + after ``ids`` and ``scores`` have been produced, it selects the top-K + ( `k` is ``beam_size`` ) candidate word ids of current step from ``ids`` + according to the corresponding ``scores``. Additionally, ``pre_id`` and + ``pre_scores`` are the output of `beam_search` at previous step, they + are needed for special use to handle ended candidate translations. + + Note that if ``is_accumulated`` is True, the ``scores`` passed in should + be accumulated scores. Otherwise, the ``scores`` are + considered as the probabilities of single step and would be transformed to + the log field and added up with ``pre_scores`` for final scores in this + operator. Length penalty should be done with extra operators before calculating + the accumulated scores if needed. + + Please see the following demo for a fully beam search usage example: + + fluid/tests/book/test_machine_translation.py + + Args: + pre_ids(Variable): A LodTensor variable (lod level is 2), representing + the selected ids of previous step. It is the output of beam_search + at previous step. Its shape is `[batch_size, 1]` and its lod is + `[[0, 1, ... , batch_size], [0, 1, ..., batch_size]]` at the + first step. The data type should be int64. + pre_scores(Variable): A LodTensor variable has the same shape and lod + with ``pre_ids`` , representing the accumulated scores corresponding + to the selected ids of previous step. It is the output of + beam_search at previous step. The data type should be float32 or float64. + ids(Variable|None): A LodTensor variable containing the candidates ids. + It has the same lod with ``pre_ids`` and its shape should be + `[batch_size * beam_size, K]`, where `K` supposed to be greater than + ``beam_size`` and the first dimension size (decrease as samples reach + to the end) should be same as that of ``pre_ids`` . The data type + should be int64. It can be None, which use index in ``scores`` as + ids. + scores(Variable): A LodTensor variable containing the accumulated + scores corresponding to ``ids`` . Both its shape and lod are same as + those of ``ids`` . The data type should be float32 or float64. + beam_size(int): The beam width used in beam search. + end_id(int): The id of end token. + level(int): **It can be ignored and mustn't change currently.** + The 2 level lod used in this operator has the following + meaning: The first level describes how many beams each sample has, + which would change to 0 when beams of the sample all end (batch reduce); + The second level describes how many times each beam is selected. + Default 0, which shouldn't be changed currently. + is_accumulated(bool): Whether the input ``score`` is accumulated scores. + Default True. + name(str, optional): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. + return_parent_idx(bool, optional): Whether to return an extra Tensor variable + in output, which stores the selected ids' parent index in + ``pre_ids`` and can be used to update RNN's states by gather operator. + Default False. + + Returns: + tuple: The tuple contains two or three LodTensor variables. The two LodTensor, \ + representing the selected ids and the corresponding accumulated scores of \ + current step, have the same shape `[batch_size, beam_size]` and lod with 2 levels, \ + and have data types int64 and float32. If ``return_parent_idx`` is True, \ + an extra Tensor variable preserving the selected ids' parent index \ + is included, whose shape is `[batch_size * beam_size]` and data type \ + is int64. + + Examples: + .. code-block:: python + + import paddle.fluid as fluid + import paddle + paddle.enable_static() + + # Suppose `probs` contains predicted results from the computation + # cell and `pre_ids` and `pre_scores` is the output of beam_search + # at previous step. + beam_size = 4 + end_id = 1 + pre_ids = fluid.data( + name='pre_id', shape=[None, 1], lod_level=2, dtype='int64') + pre_scores = fluid.data( + name='pre_scores', shape=[None, 1], lod_level=2, dtype='float32') + probs = fluid.data( + name='probs', shape=[None, 10000], dtype='float32') + topk_scores, topk_indices = fluid.layers.topk(probs, k=beam_size) + accu_scores = fluid.layers.elementwise_add( + x=fluid.layers.log(x=topk_scores), + y=fluid.layers.reshape(pre_scores, shape=[-1]), + axis=0) + selected_ids, selected_scores = fluid.layers.beam_search( + pre_ids=pre_ids, + pre_scores=pre_scores, + ids=topk_indices, + scores=accu_scores, + beam_size=beam_size, + end_id=end_id) + """ + check_variable_and_dtype(pre_ids, 'pre_ids', ['int64'], 'beam_search') + check_variable_and_dtype(pre_scores, 'pre_scores', ['float32', 'float64'], 'beam_search') + check_type(ids, 'ids', (Variable, type(None)), 'beam_search') + check_variable_and_dtype(scores, 'scores', ['float32', 'float64'], 'beam_search') + helper = LayerHelper('beam_search', **locals()) + score_type = pre_scores.dtype + id_type = pre_ids.dtype + + inputs = {"pre_ids": pre_ids, "pre_scores": pre_scores, "scores": scores} + if ids is not None: + inputs["ids"] = ids + + selected_scores = helper.create_variable_for_type_inference(dtype=score_type) + selected_ids = helper.create_variable_for_type_inference(dtype=id_type) + # parent_idx is a tensor used to gather cell states at the next time + # step. Though lod in selected_ids can also be used to gather by + # sequence_expand, it is not efficient. + # gather_op's index input only supports int32 dtype currently + parent_idx = helper.create_variable_for_type_inference(dtype="int32") + + helper.append_op( + type='beam_search', + inputs=inputs, + outputs={ + 'selected_ids': selected_ids, + 'selected_scores': selected_scores, + 'parent_idx': parent_idx + }, + attrs={ + # TODO(ChunweiYan) to assure other value support + 'level': level, + 'beam_size': beam_size, + 'end_id': end_id, + 'is_accumulated': is_accumulated, + }) + if return_parent_idx: + return selected_ids, selected_scores, parent_idx + else: + return selected_ids, selected_scores + + +def beam_search_decode(ids, scores, beam_size, end_id, name=None): + r""" + + This operator is used after beam search has completed. It constructs the + full predicted sequences for each sample by walking back along the search + paths stored in lod of ``ids`` . The result sequences are stored in a + LoDTensor, which uses the following way to parse: + + .. code-block:: text + + If lod = [[0, 3, 6], [0, 12, 24, 40, 54, 67, 82]] + + The first level of lod stands for: There are 2 samples each having 3 + (beam width) predicted sequence. + + The second level of lod stands for: The lengths of the first sample's + 3 predicted sequences are 12, 12, 16; The lengths of the second sample's + 3 predicted sequences are 14, 13, 15. + + + Please see the following demo for a fully beam search usage example: + fluid/tests/book/test_machine_translation.py + + Args: + ids(Variable): The LoDTensorArray variable containing the selected ids + of all steps. Each LoDTensor in it has int64 data type and 2 level + lod which can be used to get the search paths. + scores(Variable): The LodTensorArray variable containing the accumulated + scores corresponding to selected ids of all steps. It has the same size + as ``ids`` . Each LoDTensor in it has the same shape and lod as the + counterpart in ``ids`` , and has a float32 data type. + beam_size(int): The beam width used in beam search. + end_id(int): The id of end token. + name(str, optional): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. + + Returns: + tuple: The tuple contains two LodTensor variables. The two LodTensor, \ + containing the full sequences of ids and the corresponding accumulated \ + scores, have the same shape flattened to 1D and have the same 2 level \ + lod. The lod can be used to get how many predicted sequences each sample \ + has and how many ids each predicted sequence has. + + Examples: + .. code-block:: python + + import paddle.fluid as fluid + import paddle + paddle.enable_static() + # Suppose `ids` and `scores` are LodTensorArray variables reserving + # the selected ids and scores of all steps + ids = fluid.layers.create_array(dtype='int64') + scores = fluid.layers.create_array(dtype='float32') + finished_ids, finished_scores = fluid.layers.beam_search_decode( + ids, scores, beam_size=5, end_id=0) + """ + check_variable_and_dtype(ids, 'ids', ['int64'], 'beam_search_encode') + check_variable_and_dtype(scores, 'scores', ['float32'], 'beam_search_encode') + helper = LayerHelper('beam_search_decode', **locals()) + sentence_ids = helper.create_variable_for_type_inference(dtype=ids.dtype) + sentence_scores = helper.create_variable_for_type_inference(dtype=scores.dtype) + + helper.append_op(type="beam_search_decode", + inputs={ + "Ids": ids, + "Scores": scores + }, + outputs={ + "SentenceIds": sentence_ids, + "SentenceScores": sentence_scores + }, + attrs={ + "beam_size": beam_size, + "end_id": end_id + }) + + return sentence_ids, sentence_scores diff --git a/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/transformers/clip_vision_transformer.py b/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/transformers/clip_vision_transformer.py new file mode 100755 index 000000000..c9d3c855a --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/transformers/clip_vision_transformer.py @@ -0,0 +1,412 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn import Layer +from paddle.nn import Linear +from paddle.nn.initializer import Constant +from paddle.nn.initializer import Normal +from paddle.nn.initializer import TruncatedNormal + +# from .base_transformer import QuickGELU + +__all__ = [ + "VisionTransformer", "ViT_small_patch16_224", "ViT_base_patch16_224", "ViT_base_patch16_384", + "ViT_base_patch32_224", "ViT_base_patch32_384", "ViT_large_patch16_224", "ViT_large_patch16_384", + "ViT_large_patch32_384", "ViT_huge_patch16_224", "ViT_huge_patch32_384", "ViT_large_patch14_224" +] + +trunc_normal_ = TruncatedNormal(std=.02) +zeros_ = Constant(value=0.) +ones_ = Constant(value=1.) + + +class QuickGELU(Layer): + """ GELU """ + + def forward(self, x): + return x * F.sigmoid(1.702 * x) + + +def to_2tuple(x): + return tuple([x] * 2) + + +def drop_path(x, drop_prob=0., training=False): + """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). + the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper... + See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... + """ + if drop_prob == 0. or not training: + return x + keep_prob = paddle.to_tensor(1 - drop_prob) + shape = (paddle.shape(x)[0], ) + (1, ) * (x.ndim - 1) + random_tensor = keep_prob + paddle.rand(shape, dtype=x.dtype) + random_tensor = paddle.floor(random_tensor) # binarize + output = x.divide(keep_prob) * random_tensor + return output + + +class DropPath(nn.Layer): + """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). + """ + + def __init__(self, drop_prob=None): + super(DropPath, self).__init__() + self.drop_prob = drop_prob + + def forward(self, x): + return drop_path(x, self.drop_prob, self.training) + + +class Identity(nn.Layer): + + def __init__(self): + super(Identity, self).__init__() + + def forward(self, input): + return input + + +class Mlp(nn.Layer): + + def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.): + super().__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.fc1 = nn.Linear(in_features, hidden_features) + self.act = act_layer() + self.fc2 = nn.Linear(hidden_features, out_features) + self.drop = nn.Dropout(drop) + + def forward(self, x): + x = self.fc1(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x + + +class Attention(nn.Layer): + + def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0.): + super().__init__() + self.num_heads = num_heads + head_dim = dim // num_heads + self.scale = qk_scale or head_dim**-0.5 + + self.qkv = nn.Linear(dim, dim * 3, bias_attr=qkv_bias) + self.attn_drop = nn.Dropout(attn_drop) + self.proj = nn.Linear(dim, dim) + self.proj_drop = nn.Dropout(proj_drop) + + def forward(self, x): + # B= paddle.shape(x)[0] + N, C = x.shape[1:] + qkv = self.qkv(x).reshape((-1, N, 3, self.num_heads, C // self.num_heads)).transpose((2, 0, 3, 1, 4)) + q, k, v = qkv[0], qkv[1], qkv[2] + + attn = (q.matmul(k.transpose((0, 1, 3, 2)))) * self.scale + attn = nn.functional.softmax(attn, axis=-1) + attn = self.attn_drop(attn) + + x = (attn.matmul(v)).transpose((0, 2, 1, 3)).reshape((-1, N, C)) + x = self.proj(x) + x = self.proj_drop(x) + return x + + +class Block(nn.Layer): + + def __init__(self, + dim, + num_heads, + mlp_ratio=4., + qkv_bias=False, + qk_scale=None, + drop=0., + attn_drop=0., + drop_path=0., + act_layer=QuickGELU, + norm_layer='nn.LayerNorm', + epsilon=1e-5): + super().__init__() + self.norm1 = eval(norm_layer)(dim, epsilon=epsilon) + self.attn = Attention(dim, + num_heads=num_heads, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + attn_drop=attn_drop, + proj_drop=drop) + # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here + self.drop_path = DropPath(drop_path) if drop_path > 0. else Identity() + self.norm2 = eval(norm_layer)(dim, epsilon=epsilon) + mlp_hidden_dim = int(dim * mlp_ratio) + self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop) + + def forward(self, x): + x = x + self.drop_path(self.attn(self.norm1(x))) + x = x + self.drop_path(self.mlp(self.norm2(x))) + return x + + +class PatchEmbed(nn.Layer): + """ Image to Patch Embedding + """ + + def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768): + super().__init__() + img_size = to_2tuple(img_size) + patch_size = to_2tuple(patch_size) + num_patches = (img_size[1] // patch_size[1]) * \ + (img_size[0] // patch_size[0]) + self.img_size = img_size + self.patch_size = patch_size + self.num_patches = num_patches + + self.proj = nn.Conv2D(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size, bias_attr=False) + + def forward(self, x): + B, C, H, W = x.shape + assert H == self.img_size[0] and W == self.img_size[1], \ + "Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})." + + x = self.proj(x).flatten(2).transpose((0, 2, 1)) + return x + + +class VisionTransformer(nn.Layer): + """ Vision Transformer with support for patch input + """ + + def __init__(self, + img_size=224, + patch_size=16, + in_chans=3, + class_dim=0, + embed_dim=768, + depth=12, + num_heads=12, + mlp_ratio=4, + qkv_bias=False, + qk_scale=None, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0., + norm_layer='nn.LayerNorm', + epsilon=1e-5, + **args): + super().__init__() + self.class_dim = class_dim + + self.num_features = self.embed_dim = embed_dim + + self.patch_embed = PatchEmbed(img_size=img_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim) + num_patches = self.patch_embed.num_patches + + scale = embed_dim**-0.5 + self.class_embedding = self.create_parameter(shape=(1, 1, embed_dim), default_initializer=Normal(std=scale)) + self.positional_embedding = self.create_parameter(shape=(1, num_patches + 1, embed_dim), + default_initializer=Normal(std=scale)) + self.add_parameter("positional_embedding", self.positional_embedding) + self.add_parameter("class_embedding", self.class_embedding) + self.pos_drop = nn.Dropout(p=drop_rate) + + dpr = np.linspace(0, drop_path_rate, depth) + + self.norm_pre = eval(norm_layer)(embed_dim, epsilon=epsilon) + + self.blocks = nn.LayerList([ + Block(dim=embed_dim, + num_heads=num_heads, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop_rate, + attn_drop=attn_drop_rate, + drop_path=dpr[i], + norm_layer=norm_layer, + epsilon=epsilon) for i in range(depth) + ]) + + self.norm_post = eval(norm_layer)(embed_dim, epsilon=epsilon) + + ## Classifier head + #self.head = nn.Linear(embed_dim, + # class_dim) if class_dim > 0 else Identity() + + trunc_normal_(self.positional_embedding) + trunc_normal_(self.class_embedding) + self.apply(self._init_weights) + + def _init_weights(self, m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight) + if isinstance(m, nn.Linear) and m.bias is not None: + zeros_(m.bias) + elif isinstance(m, nn.LayerNorm): + zeros_(m.bias) + ones_(m.weight) + + def forward_features(self, x): + # B = x.shape[0] + B = paddle.shape(x)[0] + x = self.patch_embed(x) + class_embedding = self.class_embedding.expand((B, -1, -1)) + x = paddle.concat((class_embedding, x), axis=1) + x = x + self.positional_embedding + x = self.pos_drop(x) + x = self.norm_pre(x) + for blk in self.blocks: + x = blk(x) + + #x = self.norm_post(x[:, 0, :]) + x = self.norm_post(x) + # x = self.classfy(x) + return x + + def forward(self, x): + x = self.forward_features(x) + return x + + +def ViT_small_patch16_224(**kwargs): + model = VisionTransformer(patch_size=16, + embed_dim=768, + depth=8, + num_heads=8, + mlp_ratio=3, + qk_scale=768**-0.5, + **kwargs) + return model + + +def ViT_base_patch16_224(**kwargs): + model = VisionTransformer(patch_size=16, + embed_dim=768, + depth=12, + num_heads=12, + mlp_ratio=4, + qkv_bias=True, + epsilon=1e-6, + **kwargs) + return model + + +def ViT_base_patch16_384(**kwargs): + model = VisionTransformer(img_size=384, + patch_size=16, + embed_dim=768, + depth=12, + num_heads=12, + mlp_ratio=4, + qkv_bias=True, + epsilon=1e-6, + **kwargs) + return model + + +def ViT_base_patch32_384(**kwargs): + model = VisionTransformer(img_size=384, + patch_size=32, + embed_dim=768, + depth=12, + num_heads=12, + mlp_ratio=4, + qkv_bias=True, + epsilon=1e-6, + **kwargs) + return model + + +def ViT_base_patch32_224(**kwargs): + model = VisionTransformer(img_size=224, + patch_size=32, + embed_dim=768, + depth=12, + num_heads=12, + mlp_ratio=4, + qkv_bias=True, + epsilon=1e-6, + **kwargs) + return model + + +def ViT_large_patch16_224(**kwargs): + model = VisionTransformer(patch_size=16, + embed_dim=1024, + depth=24, + num_heads=16, + mlp_ratio=4, + qkv_bias=True, + epsilon=1e-6, + **kwargs) + return model + + +def ViT_large_patch16_384(**kwargs): + model = VisionTransformer(img_size=384, + patch_size=16, + embed_dim=1024, + depth=24, + num_heads=16, + mlp_ratio=4, + qkv_bias=True, + epsilon=1e-6, + **kwargs) + return model + + +def ViT_large_patch14_224(**kwargs): + model = VisionTransformer(patch_size=14, + embed_dim=1024, + depth=24, + num_heads=16, + mlp_ratio=4, + qkv_bias=True, + epsilon=1e-6, + **kwargs) + return model + + +def ViT_large_patch32_384(**kwargs): + model = VisionTransformer(img_size=384, + patch_size=32, + embed_dim=1024, + depth=24, + num_heads=16, + mlp_ratio=4, + qkv_bias=True, + epsilon=1e-6, + **kwargs) + return model + + +def ViT_huge_patch16_224(**kwargs): + model = VisionTransformer(patch_size=16, embed_dim=1280, depth=32, num_heads=16, mlp_ratio=4, **kwargs) + return model + + +def ViT_huge_patch32_384(**kwargs): + model = VisionTransformer(img_size=384, + patch_size=32, + embed_dim=1280, + depth=32, + num_heads=16, + mlp_ratio=4, + **kwargs) + return model diff --git a/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/transformers/droppath.py b/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/transformers/droppath.py new file mode 100755 index 000000000..1b2e8b731 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/transformers/droppath.py @@ -0,0 +1,59 @@ +# Copyright (c) 2021 PPViT Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Droppath, reimplement from https://github.com/yueatsprograms/Stochastic_Depth +""" +import paddle +import paddle.nn as nn + + +class DropPath(nn.Layer): + """DropPath class""" + + def __init__(self, drop_prob=None): + super(DropPath, self).__init__() + self.drop_prob = drop_prob + + def drop_path(self, inputs): + """drop path op + Args: + input: tensor with arbitrary shape + drop_prob: float number of drop path probability, default: 0.0 + training: bool, if current mode is training, default: False + Returns: + output: output tensor after drop path + """ + # if prob is 0 or eval mode, return original input + if self.drop_prob == 0. or not self.training: + return inputs + keep_prob = 1 - self.drop_prob + keep_prob = paddle.to_tensor(keep_prob, dtype='float32') + shape = (inputs.shape[0], ) + (1, ) * (inputs.ndim - 1) # shape=(N, 1, 1, 1) + random_tensor = keep_prob + paddle.rand(shape, dtype=inputs.dtype) + random_tensor = random_tensor.floor() # mask + output = inputs.divide(keep_prob) * random_tensor #divide is to keep same output expectation + return output + + def forward(self, inputs): + return self.drop_path(inputs) + + +#def main(): +# tmp = paddle.to_tensor(np.random.rand(8, 16, 8, 8), dtype='float32') +# dp = DropPath(0.5) +# out = dp(tmp) +# print(out) +# +#if __name__ == "__main__": +# main() diff --git a/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/transformers/efficientnet.py b/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/transformers/efficientnet.py new file mode 100755 index 000000000..e1bfc2bf9 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/transformers/efficientnet.py @@ -0,0 +1,836 @@ +import collections +import copy +import math +import os +import re + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddle import ParamAttr +from paddle.nn import AdaptiveAvgPool2D +from paddle.nn import AvgPool2D +from paddle.nn import BatchNorm +from paddle.nn import Conv2D +from paddle.nn import Dropout +from paddle.nn import Linear +from paddle.nn import MaxPool2D + +MODEL_URLS = { + "EfficientNetB0_small": + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB0_small_pretrained.pdparams", + "EfficientNetB0": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB0_pretrained.pdparams", + "EfficientNetB1": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB1_pretrained.pdparams", + "EfficientNetB2": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB2_pretrained.pdparams", + "EfficientNetB3": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB3_pretrained.pdparams", + "EfficientNetB4": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB4_pretrained.pdparams", + "EfficientNetB5": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB5_pretrained.pdparams", + "EfficientNetB6": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB6_pretrained.pdparams", + "EfficientNetB7": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB7_pretrained.pdparams", +} + +__all__ = list(MODEL_URLS.keys()) + +GlobalParams = collections.namedtuple('GlobalParams', [ + 'batch_norm_momentum', + 'batch_norm_epsilon', + 'dropout_rate', + 'num_classes', + 'width_coefficient', + 'depth_coefficient', + 'depth_divisor', + 'min_depth', + 'drop_connect_rate', +]) + +BlockArgs = collections.namedtuple( + 'BlockArgs', + ['kernel_size', 'num_repeat', 'input_filters', 'output_filters', 'expand_ratio', 'id_skip', 'stride', 'se_ratio']) + +GlobalParams.__new__.__defaults__ = (None, ) * len(GlobalParams._fields) +BlockArgs.__new__.__defaults__ = (None, ) * len(BlockArgs._fields) + + +def load_dygraph_pretrain(model, path=None): + if not (os.path.isdir(path) or os.path.exists(path + '.pdparams')): + raise ValueError("Model pretrain path {} does not " + "exists.".format(path)) + param_state_dict = paddle.load(path + ".pdparams") + model.set_dict(param_state_dict) + return + + +def efficientnet_params(model_name): + """ Map EfficientNet model name to parameter coefficients. """ + params_dict = { + # Coefficients: width,depth,resolution,dropout + 'efficientnet-b0': (1.0, 1.0, 224, 0.2), + 'efficientnet-b2': (1.1, 1.2, 260, 0.3), + 'efficientnet-b3': (1.2, 1.4, 300, 0.3), + 'efficientnet-b4': (1.4, 1.8, 380, 0.4), + 'efficientnet-b5': (1.6, 2.2, 456, 0.4), + 'efficientnet-b6': (1.8, 2.6, 528, 0.5), + 'efficientnet-b7': (2.0, 3.1, 600, 0.5), + } + return params_dict[model_name] + + +def efficientnet(width_coefficient=None, depth_coefficient=None, dropout_rate=0.2, drop_connect_rate=0.2): + """ Get block arguments according to parameter and coefficients. """ + blocks_args = [ + 'r1_k3_s11_e1_i32_o16_se0.25', + 'r2_k3_s22_e6_i16_o24_se0.25', + 'r2_k5_s22_e6_i24_o40_se0.25', + 'r3_k3_s22_e6_i40_o80_se0.25', + 'r3_k5_s11_e6_i80_o112_se0.25', + 'r4_k5_s22_e6_i112_o192_se0.25', + 'r1_k3_s11_e6_i192_o320_se0.25', + ] + blocks_args = BlockDecoder.decode(blocks_args) + + global_params = GlobalParams(batch_norm_momentum=0.99, + batch_norm_epsilon=1e-3, + dropout_rate=dropout_rate, + drop_connect_rate=drop_connect_rate, + num_classes=1000, + width_coefficient=width_coefficient, + depth_coefficient=depth_coefficient, + depth_divisor=8, + min_depth=None) + return blocks_args, global_params + + +def get_model_params(model_name, override_params): + """ Get the block args and global params for a given model """ + if model_name.startswith('efficientnet'): + w, d, _, p = efficientnet_params(model_name) + blocks_args, global_params = efficientnet(width_coefficient=w, depth_coefficient=d, dropout_rate=p) + else: + raise NotImplementedError('model name is not pre-defined: %s' % model_name) + if override_params: + global_params = global_params._replace(**override_params) + return blocks_args, global_params + + +def round_filters(filters, global_params): + """ Calculate and round number of filters based on depth multiplier. """ + multiplier = global_params.width_coefficient + if not multiplier: + return filters + divisor = global_params.depth_divisor + min_depth = global_params.min_depth + filters *= multiplier + min_depth = min_depth or divisor + new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor) + if new_filters < 0.9 * filters: # prevent rounding by more than 10% + new_filters += divisor + return int(new_filters) + + +def round_repeats(repeats, global_params): + """ Round number of filters based on depth multiplier. """ + multiplier = global_params.depth_coefficient + if not multiplier: + return repeats + return int(math.ceil(multiplier * repeats)) + + +class BlockDecoder(object): + """ + Block Decoder, straight from the official TensorFlow repository. + """ + + @staticmethod + def _decode_block_string(block_string): + """ Gets a block through a string notation of arguments. """ + assert isinstance(block_string, str) + + ops = block_string.split('_') + options = {} + for op in ops: + splits = re.split(r'(\d.*)', op) + if len(splits) >= 2: + key, value = splits[:2] + options[key] = value + + # Check stride + cond_1 = ('s' in options and len(options['s']) == 1) + cond_2 = ((len(options['s']) == 2) and (options['s'][0] == options['s'][1])) + assert (cond_1 or cond_2) + + return BlockArgs(kernel_size=int(options['k']), + num_repeat=int(options['r']), + input_filters=int(options['i']), + output_filters=int(options['o']), + expand_ratio=int(options['e']), + id_skip=('noskip' not in block_string), + se_ratio=float(options['se']) if 'se' in options else None, + stride=[int(options['s'][0])]) + + @staticmethod + def _encode_block_string(block): + """Encodes a block to a string.""" + args = [ + 'r%d' % block.num_repeat, + 'k%d' % block.kernel_size, + 's%d%d' % (block.strides[0], block.strides[1]), + 'e%s' % block.expand_ratio, + 'i%d' % block.input_filters, + 'o%d' % block.output_filters + ] + if 0 < block.se_ratio <= 1: + args.append('se%s' % block.se_ratio) + if block.id_skip is False: + args.append('noskip') + return '_'.join(args) + + @staticmethod + def decode(string_list): + """ + Decode a list of string notations to specify blocks in the network. + string_list: list of strings, each string is a notation of block + return + list of BlockArgs namedtuples of block args + """ + assert isinstance(string_list, list) + blocks_args = [] + for block_string in string_list: + blocks_args.append(BlockDecoder._decode_block_string(block_string)) + return blocks_args + + @staticmethod + def encode(blocks_args): + """ + Encodes a list of BlockArgs to a list of strings. + :param blocks_args: a list of BlockArgs namedtuples of block args + :return: a list of strings, each string is a notation of block + """ + block_strings = [] + for block in blocks_args: + block_strings.append(BlockDecoder._encode_block_string(block)) + return block_strings + + +def initial_type(name, use_bias=False): + param_attr = ParamAttr(name=name + "_weights") + if use_bias: + bias_attr = ParamAttr(name=name + "_offset") + else: + bias_attr = False + return param_attr, bias_attr + + +def init_batch_norm_layer(name="batch_norm"): + param_attr = ParamAttr(name=name + "_scale") + bias_attr = ParamAttr(name=name + "_offset") + return param_attr, bias_attr + + +def init_fc_layer(name="fc"): + param_attr = ParamAttr(name=name + "_weights") + bias_attr = ParamAttr(name=name + "_offset") + return param_attr, bias_attr + + +def cal_padding(img_size, stride, filter_size, dilation=1): + """Calculate padding size.""" + # out_size = max(filter_size - stride, 0) + if img_size % stride == 0: + out_size = max(filter_size - stride, 0) + else: + out_size = max(filter_size - (img_size % stride), 0) + return out_size // 2, out_size - out_size // 2 + + +# inp_shape = { +# "b0_small": [224, 112, 112, 56, 28, 14, 14, 7], +# "b0": [224, 112, 112, 56, 28, 14, 14, 7], +# "b1": [240, 120, 120, 60, 30, 15, 15, 8], +# "b2": [260, 130, 130, 65, 33, 17, 17, 9], +# "b3": [300, 150, 150, 75, 38, 19, 19, 10], +# "b4": [380, 190, 190, 95, 48, 24, 24, 12], +# "b5": [456, 228, 228, 114, 57, 29, 29, 15], +# "b6": [528, 264, 264, 132, 66, 33, 33, 17], +# "b7": [600, 300, 300, 150, 75, 38, 38, 19] +# } +inp_shape = { + "b0_small": [224, 112, 112, 56, 28, 14, 14, 7], + "b0": [224, 112, 112, 56, 28, 14, 14, 7], + "b1": [240, 120, 120, 60, 30, 15, 15, 8], + "b2": [260, 130, 130, 65, 33, 17, 17, 9], + "b3": [300, 150, 150, 75, 38, 19, 19, 10], + "b4": [380, 190, 190, 95, 48, 24, 24, 12], + "b5": [256, 128, 128, 64, 32, 16, 16, 8], + "b6": [528, 264, 264, 132, 66, 33, 33, 17], + "b7": [600, 300, 300, 150, 75, 38, 38, 19] +} + + +def _drop_connect(inputs, prob, is_test): + if is_test: + output = inputs + else: + keep_prob = 1.0 - prob + inputs_shape = paddle.shape(inputs) + random_tensor = keep_prob + paddle.rand(shape=[inputs_shape[0], 1, 1, 1]) + binary_tensor = paddle.floor(random_tensor) + output = paddle.multiply(inputs, binary_tensor) / keep_prob + return output + + +class Conv2ds(nn.Layer): + + def __init__(self, + input_channels, + output_channels, + filter_size, + stride=1, + padding=0, + groups=None, + name="conv2d", + act=None, + use_bias=False, + padding_type=None, + model_name=None, + cur_stage=None): + super(Conv2ds, self).__init__() + assert act in [None, "swish", "sigmoid"] + self.act = act + + param_attr, bias_attr = initial_type(name=name, use_bias=use_bias) + + self.padding_type = padding_type + self.stride = stride + self.filter_size = filter_size + + def get_padding(filter_size, stride=1, dilation=1): + padding = ((stride - 1) + dilation * (filter_size - 1)) // 2 + return padding + + inps = 1 if model_name == None and cur_stage == None else inp_shape[model_name][cur_stage] + self.need_crop = False + if padding_type == "SAME": + top_padding, bottom_padding = cal_padding(inps, stride, filter_size) + left_padding, right_padding = cal_padding(inps, stride, filter_size) + height_padding = bottom_padding + width_padding = right_padding + if top_padding != bottom_padding or left_padding != right_padding: + height_padding = top_padding + stride + width_padding = left_padding + stride + self.need_crop = True + padding = [height_padding, width_padding] + elif padding_type == "VALID": + height_padding = 0 + width_padding = 0 + padding = [height_padding, width_padding] + elif padding_type == "DYNAMIC": + padding = get_padding(filter_size, stride) + else: + padding = padding_type + + groups = 1 if groups is None else groups + print('------') + print(input_channels) + print(output_channels) + print(filter_size) + print('------') + self._conv = Conv2D( + input_channels, + output_channels, + filter_size, + groups=groups, + stride=stride, + # act=act, + padding=padding, + weight_attr=param_attr, + bias_attr=bias_attr) + + def forward(self, inputs): + x = self._conv(inputs) + if self.act == "swish": + x = F.swish(x) + elif self.act == "sigmoid": + x = F.sigmoid(x) + + if self.need_crop: + x = x[:, :, 1:, 1:] + return x + + +class ConvBNLayer(nn.Layer): + + def __init__(self, + input_channels, + filter_size, + output_channels, + stride=1, + num_groups=1, + padding_type="SAME", + conv_act=None, + bn_act="swish", + use_bn=True, + use_bias=False, + name=None, + conv_name=None, + bn_name=None, + model_name=None, + cur_stage=None): + super(ConvBNLayer, self).__init__() + self._conv = Conv2ds(input_channels=input_channels, + output_channels=output_channels, + filter_size=filter_size, + stride=stride, + groups=num_groups, + act=conv_act, + padding_type=padding_type, + name=conv_name, + use_bias=use_bias, + model_name=model_name, + cur_stage=cur_stage) + self.use_bn = use_bn + if use_bn is True: + bn_name = name + bn_name + param_attr, bias_attr = init_batch_norm_layer(bn_name) + self._bn = BatchNorm(num_channels=output_channels, + act=bn_act, + momentum=0.99, + epsilon=0.001, + moving_mean_name=bn_name + "_mean", + moving_variance_name=bn_name + "_variance", + param_attr=param_attr, + bias_attr=bias_attr) + + def forward(self, inputs): + print('ConvBNLayer:') + if self.use_bn: + x = self._conv(inputs) + x = self._bn(x) + print(x.shape) + print('-------') + return x + else: + return self._conv(inputs) + + +class ExpandConvNorm(nn.Layer): + + def __init__(self, input_channels, block_args, padding_type, name=None, model_name=None, cur_stage=None): + super(ExpandConvNorm, self).__init__() + + self.oup = block_args.input_filters * block_args.expand_ratio + self.expand_ratio = block_args.expand_ratio + + if self.expand_ratio != 1: + self._conv = ConvBNLayer(input_channels, + 1, + self.oup, + bn_act=None, + padding_type=padding_type, + name=name, + conv_name=name + "_expand_conv", + bn_name="_bn0", + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs): + if self.expand_ratio != 1: + return self._conv(inputs) + else: + return inputs + + +class DepthwiseConvNorm(nn.Layer): + + def __init__(self, input_channels, block_args, padding_type, name=None, model_name=None, cur_stage=None): + super(DepthwiseConvNorm, self).__init__() + + self.k = block_args.kernel_size + self.s = block_args.stride + if isinstance(self.s, list) or isinstance(self.s, tuple): + self.s = self.s[0] + oup = block_args.input_filters * block_args.expand_ratio + self._conv = ConvBNLayer(input_channels, + self.k, + oup, + self.s, + num_groups=input_channels, + bn_act=None, + padding_type=padding_type, + name=name, + conv_name=name + "_depthwise_conv", + bn_name="_bn1", + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs): + return self._conv(inputs) + + +class ProjectConvNorm(nn.Layer): + + def __init__(self, input_channels, block_args, padding_type, name=None, model_name=None, cur_stage=None): + super(ProjectConvNorm, self).__init__() + + final_oup = block_args.output_filters + + self._conv = ConvBNLayer(input_channels, + 1, + final_oup, + bn_act=None, + padding_type=padding_type, + name=name, + conv_name=name + "_project_conv", + bn_name="_bn2", + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs): + return self._conv(inputs) + + +class SEBlock(nn.Layer): + + def __init__(self, + input_channels, + num_squeezed_channels, + oup, + padding_type, + name=None, + model_name=None, + cur_stage=None): + super(SEBlock, self).__init__() + + self._pool = AdaptiveAvgPool2D(1) + self._conv1 = Conv2ds(input_channels, + num_squeezed_channels, + 1, + use_bias=True, + padding_type=padding_type, + act="swish", + name=name + "_se_reduce") + + self._conv2 = Conv2ds( + num_squeezed_channels, + oup, + 1, + # act="sigmoid", + act=None, + use_bias=True, + padding_type=padding_type, + name=name + "_se_expand") + + def forward(self, inputs): + x = self._pool(inputs) + x = self._conv1(x) + x = self._conv2(x) + out = paddle.multiply(inputs, F.sigmoid(x)) + return out + + +class MbConvBlock(nn.Layer): + + def __init__(self, + input_channels, + block_args, + padding_type, + use_se, + name=None, + drop_connect_rate=None, + model_name=None, + cur_stage=None): + super(MbConvBlock, self).__init__() + + oup = block_args.input_filters * block_args.expand_ratio + self.block_args = block_args + self.has_se = use_se and (block_args.se_ratio is not None) and (0 < block_args.se_ratio <= 1) + self.id_skip = block_args.id_skip + self.expand_ratio = block_args.expand_ratio + self.drop_connect_rate = drop_connect_rate + + if self.expand_ratio != 1: + self._ecn = ExpandConvNorm(input_channels, + block_args, + padding_type=padding_type, + name=name, + model_name=model_name, + cur_stage=cur_stage) + + self._dcn = DepthwiseConvNorm(input_channels * block_args.expand_ratio, + block_args, + padding_type=padding_type, + name=name, + model_name=model_name, + cur_stage=cur_stage) + + if self.has_se: + num_squeezed_channels = max(1, int(block_args.input_filters * block_args.se_ratio)) + self._se = SEBlock(input_channels * block_args.expand_ratio, + num_squeezed_channels, + oup, + padding_type=padding_type, + name=name, + model_name=model_name, + cur_stage=cur_stage) + + self._pcn = ProjectConvNorm(input_channels * block_args.expand_ratio, + block_args, + padding_type=padding_type, + name=name, + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs): + x = inputs + if self.expand_ratio != 1: + x = self._ecn(x) + x = F.swish(x) + x = self._dcn(x) + x = F.swish(x) + if self.has_se: + x = self._se(x) + x = self._pcn(x) + if self.id_skip and \ + self.block_args.stride == 1 and \ + self.block_args.input_filters == self.block_args.output_filters: + if self.drop_connect_rate: + x = _drop_connect(x, self.drop_connect_rate, not self.training) + x = paddle.add(x, inputs) + return x + + +class ConvStemNorm(nn.Layer): + + def __init__(self, input_channels, padding_type, _global_params, name=None, model_name=None, cur_stage=None): + super(ConvStemNorm, self).__init__() + + output_channels = round_filters(32, _global_params) + + self._conv = ConvBNLayer(input_channels, + filter_size=3, + output_channels=output_channels, + stride=2, + bn_act=None, + padding_type=padding_type, + name="", + conv_name="_conv_stem", + bn_name="_bn0", + model_name=model_name, + cur_stage=cur_stage) + + def forward(self, inputs): + return self._conv(inputs) + + +class ExtractFeatures(nn.Layer): + + def __init__(self, input_channels, _block_args, _global_params, padding_type, use_se, model_name=None): + super(ExtractFeatures, self).__init__() + + self._global_params = _global_params + + self._conv_stem = ConvStemNorm(input_channels, + padding_type=padding_type, + _global_params=_global_params, + model_name=model_name, + cur_stage=0) + + self.block_args_copy = copy.deepcopy(_block_args) + idx = 0 + block_size = 0 + for block_arg in self.block_args_copy: + block_arg = block_arg._replace(input_filters=round_filters(block_arg.input_filters, _global_params), + output_filters=round_filters(block_arg.output_filters, _global_params), + num_repeat=round_repeats(block_arg.num_repeat, _global_params)) + block_size += 1 + for _ in range(block_arg.num_repeat - 1): + block_size += 1 + + self.conv_seq = [] + cur_stage = 1 + for block_args in _block_args: + block_args = block_args._replace(input_filters=round_filters(block_args.input_filters, _global_params), + output_filters=round_filters(block_args.output_filters, _global_params), + num_repeat=round_repeats(block_args.num_repeat, _global_params)) + drop_connect_rate = self._global_params.drop_connect_rate + if drop_connect_rate: + drop_connect_rate *= float(idx) / block_size + _mc_block = self.add_sublayer( + "_blocks." + str(idx) + ".", + MbConvBlock(block_args.input_filters, + block_args=block_args, + padding_type=padding_type, + use_se=use_se, + name="_blocks." + str(idx) + ".", + drop_connect_rate=drop_connect_rate, + model_name=model_name, + cur_stage=cur_stage)) + self.conv_seq.append(_mc_block) + idx += 1 + if block_args.num_repeat > 1: + block_args = block_args._replace(input_filters=block_args.output_filters, stride=1) + for _ in range(block_args.num_repeat - 1): + drop_connect_rate = self._global_params.drop_connect_rate + if drop_connect_rate: + drop_connect_rate *= float(idx) / block_size + _mc_block = self.add_sublayer( + "block." + str(idx) + ".", + MbConvBlock(block_args.input_filters, + block_args, + padding_type=padding_type, + use_se=use_se, + name="_blocks." + str(idx) + ".", + drop_connect_rate=drop_connect_rate, + model_name=model_name, + cur_stage=cur_stage)) + self.conv_seq.append(_mc_block) + idx += 1 + cur_stage += 1 + + def forward(self, inputs): + print('ExtractFeatures:') + print(inputs.shape) + x = self._conv_stem(inputs) + print(x.shape) + print('******') + x = F.swish(x) + for _mc_block in self.conv_seq: + x = _mc_block(x) + return x + + +class EfficientNet(nn.Layer): + + def __init__(self, name="b0", padding_type="SAME", override_params=None, use_se=True, class_num=768): + super(EfficientNet, self).__init__() + + model_name = 'efficientnet-' + name + self.name = name + self._block_args, self._global_params = get_model_params(model_name, override_params) + self.padding_type = padding_type + self.use_se = use_se + + self._ef = ExtractFeatures(3, + self._block_args, + self._global_params, + self.padding_type, + self.use_se, + model_name=self.name) + + output_channels = round_filters(1280, self._global_params) + + if name == "b0_small" or name == "b0" or name == "b1": + oup = 320 + elif name == "b2": + oup = 352 + elif name == "b3": + oup = 384 + elif name == "b4": + oup = 448 + elif name == "b5": + oup = 512 + elif name == "b6": + oup = 576 + elif name == "b7": + oup = 640 + self._conv = ConvBNLayer(oup, + 1, + output_channels, + bn_act="swish", + padding_type=self.padding_type, + name="", + conv_name="_conv_head", + bn_name="_bn1", + model_name=self.name, + cur_stage=7) + self._pool = AdaptiveAvgPool2D(1) + + if self._global_params.dropout_rate: + self._drop = Dropout(p=self._global_params.dropout_rate, mode="upscale_in_train") + + # param_attr, bias_attr = init_fc_layer("_fc") + self._fc = Linear(output_channels, + class_num, + weight_attr=paddle.ParamAttr(name='image_trans_w'), + bias_attr=paddle.ParamAttr(name='image_trans_b')) + + def forward(self, inputs): + x = self._ef(inputs) + print('EfficientNet:') + print(x.shape) + print(self._conv) + x = self._conv(x) + x = self._pool(x) + if self._global_params.dropout_rate: + x = self._drop(x) + x = paddle.squeeze(x, axis=[2, 3]) + x = self._fc(x) + x = F.tanh(x) + return x + + +def _load_pretrained(pretrained, model, model_url, use_ssld=False): + if pretrained is False: + pass + elif isinstance(pretrained, str): + load_dygraph_pretrain(model, pretrained) + else: + raise RuntimeError("pretrained type is not available. Please use `string` type.") + + +def EfficientNetB0_small(padding_type='DYNAMIC', + override_params=None, + use_se=False, + pretrained=False, + use_ssld=False, + **kwargs): + model = EfficientNet(name='b0', padding_type=padding_type, override_params=override_params, use_se=use_se, **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB0_small"]) + return model + + +def EfficientNetB0(padding_type='SAME', override_params=None, use_se=True, pretrained=False, use_ssld=False, **kwargs): + model = EfficientNet(name='b0', padding_type=padding_type, override_params=override_params, use_se=use_se, **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB0"]) + return model + + +def EfficientNetB1(padding_type='SAME', override_params=None, use_se=True, pretrained=False, use_ssld=False, **kwargs): + model = EfficientNet(name='b1', padding_type=padding_type, override_params=override_params, use_se=use_se, **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB1"]) + return model + + +def EfficientNetB2(padding_type='SAME', override_params=None, use_se=True, pretrained=False, use_ssld=False, **kwargs): + model = EfficientNet(name='b2', padding_type=padding_type, override_params=override_params, use_se=use_se, **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB2"]) + return model + + +def EfficientNetB3(padding_type='SAME', override_params=None, use_se=True, pretrained=False, use_ssld=False, **kwargs): + model = EfficientNet(name='b3', padding_type=padding_type, override_params=override_params, use_se=use_se, **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB3"]) + return model + + +def EfficientNetB4(padding_type='SAME', override_params=None, use_se=True, pretrained=False, use_ssld=False, **kwargs): + model = EfficientNet(name='b4', padding_type=padding_type, override_params=override_params, use_se=use_se, **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB4"]) + return model + + +def EfficientNetB5(padding_type='SAME', override_params=None, use_se=True, pretrained=False, use_ssld=False, **kwargs): + model = EfficientNet(name='b5', padding_type=padding_type, override_params=override_params, use_se=use_se, **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB5"]) + return model + + +def EfficientNetB6(padding_type='SAME', override_params=None, use_se=True, pretrained=False, use_ssld=False, **kwargs): + model = EfficientNet(name='b6', padding_type=padding_type, override_params=override_params, use_se=use_se, **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB6"]) + return model + + +def EfficientNetB7(padding_type='SAME', override_params=None, use_se=True, pretrained=False, use_ssld=False, **kwargs): + model = EfficientNet(name='b7', padding_type=padding_type, override_params=override_params, use_se=use_se, **kwargs) + _load_pretrained(pretrained, model, MODEL_URLS["EfficientNetB7"]) + return model diff --git a/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/transformers/ernie2.py b/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/transformers/ernie2.py new file mode 100755 index 000000000..9267be83b --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/transformers/ernie2.py @@ -0,0 +1,381 @@ +# -*- coding: utf-8 -* +""" +ERNIE 网络结构 +""" +import logging +import re +import time + +import paddle +from paddle import nn +from paddle.nn import functional as F + +ACT_DICT = { + 'relu': nn.ReLU, + 'gelu': nn.GELU, +} + + +class ErnieModel(nn.Layer): + """ ernie model """ + + def __init__(self, cfg, name=''): + """ + Fundamental pretrained Ernie model + """ + nn.Layer.__init__(self) + self.cfg = cfg + d_model = cfg['hidden_size'] + d_emb = cfg.get('emb_size', cfg['hidden_size']) + d_vocab = cfg['vocab_size'] + d_pos = cfg['max_position_embeddings'] + # d_sent = cfg.get("sent_type_vocab_size", 4) or cfg.get('type_vocab_size', 4) + if cfg.get('sent_type_vocab_size'): + d_sent = cfg['sent_type_vocab_size'] + else: + d_sent = cfg.get('type_vocab_size', 2) + self.n_head = cfg['num_attention_heads'] + self.return_additional_info = cfg.get('return_additional_info', False) + self.initializer = nn.initializer.TruncatedNormal(std=cfg['initializer_range']) + + self.ln = _build_ln(d_model, name=append_name(name, 'pre_encoder')) + self.word_emb = nn.Embedding(d_vocab, + d_emb, + weight_attr=paddle.ParamAttr(name=append_name(name, 'word_embedding'), + initializer=self.initializer)) + self.pos_emb = nn.Embedding(d_pos, + d_emb, + weight_attr=paddle.ParamAttr(name=append_name(name, 'pos_embedding'), + initializer=self.initializer)) + # self.sent_emb = nn.Embedding( + # d_sent, + # d_emb, + # weight_attr=paddle.ParamAttr(name=append_name(name, 'sent_embedding'), initializer=self.initializer)) + self._use_sent_id = cfg.get('use_sent_id', True) + self._use_sent_id = False + if self._use_sent_id: + self.sent_emb = nn.Embedding(d_sent, + d_emb, + weight_attr=paddle.ParamAttr(name=append_name(name, 'sent_embedding'), + initializer=self.initializer)) + self._use_task_id = cfg.get('use_task_id', False) + self._use_task_id = False + if self._use_task_id: + self._task_types = cfg.get('task_type_vocab_size', 3) + logging.info('using task_id, #task_types:{}'.format(self._task_types)) + self.task_emb = nn.Embedding(self._task_types, + d_emb, + weight_attr=paddle.ParamAttr(name=append_name(name, 'task_embedding'), + initializer=self.initializer)) + + prob = cfg['hidden_dropout_prob'] + self.dropout = nn.Dropout(p=prob) + + self.encoder_stack = ErnieEncoderStack(cfg, append_name(name, 'encoder')) + + if cfg.get('has_pooler', True): + self.pooler = _build_linear(cfg['hidden_size'], cfg['hidden_size'], append_name(name, 'pooled_fc'), + self.initializer) + else: + self.pooler = None + + self.key_tag = None + self._checkpoints = [] + self.train() + + def get_checkpoints(self): + """return checkpoints for recomputing""" + # recompute checkpoints + return self._checkpoints + + # FIXME:remove this + def eval(self): + """ eval """ + if paddle.in_dynamic_mode(): + super(ErnieModel, self).eval() + self.training = False + for l in self.sublayers(): + l.training = False + return self + + def train(self): + """ train """ + if paddle.in_dynamic_mode(): + super(ErnieModel, self).train() + self.training = True + for l in self.sublayers(): + l.training = True + return self + + def forward(self, + src_ids, + sent_ids=None, + pos_ids=None, + input_mask=None, + task_ids=None, + attn_bias=None, + past_cache=None, + use_causal_mask=False): + """ + Args: + src_ids (`Variable` of shape `[batch_size, seq_len]`): + Indices of input sequence tokens in the vocabulary. + sent_ids (optional, `Variable` of shape `[batch_size, seq_len]`): + aka token_type_ids, Segment token indices to indicate first and second portions of the inputs. + if None, assume all tokens come from `segment_a` + pos_ids(optional, `Variable` of shape `[batch_size, seq_len]`): + Indices of positions of each input sequence tokens in the position embeddings. + input_mask(optional `Variable` of shape `[batch_size, seq_len]`): + Mask to avoid performing attention on the padding token indices of the encoder input. + task_ids(optional `Variable` of shape `[batch_size, seq_len]`): + task type for pre_train task type + attn_bias(optional, `Variable` of shape `[batch_size, seq_len, seq_len] or False`): + 3D version of `input_mask`, if set, overrides `input_mask`; if set not False, will not apply attention mask + past_cache(optional, tuple of two lists: cached key and cached value, + each is a list of `Variable`s of shape `[batch_size, seq_len, hidden_size]`): + cached key/value tensor that will be concated to generated key/value when performing self attention. + if set, `attn_bias` should not be None. + + Returns: + pooled (`Variable` of shape `[batch_size, hidden_size]`): + output logits of pooler classifier + encoded(`Variable` of shape `[batch_size, seq_len, hidden_size]`): + output logits of transformer stack + info (Dictionary): + addtional middle level info, inclues: all hidden stats, k/v caches. + """ + assert len(src_ids.shape) == 2, 'expect src_ids.shape = [batch, sequence], got %s' % (repr(src_ids.shape)) + assert attn_bias is not None if past_cache else True, 'if `past_cache` specified; attn_bias must not be None' + d_seqlen = paddle.shape(src_ids)[1] + if pos_ids is None: + pos_ids = paddle.arange(0, d_seqlen, 1, dtype='int32').reshape([1, -1]).cast('int64') + + if attn_bias is None: + if input_mask is None: + input_mask = paddle.cast(src_ids != 0, 'float32') + assert len(input_mask.shape) == 2 + input_mask = input_mask.unsqueeze(-1) + attn_bias = input_mask.matmul(input_mask, transpose_y=True) + if use_causal_mask: + sequence = paddle.reshape(paddle.arange(0, d_seqlen, 1, dtype='float32') + 1., [1, 1, -1, 1]) + causal_mask = (sequence.matmul(1. / sequence, transpose_y=True) >= 1.).cast('float32') + attn_bias *= causal_mask + else: + assert len(attn_bias.shape) == 3, 'expect attn_bias tobe rank 3, got %r' % attn_bias.shape + + attn_bias = (1. - attn_bias) * -10000.0 + attn_bias = attn_bias.unsqueeze(1).tile([1, self.n_head, 1, 1]) # avoid broadcast =_= + + if sent_ids is None: + sent_ids = paddle.zeros_like(src_ids) + + src_embedded = self.word_emb(src_ids) + pos_embedded = self.pos_emb(pos_ids) + # sent_embedded = self.sent_emb(sent_ids) + # embedded = src_embedded + pos_embedded + sent_embedded + embedded = src_embedded + pos_embedded + if self._use_sent_id: + sent_embedded = self.sent_emb(sent_ids) + embedded = embedded + sent_embedded + if self._use_task_id: + task_embeded = self.task_emb(task_ids) + embedded = embedded + task_embeded + + self._checkpoints.append(embedded.name) + embedded = self.dropout(self.ln(embedded)) + + (encoded, hidden_list, cache_list, checkpoint_name) = self.encoder_stack(embedded, attn_bias, + past_cache=past_cache, \ + key_tag=self.key_tag) + + self._checkpoints.extend(checkpoint_name) + if self.pooler is not None: + pooled = F.tanh(self.pooler(encoded[:, 0, :])) + else: + pooled = None + + additional_info = { + 'hiddens': hidden_list, + 'caches': cache_list, + } + + if self.return_additional_info: + return pooled, encoded, additional_info + return pooled, encoded + + +class ErnieEncoderStack(nn.Layer): + """ ernie encoder stack """ + + def __init__(self, cfg, name=None): + super(ErnieEncoderStack, self).__init__() + n_layers = cfg['num_hidden_layers'] + self.block = nn.LayerList([ErnieBlock(cfg, append_name(name, 'layer_%d' % i)) for i in range(n_layers)]) + + def forward(self, inputs, attn_bias=None, past_cache=None, key_tag=None): + """ forward function """ + if past_cache is not None: + assert isinstance( + past_cache, + tuple), 'unknown type of `past_cache`, expect tuple or list. got %s' % repr(type(past_cache)) + past_cache = list(zip(*past_cache)) + else: + past_cache = [None] * len(self.block) + cache_list_k, cache_list_v, hidden_list = [], [], [inputs] + checkpoint_name = [] + + for b, p in zip(self.block, past_cache): + inputs, cache = b(inputs, attn_bias=attn_bias, past_cache=p, key_tag=key_tag) + cache_k, cache_v = cache + cache_list_k.append(cache_k) + cache_list_v.append(cache_v) + hidden_list.append(inputs) + checkpoint_name.append(inputs.name) + + return [inputs, hidden_list, (cache_list_k, cache_list_v), checkpoint_name] + + +class ErnieBlock(nn.Layer): + """ ernie block class """ + + def __init__(self, cfg, name=None): + super(ErnieBlock, self).__init__() + d_model = cfg['hidden_size'] + self.attn = AttentionLayer(cfg, name=append_name(name, 'multi_head_att')) + self.ln1 = _build_ln(d_model, name=append_name(name, 'post_att')) + self.ffn = PositionWiseFeedForwardLayer(cfg, name=append_name(name, 'ffn')) + self.ln2 = _build_ln(d_model, name=append_name(name, 'post_ffn')) + prob = cfg.get('intermediate_dropout_prob', cfg['hidden_dropout_prob']) + self.dropout = nn.Dropout(p=prob) + + def forward(self, inputs, attn_bias=None, past_cache=None, key_tag=None): + """ forward """ + attn_out, cache = self.attn(inputs, inputs, inputs, attn_bias, past_cache=past_cache, + key_tag=key_tag) # self attention + attn_out = self.dropout(attn_out) + hidden = attn_out + inputs + hidden = self.ln1(hidden) # dropout/ add/ norm + + ffn_out = self.ffn(hidden) + ffn_out = self.dropout(ffn_out) + hidden = ffn_out + hidden + hidden = self.ln2(hidden) + return hidden, cache + + +class AttentionLayer(nn.Layer): + """ attention layer """ + + def __init__(self, cfg, name=None): + super(AttentionLayer, self).__init__() + initializer = nn.initializer.TruncatedNormal(std=cfg['initializer_range']) + d_model = cfg['hidden_size'] + n_head = cfg['num_attention_heads'] + # assert d_model % n_head == 0 + d_model_q = cfg.get('query_hidden_size_per_head', d_model // n_head) * n_head + d_model_v = cfg.get('value_hidden_size_per_head', d_model // n_head) * n_head + + self.n_head = n_head + self.d_key = d_model_q // n_head + + self.q = _build_linear(d_model, d_model_q, append_name(name, 'query_fc'), initializer) + self.k = _build_linear(d_model, d_model_q, append_name(name, 'key_fc'), initializer) + self.v = _build_linear(d_model, d_model_v, append_name(name, 'value_fc'), initializer) + self.o = _build_linear(d_model_v, d_model, append_name(name, 'output_fc'), initializer) + self.layer_num = int(re.findall(r"\d+", name)[0]) + # self.dropout = nn.Dropout(p=cfg['attention_probs_dropout_prob']) + self.dropout_prob = cfg['attention_probs_dropout_prob'] + self.dropout = nn.Dropout(p=self.dropout_prob) + + def forward(self, queries, keys, values, attn_bias, past_cache, key_tag=None): + """ layer forward function """ + assert len(queries.shape) == len(keys.shape) == len(values.shape) == 3 + # bsz, q_len, q_dim = queries.shape + # bsz, k_len, k_dim = keys.shape + # bsz, v_len, v_dim = values.shape + # assert k_len == v_len + + q = self.q(queries) + k = self.k(keys) + v = self.v(values) + + cache = (k, v) + if past_cache is not None: + cached_k, cached_v = past_cache + k = paddle.concat([cached_k, k], 1) + v = paddle.concat([cached_v, v], 1) + + # [batch, head, seq, dim] + q = q.reshape([0, 0, self.n_head, q.shape[-1] // self.n_head]).transpose([0, 2, 1, 3]) + # [batch, head, seq, dim] + k = k.reshape([0, 0, self.n_head, k.shape[-1] // self.n_head]).transpose([0, 2, 1, 3]) + # [batch, head, seq, dim] + v = v.reshape([0, 0, self.n_head, v.shape[-1] // self.n_head]).transpose([0, 2, 1, 3]) + q = q.scale(self.d_key**-0.5) + + score = q.matmul(k, transpose_y=True) + + if attn_bias is not None: + score += attn_bias + score = F.softmax(score) + score = self.dropout(score) + out = score.matmul(v) + + out = out.transpose([0, 2, 1, 3]) + out = out.reshape([0, 0, out.shape[2] * out.shape[3]]) + out = self.o(out) + + return out, cache + + +class PositionWiseFeedForwardLayer(nn.Layer): + """ post wise feed forward layer """ + + def __init__(self, cfg, name=None): + super(PositionWiseFeedForwardLayer, self).__init__() + initializer = nn.initializer.TruncatedNormal(std=cfg['initializer_range']) + d_model = cfg['hidden_size'] + d_ffn = cfg.get('intermediate_size', 4 * d_model) + + self.act = ACT_DICT[cfg['hidden_act']]() + self.i = _build_linear(d_model, d_ffn, append_name(name, 'fc_0'), initializer) + self.o = _build_linear(d_ffn, d_model, append_name(name, 'fc_1'), initializer) + prob = cfg.get('intermediate_dropout_prob', 0.) + self.dropout = nn.Dropout(p=prob) + + def forward(self, inputs): + """ forward """ + hidden = self.act(self.i(inputs)) + hidden = self.dropout(hidden) + out = self.o(hidden) + return out + + +def _build_linear(n_in, n_out, name, init): + """ + """ + return nn.Linear(n_in, + n_out, + weight_attr=paddle.ParamAttr(name='%s.w_0' % name if name is not None else None, initializer=init), + bias_attr='%s.b_0' % name if name is not None else None) + + +def _build_ln(n_in, name): + """ + """ + return nn.LayerNorm(normalized_shape=n_in, + weight_attr=paddle.ParamAttr(name='%s_layer_norm_scale' % name if name is not None else None, + initializer=nn.initializer.Constant(1.)), + bias_attr=paddle.ParamAttr(name='%s_layer_norm_bias' % name if name is not None else None, + initializer=nn.initializer.Constant(0.))) + + +def append_name(name, postfix): + """ append name with postfix """ + if name is None: + ret = None + elif name == '': + ret = postfix + else: + ret = '%s_%s' % (name, postfix) + return ret diff --git a/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/transformers/ernie_modeling.py b/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/transformers/ernie_modeling.py new file mode 100755 index 000000000..b35e30869 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/transformers/ernie_modeling.py @@ -0,0 +1,739 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import json +import logging +import math + +import six +if six.PY2: + from pathlib2 import Path +else: + from pathlib import Path +import numpy as np +import paddle as P +from paddle import nn +from paddle.nn import functional as F +from disco_diffusion_ernievil_base.vit_b_16x.ernievil2.transformers.file_utils import _fetch_from_remote, add_docstring + +log = logging.getLogger(__name__) + +ACT_DICT = { + 'relu': nn.ReLU, + 'gelu': nn.GELU, +} + + +def _get_rel_pos_bias(seq_len, max_len=128, num_buckets=32, bidirectional=True, reset=True): + #max_len = 520 + pos = np.array(range(seq_len)) + rel_pos = pos[:, None] - pos[None, :] + ret = 0 + n = -rel_pos + if bidirectional: + num_buckets //= 2 + ret += (n < 0).astype('int32') * num_buckets # mtf.to_int32(mtf.less(n, 0)) * num_buckets + n = np.abs(n) + else: + n = np.max(n, np.zeros_like(n)) + # now n is in the range [0, inf) + + # half of the buckets are for exact increments in positions + max_exact = num_buckets // 2 + is_small = n < max_exact + # The other half of the buckets are for logarithmically bigger bins in positions up to max_distance + val_if_large = max_exact + (np.log(n.astype('float32') / max_exact) / math.log(max_len / max_exact) * + (num_buckets - max_exact)).astype('int32') + tmp = np.full_like(val_if_large, num_buckets - 1) + val_if_large = np.where(val_if_large < tmp, val_if_large, tmp) + + ret += np.where(is_small, n, val_if_large) + if reset: + num_buckets *= 2 + ret[:, 0] = num_buckets + ret[0, :] = num_buckets // 2 + + return np.array(ret).reshape([seq_len, seq_len]).astype("int64") + + +def _build_linear(n_in, n_out, name, init): + return nn.Linear( + n_in, + n_out, + weight_attr=P.ParamAttr(name='%s.w_0' % name if name is not None else None, initializer=init), + bias_attr='%s.b_0' % name if name is not None else None, + ) + + +def _build_ln(n_in, name): + return nn.LayerNorm( + normalized_shape=n_in, + weight_attr=P.ParamAttr(name='%s_layer_norm_scale' % name if name is not None else None, + initializer=nn.initializer.Constant(1.)), + bias_attr=P.ParamAttr(name='%s_layer_norm_bias' % name if name is not None else None, + initializer=nn.initializer.Constant(0.)), + ) + + +def append_name(name, postfix): + if name is None: + ret = None + elif name == '': + ret = postfix + else: + ret = '%s_%s' % (name, postfix) + return ret + + +class AttentionLayer(nn.Layer): + + def __init__(self, cfg, name=None): + super(AttentionLayer, self).__init__() + initializer = nn.initializer.TruncatedNormal(std=cfg['initializer_range']) + d_model = cfg['hidden_size'] + n_head = cfg['num_attention_heads'] + assert d_model % n_head == 0 + d_model_q = cfg.get('query_hidden_size_per_head', d_model // n_head) * n_head + d_model_v = cfg.get('value_hidden_size_per_head', d_model // n_head) * n_head + self.n_head = n_head + self.d_key = d_model_q // n_head + self.q = _build_linear(d_model, d_model_q, append_name(name, 'query_fc'), initializer) + self.k = _build_linear(d_model, d_model_q, append_name(name, 'key_fc'), initializer) + self.v = _build_linear(d_model, d_model_v, append_name(name, 'value_fc'), initializer) + self.o = _build_linear(d_model_v, d_model, append_name(name, 'output_fc'), initializer) + self.dropout = nn.Dropout(p=cfg['attention_probs_dropout_prob']) + + def forward(self, queries, keys, values, attn_bias, past_cache): + assert len(queries.shape) == len(keys.shape) == len(values.shape) == 3 + #bsz, q_len, q_dim = queries.shape + #bsz, k_len, k_dim = keys.shape + #bsz, v_len, v_dim = values.shape + #assert k_len == v_len + + q = self.q(queries) + k = self.k(keys) + v = self.v(values) + + cache = (k, v) + if past_cache is not None: + cached_k, cached_v = past_cache + k = P.concat([cached_k, k], 1) + v = P.concat([cached_v, v], 1) + + q = q.reshape([0, 0, self.n_head, q.shape[-1] // self.n_head]).transpose([0, 2, 1, 3]) #[batch, head, seq, dim] + k = k.reshape([0, 0, self.n_head, k.shape[-1] // self.n_head]).transpose([0, 2, 1, 3]) #[batch, head, seq, dim] + v = v.reshape([0, 0, self.n_head, v.shape[-1] // self.n_head]).transpose([0, 2, 1, 3]) #[batch, head, seq, dim] + + q = q.scale(self.d_key**-0.5) + score = q.matmul(k, transpose_y=True) + if attn_bias is not None: + score += attn_bias + score = F.softmax(score) + score = self.dropout(score) + + out = score.matmul(v).transpose([0, 2, 1, 3]) + out = out.reshape([0, 0, out.shape[2] * out.shape[3]]) + out = self.o(out) + return out, cache + + +class PositionwiseFeedForwardLayer(nn.Layer): + + def __init__(self, cfg, name=None): + super(PositionwiseFeedForwardLayer, self).__init__() + initializer = nn.initializer.TruncatedNormal(std=cfg['initializer_range']) + d_model = cfg['hidden_size'] + d_ffn = cfg.get('intermediate_size', 4 * d_model) + self.act = ACT_DICT[cfg['hidden_act']]() + self.i = _build_linear( + d_model, + d_ffn, + append_name(name, 'fc_0'), + initializer, + ) + self.o = _build_linear(d_ffn, d_model, append_name(name, 'fc_1'), initializer) + prob = cfg.get('intermediate_dropout_prob', 0.) + self.dropout = nn.Dropout(p=prob) + + def forward(self, inputs): + hidden = self.act(self.i(inputs)) + hidden = self.dropout(hidden) + out = self.o(hidden) + return out + + +class ErnieBlock(nn.Layer): + + def __init__(self, cfg, name=None): + super(ErnieBlock, self).__init__() + d_model = cfg['hidden_size'] + self.attn = AttentionLayer(cfg, name=append_name(name, 'multi_head_att')) + self.ln1 = _build_ln(d_model, name=append_name(name, 'post_att')) + self.ffn = PositionwiseFeedForwardLayer(cfg, name=append_name(name, 'ffn')) + self.ln2 = _build_ln(d_model, name=append_name(name, 'post_ffn')) + prob = cfg.get('intermediate_dropout_prob', cfg['hidden_dropout_prob']) + self.dropout = nn.Dropout(p=prob) + + def forward(self, inputs, attn_bias=None, past_cache=None): + attn_out, cache = self.attn(inputs, inputs, inputs, attn_bias, past_cache=past_cache) #self attn + attn_out = self.dropout(attn_out) + hidden = attn_out + inputs + hidden = self.ln1(hidden) # dropout/ add/ norm + + ffn_out = self.ffn(hidden) + ffn_out = self.dropout(ffn_out) + hidden = ffn_out + hidden + hidden = self.ln2(hidden) + return hidden, cache + + +class ErnieEncoderStack(nn.Layer): + + def __init__(self, cfg, name=None): + super(ErnieEncoderStack, self).__init__() + n_layers = cfg['num_hidden_layers'] + self.block = nn.LayerList([ErnieBlock(cfg, append_name(name, 'layer_%d' % i)) for i in range(n_layers)]) + + def forward(self, inputs, attn_bias=None, past_cache=None): + if past_cache is not None: + assert isinstance( + past_cache, + tuple), 'unknown type of `past_cache`, expect tuple or list. got %s' % repr(type(past_cache)) + past_cache = list(zip(*past_cache)) + else: + past_cache = [None] * len(self.block) + cache_list_k, cache_list_v, hidden_list = [], [], [inputs] + + for b, p in zip(self.block, past_cache): + inputs, cache = b(inputs, attn_bias=attn_bias, past_cache=p) + cache_k, cache_v = cache + cache_list_k.append(cache_k) + cache_list_v.append(cache_v) + hidden_list.append(inputs) + + return inputs, hidden_list, (cache_list_k, cache_list_v) + + +class PretrainedModel(object): + bce = 'https://ernie-github.cdn.bcebos.com/' + resource_map = { + 'ernie-1.0': bce + 'model-ernie1.0.1.tar.gz', + 'ernie-2.0-en': bce + 'model-ernie2.0-en.1.tar.gz', + 'ernie-2.0-large-en': bce + 'model-ernie2.0-large-en.1.tar.gz', + 'ernie-tiny': bce + 'model-ernie_tiny.1.tar.gz', + 'ernie-gram-zh': bce + 'model-ernie-gram-zh.1.tar.gz', + 'ernie-gram-en': bce + 'model-ernie-gram-en.1.tar.gz', + } + + @classmethod + def from_pretrained(cls, pretrain_dir_or_url, force_download=False, **kwargs): + if not Path(pretrain_dir_or_url).exists() and str(pretrain_dir_or_url) in cls.resource_map: + url = cls.resource_map[str(pretrain_dir_or_url)] + log.info('get pretrain dir from %s' % url) + pretrain_dir = _fetch_from_remote(url, force_download) + else: + log.info('pretrain dir %s not in %s, read from local' % (pretrain_dir_or_url, repr(cls.resource_map))) + pretrain_dir = Path(pretrain_dir_or_url) + + if not pretrain_dir.exists(): + raise ValueError('pretrain dir not found: %s, optional: %s' % (pretrain_dir, cls.resource_map.keys())) + state_dict_path = pretrain_dir / 'saved_weights.pdparams' + config_path = pretrain_dir / 'ernie_config.json' + + if not config_path.exists(): + raise ValueError('config path not found: %s' % config_path) + name_prefix = kwargs.pop('name', None) + cfg_dict = dict(json.loads(config_path.open().read()), **kwargs) + model = cls(cfg_dict, name=name_prefix) + + log.info('loading pretrained model from %s' % pretrain_dir) + + #param_path = pretrain_dir / 'params' + #if os.path.exists(param_path): + # raise NotImplementedError() + # log.debug('load pretrained weight from program state') + # F.io.load_program_state(param_path) #buggy in dygraph.gurad, push paddle to fix + if state_dict_path.exists(): + m = P.load(str(state_dict_path)) + for k, v in model.state_dict().items(): + if k not in m: + log.warn('param:%s not set in pretrained model, skip' % k) + m[k] = v # FIXME: no need to do this in the future + model.set_state_dict(m) + else: + raise ValueError('weight file not found in pretrain dir: %s' % pretrain_dir) + return model + + +class ErnieModel(nn.Layer, PretrainedModel): + + def __init__(self, cfg, name=None): + """ + Fundamental pretrained Ernie model + """ + log.debug('init ErnieModel with config: %s' % repr(cfg)) + nn.Layer.__init__(self) + d_model = cfg['hidden_size'] + d_emb = cfg.get('emb_size', cfg['hidden_size']) + d_vocab = cfg['vocab_size'] + d_pos = cfg['max_position_embeddings'] + d_sent = cfg.get("sent_type_vocab_size") or cfg['type_vocab_size'] + self.d_rel_pos = cfg.get('rel_pos_size', None) + max_seq_len = cfg.get("max_seq_len", 512) + self.n_head = cfg['num_attention_heads'] + self.return_additional_info = cfg.get('return_additional_info', False) + initializer = nn.initializer.TruncatedNormal(std=cfg['initializer_range']) + if self.d_rel_pos: + self.rel_pos_bias = _get_rel_pos_bias(max_seq_len) + + self.ln = _build_ln(d_model, name=append_name(name, 'pre_encoder')) + self.word_emb = nn.Embedding(d_vocab, + d_emb, + weight_attr=P.ParamAttr(name=append_name(name, 'word_embedding'), + initializer=initializer)) + self.pos_emb = nn.Embedding(d_pos, + d_emb, + weight_attr=P.ParamAttr(name=append_name(name, 'pos_embedding'), + initializer=initializer)) + self.sent_emb = nn.Embedding(d_sent, + d_emb, + weight_attr=P.ParamAttr(name=append_name(name, 'sent_embedding'), + initializer=initializer)) + if self.d_rel_pos: + self.rel_pos_bias_emb = nn.Embedding(self.d_rel_pos, + self.n_head, + weight_attr=P.ParamAttr(name=append_name(name, 'rel_pos_embedding'), + initializer=initializer)) + prob = cfg['hidden_dropout_prob'] + self.dropout = nn.Dropout(p=prob) + + self.encoder_stack = ErnieEncoderStack(cfg, append_name(name, 'encoder')) + if cfg.get('has_pooler', True): + self.pooler = _build_linear( + cfg['hidden_size'], + cfg['hidden_size'], + append_name(name, 'pooled_fc'), + initializer, + ) + else: + self.pooler = None + self.train() + + #FIXME:remove this + def eval(self): + if P.in_dynamic_mode(): + super(ErnieModel, self).eval() + self.training = False + for l in self.sublayers(): + l.training = False + return self + + def train(self): + if P.in_dynamic_mode(): + super(ErnieModel, self).train() + self.training = True + for l in self.sublayers(): + l.training = True + return self + + def forward(self, + src_ids, + sent_ids=None, + pos_ids=None, + input_mask=None, + attn_bias=None, + past_cache=None, + use_causal_mask=False): + """ + Args: + src_ids (`Variable` of shape `[batch_size, seq_len]`): + Indices of input sequence tokens in the vocabulary. + sent_ids (optional, `Variable` of shape `[batch_size, seq_len]`): + aka token_type_ids, Segment token indices to indicate first and second portions of the inputs. + if None, assume all tokens come from `segment_a` + pos_ids(optional, `Variable` of shape `[batch_size, seq_len]`): + Indices of positions of each input sequence tokens in the position embeddings. + input_mask(optional `Variable` of shape `[batch_size, seq_len]`): + Mask to avoid performing attention on the padding token indices of the encoder input. + attn_bias(optional, `Variable` of shape `[batch_size, seq_len, seq_len] or False`): + 3D version of `input_mask`, if set, overrides `input_mask`; if set not False, will not apply attention mask + past_cache(optional, tuple of two lists: cached key and cached value, + each is a list of `Variable`s of shape `[batch_size, seq_len, hidden_size]`): + cached key/value tensor that will be concated to generated key/value when performing self attention. + if set, `attn_bias` should not be None. + Returns: + pooled (`Variable` of shape `[batch_size, hidden_size]`): + output logits of pooler classifier + encoded(`Variable` of shape `[batch_size, seq_len, hidden_size]`): + output logits of transformer stack + info (Dictionary): + addtional middle level info, inclues: all hidden stats, k/v caches. + """ + assert len(src_ids.shape) == 2, 'expect src_ids.shape = [batch, sequecen], got %s' % (repr(src_ids.shape)) + assert attn_bias is not None if past_cache else True, 'if `past_cache` is specified; attn_bias should not be None' + d_seqlen = P.shape(src_ids)[1] + if pos_ids is None: + pos_ids = P.arange(0, d_seqlen, 1, dtype='int32').reshape([1, -1]).cast('int64') + if attn_bias is None: + if input_mask is None: + input_mask = P.cast(src_ids != 0, 'float32') + assert len(input_mask.shape) == 2 + input_mask = input_mask.unsqueeze(-1) + attn_bias = input_mask.matmul(input_mask, transpose_y=True) + if use_causal_mask: + sequence = P.reshape(P.arange(0, d_seqlen, 1, dtype='float32') + 1., [1, 1, -1, 1]) + causal_mask = (sequence.matmul(1. / sequence, transpose_y=True) >= 1.).cast('float32') + attn_bias *= causal_mask + else: + assert len(attn_bias.shape) == 3, 'expect attn_bias tobe rank 3, got %r' % attn_bias.shape + attn_bias = (1. - attn_bias) * -10000.0 + attn_bias = attn_bias.unsqueeze(1).tile([1, self.n_head, 1, 1]) # avoid broadcast =_= + attn_bias.stop_gradient = True + if sent_ids is None: + sent_ids = P.zeros_like(src_ids) + if self.d_rel_pos: + rel_pos_ids = self.rel_pos_bias[:d_seqlen, :d_seqlen] + rel_pos_ids = P.to_tensor(rel_pos_ids, dtype='int64') + rel_pos_bias = self.rel_pos_bias_emb(rel_pos_ids).transpose([2, 0, 1]) + attn_bias += rel_pos_bias + src_embedded = self.word_emb(src_ids) + pos_embedded = self.pos_emb(pos_ids) + sent_embedded = self.sent_emb(sent_ids) + embedded = src_embedded + pos_embedded + sent_embedded + + embedded = self.dropout(self.ln(embedded)) + + encoded, hidden_list, cache_list = self.encoder_stack(embedded, attn_bias, past_cache=past_cache) + if self.pooler is not None: + pooled = F.tanh(self.pooler(encoded[:, 0, :])) + else: + pooled = None + + additional_info = { + 'hiddens': hidden_list, + 'caches': cache_list, + } + + if self.return_additional_info: + return pooled, encoded, additional_info + return pooled, encoded + + +class ErnieModelForSequenceClassification(ErnieModel): + """ + Ernie Model for text classfication or pointwise ranking tasks + """ + + def __init__(self, cfg, name=None): + super(ErnieModelForSequenceClassification, self).__init__(cfg, name=name) + + initializer = nn.initializer.TruncatedNormal(std=cfg['initializer_range']) + self.classifier = _build_linear(cfg['hidden_size'], cfg['num_labels'], append_name(name, 'cls'), initializer) + + prob = cfg.get('classifier_dropout_prob', cfg['hidden_dropout_prob']) + self.dropout = nn.Dropout(p=prob) + self.train() + + @add_docstring(ErnieModel.forward.__doc__) + def forward(self, *args, **kwargs): + """ + Args: + labels (optional, `Variable` of shape [batch_size]): + ground truth label id for each sentence + Returns: + loss (`Variable` of shape []): + Cross entropy loss mean over batch + if labels not set, returns None + logits (`Variable` of shape [batch_size, hidden_size]): + output logits of classifier + """ + labels = kwargs.pop('labels', None) + pooled, encoded = super(ErnieModelForSequenceClassification, self).forward(*args, **kwargs) + hidden = self.dropout(pooled) + logits = self.classifier(hidden) + + if labels is not None: + if len(labels.shape) != 1: + labels = labels.squeeze() + loss = F.cross_entropy(logits, labels) + else: + loss = None + return loss, logits + + +class ErnieModelForTokenClassification(ErnieModel): + """ + Ernie Model for Named entity tasks(NER) + """ + + def __init__(self, cfg, name=None): + super(ErnieModelForTokenClassification, self).__init__(cfg, name=name) + + initializer = nn.initializer.TruncatedNormal(std=cfg['initializer_range']) + self.classifier = _build_linear(cfg['hidden_size'], cfg['num_labels'], append_name(name, 'cls'), initializer) + + prob = cfg.get('classifier_dropout_prob', cfg['hidden_dropout_prob']) + self.dropout = nn.Dropout(p=prob) + self.train() + + @add_docstring(ErnieModel.forward.__doc__) + def forward(self, *args, **kwargs): + """ + Args: + labels (optional, `Variable` of shape [batch_size, seq_len]): + ground truth label id for each token + Returns: + loss (`Variable` of shape []): + Cross entropy loss mean over batch and time, ignore positions where label == -100 + if labels not set, returns None + logits (`Variable` of shape [batch_size, seq_len, hidden_size]): + output logits of classifier + loss_weights (`Variable` of shape [batch_size, seq_len]): + weigths of loss for each tokens. + ignore_index (int): + when label == `ignore_index`, this token will not contribute to loss + """ + ignore_index = kwargs.pop('ignore_index', -100) + labels = kwargs.pop('labels', None) + loss_weights = kwargs.pop('loss_weights', None) + pooled, encoded = super(ErnieModelForTokenClassification, self).forward(*args, **kwargs) + hidden = self.dropout(encoded) # maybe not? + logits = self.classifier(hidden) + + if labels is not None: + if len(labels.shape) != 2: + labels = labels.squeeze() + loss = F.cross_entropy(logits, labels, ignore_index=ignore_index, reduction='none') + if loss_weights is not None: + loss = loss * loss_weights + loss = loss.mean() + else: + loss = None + return loss, logits + + +class ErnieModelForQuestionAnswering(ErnieModel): + """ + Ernie model for reading comprehension tasks (SQuAD) + """ + + def __init__(self, cfg, name=None): + super(ErnieModelForQuestionAnswering, self).__init__(cfg, name=name) + + initializer = nn.initializer.TruncatedNormal(std=cfg['initializer_range']) + self.classifier = _build_linear(cfg['hidden_size'], 2, append_name(name, 'cls_mrc'), initializer) + + prob = cfg.get('classifier_dropout_prob', cfg['hidden_dropout_prob']) + self.dropout = nn.Dropout(p=prob) + self.train() + + @add_docstring(ErnieModel.forward.__doc__) + def forward(self, *args, **kwargs): + """ + Args: + start_pos (optional, `Variable` of shape [batch_size]): + token index of start of answer span in `context` + end_pos (optional, `Variable` of shape [batch_size]): + token index of end of answer span in `context` + Returns: + loss (`Variable` of shape []): + Cross entropy loss mean over batch and time, ignore positions where label == -100 + if labels not set, returns None + start_logits (`Variable` of shape [batch_size, hidden_size]): + output logits of start position, use argmax(start_logit) to get start index + end_logits (`Variable` of shape [batch_size, hidden_size]): + output logits of end position, use argmax(end_logit) to get end index + """ + + start_pos = kwargs.pop('start_pos', None) + end_pos = kwargs.pop('end_pos', None) + pooled, encoded = super(ErnieModelForQuestionAnswering, self).forward(*args, **kwargs) + encoded = self.dropout(encoded) + encoded = self.classifier(encoded) + start_logit, end_logits = P.unstack(encoded, axis=-1) + if start_pos is not None and end_pos is not None: + if len(start_pos.shape) != 1: + start_pos = start_pos.squeeze() + if len(end_pos.shape) != 1: + end_pos = end_pos.squeeze() + start_loss = F.cross_entropy(start_logit, start_pos) + end_loss = F.cross_entropy(end_logits, end_pos) + loss = (start_loss.mean() + end_loss.mean()) / 2. + else: + loss = None + return loss, start_logit, end_logits + + +class NSPHead(nn.Layer): + + def __init__(self, cfg, name=None): + super(NSPHead, self).__init__() + initializer = nn.initializer.TruncatedNormal(std=cfg['initializer_range']) + self.nsp = _build_linear(cfg['hidden_size'], 2, append_name(name, 'nsp_fc'), initializer) + + def forward(self, inputs, labels): + """ + Args: + start_pos (optional, `Variable` of shape [batch_size]): + token index of start of answer span in `context` + end_pos (optional, `Variable` of shape [batch_size]): + token index of end of answer span in `context` + Returns: + loss (`Variable` of shape []): + Cross entropy loss mean over batch and time, ignore positions where label == -100 + if labels not set, returns None + start_logits (`Variable` of shape [batch_size, hidden_size]): + output logits of start position + end_logits (`Variable` of shape [batch_size, hidden_size]): + output logits of end position + """ + + logits = self.nsp(inputs) + loss = F.cross_entropy(logits, labels) + return loss + + +class ErnieModelForPretraining(ErnieModel): + """ + Ernie Model for Masked Languate Model pretrain + """ + + def __init__(self, cfg, name=None): + super(ErnieModelForPretraining, self).__init__(cfg, name=name) + initializer = nn.initializer.TruncatedNormal(std=cfg['initializer_range']) + d_model = cfg['hidden_size'] + d_vocab = cfg['vocab_size'] + + self.pooler_heads = nn.LayerList([NSPHead(cfg, name=name)]) + self.mlm = _build_linear( + d_model, + d_model, + append_name(name, 'mask_lm_trans_fc'), + initializer, + ) + self.act = ACT_DICT[cfg['hidden_act']]() + self.mlm_ln = _build_ln(d_model, name=append_name(name, 'mask_lm_trans')) + self.mlm_bias = P.create_parameter( + dtype='float32', + shape=[d_vocab], + attr=P.ParamAttr(name=append_name(name, 'mask_lm_out_fc.b_0'), + initializer=nn.initializer.Constant(value=0.0)), + is_bias=True, + ) + self.train() + + @add_docstring(ErnieModel.forward.__doc__) + def forward(self, *args, **kwargs): + """ + Args: + nsp_labels (optional, `Variable` of shape [batch_size]): + labels for `next sentence prediction` tasks + mlm_pos (optional, `Variable` of shape [n_mask, 2]): + index of mask_id in `src_ids`, can be obtained from `fluid.layers.where(src_ids==mask_id)` + labels (optional, `Variable` of shape [n_mask]): + labels for `mask language model` tasks, the original token indices in masked position in `src_ids` + Returns: + loss (`Variable` of shape []): + total_loss of `next sentence prediction` and `masked language model` + mlm_loss (`Variable` of shape []): + loss for `masked language model` task + nsp_loss (`Variable` of shape []): + loss for `next sentence prediction` task + """ + + mlm_labels = kwargs.pop('labels') + mlm_pos = kwargs.pop('mlm_pos') + nsp_labels = kwargs.pop('nsp_labels') + pooled, encoded = super(ErnieModelForPretraining, self).forward(*args, **kwargs) + if len(mlm_labels.shape) != 1: + mlm_labels = mlm_labels.squeeze() + if len(nsp_labels.shape) == 1: + nsp_labels = nsp_labels.squeeze() + + nsp_loss = self.pooler_heads[0](pooled, nsp_labels) + + encoded_2d = encoded.gather_nd(mlm_pos) + encoded_2d = self.act(self.mlm(encoded_2d)) + encoded_2d = self.mlm_ln(encoded_2d) + logits_2d = encoded_2d.matmul(self.word_emb.weight, transpose_y=True) + self.mlm_bias + mlm_loss = F.cross_entropy(logits_2d, mlm_labels) + total_loss = mlm_loss + nsp_loss + return total_loss, mlm_loss, nsp_loss + + +class ErnieModelForGeneration(ErnieModel): + """ + Ernie Model for sequence to sequence generation. + """ + resource_map = { + 'ernie-gen-base-en': ErnieModel.bce + 'model-ernie-gen-base-en.1.tar.gz', + 'ernie-gen-large-en': ErnieModel.bce + 'model-ernie-gen-large-en.1.tar.gz', + 'ernie-gen-large-430g-en': ErnieModel.bce + 'model-ernie-gen-large-430g-en.1.tar.gz', + 'ernie-1.0': ErnieModel.bce + 'model-ernie1.0.1.tar.gz', + } + + def __init__(self, cfg, name=None): + cfg['return_additional_info'] = True + cfg['has_pooler'] = False + super(ErnieModelForGeneration, self).__init__(cfg, name=name) + initializer = nn.initializer.TruncatedNormal(std=cfg['initializer_range']) + d_model = cfg['hidden_size'] + d_vocab = cfg['vocab_size'] + + self.mlm = _build_linear( + d_model, + d_model, + append_name(name, 'mask_lm_trans_fc'), + initializer, + ) + self.act = ACT_DICT[cfg['hidden_act']]() + self.mlm_ln = _build_ln(d_model, name=append_name(name, 'mask_lm_trans')) + self.mlm_bias = P.create_parameter( + dtype='float32', + shape=[d_vocab], + attr=P.ParamAttr(name=append_name(name, 'mask_lm_out_fc.b_0'), + initializer=nn.initializer.Constant(value=0.0)), + is_bias=True, + ) + self.train() + + @add_docstring(ErnieModel.forward.__doc__) + def forward(self, *args, **kwargs): + """ + Args + tgt_labels(`Variable` of shape [batch_size, seqlen] or [batch, seqlen, vocab_size]): + ground trouth target sequence id (hard label) or distribution (soft label) + tgt_pos(`Variable` of shape [n_targets, 2]): + index of tgt_labels in `src_ids`, can be obtained from `fluid.layers.where(src_ids==mask_id)` + encoder_only(Bool): + if set, will not return loss, logits_2d + Returns: + loss(`Variable` of shape []): + cross entropy loss mean over every target label. if `encode_only`, returns None. + logits(`Variable` of shape [n_targets, vocab_size]): + logits for every targets. if `encode_only`, returns None. + info(Dictionary): see `ErnieModel` + """ + tgt_labels = kwargs.pop('tgt_labels', None) + tgt_pos = kwargs.pop('tgt_pos', None) + encode_only = kwargs.pop('encode_only', False) + _, encoded, info = ErnieModel.forward(self, *args, **kwargs) + if encode_only: + return None, None, info + if tgt_labels is None or tgt_pos is None: + encoded = self.act(self.mlm(encoded)) + encoded = self.mlm_ln(encoded) + logits = encoded.matmul(self.word_emb.weight, transpose_y=True) + self.mlm_bias + output_ids = logits.cast('float32').argmax(-1) + return output_ids, logits, info + else: + encoded_2d = encoded.gather_nd(tgt_pos) + encoded_2d = self.act(self.mlm(encoded_2d)) + encoded_2d = self.mlm_ln(encoded_2d) + logits_2d = encoded_2d.matmul(self.word_emb.weight, transpose_y=True) + self.mlm_bias + assert len(tgt_labels.shape) == 2, 'expect 2d label, got %r' % tgt_labels + + loss = F.cross_entropy(logits_2d, tgt_labels, soft_label=True) + return loss, logits_2d, info diff --git a/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/transformers/ernie_tokenizer.py b/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/transformers/ernie_tokenizer.py new file mode 100755 index 000000000..b6b290084 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/transformers/ernie_tokenizer.py @@ -0,0 +1,245 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import logging +import os +import re +import sys +import tempfile +from functools import partial +from pathlib import Path + +import six +if six.PY2: + from pathlib2 import Path +else: + from pathlib import Path + +from tqdm import tqdm +import numpy as np + +from disco_diffusion_ernievil_base.vit_b_16x.ernievil2.transformers.file_utils import _fetch_from_remote +import io + +open = partial(io.open, encoding='utf8') + +log = logging.getLogger(__name__) + +_max_input_chars_per_word = 100 + + +def _wordpiece(token, vocab, unk_token, prefix='##', sentencepiece_prefix=''): + """ wordpiece: helloworld => [hello, ##world] """ + chars = list(token) + if len(chars) > _max_input_chars_per_word: + return [unk_token], [(0, len(chars))] + + is_bad = False + start = 0 + sub_tokens = [] + sub_pos = [] + while start < len(chars): + end = len(chars) + cur_substr = None + while start < end: + substr = "".join(chars[start:end]) + if start == 0: + substr = sentencepiece_prefix + substr + if start > 0: + substr = prefix + substr + if substr in vocab: + cur_substr = substr + break + end -= 1 + if cur_substr is None: + is_bad = True + break + sub_tokens.append(cur_substr) + sub_pos.append((start, end)) + start = end + if is_bad: + return [unk_token], [(0, len(chars))] + else: + return sub_tokens, sub_pos + + +class ErnieTokenizer(object): + bce = 'https://ernie-github.cdn.bcebos.com/' + resource_map = { + 'ernie-1.0': bce + 'model-ernie1.0.1.tar.gz', + 'ernie-2.0-en': bce + 'model-ernie2.0-en.1.tar.gz', + 'ernie-2.0-large-en': bce + 'model-ernie2.0-large-en.1.tar.gz', + 'ernie-tiny': bce + 'model-ernie_tiny.1.tar.gz', + 'ernie-gen-base-en': bce + 'model-ernie-gen-base-en.1.tar.gz', + 'ernie-gen-large-en': bce + 'model-ernie-gen-large-en.1.tar.gz', + 'ernie-gram-zh': bce + 'model-ernie-gram-zh.1.tar.gz', + 'ernie-gram-en': bce + 'model-ernie-gram-en.1.tar.gz', + } + + @classmethod + def from_pretrained(cls, pretrain_dir_or_url, force_download=False, **kwargs): + if not Path(pretrain_dir_or_url).exists() and str(pretrain_dir_or_url) in cls.resource_map: + url = cls.resource_map[str(pretrain_dir_or_url)] + log.info('get pretrain dir from %s' % url) + pretrain_dir = _fetch_from_remote(url, force_download=force_download) + else: + log.info('pretrain dir %s not in %s, read from local' % (pretrain_dir_or_url, repr(cls.resource_map))) + pretrain_dir = Path(pretrain_dir_or_url) + if not pretrain_dir.exists(): + raise ValueError('pretrain dir not found: %s, optional: %s' % (pretrain_dir, cls.resource_map.keys())) + vocab_path = pretrain_dir / 'vocab.txt' + if not vocab_path.exists(): + raise ValueError('no vocab file in pretrain dir: %s' % pretrain_dir) + vocab_dict = {j.strip().split('\t')[0]: i for i, j in enumerate(vocab_path.open(encoding='utf8').readlines())} + t = cls(vocab_dict, **kwargs) + return t + + def __init__(self, + vocab, + unk_token='[UNK]', + sep_token='[SEP]', + cls_token='[CLS]', + pad_token='[PAD]', + mask_token='[MASK]', + wordpiece_prefix='##', + sentencepiece_prefix='', + lower=True, + encoding='utf8', + special_token_list=[]): + if not isinstance(vocab, dict): + raise ValueError('expect `vocab` to be instance of dict, got %s' % type(vocab)) + self.vocab = vocab + self.lower = lower + self.prefix = wordpiece_prefix + self.sentencepiece_prefix = sentencepiece_prefix + self.pad_id = self.vocab[pad_token] + self.cls_id = cls_token and self.vocab[cls_token] + self.sep_id = sep_token and self.vocab[sep_token] + self.unk_id = unk_token and self.vocab[unk_token] + self.mask_id = mask_token and self.vocab[mask_token] + self.unk_token = unk_token + special_tokens = {pad_token, cls_token, sep_token, unk_token, mask_token} | set(special_token_list) + pat_str = '' + for t in special_tokens: + if t is None: + continue + pat_str += '(%s)|' % re.escape(t) + pat_str += r'([a-zA-Z0-9]+|\S)' + log.debug('regex: %s' % pat_str) + self.pat = re.compile(pat_str) + self.encoding = encoding + + def tokenize(self, text): + if len(text) == 0: + return [] + if six.PY3 and not isinstance(text, six.string_types): + text = text.decode(self.encoding) + if six.PY2 and isinstance(text, str): + text = text.decode(self.encoding) + + res = [] + for match in self.pat.finditer(text): + match_group = match.group(0) + if match.groups()[-1]: + if self.lower: + match_group = match_group.lower() + words, _ = _wordpiece(match_group, + vocab=self.vocab, + unk_token=self.unk_token, + prefix=self.prefix, + sentencepiece_prefix=self.sentencepiece_prefix) + else: + words = [match_group] + res += words + return res + + def convert_tokens_to_ids(self, tokens): + return [self.vocab.get(t, self.unk_id) for t in tokens] + + def truncate(self, id1, id2, seqlen): + len1 = len(id1) + len2 = len(id2) + half = seqlen // 2 + if len1 > len2: + len1_truncated, len2_truncated = max(half, seqlen - len2), min(half, len2) + else: + len1_truncated, len2_truncated = min(half, seqlen - len1), max(half, seqlen - len1) + return id1[:len1_truncated], id2[:len2_truncated] + + def build_for_ernie(self, text_id, pair_id=[]): + """build sentence type id, add [CLS] [SEP]""" + text_id_type = np.zeros_like(text_id, dtype=np.int64) + ret_id = np.concatenate([[self.cls_id], text_id, [self.sep_id]], 0) + ret_id_type = np.concatenate([[0], text_id_type, [0]], 0) + + if len(pair_id): + pair_id_type = np.ones_like(pair_id, dtype=np.int64) + ret_id = np.concatenate([ret_id, pair_id, [self.sep_id]], 0) + ret_id_type = np.concatenate([ret_id_type, pair_id_type, [1]], 0) + return ret_id, ret_id_type + + def encode(self, text, pair=None, truncate_to=None): + text_id = np.array(self.convert_tokens_to_ids(self.tokenize(text)), dtype=np.int64) + text_id_type = np.zeros_like(text_id, dtype=np.int64) + if pair is not None: + pair_id = np.array(self.convert_tokens_to_ids(self.tokenize(pair)), dtype=np.int64) + else: + pair_id = [] + if truncate_to is not None: + text_id, pair_id = self.truncate(text_id, [] if pair_id is None else pair_id, truncate_to) + + ret_id, ret_id_type = self.build_for_ernie(text_id, pair_id) + return ret_id, ret_id_type + + +class ErnieTinyTokenizer(ErnieTokenizer): + bce = 'https://ernie-github.cdn.bcebos.com/' + resource_map = {'ernie-tiny': bce + 'model-ernie_tiny.1.tar.gz'} + + @classmethod + def from_pretrained(cls, pretrain_dir_or_url, force_download=False, **kwargs): + if not Path(pretrain_dir_or_url).exists() and str(pretrain_dir_or_url) in cls.resource_map: + url = cls.resource_map[str(pretrain_dir_or_url)] + log.info('get pretrain dir from %s' % url) + pretrain_dir = _fetch_from_remote(url, force_download) + else: + log.info('pretrain dir %s not in %s, read from local' % (pretrain_dir_or_url, repr(cls.resource_map))) + pretrain_dir = Path(pretrain_dir_or_url) + if not pretrain_dir.exists(): + raise ValueError('pretrain dir not found: %s' % pretrain_dir) + vocab_path = pretrain_dir / 'vocab.txt' + sp_model_path = pretrain_dir / 'subword/spm_cased_simp_sampled.model' + + if not vocab_path.exists(): + raise ValueError('no vocab file in pretrain dir: %s' % pretrain_dir) + vocab_dict = {j.strip().split('\t')[0]: i for i, j in enumerate(vocab_path.open(encoding='utf8').readlines())} + + t = cls(vocab_dict, sp_model_path, **kwargs) + return t + + def __init__(self, vocab, sp_model_path, **kwargs): + super(ErnieTinyTokenizer, self).__init__(vocab, **kwargs) + import sentencepiece as spm + import jieba as jb + self.sp_model = spm.SentencePieceProcessor() + self.window_size = 5 + self.sp_model.Load(sp_model_path) + self.jb = jb + + def cut(self, sentence): + return self.jb.cut(sentence) + + def tokenize(self, text): + if len(text) == 0: + return [] + if not isinstance(text, six.string_types): + text = text.decode(self.encoding) + if self.lower: + text = text.lower() + + res = [] + for match in self.cut(text): + res += self.sp_model.EncodeAsPieces(match) + return res diff --git a/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/transformers/file_utils.py b/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/transformers/file_utils.py new file mode 100755 index 000000000..bead1f2c7 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/transformers/file_utils.py @@ -0,0 +1,63 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import logging +import os +import time +from pathlib import Path + +import six +from tqdm import tqdm +if six.PY2: + from pathlib2 import Path +else: + from pathlib import Path + +log = logging.getLogger(__name__) + + +def _fetch_from_remote(url, force_download=False, cached_dir='~/.paddle-ernie-cache'): + import hashlib, tempfile, requests, tarfile + sig = hashlib.md5(url.encode('utf8')).hexdigest() + cached_dir = Path(cached_dir).expanduser() + try: + cached_dir.mkdir() + except OSError: + pass + cached_dir_model = cached_dir / sig + from filelock import FileLock + with FileLock(str(cached_dir_model) + '.lock'): + donefile = cached_dir_model / 'done' + if (not force_download) and donefile.exists(): + log.debug('%s cached in %s' % (url, cached_dir_model)) + return cached_dir_model + cached_dir_model.mkdir(exist_ok=True) + tmpfile = cached_dir_model / 'tmp' + with tmpfile.open('wb') as f: + r = requests.get(url, stream=True) + total_len = int(r.headers.get('content-length')) + for chunk in tqdm(r.iter_content(chunk_size=1024), + total=total_len // 1024, + desc='downloading %s' % url, + unit='KB'): + if chunk: + f.write(chunk) + f.flush() + log.debug('extacting... to %s' % tmpfile) + with tarfile.open(tmpfile.as_posix()) as tf: + tf.extractall(path=str(cached_dir_model)) + donefile.touch() + os.remove(tmpfile.as_posix()) + + return cached_dir_model + + +def add_docstring(doc): + + def func(f): + f.__doc__ += ('\n======other docs from supper class ======\n%s' % doc) + return f + + return func diff --git a/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/transformers/multimodal.py b/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/transformers/multimodal.py new file mode 100755 index 000000000..63dd67ce3 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/transformers/multimodal.py @@ -0,0 +1,39 @@ +# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + + +class MultiModalModel(nn.Layer): + + def __init__(self, image_model=None, text_model=None, args=None): + super(MultiModalModel, self).__init__() + self.visual = image_model + self.text_model = text_model + + def encode_text(self, input_ids, pos_ids=None): + pool_out, text_embedding = self.text_model(input_ids, pos_ids=pos_ids) + return pool_out + + def encode_image(self, img_word): + img_embedding = self.visual(img_word) + return img_embedding[:, 0] + + def forward(self, img_word=None, input_ids=None, pos_ids=None): + img_embedding = self.visual(img_word) + img_embedding = img_embedding[:, 0] + pool_out, text_embedding = self.text_model(input_ids, pos_ids=pos_ids) + return img_embedding, pool_out diff --git a/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/transformers/paddle_vision_transformer.py b/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/transformers/paddle_vision_transformer.py new file mode 100755 index 000000000..4f1618cf0 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/transformers/paddle_vision_transformer.py @@ -0,0 +1,444 @@ +# Copyright (c) 2021 PPViT Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Implement Transformer Class for ViT +""" +import copy + +import paddle +import paddle.nn as nn +from disco_diffusion_ernievil_base.vit_b_16x.ernievil2.transformers.droppath import DropPath + + +class Identity(nn.Layer): + """ Identity layer + The output of this layer is the input without any change. + Use this layer to avoid using 'if' condition in forward methods + """ + + def __init__(self): + super().__init__() + + def forward(self, x): + return x + + +class PatchEmbedding(nn.Layer): + """Patch Embedding and Position Embedding + Apply patch embedding and position embedding on input images. + Attributes: + patch_embddings: impl using a patch_size x patch_size Conv2D operation + position_embddings: a parameter with len = num_patch + 1(for cls_token) + cls_token: token insert to the patch feature for classification + dropout: dropout for embeddings + """ + + def __init__(self, image_size=224, patch_size=16, in_channels=3, embed_dim=768, dropout=0.): + super().__init__() + n_patches = (image_size // patch_size) * (image_size // patch_size) + + self.patch_embedding = nn.Conv2D(in_channels=in_channels, + out_channels=embed_dim, + kernel_size=patch_size, + stride=patch_size) + + self.position_embeddings = paddle.create_parameter( + shape=[1, n_patches + 1, embed_dim], + dtype='float32', + default_initializer=paddle.nn.initializer.TruncatedNormal(std=.02)) + + self.cls_token = paddle.create_parameter(shape=[1, 1, embed_dim], + dtype='float32', + default_initializer=paddle.nn.initializer.Constant(0)) + + self.dropout = nn.Dropout(dropout) + + def forward(self, x): + cls_tokens = self.cls_token.expand((x.shape[0], -1, -1)) + x = self.patch_embedding(x) + x = x.flatten(2) + x = x.transpose([0, 2, 1]) + x = paddle.concat((cls_tokens, x), axis=1) + + embeddings = x + self.position_embeddings # tensor broadcast + embeddings = self.dropout(embeddings) + return embeddings + + +class Attention(nn.Layer): + """ Attention module + Attention module for ViT, here q, k, v are assumed the same. + The qkv mappings are stored as one single param. + Attributes: + num_heads: number of heads + attn_head_size: feature dim of single head + all_head_size: feature dim of all heads + qkv: a nn.Linear for q, k, v mapping + scales: 1 / sqrt(single_head_feature_dim) + out: projection of multi-head attention + attn_dropout: dropout for attention + proj_dropout: final dropout before output + softmax: softmax op for attention + """ + + def __init__(self, embed_dim, num_heads, attn_head_size=None, qkv_bias=True, dropout=0., attention_dropout=0.): + super().__init__() + + assert isinstance(embed_dim, + int), (f"Expected the type of `embed_dim` to be {int}, but received {type(embed_dim)}.") + assert isinstance(num_heads, + int), (f"Expected the type of `num_heads` to be {int}, but received {type(num_heads)}.") + + assert embed_dim > 0, (f"Expected `embed_dim` to be greater than 0, but received {embed_dim}") + assert num_heads > 0, (f"Expected `num_heads` to be greater than 0, but received {num_heads}") + + self.embed_dim = embed_dim + self.num_heads = num_heads + + if attn_head_size is not None: + assert isinstance(attn_head_size, int), (f"Expected the type of `attn_head_size` to be {int}, " + f"but received {type(attn_head_size)}.") + assert attn_head_size > 0, f"Expected `attn_head_size` to be greater than 0," \ + f" but received {attn_head_size}." + self.attn_head_size = attn_head_size + else: + self.attn_head_size = embed_dim // num_heads + assert self.attn_head_size * num_heads == embed_dim, ( + f"`embed_dim` must be divisible by `num_heads`," + f" but received embed_dim={embed_dim}, num_heads={num_heads}.") + + self.all_head_size = self.attn_head_size * num_heads + + w_attr_1, b_attr_1 = self._init_weights() + self.qkv = nn.Linear( + embed_dim, + self.all_head_size * 3, # weights for q, k, and v + weight_attr=w_attr_1, + bias_attr=b_attr_1 if qkv_bias else False) + + self.scales = self.attn_head_size**-0.5 + + w_attr_2, b_attr_2 = self._init_weights() + self.out = nn.Linear(self.all_head_size, embed_dim, weight_attr=w_attr_2, bias_attr=b_attr_2) + + self.attn_dropout = nn.Dropout(attention_dropout) + self.proj_dropout = nn.Dropout(dropout) + self.softmax = nn.Softmax(axis=-1) + + def _init_weights(self): + weight_attr = paddle.ParamAttr(initializer=nn.initializer.TruncatedNormal(std=.02)) + bias_attr = paddle.ParamAttr(initializer=nn.initializer.Constant(0.0)) + return weight_attr, bias_attr + + def transpose_multihead(self, x): + new_shape = x.shape[:-1] + [self.num_heads, self.attn_head_size] + x = x.reshape(new_shape) + x = x.transpose([0, 2, 1, 3]) + return x + + def forward(self, x): + qkv = self.qkv(x).chunk(3, axis=-1) + q, k, v = map(self.transpose_multihead, qkv) + + attn = paddle.matmul(q, k, transpose_y=True) + attn = attn * self.scales + attn = self.softmax(attn) + attn = self.attn_dropout(attn) + + z = paddle.matmul(attn, v) + z = z.transpose([0, 2, 1, 3]) + new_shape = z.shape[:-2] + [self.all_head_size] + z = z.reshape(new_shape) + # reshape + z = self.out(z) + z = self.proj_dropout(z) + return z + + +class Mlp(nn.Layer): + """ MLP module + Impl using nn.Linear and activation is GELU, dropout is applied. + Ops: fc -> act -> dropout -> fc -> dropout + Attributes: + fc1: nn.Linear + fc2: nn.Linear + act: GELU + dropout1: dropout after fc1 + dropout2: dropout after fc2 + """ + + def __init__(self, embed_dim, mlp_ratio, dropout=0.): + super().__init__() + w_attr_1, b_attr_1 = self._init_weights() + self.fc1 = nn.Linear(embed_dim, int(embed_dim * mlp_ratio), weight_attr=w_attr_1, bias_attr=b_attr_1) + + w_attr_2, b_attr_2 = self._init_weights() + self.fc2 = nn.Linear(int(embed_dim * mlp_ratio), embed_dim, weight_attr=w_attr_2, bias_attr=b_attr_2) + self.act = nn.GELU() + self.dropout1 = nn.Dropout(dropout) + self.dropout2 = nn.Dropout(dropout) + + def _init_weights(self): + weight_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.TruncatedNormal(std=0.2)) + bias_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.Constant(0.0)) + return weight_attr, bias_attr + + def forward(self, x): + x = self.fc1(x) + x = self.act(x) + x = self.dropout1(x) + x = self.fc2(x) + x = self.dropout2(x) + return x + + +class EncoderLayer(nn.Layer): + """Encoder Layer + Encoder layer contains attention, norm, mlp and residual + Attributes: + hidden_size: transformer feature dim + attn_norm: nn.LayerNorm before attention + mlp_norm: nn.LayerNorm before mlp + mlp: mlp modual + attn: attention modual + """ + + def __init__(self, + embed_dim, + num_heads, + attn_head_size=None, + qkv_bias=True, + mlp_ratio=4., + dropout=0., + attention_dropout=0., + droppath=0.): + super().__init__() + w_attr_1, b_attr_1 = self._init_weights() + self.attn_norm = nn.LayerNorm(embed_dim, weight_attr=w_attr_1, bias_attr=b_attr_1, epsilon=1e-6) + + self.attn = Attention(embed_dim, num_heads, attn_head_size, qkv_bias, dropout, attention_dropout) + self.drop_path = DropPath(droppath) if droppath > 0. else Identity() + + w_attr_2, b_attr_2 = self._init_weights() + self.mlp_norm = nn.LayerNorm(embed_dim, weight_attr=w_attr_2, bias_attr=b_attr_2, epsilon=1e-6) + + self.mlp = Mlp(embed_dim, mlp_ratio, dropout) + + def _init_weights(self): + weight_attr = paddle.ParamAttr(initializer=nn.initializer.Constant(1.0)) + bias_attr = paddle.ParamAttr(initializer=nn.initializer.Constant(0.0)) + return weight_attr, bias_attr + + def forward(self, x): + h = x + x = self.attn_norm(x) + x = self.attn(x) + x = self.drop_path(x) + x = x + h + + h = x + x = self.mlp_norm(x) + x = self.mlp(x) + x = self.drop_path(x) + x = x + h + + return x + + +class Encoder(nn.Layer): + """Transformer encoder + Encoder encoder contains a list of EncoderLayer, and a LayerNorm. + Attributes: + layers: nn.LayerList contains multiple EncoderLayers + encoder_norm: nn.LayerNorm which is applied after last encoder layer + """ + + def __init__(self, + embed_dim, + num_heads, + depth, + attn_head_size=None, + qkv_bias=True, + mlp_ratio=4.0, + dropout=0., + attention_dropout=0., + droppath=0.): + super().__init__() + # stochatic depth decay + depth_decay = [x.item() for x in paddle.linspace(0, droppath, depth)] + layer_list = [] + for i in range(depth): + encoder_layer = EncoderLayer(embed_dim, + num_heads, + attn_head_size=attn_head_size, + qkv_bias=qkv_bias, + mlp_ratio=mlp_ratio, + dropout=dropout, + attention_dropout=attention_dropout, + droppath=depth_decay[i]) + layer_list.append(copy.deepcopy(encoder_layer)) + self.layers = nn.LayerList(layer_list) + + w_attr_1, b_attr_1 = self._init_weights() + self.encoder_norm = nn.LayerNorm(embed_dim, weight_attr=w_attr_1, bias_attr=b_attr_1, epsilon=1e-6) + + def _init_weights(self): + weight_attr = paddle.ParamAttr(initializer=nn.initializer.Constant(1.0)) + bias_attr = paddle.ParamAttr(initializer=nn.initializer.Constant(0.0)) + return weight_attr, bias_attr + + def forward(self, x): + for layer in self.layers: + x = layer(x) + out = self.encoder_norm(x) + return out + + +class VisualTransformer(nn.Layer): + """ViT transformer + ViT Transformer, classifier is a single Linear layer for finetune, + For training from scratch, two layer mlp should be used. + Classification is done using cls_token. + Args: + image_size: int, input image size, default: 224 + patch_size: int, patch size, default: 16 + in_channels: int, input image channels, default: 3 + num_classes: int, number of classes for classification, default: 1000 + embed_dim: int, embedding dimension (patch embed out dim), default: 768 + depth: int, number ot transformer blocks, default: 12 + num_heads: int, number of attention heads, default: 12 + mlp_ratio: float, ratio of mlp hidden dim to embed dim(mlp in dim), default: 4.0 + qkv_bias: bool, If True, enable qkv(nn.Linear) layer with bias, default: True + dropout: float, dropout rate for linear layers, default: 0. + attention_dropout: float, dropout rate for attention layers default: 0. + droppath: float, droppath rate for droppath layers, default: 0. + """ + + def __init__(self, + image_size=224, + patch_size=16, + in_channels=3, + num_classes=768, + embed_dim=768, + depth=12, + num_heads=12, + attn_head_size=None, + mlp_ratio=4, + qkv_bias=True, + dropout=0., + attention_dropout=0., + droppath=0., + train_from_scratch=False): + super().__init__() + # create patch embedding with positional embedding + self.patch_embedding = PatchEmbedding(image_size, patch_size, in_channels, embed_dim, dropout) + # create multi head self-attention layers + self.encoder = Encoder(embed_dim, num_heads, depth, attn_head_size, qkv_bias, mlp_ratio, dropout, + attention_dropout, droppath) + + # classifier head (for training from scracth) + if train_from_scratch: + w_attr_1, b_attr_1 = self._init_weights() + w_attr_2, b_attr_2 = self._init_weights() + self.classifier = nn.Sequential( + nn.Linear(embed_dim, embed_dim, weight_attr=w_attr_1, bias_attr=b_attr_1), + nn.ReLU(), + nn.Dropout(dropout), + nn.Linear(embed_dim, num_classes, weight_attr=w_attr_2, bias_attr=b_attr_2), + nn.Dropout(dropout), + ) + else: + # classifier head (for finetuning) + w_attr_1, b_attr_1 = self._init_weights() + self.classifier = nn.Linear(embed_dim, num_classes, weight_attr=w_attr_1, bias_attr=b_attr_1) + + def _init_weights(self): + weight_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.TruncatedNormal(std=.02)) + bias_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.Constant(0.0)) + return weight_attr, bias_attr + + def forward(self, x): + x = self.patch_embedding(x) + x = self.encoder(x) + logits = self.classifier(x[:, 0]) # take only cls_token as classifier + return logits + + # return x + + +def build_vit(config): + """build vit model from config""" + model = VisualTransformer(image_size=config.DATA.IMAGE_SIZE, + patch_size=config.MODEL.TRANS.PATCH_SIZE, + in_channels=config.DATA.IMAGE_CHANNELS, + num_classes=config.MODEL.NUM_CLASSES, + embed_dim=config.MODEL.TRANS.EMBED_DIM, + depth=config.MODEL.TRANS.DEPTH, + num_heads=config.MODEL.TRANS.NUM_HEADS, + attn_head_size=config.MODEL.TRANS.ATTN_HEAD_SIZE, + mlp_ratio=config.MODEL.TRANS.MLP_RATIO, + qkv_bias=config.MODEL.TRANS.QKV_BIAS, + dropout=config.MODEL.DROPOUT, + attention_dropout=config.MODEL.ATTENTION_DROPOUT, + droppath=config.MODEL.DROPPATH, + train_from_scratch=False) + return model + + +def ViT_large_patch16_384(**kwargs): + model = VisualTransformer(image_size=384, + patch_size=16, + in_channels=3, + embed_dim=1024, + depth=24, + num_heads=16, + attn_head_size=64, + mlp_ratio=4.0, + qkv_bias=True, + dropout=0.1, + attention_dropout=0.1, + train_from_scratch=False) + return model + + +def ViT_large_patch16_224(**kwargs): + model = VisualTransformer(image_size=224, + patch_size=16, + in_channels=3, + embed_dim=1024, + depth=24, + num_heads=16, + attn_head_size=64, + mlp_ratio=4.0, + qkv_bias=True, + dropout=0.1, + attention_dropout=0.1, + train_from_scratch=False) + return model + + +def ViT_base_patch16_224(**kwargs): + model = VisualTransformer(image_size=224, + patch_size=16, + in_channels=3, + embed_dim=768, + depth=12, + num_heads=12, + attn_head_size=64, + mlp_ratio=4.0, + qkv_bias=True, + dropout=0, + attention_dropout=0, + train_from_scratch=False) + return model diff --git a/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/transformers/resnet.py b/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/transformers/resnet.py new file mode 100755 index 000000000..a8588d6bd --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/transformers/resnet.py @@ -0,0 +1,445 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import print_function + +import paddle +import paddle.nn as nn +from paddle.utils.download import get_weights_path_from_url + +__all__ = [] + +model_urls = { + 'resnet18': ('https://paddle-hapi.bj.bcebos.com/models/resnet18.pdparams', 'cf548f46534aa3560945be4b95cd11c4'), + 'resnet34': ('https://paddle-hapi.bj.bcebos.com/models/resnet34.pdparams', '8d2275cf8706028345f78ac0e1d31969'), + 'resnet50': ('https://paddle-hapi.bj.bcebos.com/models/resnet50.pdparams', 'ca6f485ee1ab0492d38f323885b0ad80'), + 'resnet101': ('https://paddle-hapi.bj.bcebos.com/models/resnet101.pdparams', '02f35f034ca3858e1e54d4036443c92d'), + 'resnet152': ('https://paddle-hapi.bj.bcebos.com/models/resnet152.pdparams', '7ad16a2f1e7333859ff986138630fd7a'), + 'wide_resnet50_2': + ('https://paddle-hapi.bj.bcebos.com/models/wide_resnet50_2.pdparams', '0282f804d73debdab289bd9fea3fa6dc'), + 'wide_resnet101_2': + ('https://paddle-hapi.bj.bcebos.com/models/wide_resnet101_2.pdparams', 'd4360a2d23657f059216f5d5a1a9ac93'), +} + + +class BasicBlock(nn.Layer): + expansion = 1 + + def __init__(self, + inplanes, + planes, + stride=1, + downsample=None, + groups=1, + base_width=64, + dilation=1, + norm_layer=None): + super(BasicBlock, self).__init__() + if norm_layer is None: + norm_layer = nn.BatchNorm2D + + if dilation > 1: + raise NotImplementedError("Dilation > 1 not supported in BasicBlock") + + self.conv1 = nn.Conv2D(inplanes, planes, 3, padding=1, stride=stride, bias_attr=False) + self.bn1 = norm_layer(planes) + self.relu = nn.ReLU() + self.conv2 = nn.Conv2D(planes, planes, 3, padding=1, bias_attr=False) + self.bn2 = norm_layer(planes) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.relu(out) + + return out + + +class BottleneckBlock(nn.Layer): + + expansion = 4 + + def __init__(self, + inplanes, + planes, + stride=1, + downsample=None, + groups=1, + base_width=64, + dilation=1, + norm_layer=None): + super(BottleneckBlock, self).__init__() + if norm_layer is None: + norm_layer = nn.BatchNorm2D + width = int(planes * (base_width / 64.)) * groups + + self.conv1 = nn.Conv2D(inplanes, width, 1, bias_attr=False) + self.bn1 = norm_layer(width) + + self.conv2 = nn.Conv2D(width, + width, + 3, + padding=dilation, + stride=stride, + groups=groups, + dilation=dilation, + bias_attr=False) + self.bn2 = norm_layer(width) + + self.conv3 = nn.Conv2D(width, planes * self.expansion, 1, bias_attr=False) + self.bn3 = norm_layer(planes * self.expansion) + self.relu = nn.ReLU() + self.downsample = downsample + self.stride = stride + + def forward(self, x): + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.relu(out) + + return out + + +class ResNet(nn.Layer): + """ResNet model from + `"Deep Residual Learning for Image Recognition" `_ + + Args: + Block (BasicBlock|BottleneckBlock): block module of model. + depth (int): layers of resnet, default: 50. + width (int): base width of resnet, default: 64. + num_classes (int): output dim of last fc layer. If num_classes <=0, last fc layer + will not be defined. Default: 1000. + with_pool (bool): use pool before the last fc layer or not. Default: True. + + Examples: + .. code-block:: python + + import paddle + from paddle.vision.models import ResNet + from paddle.vision.models.resnet import BottleneckBlock, BasicBlock + + resnet50 = ResNet(BottleneckBlock, 50) + + wide_resnet50_2 = ResNet(BottleneckBlock, 50, width=64*2) + + resnet18 = ResNet(BasicBlock, 18) + + x = paddle.rand([1, 3, 224, 224]) + out = resnet18(x) + + print(out.shape) + + """ + + def __init__(self, block, depth=50, width=64, num_classes=1000, with_pool=True): + super(ResNet, self).__init__() + layer_cfg = {18: [2, 2, 2, 2], 34: [3, 4, 6, 3], 50: [3, 4, 6, 3], 101: [3, 4, 23, 3], 152: [3, 8, 36, 3]} + layers = layer_cfg[depth] + self.groups = 1 + self.base_width = width + self.num_classes = num_classes + self.with_pool = with_pool + self._norm_layer = nn.BatchNorm2D + + self.inplanes = 64 + self.dilation = 1 + + self.conv1 = nn.Conv2D(3, self.inplanes, kernel_size=7, stride=2, padding=3, bias_attr=False) + self.bn1 = self._norm_layer(self.inplanes) + self.relu = nn.ReLU() + self.maxpool = nn.MaxPool2D(kernel_size=3, stride=2, padding=1) + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2) + if with_pool: + self.avgpool = nn.AdaptiveAvgPool2D((1, 1)) + + if num_classes > 0: + self.fc = nn.Linear(512 * block.expansion, num_classes) + + def _make_layer(self, block, planes, blocks, stride=1, dilate=False): + norm_layer = self._norm_layer + downsample = None + previous_dilation = self.dilation + if dilate: + self.dilation *= stride + stride = 1 + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2D(self.inplanes, planes * block.expansion, 1, stride=stride, bias_attr=False), + norm_layer(planes * block.expansion), + ) + + layers = [] + layers.append( + block(self.inplanes, planes, stride, downsample, self.groups, self.base_width, previous_dilation, + norm_layer)) + self.inplanes = planes * block.expansion + for _ in range(1, blocks): + layers.append( + block(self.inplanes, planes, groups=self.groups, base_width=self.base_width, norm_layer=norm_layer)) + + return nn.Sequential(*layers) + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + + if self.with_pool: + x = self.avgpool(x) + + if self.num_classes > 0: + x = paddle.flatten(x, 1) + x = self.fc(x) + + return x + + +def _resnet(arch, Block, depth, pretrained, **kwargs): + model = ResNet(Block, depth, **kwargs) + if pretrained: + assert arch in model_urls, "{} model do not have a pretrained model now, you should set pretrained=False".format( + arch) + weight_path = get_weights_path_from_url(model_urls[arch][0], model_urls[arch][1]) + + param = paddle.load(weight_path) + model.set_dict(param) + + return model + + +def resnet18(pretrained=False, **kwargs): + """ResNet 18-layer model from + `"Deep Residual Learning for Image Recognition" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + + Examples: + .. code-block:: python + + import paddle + from paddle.vision.models import resnet18 + + # build model + model = resnet18() + + # build model and load imagenet pretrained weight + # model = resnet18(pretrained=True) + + x = paddle.rand([1, 3, 224, 224]) + out = model(x) + + print(out.shape) + """ + return _resnet('resnet18', BasicBlock, 18, pretrained, **kwargs) + + +def resnet34(pretrained=False, **kwargs): + """ResNet 34-layer model from + `"Deep Residual Learning for Image Recognition" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + + Examples: + .. code-block:: python + + import paddle + from paddle.vision.models import resnet34 + + # build model + model = resnet34() + + # build model and load imagenet pretrained weight + # model = resnet34(pretrained=True) + + x = paddle.rand([1, 3, 224, 224]) + out = model(x) + + print(out.shape) + """ + return _resnet('resnet34', BasicBlock, 34, pretrained, **kwargs) + + +def resnet50(pretrained=False, **kwargs): + """ResNet 50-layer model from + `"Deep Residual Learning for Image Recognition" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + + Examples: + .. code-block:: python + + import paddle + from paddle.vision.models import resnet50 + + # build model + model = resnet50() + + # build model and load imagenet pretrained weight + # model = resnet50(pretrained=True) + + x = paddle.rand([1, 3, 224, 224]) + out = model(x) + + print(out.shape) + """ + return _resnet('resnet50', BottleneckBlock, 50, pretrained, **kwargs) + + +def resnet101(pretrained=False, **kwargs): + """ResNet 101-layer model from + `"Deep Residual Learning for Image Recognition" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + + Examples: + .. code-block:: python + + import paddle + from paddle.vision.models import resnet101 + + # build model + model = resnet101() + + # build model and load imagenet pretrained weight + # model = resnet101(pretrained=True) + + x = paddle.rand([1, 3, 224, 224]) + out = model(x) + + print(out.shape) + """ + return _resnet('resnet101', BottleneckBlock, 101, pretrained, **kwargs) + + +def resnet152(pretrained=False, **kwargs): + """ResNet 152-layer model from + `"Deep Residual Learning for Image Recognition" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + + Examples: + .. code-block:: python + + import paddle + from paddle.vision.models import resnet152 + + # build model + model = resnet152() + + # build model and load imagenet pretrained weight + # model = resnet152(pretrained=True) + + x = paddle.rand([1, 3, 224, 224]) + out = model(x) + + print(out.shape) + """ + return _resnet('resnet152', BottleneckBlock, 152, pretrained, **kwargs) + + +def wide_resnet50_2(pretrained=False, **kwargs): + """Wide ResNet-50-2 model from + `"Wide Residual Networks" `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + + Examples: + .. code-block:: python + + import paddle + from paddle.vision.models import wide_resnet50_2 + + # build model + model = wide_resnet50_2() + + # build model and load imagenet pretrained weight + # model = wide_resnet50_2(pretrained=True) + + x = paddle.rand([1, 3, 224, 224]) + out = model(x) + + print(out.shape) + """ + kwargs['width'] = 64 * 2 + return _resnet('wide_resnet50_2', BottleneckBlock, 50, pretrained, **kwargs) + + +def wide_resnet101_2(pretrained=False, **kwargs): + """Wide ResNet-101-2 model from + `"Wide Residual Networks" `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + + Examples: + .. code-block:: python + + import paddle + from paddle.vision.models import wide_resnet101_2 + + # build model + model = wide_resnet101_2() + + # build model and load imagenet pretrained weight + # model = wide_resnet101_2(pretrained=True) + + x = paddle.rand([1, 3, 224, 224]) + out = model(x) + + print(out.shape) + """ + kwargs['width'] = 64 * 2 + return _resnet('wide_resnet101_2', BottleneckBlock, 101, pretrained, **kwargs) diff --git a/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/utils/__init__.py b/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/utils/__init__.py new file mode 100755 index 000000000..e69de29bb diff --git a/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/utils/tokenizer.py b/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/utils/tokenizer.py new file mode 100755 index 000000000..759ca89f2 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/utils/tokenizer.py @@ -0,0 +1,355 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tokenization classes.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections +import unicodedata + +import six +#import sentencepiece as sp + + +def convert_to_unicode(text): + """Converts `text` to Unicode (if it's not already), assuming utf-8 input.""" + if six.PY3: + if isinstance(text, str): + return text + elif isinstance(text, bytes): + return text.decode("utf-8", "ignore") + else: + raise ValueError("Unsupported string type: %s" % (type(text))) + elif six.PY2: + if isinstance(text, str): + return text.decode("utf-8", "ignore") + elif isinstance(text, unicode): + return text + else: + raise ValueError("Unsupported string type: %s" % (type(text))) + else: + raise ValueError("Not running on Python2 or Python 3?") + + +def load_vocab(vocab_file): + """Loads a vocabulary file into a dictionary.""" + vocab = collections.OrderedDict() + fin = open(vocab_file) + for num, line in enumerate(fin): + items = convert_to_unicode(line.strip()).split("\t") + if len(items) > 2: + break + token = items[0] + index = items[1] if len(items) == 2 else num + token = token.strip() + vocab[token] = int(index) + return vocab + + +def convert_by_vocab(vocab, items): + """Converts a sequence of [tokens|ids] using the vocab.""" + output = [] + for item in items: + output.append(vocab[item]) + return output + + +def convert_tokens_to_ids_include_unk(vocab, tokens, unk_token="[UNK]"): + output = [] + for token in tokens: + if token in vocab: + output.append(vocab[token]) + else: + output.append(vocab[unk_token]) + return output + + +def convert_tokens_to_ids(vocab, tokens): + return convert_by_vocab(vocab, tokens) + + +def convert_ids_to_tokens(inv_vocab, ids): + return convert_by_vocab(inv_vocab, ids) + + +def whitespace_tokenize(text): + """Runs basic whitespace cleaning and splitting on a peice of text.""" + text = text.strip() + if not text: + return [] + tokens = text.split() + return tokens + + +class FullTokenizer(object): + """Runs end-to-end tokenziation.""" + + def __init__(self, vocab_file, do_lower_case=True): + self.vocab = load_vocab(vocab_file) + self.inv_vocab = {v: k for k, v in self.vocab.items()} + self.basic_tokenizer = BasicTokenizer(do_lower_case=do_lower_case) + self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab) + + def tokenize(self, text): + split_tokens = [] + for token in self.basic_tokenizer.tokenize(text): + for sub_token in self.wordpiece_tokenizer.tokenize(token): + split_tokens.append(sub_token) + + return split_tokens + + def convert_tokens_to_ids(self, tokens): + return convert_by_vocab(self.vocab, tokens) + + def convert_ids_to_tokens(self, ids): + return convert_by_vocab(self.inv_vocab, ids) + + +class CharTokenizer(object): + """Runs end-to-end tokenziation.""" + + def __init__(self, vocab_file, do_lower_case=True): + self.vocab = load_vocab(vocab_file) + self.inv_vocab = {v: k for k, v in self.vocab.items()} + self.tokenizer = WordpieceTokenizer(vocab=self.vocab) + + def tokenize(self, text): + split_tokens = [] + for token in text.lower().split(" "): + for sub_token in self.tokenizer.tokenize(token): + split_tokens.append(sub_token) + return split_tokens + + def convert_tokens_to_ids(self, tokens): + return convert_by_vocab(self.vocab, tokens) + + def convert_ids_to_tokens(self, ids): + return convert_by_vocab(self.inv_vocab, ids) + + +class BasicTokenizer(object): + """Runs basic tokenization (punctuation splitting, lower casing, etc.).""" + + def __init__(self, do_lower_case=True): + """Constructs a BasicTokenizer. + + Args: + do_lower_case: Whether to lower case the input. + """ + self.do_lower_case = do_lower_case + + def tokenize(self, text): + """Tokenizes a piece of text.""" + text = convert_to_unicode(text) + text = self._clean_text(text) + + # This was added on November 1st, 2018 for the multilingual and Chinese + # models. This is also applied to the English models now, but it doesn't + # matter since the English models were not trained on any Chinese data + # and generally don't have any Chinese data in them (there are Chinese + # characters in the vocabulary because Wikipedia does have some Chinese + # words in the English Wikipedia.). + text = self._tokenize_chinese_chars(text) + + orig_tokens = whitespace_tokenize(text) + split_tokens = [] + for token in orig_tokens: + if self.do_lower_case: + token = token.lower() + token = self._run_strip_accents(token) + split_tokens.extend(self._run_split_on_punc(token)) + + output_tokens = whitespace_tokenize(" ".join(split_tokens)) + return output_tokens + + def _run_strip_accents(self, text): + """Strips accents from a piece of text.""" + text = unicodedata.normalize("NFD", text) + output = [] + for char in text: + cat = unicodedata.category(char) + if cat == "Mn": + continue + output.append(char) + return "".join(output) + + def _run_split_on_punc(self, text): + """Splits punctuation on a piece of text.""" + chars = list(text) + i = 0 + start_new_word = True + output = [] + while i < len(chars): + char = chars[i] + if _is_punctuation(char): + output.append([char]) + start_new_word = True + else: + if start_new_word: + output.append([]) + start_new_word = False + output[-1].append(char) + i += 1 + + return ["".join(x) for x in output] + + def _tokenize_chinese_chars(self, text): + """Adds whitespace around any CJK character.""" + output = [] + for char in text: + cp = ord(char) + if self._is_chinese_char(cp): + output.append(" ") + output.append(char) + output.append(" ") + else: + output.append(char) + return "".join(output) + + def _is_chinese_char(self, cp): + """Checks whether CP is the codepoint of a CJK character.""" + # This defines a "chinese character" as anything in the CJK Unicode block: + # https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block) + # + # Note that the CJK Unicode block is NOT all Japanese and Korean characters, + # despite its name. The modern Korean Hangul alphabet is a different block, + # as is Japanese Hiragana and Katakana. Those alphabets are used to write + # space-separated words, so they are not treated specially and handled + # like the all of the other languages. + if ((cp >= 0x4E00 and cp <= 0x9FFF) or # + (cp >= 0x3400 and cp <= 0x4DBF) or # + (cp >= 0x20000 and cp <= 0x2A6DF) or # + (cp >= 0x2A700 and cp <= 0x2B73F) or # + (cp >= 0x2B740 and cp <= 0x2B81F) or # + (cp >= 0x2B820 and cp <= 0x2CEAF) or (cp >= 0xF900 and cp <= 0xFAFF) or # + (cp >= 0x2F800 and cp <= 0x2FA1F)): # + return True + + return False + + def _clean_text(self, text): + """Performs invalid character removal and whitespace cleanup on text.""" + output = [] + for char in text: + cp = ord(char) + if cp == 0 or cp == 0xfffd or _is_control(char): + continue + if _is_whitespace(char): + output.append(" ") + else: + output.append(char) + return "".join(output) + + +class WordpieceTokenizer(object): + """Runs WordPiece tokenziation.""" + + def __init__(self, vocab, unk_token="[UNK]", max_input_chars_per_word=100): + self.vocab = vocab + self.unk_token = unk_token + self.max_input_chars_per_word = max_input_chars_per_word + + def tokenize(self, text): + """Tokenizes a piece of text into its word pieces. + + This uses a greedy longest-match-first algorithm to perform tokenization + using the given vocabulary. + + For example: + input = "unaffable" + output = ["un", "##aff", "##able"] + + Args: + text: A single token or whitespace separated tokens. This should have + already been passed through `BasicTokenizer. + + Returns: + A list of wordpiece tokens. + """ + + text = convert_to_unicode(text) + + output_tokens = [] + for token in whitespace_tokenize(text): + chars = list(token) + if len(chars) > self.max_input_chars_per_word: + output_tokens.append(self.unk_token) + continue + + is_bad = False + start = 0 + sub_tokens = [] + while start < len(chars): + end = len(chars) + cur_substr = None + while start < end: + substr = "".join(chars[start:end]) + if start > 0: + substr = "##" + substr + if substr in self.vocab: + cur_substr = substr + break + end -= 1 + if cur_substr is None: + is_bad = True + break + sub_tokens.append(cur_substr) + start = end + + if is_bad: + output_tokens.append(self.unk_token) + else: + output_tokens.extend(sub_tokens) + return output_tokens + + +def _is_whitespace(char): + """Checks whether `chars` is a whitespace character.""" + # \t, \n, and \r are technically contorl characters but we treat them + # as whitespace since they are generally considered as such. + if char == " " or char == "\t" or char == "\n" or char == "\r": + return True + cat = unicodedata.category(char) + if cat == "Zs": + return True + return False + + +def _is_control(char): + """Checks whether `chars` is a control character.""" + # These are technically control characters but we count them as whitespace + # characters. + if char == "\t" or char == "\n" or char == "\r": + return False + cat = unicodedata.category(char) + if cat.startswith("C"): + return True + return False + + +def _is_punctuation(char): + """Checks whether `chars` is a punctuation character.""" + cp = ord(char) + # We treat all non-letter/number ASCII as punctuation. + # Characters such as "^", "$", and "`" are not in the Unicode + # Punctuation class but we treat them as punctuation anyways, for + # consistency. + if ((cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64) or (cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126)): + return True + cat = unicodedata.category(char) + if cat.startswith("P"): + return True + return False diff --git a/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/utils/utils.py b/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/utils/utils.py new file mode 100755 index 000000000..e603c341d --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/utils/utils.py @@ -0,0 +1,75 @@ +import json +import os +from typing import List +from typing import Union + +import numpy as np +import paddle +from disco_diffusion_ernievil_base.vit_b_16x.ernievil2.transformers.clip_vision_transformer import ViT_base_patch16_224 +from disco_diffusion_ernievil_base.vit_b_16x.ernievil2.transformers.clip_vision_transformer import ViT_base_patch32_224 +from disco_diffusion_ernievil_base.vit_b_16x.ernievil2.transformers.clip_vision_transformer import ViT_large_patch14_224 +from disco_diffusion_ernievil_base.vit_b_16x.ernievil2.transformers.efficientnet import EfficientNetB5 +from disco_diffusion_ernievil_base.vit_b_16x.ernievil2.transformers.ernie2 import ErnieModel +from disco_diffusion_ernievil_base.vit_b_16x.ernievil2.transformers.multimodal import MultiModalModel +from disco_diffusion_ernievil_base.vit_b_16x.ernievil2.utils.tokenizer import FullTokenizer + +__all__ = ['tokenize', 'build_model'] + +MODEL_NAMES = ['vit_b_16x'] + +MEAN, STD = (0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711) +_tokenizer = FullTokenizer(vocab_file=os.path.join(os.path.dirname(__file__), + '../../packages/ernie_base_3.0/vocab.txt'), + do_lower_case=True) + + +def tokenize(texts: Union[str, List[str]], context_length: int = 64): + """ + Returns the tokenized representation of given input string(s) + Parameters + ---------- + texts : Union[str, List[str]] + An input string or a list of input strings to tokenize + context_length : int + The context length to use; all baseline models use 24 as the context length + Returns + ------- + A two-dimensional tensor containing the resulting tokens, shape = [number of input strings, context_length] + """ + if isinstance(texts, str): + texts = [texts] + + all_tokens = [] + for text in texts: + all_tokens.append([_tokenizer.vocab['[CLS]']] + + _tokenizer.convert_tokens_to_ids(_tokenizer.tokenize(text))[:context_length - 2] + + [_tokenizer.vocab['[SEP]']]) + + result = paddle.zeros([len(all_tokens), context_length], dtype='int64') + + for i, tokens in enumerate(all_tokens): + assert len(tokens) <= context_length + result[i, :len(tokens)] = paddle.to_tensor(tokens) + + return result + + +def build_model(name='vit_b_16x'): + assert name in MODEL_NAMES, f"model name must be one of {MODEL_NAMES}" + name2model = {'vit_b_16x': build_vit_b_16x_model} + model = name2model[name]() + return model + + +def build_vit_b_16x_model(): + # Define model + image_model = ViT_base_patch16_224() + with open(os.path.join(os.path.dirname(__file__), + '../../packages/ernie_base_3.0/ernie_config.base.json')) as json_file: + config_dict = json.load(json_file) + text_model = ErnieModel(config_dict) + model = MultiModalModel(image_model, text_model) + checkpoint = paddle.load(os.path.join(os.path.dirname(__file__), '../../pre_trained/vit_b_16x.pdparams')) + model.set_state_dict(checkpoint) + model.eval() + return model diff --git a/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/packages/configs/vit_ernie_base.yaml b/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/packages/configs/vit_ernie_base.yaml new file mode 100755 index 000000000..0a3da1057 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/packages/configs/vit_ernie_base.yaml @@ -0,0 +1,67 @@ +# The frequency to save trained models when training. +save_step: 500 +# The frequency to fetch and print output when training. +print_step: 10 + +# The directory for saving model +save_model: "checkpoints" +# The directory for saving inference model +inference_model_dir: "infer_model" +# Set seed for CE or debug +random_seed: 1024 + +# The data type of input ids. +input_dtype: "int64" + +# Device to use. +device: "gpu" + +# TODO fix +#batch_size: 2000 +batch_size: 100 + +infer_batch_size: 1500 +shuffle_batch: False +# Data shuffle only works when sort_type is pool or none +shuffle: False +# shuffle_seed must be set when shuffle is True and using multi-cards to train. +# Otherwise, the number of batches cannot be guaranteed. +shuffle_seed: 128 + +# The number of epoches for training +epoch: 50 + + +#learning_rate: 0.00005 +learning_rate: 0.00003 + + +beta1: 0.9 +beta2: 0.997 +eps: 1e-9 +# The parameters for learning rate scheduling. +warmup_steps: 1000 + +# Dropout rates. +dropout: 0.1 + + +# Mixed precision training +use_amp: True +use_pure_fp16: False +scale_loss: 128.0 + +# Maximum iteration for training. +max_iter: None + +do_train: True + +max_text_seqlen: 48 +vocab_file: "./packages/ernie_base_3.0/vocab.txt" +text_model_config: "./packages/ernie_base_3.0/ernie_config.base.json" + +pad_token: 0 +cls_token: 1 +sep_token: 2 +mask_token: 3 +unk_token: 17963 diff --git a/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/packages/ernie_base_3.0/ernie_config.base.json b/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/packages/ernie_base_3.0/ernie_config.base.json new file mode 100755 index 000000000..7cced9237 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/packages/ernie_base_3.0/ernie_config.base.json @@ -0,0 +1,13 @@ +{ + "attention_probs_dropout_prob": 0.1, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "max_position_embeddings": 2048, + "num_attention_heads": 12, + "num_hidden_layers": 12, + "sent_type_vocab_size": 4, + "task_type_vocab_size": 3, + "vocab_size": 40000 + } From e5e279f9365215e35d1c66335c018824d923497d Mon Sep 17 00:00:00 2001 From: chenjian Date: Fri, 19 Aug 2022 13:20:16 +0800 Subject: [PATCH 031/117] add ernie_vilg module --- .../image/text_to_image/ernie_vilg/README.md | 104 ++++++++ .../image/text_to_image/ernie_vilg/module.py | 230 ++++++++++++++++++ .../text_to_image/ernie_vilg/requirements.txt | 2 + 3 files changed, 336 insertions(+) create mode 100644 modules/image/text_to_image/ernie_vilg/README.md create mode 100644 modules/image/text_to_image/ernie_vilg/module.py create mode 100644 modules/image/text_to_image/ernie_vilg/requirements.txt diff --git a/modules/image/text_to_image/ernie_vilg/README.md b/modules/image/text_to_image/ernie_vilg/README.md new file mode 100644 index 000000000..c85e52d2b --- /dev/null +++ b/modules/image/text_to_image/ernie_vilg/README.md @@ -0,0 +1,104 @@ +# ernie_vilg + +|模型名称|ernie_vilg| +| :--- | :---: | +|类别|图像-文图生成| +|网络|ERNIE-ViLG| +|数据集|-| +|是否支持Fine-tuning|否| +|模型大小|-| +|最新更新日期|2022-08-02| +|数据指标|-| + +## 一、模型基本信息 + +### 应用效果展示 + + - 输入文本 "宁静的小镇" 风格 "油画" + + - 输出图像 +

+ +
+ + +### 模型介绍 + +文心ERNIE-ViLG参数规模达到100亿,是目前为止全球最大规模中文跨模态生成模型,在文本生成图像、图像描述等跨模态生成任务上效果全球领先,在图文生成领域MS-COCO、COCO-CN、AIC-ICC等数据集上取得最好效果。你可以输入一段文本描述以及生成风格,模型就会根据输入的内容自动创作出符合要求的图像。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.2.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install ernie_vilg + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run ernie_vilg --text_prompts "宁静的小镇" --output_dir ernie_vilg_out + ``` + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + + module = hub.Module(name="ernie_vilg") + text_prompts = ["宁静的小镇"] + images = module.generate_image(text_prompts=text_prompts, output_dir='./ernie_vilg_out/') + ``` + +- ### 3、API + + - ```python + def __init__(ak: Optional[str]=None, sk: Optional[str]=None) + ``` + - 初始化模块,可自定义用于申请访问文心API的ak和sk。 + + - **参数** + - ak:(Optional[str]): 用于申请文心api使用token的ak,可不填。 + - sk:(Optional[str]): 用于申请文心api使用token的sk,可不填。 + + - ```python + def generate_image( + text_prompts:str, + style: Optional[str] = "油画", + topk: Optional[int] = 10, + output_dir: Optional[str] = 'ernievilg_output') + ``` + + - 文图生成API,生成文本描述内容的图像。 + + - **参数** + + - text_prompts(str): 输入的语句,描述想要生成的图像的内容。 + - style(Optional[str]): 生成图像的风格,当前支持'油画','水彩','粉笔画','卡通','儿童画','蜡笔画'。 + - topk(Optional[int]): 保存前多少张图,最多保存10张。 + - output_dir(Optional[str]): 保存输出图像的目录,默认为"ernievilg_output"。 + + + - **返回** + - images(List(PIL.Image)): 返回生成的所有图像列表,PIL的Image格式。 + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + ```shell + $ hub install ernie_vilg == 1.0.0 + ``` diff --git a/modules/image/text_to_image/ernie_vilg/module.py b/modules/image/text_to_image/ernie_vilg/module.py new file mode 100644 index 000000000..7af5abb0c --- /dev/null +++ b/modules/image/text_to_image/ernie_vilg/module.py @@ -0,0 +1,230 @@ +import argparse +import ast +import os +import re +import sys +import time +from functools import partial +from io import BytesIO +from typing import List +from typing import Optional + +import requests +from PIL import Image +from tqdm.auto import tqdm + +import paddlehub as hub +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving + + +@moduleinfo(name="ernie_vilg", + version="1.0.0", + type="image/text_to_image", + summary="", + author="baidu-nlp", + author_email="paddle-dev@baidu.com") +class ErnieVilG: + + def __init__(self, ak=None, sk=None): + """ + :param ak: ak for applying token to request wenxin api. + :param sk: sk for applying token to request wenxin api. + """ + if ak is None or sk is None: + self.ak = 'G26BfAOLpGIRBN5XrOV2eyPA25CE01lE' + self.sk = 'txLZOWIjEqXYMU3lSm05ViW4p9DWGOWs' + else: + self.ak = ak + self.sk = sk + self.token_host = 'https://wenxin.baidu.com/younger/portal/api/oauth/token' + self.token = self._apply_token(self.ak, self.sk) + + def _apply_token(self, ak, sk): + if ak is None or sk is None: + ak = self.ak + sk = self.sk + response = requests.get(self.token_host, + params={ + 'grant_type': 'client_credentials', + 'client_id': ak, + 'client_secret': sk + }) + if response: + res = response.json() + if res['code'] != 0: + print('Request access token error.') + raise RuntimeError("Request access token error.") + else: + print('Request access token error.') + raise RuntimeError("Request access token error.") + return res['data'] + + def generate_image(self, + text_prompts, + style: Optional[str] = "油画", + topk: Optional[int] = 10, + output_dir: Optional[str] = 'ernievilg_output'): + """ + Create image by text prompts using ErnieVilG model. + + :param text_prompts: Phrase, sentence, or string of words and phrases describing what the image should look like. + :param style: Image stype, currently supported 油画、水彩、粉笔画、卡通、儿童画、蜡笔画 + :param topk: Top k images to save. + :output_dir: Output directory + """ + if not os.path.exists(output_dir): + os.makedirs(output_dir, exist_ok=True) + token = self.token + create_url = 'https://wenxin.baidu.com/younger/portal/api/rest/1.0/ernievilg/v1/txt2img?from=paddlehub' + get_url = 'https://wenxin.baidu.com/younger/portal/api/rest/1.0/ernievilg/v1/getImg?from=paddlehub' + if isinstance(text_prompts, str): + text_prompts = [text_prompts] + taskids = [] + for text_prompt in text_prompts: + res = requests.post(create_url, + headers={'Content-Type': 'application/x-www-form-urlencoded'}, + data={ + 'access_token': token, + "text": text_prompt, + "style": style + }) + res = res.json() + if res['code'] == 4001: + print('请求参数错误') + raise RuntimeError("请求参数错误") + elif res['code'] == 4002: + print('请求参数格式错误,请检查必传参数是否齐全,参数类型等') + raise RuntimeError("请求参数格式错误,请检查必传参数是否齐全,参数类型等") + elif res['code'] == 4003: + print('请求参数中,图片风格不在可选范围内') + raise RuntimeError("请求参数中,图片风格不在可选范围内") + elif res['code'] == 4004: + print('API服务内部错误,可能引起原因有请求超时、模型推理错误等') + raise RuntimeError("API服务内部错误,可能引起原因有请求超时、模型推理错误等") + elif res['code'] == 100 or res['code'] == 110 or res['code'] == 111: + token = self._apply_token(self.ak, self.sk) + res = requests.post(create_url, + headers={'Content-Type': 'application/x-www-form-urlencoded'}, + data={ + 'access_token': token, + "text": text_prompt, + "style": style + }) + res = res.json() + if res['code'] != 0: + print("Token失效重新请求后依然发生错误,请检查输入的参数") + raise RuntimeError("Token失效重新请求后依然发生错误,请检查输入的参数") + + taskids.append(res['data']["taskId"]) + + start_time = time.time() + process_bar = tqdm(total=100, unit='%') + results = {} + first_iter = True + while True: + if not taskids: + break + total_time = 0 + has_done = [] + for taskid in taskids: + res = requests.post(get_url, + headers={'Content-Type': 'application/x-www-form-urlencoded'}, + data={ + 'access_token': token, + 'taskId': {taskid} + }) + res = res.json() + if res['code'] == 4001: + print('请求参数错误') + raise RuntimeError("请求参数错误") + elif res['code'] == 4002: + print('请求参数格式错误,请检查必传参数是否齐全,参数类型等') + raise RuntimeError("请求参数格式错误,请检查必传参数是否齐全,参数类型等") + elif res['code'] == 4003: + print('请求参数中,图片风格不在可选范围内') + raise RuntimeError("请求参数中,图片风格不在可选范围内") + elif res['code'] == 4004: + print('API服务内部错误,可能引起原因有请求超时、模型推理错误等') + raise RuntimeError("API服务内部错误,可能引起原因有请求超时、模型推理错误等") + elif res['code'] == 100 or res['code'] == 110 or res['code'] == 111: + token = self._apply_token(self.ak, self.sk) + res = requests.post(get_url, + headers={'Content-Type': 'application/x-www-form-urlencoded'}, + data={ + 'access_token': token, + 'taskId': {taskid} + }) + res = res.json() + if res['code'] != 0: + print("Token失效重新请求后依然发生错误,请检查输入的参数") + raise RuntimeError("Token失效重新请求后依然发生错误,请检查输入的参数") + if res['data']['status'] == 1: + has_done.append(res['data']['taskId']) + results[res['data']['text']] = { + 'imgUrls': res['data']['imgUrls'], + 'waiting': res['data']['waiting'], + 'taskId': res['data']['taskId'] + } + total_time = int(re.match('[0-9]+', str(res['data']['waiting'])).group(0)) * 60 + end_time = time.time() + progress_rate = int(((end_time - start_time) / total_time * 100)) if total_time != 0 else 100 + if progress_rate > process_bar.n: + increase_rate = progress_rate - process_bar.n + if progress_rate >= 100: + increase_rate = 100 - process_bar.n + else: + increase_rate = 0 + process_bar.update(increase_rate) + time.sleep(5) + for taskid in has_done: + taskids.remove(taskid) + print('Saving Images...') + result_images = [] + for text, data in results.items(): + for idx, imgdata in enumerate(data['imgUrls']): + image = Image.open(BytesIO(requests.get(imgdata['image']).content)) + image.save(os.path.join(output_dir, '{}_{}.png'.format(text, idx))) + result_images.append(image) + if idx + 1 >= topk: + break + print('Done') + return result_images + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + if args.ak is not None and args.sk is not None: + self.ak = args.ak + self.sk = args.sk + self.token = self._apply_token(self.ak, self.sk) + results = self.generate_image(text_prompts=args.text_prompts, + style=args.style, + topk=args.topk, + output_dir=args.output_dir) + return results + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--text_prompts', type=str) + self.arg_input_group.add_argument('--style', + type=str, + default='油画', + choices=['油画', '水彩', '粉笔画', '卡通', '儿童画', '蜡笔画'], + help="绘画风格") + self.arg_input_group.add_argument('--topk', type=int, default=10, help="选取保存前多少张图,最多10张") + self.arg_input_group.add_argument('--ak', type=str, default=None, help="申请文心api使用token的ak") + self.arg_input_group.add_argument('--sk', type=str, default=None, help="申请文心api使用token的sk") + self.arg_input_group.add_argument('--output_dir', type=str, default='ernievilg_output') diff --git a/modules/image/text_to_image/ernie_vilg/requirements.txt b/modules/image/text_to_image/ernie_vilg/requirements.txt new file mode 100644 index 000000000..5bb8c66c6 --- /dev/null +++ b/modules/image/text_to_image/ernie_vilg/requirements.txt @@ -0,0 +1,2 @@ +requests +tqdm From c1cd06b233c805b44e23170906ea8ea1c90baeda Mon Sep 17 00:00:00 2001 From: chenjian Date: Fri, 19 Aug 2022 13:20:32 +0800 Subject: [PATCH 032/117] add disco_diffusion_clip_rn50 module --- .../disco_diffusion_clip_rn50/README.md | 114 ++ .../disco_diffusion_clip_rn50/clip/README.md | 2 + .../clip/clip/__init__.py | 1 + .../clip/clip/layers.py | 182 +++ .../clip/clip/model.py | 227 +++ .../clip/clip/simple_tokenizer.py | 135 ++ .../clip/clip/utils.py | 122 ++ .../disco_diffusion_clip_rn50/module.py | 441 ++++++ .../requirements.txt | 8 + .../resize_right/README.md | 3 + .../resize_right/__init__.py | 0 .../resize_right/interp_methods.py | 70 + .../resize_right/resize_right.py | 403 ++++++ .../reverse_diffusion/README.md | 2 + .../reverse_diffusion/__init__.py | 156 +++ .../reverse_diffusion/config.py | 77 ++ .../reverse_diffusion/helper.py | 137 ++ .../reverse_diffusion/model/__init__.py | 3 + .../model/gaussian_diffusion.py | 1214 +++++++++++++++++ .../reverse_diffusion/model/losses.py | 86 ++ .../reverse_diffusion/model/make_cutouts.py | 177 +++ .../reverse_diffusion/model/nn.py | 127 ++ .../reverse_diffusion/model/perlin_noises.py | 78 ++ .../reverse_diffusion/model/respace.py | 123 ++ .../reverse_diffusion/model/script_util.py | 201 +++ .../reverse_diffusion/model/sec_diff.py | 135 ++ .../reverse_diffusion/model/transforms.py | 757 ++++++++++ .../reverse_diffusion/model/unet.py | 838 ++++++++++++ .../reverse_diffusion/resources/default.yml | 47 + .../resources/docstrings.yml | 103 ++ .../reverse_diffusion/runner.py | 285 ++++ 31 files changed, 6254 insertions(+) create mode 100644 modules/image/text_to_image/disco_diffusion_clip_rn50/README.md create mode 100644 modules/image/text_to_image/disco_diffusion_clip_rn50/clip/README.md create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn50/clip/clip/__init__.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn50/clip/clip/layers.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn50/clip/clip/model.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn50/clip/clip/simple_tokenizer.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn50/clip/clip/utils.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn50/module.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn50/requirements.txt create mode 100644 modules/image/text_to_image/disco_diffusion_clip_rn50/resize_right/README.md create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn50/resize_right/__init__.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn50/resize_right/interp_methods.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn50/resize_right/resize_right.py create mode 100644 modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/README.md create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/__init__.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/config.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/helper.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/model/__init__.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/model/gaussian_diffusion.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/model/losses.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/model/make_cutouts.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/model/nn.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/model/perlin_noises.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/model/respace.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/model/script_util.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/model/sec_diff.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/model/transforms.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/model/unet.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/resources/default.yml create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/resources/docstrings.yml create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/runner.py diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn50/README.md b/modules/image/text_to_image/disco_diffusion_clip_rn50/README.md new file mode 100644 index 000000000..a3a6733de --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn50/README.md @@ -0,0 +1,114 @@ +# disco_diffusion_clip_rn50 + +|模型名称|disco_diffusion_clip_rn50| +| :--- | :---: | +|类别|图像-文图生成| +|网络|dd+clip ResNet50| +|数据集|-| +|是否支持Fine-tuning|否| +|模型大小|2.8GB| +|最新更新日期|2022-08-02| +|数据指标|-| + +## 一、模型基本信息 + +### 应用效果展示 + + - 输入文本 "A beautiful painting of a singular lighthouse, shining its light across a tumultuous sea of blood by greg rutkowski and thomas kinkade, Trending on artstation." + + - 输出图像 +

+ +
+ + - 生成过程 +

+ +
+ +### 模型介绍 + +disco_diffusion_clip_rn50 是一个文图生成模型,可以通过输入一段文字来生成符合该句子语义的图像。该模型由两部分组成,一部分是扩散模型,是一种生成模型,可以从噪声输入中重建出原始图像。另一部分是多模态预训练模型(CLIP), 可以将文本和图像表示在同一个特征空间,相近语义的文本和图像在该特征空间里距离会更相近。在该文图生成模型中,扩散模型负责从初始噪声或者指定初始图像中来生成目标图像,CLIP负责引导生成图像的语义和输入的文本的语义尽可能接近,随着扩散模型在CLIP的引导下不断的迭代生成新图像,最终能够生成文本所描述内容的图像。该模块中使用的CLIP模型结构为ResNet50。 + +更多详情请参考论文:[Diffusion Models Beat GANs on Image Synthesis](https://arxiv.org/abs/2105.05233) 以及 [Learning Transferable Visual Models From Natural Language Supervision](https://arxiv.org/abs/2103.00020) + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.2.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install disco_diffusion_clip_rn50 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run disco_diffusion_clip_rn50 --text_prompts "A beautiful painting of a singular lighthouse, shining its light across a tumultuous sea of blood by greg rutkowski and thomas kinkade, Trending on artstation." --output_dir disco_diffusion_clip_rn50_out + ``` + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + + module = hub.Module(name="disco_diffusion_clip_rn50") + text_prompts = ["A beautiful painting of a singular lighthouse, shining its light across a tumultuous sea of blood by greg rutkowski and thomas kinkade, Trending on artstation."] + # 生成图像, 默认会在disco_diffusion_clip_rn50_out目录保存图像 + # 返回的da是一个DocumentArray对象,保存了所有的结果,包括最终结果和迭代过程的中间结果 + # 可以通过操作DocumentArray对象对生成的图像做后处理,保存或者分析 + da = module.generate_image(text_prompts=text_prompts, output_dir='./disco_diffusion_clip_rn50_out/') + # 手动将最终生成的图像保存到指定路径 + da[0].save_uri_to_file('disco_diffusion_clip_rn50_out-result.png') + # 展示所有的中间结果 + da[0].chunks.plot_image_sprites(skip_empty=True, show_index=True, keep_aspect_ratio=True) + # 将整个生成过程保存为一个动态图gif + da[0].chunks.save_gif('disco_diffusion_clip_rn50_out-result.gif', show_index=True, inline_display=True, size_ratio=0.5) + ``` + +- ### 3、API + + - ```python + def generate_image( + text_prompts, + style: Optional[str] = None, + artist: Optional[str] = None, + width_height: Optional[List[int]] = [1280, 768], + seed: Optional[int] = None, + output_dir: Optional[str] = 'disco_diffusion_clip_rn50_out'): + ``` + + - 文图生成API,生成文本描述内容的图像。 + + - **参数** + + - text_prompts(str): 输入的语句,描述想要生成的图像的内容。通常比较有效的构造方式为 "一段描述性的文字内容" + "指定艺术家的名字",如"a beautiful painting of Chinese architecture, by krenz, sunny, super wide angle, artstation."。prompt的构造可以参考[网站](https://docs.google.com/document/d/1XUT2G9LmkZataHFzmuOtRXnuWBfhvXDAo8DkS--8tec/edit#)。 + - style(Optional[str]): 指定绘画的风格,如'watercolor','Chinese painting'等。当不指定时,风格完全由您所填写的prompt决定。 + - artist(Optional[str]): 指定特定的艺术家,如Greg Rutkowsk、krenz,将会生成所指定艺术家的绘画风格。当不指定时,风格完全由您所填写的prompt决定。各种艺术家的风格可以参考[网站](https://weirdwonderfulai.art/resources/disco-diffusion-70-plus-artist-studies/)。 + - width_height(Optional[List[int]]): 指定最终输出图像的宽高,宽和高都需要是64的倍数,生成的图像越大,所需要的计算时间越长。 + - seed(Optional[int]): 随机种子,由于输入默认是随机高斯噪声,设置不同的随机种子会由不同的初始输入,从而最终生成不同的结果,可以设置该参数来获得不同的输出图像。 + - output_dir(Optional[str]): 保存输出图像的目录,默认为"disco_diffusion_clip_rn50_out"。 + + + - **返回** + - ra(DocumentArray): DocumentArray对象, 包含`n_batches`个Documents,其中每个Document都保存了迭代过程的所有中间结果。详细可参考[DocumentArray使用文档](https://docarray.jina.ai/fundamentals/documentarray/index.html)。 + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + ```shell + $ hub install disco_diffusion_clip_rn50 == 1.0.0 + ``` diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn50/clip/README.md b/modules/image/text_to_image/disco_diffusion_clip_rn50/clip/README.md new file mode 100644 index 000000000..317214d80 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn50/clip/README.md @@ -0,0 +1,2 @@ +# OpenAI CLIP implemented in Paddle. +The original implementation repo is [ranchlai/clip.paddle](https://github.com/ranchlai/clip.paddle). We copy this repo here for guided diffusion. diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn50/clip/clip/__init__.py b/modules/image/text_to_image/disco_diffusion_clip_rn50/clip/clip/__init__.py new file mode 100755 index 000000000..5657b56e6 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn50/clip/clip/__init__.py @@ -0,0 +1 @@ +from .utils import * diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn50/clip/clip/layers.py b/modules/image/text_to_image/disco_diffusion_clip_rn50/clip/clip/layers.py new file mode 100755 index 000000000..286f35ab4 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn50/clip/clip/layers.py @@ -0,0 +1,182 @@ +from typing import Optional + +import paddle +import paddle.nn as nn +from paddle import Tensor +from paddle.nn import functional as F +from paddle.nn import Linear + +__all__ = ['ResidualAttentionBlock', 'AttentionPool2d', 'multi_head_attention_forward', 'MultiHeadAttention'] + + +def multi_head_attention_forward(x: Tensor, + num_heads: int, + q_proj: Linear, + k_proj: Linear, + v_proj: Linear, + c_proj: Linear, + attn_mask: Optional[Tensor] = None): + max_len, batch_size, emb_dim = x.shape + head_dim = emb_dim // num_heads + scaling = float(head_dim)**-0.5 + q = q_proj(x) # L, N, E + k = k_proj(x) # L, N, E + v = v_proj(x) # L, N, E + #k = k.con + v = v.reshape((-1, batch_size * num_heads, head_dim)).transpose((1, 0, 2)) + k = k.reshape((-1, batch_size * num_heads, head_dim)).transpose((1, 0, 2)) + q = q.reshape((-1, batch_size * num_heads, head_dim)).transpose((1, 0, 2)) + + q = q * scaling + qk = paddle.bmm(q, k.transpose((0, 2, 1))) + if attn_mask is not None: + if attn_mask.ndim == 2: + attn_mask.unsqueeze_(0) + #assert str(attn_mask.dtype) == 'VarType.FP32' and attn_mask.ndim == 3 + assert attn_mask.shape[0] == 1 and attn_mask.shape[1] == max_len and attn_mask.shape[2] == max_len + qk += attn_mask + + qk = paddle.nn.functional.softmax(qk, axis=-1) + atten = paddle.bmm(qk, v) + atten = atten.transpose((1, 0, 2)) + atten = atten.reshape((max_len, batch_size, emb_dim)) + atten = c_proj(atten) + return atten + + +class MultiHeadAttention(nn.Layer): # without attention mask + + def __init__(self, emb_dim: int, num_heads: int): + super().__init__() + self.q_proj = nn.Linear(emb_dim, emb_dim, bias_attr=True) + self.k_proj = nn.Linear(emb_dim, emb_dim, bias_attr=True) + self.v_proj = nn.Linear(emb_dim, emb_dim, bias_attr=True) + self.c_proj = nn.Linear(emb_dim, emb_dim, bias_attr=True) + self.head_dim = emb_dim // num_heads + self.emb_dim = emb_dim + self.num_heads = num_heads + assert self.head_dim * num_heads == emb_dim, "embed_dim must be divisible by num_heads" + #self.scaling = float(self.head_dim) ** -0.5 + + def forward(self, x, attn_mask=None): # x is in shape[max_len,batch_size,emb_dim] + + atten = multi_head_attention_forward(x, + self.num_heads, + self.q_proj, + self.k_proj, + self.v_proj, + self.c_proj, + attn_mask=attn_mask) + + return atten + + +class Identity(nn.Layer): + + def __init__(self): + super().__init__() + + def forward(self, x): + return x + + +class Bottleneck(nn.Layer): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1): + super().__init__() + + # all conv layers have stride 1. an avgpool is performed after the second convolution when stride > 1 + self.conv1 = nn.Conv2D(inplanes, planes, 1, bias_attr=False) + self.bn1 = nn.BatchNorm2D(planes) + + self.conv2 = nn.Conv2D(planes, planes, 3, padding=1, bias_attr=False) + self.bn2 = nn.BatchNorm2D(planes) + + self.avgpool = nn.AvgPool2D(stride) if stride > 1 else Identity() + + self.conv3 = nn.Conv2D(planes, planes * self.expansion, 1, bias_attr=False) + self.bn3 = nn.BatchNorm2D(planes * self.expansion) + + self.relu = nn.ReLU() + self.downsample = None + self.stride = stride + + if stride > 1 or inplanes != planes * Bottleneck.expansion: + self.downsample = nn.Sequential( + ("-1", nn.AvgPool2D(stride)), + ("0", nn.Conv2D(inplanes, planes * self.expansion, 1, stride=1, bias_attr=False)), + ("1", nn.BatchNorm2D(planes * self.expansion))) + + def forward(self, x): + identity = x + + out = self.relu(self.bn1(self.conv1(x))) + out = self.relu(self.bn2(self.conv2(out))) + out = self.avgpool(out) + out = self.bn3(self.conv3(out)) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.relu(out) + return out + + +class AttentionPool2d(nn.Layer): + + def __init__(self, spacial_dim: int, embed_dim: int, num_heads: int, output_dim: int = None): + super().__init__() + + self.positional_embedding = paddle.create_parameter((spacial_dim**2 + 1, embed_dim), dtype='float32') + + self.q_proj = nn.Linear(embed_dim, embed_dim, bias_attr=True) + self.k_proj = nn.Linear(embed_dim, embed_dim, bias_attr=True) + self.v_proj = nn.Linear(embed_dim, embed_dim, bias_attr=True) + self.c_proj = nn.Linear(embed_dim, output_dim or embed_dim, bias_attr=True) + self.num_heads = num_heads + + self.head_dim = embed_dim // num_heads + assert self.head_dim * num_heads == embed_dim, "embed_dim must be divisible by num_heads" + + def forward(self, x): + + x = x.reshape((x.shape[0], x.shape[1], x.shape[2] * x.shape[3])).transpose((2, 0, 1)) # NCHW -> (HW)NC + max_len, batch_size, emb_dim = x.shape + head_dim = self.head_dim + x = paddle.concat([paddle.mean(x, axis=0, keepdim=True), x], axis=0) + x = x + paddle.unsqueeze(self.positional_embedding, 1) + out = multi_head_attention_forward(x, self.num_heads, self.q_proj, self.k_proj, self.v_proj, self.c_proj) + + return out[0] + + +class QuickGELU(nn.Layer): + + def forward(self, x): + return x * paddle.nn.functional.sigmoid(1.702 * x) + + +class ResidualAttentionBlock(nn.Layer): + + def __init__(self, d_model: int, n_head: int, attn_mask=None): + super().__init__() + + self.attn = MultiHeadAttention(d_model, n_head) + self.ln_1 = nn.LayerNorm(d_model) + self.mlp = nn.Sequential(("c_fc", nn.Linear(d_model, d_model * 4)), ("gelu", QuickGELU()), + ("c_proj", nn.Linear(d_model * 4, d_model))) + self.ln_2 = nn.LayerNorm(d_model) + self.attn_mask = attn_mask + + def attention(self, x): + x = self.attn(x, self.attn_mask) + assert isinstance(x, paddle.Tensor) # not tuble here + return x + + def forward(self, x): + + x = x + self.attention(self.ln_1(x)) + x = x + self.mlp(self.ln_2(x)) + return x diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn50/clip/clip/model.py b/modules/image/text_to_image/disco_diffusion_clip_rn50/clip/clip/model.py new file mode 100755 index 000000000..63d1835c5 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn50/clip/clip/model.py @@ -0,0 +1,227 @@ +from typing import Tuple +from typing import Union + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddle import nn + +from .layers import AttentionPool2d +from .layers import Bottleneck +from .layers import MultiHeadAttention +from .layers import ResidualAttentionBlock + + +class ModifiedResNet(nn.Layer): + """ + A ResNet class that is similar to torchvision's but contains the following changes: + - There are now 3 "stem" convolutions as opposed to 1, with an average pool instead of a max pool. + - Performs anti-aliasing strided convolutions, where an avgpool is prepended to convolutions with stride > 1 + - The final pooling layer is a QKV attention instead of an average pool + """ + + def __init__(self, layers, output_dim, heads, input_resolution=224, width=64): + super().__init__() + self.output_dim = output_dim + self.input_resolution = input_resolution + + # the 3-layer stem + self.conv1 = nn.Conv2D(3, width // 2, kernel_size=3, stride=2, padding=1, bias_attr=False) + self.bn1 = nn.BatchNorm2D(width // 2) + self.conv2 = nn.Conv2D(width // 2, width // 2, kernel_size=3, padding=1, bias_attr=False) + self.bn2 = nn.BatchNorm2D(width // 2) + self.conv3 = nn.Conv2D(width // 2, width, kernel_size=3, padding=1, bias_attr=False) + self.bn3 = nn.BatchNorm2D(width) + self.avgpool = nn.AvgPool2D(2) + self.relu = nn.ReLU() + + # residual layers + self._inplanes = width # this is a *mutable* variable used during construction + self.layer1 = self._make_layer(width, layers[0]) + self.layer2 = self._make_layer(width * 2, layers[1], stride=2) + self.layer3 = self._make_layer(width * 4, layers[2], stride=2) + self.layer4 = self._make_layer(width * 8, layers[3], stride=2) + + embed_dim = width * 32 # the ResNet feature dimension + self.attnpool = AttentionPool2d(input_resolution // 32, embed_dim, heads, output_dim) + + def _make_layer(self, planes, blocks, stride=1): + layers = [Bottleneck(self._inplanes, planes, stride)] + + self._inplanes = planes * Bottleneck.expansion + for _ in range(1, blocks): + layers.append(Bottleneck(self._inplanes, planes)) + + return nn.Sequential(*layers) + + def forward(self, x): + + def stem(x): + for conv, bn in [(self.conv1, self.bn1), (self.conv2, self.bn2), (self.conv3, self.bn3)]: + x = self.relu(bn(conv(x))) + x = self.avgpool(x) + return x + + #x = x.type(self.conv1.weight.dtype) + x = stem(x) + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + x = self.attnpool(x) + + return x + + +class Transformer(nn.Layer): + + def __init__(self, width: int, layers: int, heads: int, attn_mask=None): + super().__init__() + self.width = width + self.layers = layers + self.resblocks = nn.Sequential(*[ResidualAttentionBlock(width, heads, attn_mask) for _ in range(layers)]) + + def forward(self, x): + return self.resblocks(x) + + +class VisualTransformer(nn.Layer): + + def __init__(self, input_resolution: int, patch_size: int, width: int, layers: int, heads: int, output_dim: int): + super().__init__() + self.input_resolution = input_resolution + self.output_dim = output_dim + # used patch_size x patch_size, stride patch_size to do linear projection + self.conv1 = nn.Conv2D(in_channels=3, + out_channels=width, + kernel_size=patch_size, + stride=patch_size, + bias_attr=False) + + # scale = width ** -0.5 + self.class_embedding = paddle.create_parameter((width, ), 'float32') + + self.positional_embedding = paddle.create_parameter(((input_resolution // patch_size)**2 + 1, width), 'float32') + + self.ln_pre = nn.LayerNorm(width) + + self.transformer = Transformer(width, layers, heads) + + self.ln_post = nn.LayerNorm(width) + self.proj = paddle.create_parameter((width, output_dim), 'float32') + + def forward(self, x): + + x = self.conv1(x) + x = x.reshape((x.shape[0], x.shape[1], -1)) + x = x.transpose((0, 2, 1)) + x = paddle.concat([self.class_embedding + paddle.zeros((x.shape[0], 1, x.shape[-1]), dtype=x.dtype), x], axis=1) + + x = x + self.positional_embedding + x = self.ln_pre(x) + x = x.transpose((1, 0, 2)) + x = self.transformer(x) + x = x.transpose((1, 0, 2)) + x = self.ln_post(x[:, 0, :]) + if self.proj is not None: + x = paddle.matmul(x, self.proj) + + return x + + +class CLIP(nn.Layer): + + def __init__( + self, + embed_dim: int, + # vision + image_resolution: int, + vision_layers: Union[Tuple[int, int, int, int], int], + vision_width: int, + vision_patch_size: int, + # text + context_length: int, + vocab_size: int, + transformer_width: int, + transformer_heads: int, + transformer_layers: int): + super().__init__() + + self.context_length = context_length + if isinstance(vision_layers, (tuple, list)): + vision_heads = vision_width * 32 // 64 + self.visual = ModifiedResNet(layers=vision_layers, + output_dim=embed_dim, + heads=vision_heads, + input_resolution=image_resolution, + width=vision_width) + else: + vision_heads = vision_width // 64 + self.visual = VisualTransformer(input_resolution=image_resolution, + patch_size=vision_patch_size, + width=vision_width, + layers=vision_layers, + heads=vision_heads, + output_dim=embed_dim) + + self.transformer = Transformer(width=transformer_width, + layers=transformer_layers, + heads=transformer_heads, + attn_mask=self.build_attention_mask()) + + self.vocab_size = vocab_size + self.token_embedding = nn.Embedding(vocab_size, transformer_width) + self.positional_embedding = paddle.create_parameter((self.context_length, transformer_width), 'float32') + self.ln_final = nn.LayerNorm(transformer_width) + + self.text_projection = paddle.create_parameter((transformer_width, embed_dim), 'float32') + self.logit_scale = paddle.create_parameter((1, ), 'float32') + + def build_attention_mask(self): + # lazily create causal attention mask, with full attention between the vision tokens + # mask = paddle.empty((self.context_length, self.context_length),dtype='float32') + # mask.fill_(float("-inf")) + #mask.triu_(1) # zero out the lower diagonal + + mask = paddle.ones((self.context_length, self.context_length)) * float("-inf") + mask = paddle.triu(mask, diagonal=1) + + return mask + + def encode_image(self, image): + return self.visual(image) + + def encode_text(self, text): + x = self.token_embedding(text) # [batch_size, n_ctx, d_model] + # print(x.shape) + + x = x + self.positional_embedding + #print(x.shape) + + x = x.transpose((1, 0, 2)) # NLD -> LND + x = self.transformer(x) + x = x.transpose((1, 0, 2)) # LND -> NLD + x = self.ln_final(x) + + idx = text.numpy().argmax(-1) + idx = list(idx) + x = [x[i:i + 1, int(j), :] for i, j in enumerate(idx)] + x = paddle.concat(x, 0) + x = paddle.matmul(x, self.text_projection) + return x + + def forward(self, image, text): + image_features = self.encode_image(image) + text_features = self.encode_text(text) + + # normalized features + image_features = image_features / image_features.norm(dim=-1, keepdim=True) + text_features = text_features / text_features.norm(dim=-1, keepdim=True) + + # cosine similarity as logits + logit_scale = self.logit_scale.exp() + logits_per_image = paddle.matmul(logit_scale * image_features, text_features.t()) + logits_per_text = paddle.matmul(logit_scale * text_features, image_features.t()) + + # shape = [global_batch_size, global_batch_size] + return logits_per_image, logits_per_text diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn50/clip/clip/simple_tokenizer.py b/modules/image/text_to_image/disco_diffusion_clip_rn50/clip/clip/simple_tokenizer.py new file mode 100755 index 000000000..4eaf82e9e --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn50/clip/clip/simple_tokenizer.py @@ -0,0 +1,135 @@ +import gzip +import html +import os +from functools import lru_cache + +import ftfy +import regex as re + + +@lru_cache() +def default_bpe(): + return os.path.join(os.path.dirname(os.path.abspath(__file__)), "../assets/bpe_simple_vocab_16e6.txt.gz") + + +@lru_cache() +def bytes_to_unicode(): + """ + Returns list of utf-8 byte and a corresponding list of unicode strings. + The reversible bpe codes work on unicode strings. + This means you need a large # of unicode characters in your vocab if you want to avoid UNKs. + When you're at something like a 10B token dataset you end up needing around 5K for decent coverage. + This is a signficant percentage of your normal, say, 32K bpe vocab. + To avoid that, we want lookup tables between utf-8 bytes and unicode strings. + And avoids mapping to whitespace/control characters the bpe code barfs on. + """ + bs = list(range(ord("!"), ord("~") + 1)) + list(range(ord("¡"), ord("¬") + 1)) + list(range(ord("®"), ord("ÿ") + 1)) + cs = bs[:] + n = 0 + for b in range(2**8): + if b not in bs: + bs.append(b) + cs.append(2**8 + n) + n += 1 + cs = [chr(n) for n in cs] + return dict(zip(bs, cs)) + + +def get_pairs(word): + """Return set of symbol pairs in a word. + Word is represented as tuple of symbols (symbols being variable-length strings). + """ + pairs = set() + prev_char = word[0] + for char in word[1:]: + pairs.add((prev_char, char)) + prev_char = char + return pairs + + +def basic_clean(text): + text = ftfy.fix_text(text) + text = html.unescape(html.unescape(text)) + return text.strip() + + +def whitespace_clean(text): + text = re.sub(r'\s+', ' ', text) + text = text.strip() + return text + + +class SimpleTokenizer(object): + + def __init__(self, bpe_path: str = default_bpe()): + self.byte_encoder = bytes_to_unicode() + self.byte_decoder = {v: k for k, v in self.byte_encoder.items()} + merges = gzip.open(bpe_path).read().decode("utf-8").split('\n') + merges = merges[1:49152 - 256 - 2 + 1] + merges = [tuple(merge.split()) for merge in merges] + vocab = list(bytes_to_unicode().values()) + vocab = vocab + [v + '' for v in vocab] + for merge in merges: + vocab.append(''.join(merge)) + vocab.extend(['<|startoftext|>', '<|endoftext|>']) + self.encoder = dict(zip(vocab, range(len(vocab)))) + self.decoder = {v: k for k, v in self.encoder.items()} + self.bpe_ranks = dict(zip(merges, range(len(merges)))) + self.cache = {'<|startoftext|>': '<|startoftext|>', '<|endoftext|>': '<|endoftext|>'} + self.pat = re.compile( + r"""<\|startoftext\|>|<\|endoftext\|>|'s|'t|'re|'ve|'m|'ll|'d|[\p{L}]+|[\p{N}]|[^\s\p{L}\p{N}]+""", + re.IGNORECASE) + + def bpe(self, token): + if token in self.cache: + return self.cache[token] + word = tuple(token[:-1]) + (token[-1] + '', ) + pairs = get_pairs(word) + + if not pairs: + return token + '' + + while True: + bigram = min(pairs, key=lambda pair: self.bpe_ranks.get(pair, float('inf'))) + if bigram not in self.bpe_ranks: + break + first, second = bigram + new_word = [] + i = 0 + while i < len(word): + try: + j = word.index(first, i) + new_word.extend(word[i:j]) + i = j + except: + new_word.extend(word[i:]) + break + + if word[i] == first and i < len(word) - 1 and word[i + 1] == second: + new_word.append(first + second) + i += 2 + else: + new_word.append(word[i]) + i += 1 + new_word = tuple(new_word) + word = new_word + if len(word) == 1: + break + else: + pairs = get_pairs(word) + word = ' '.join(word) + self.cache[token] = word + return word + + def encode(self, text): + bpe_tokens = [] + text = whitespace_clean(basic_clean(text)).lower() + for token in re.findall(self.pat, text): + token = ''.join(self.byte_encoder[b] for b in token.encode('utf-8')) + bpe_tokens.extend(self.encoder[bpe_token] for bpe_token in self.bpe(token).split(' ')) + return bpe_tokens + + def decode(self, tokens): + text = ''.join([self.decoder[token] for token in tokens]) + text = bytearray([self.byte_decoder[c] for c in text]).decode('utf-8', errors="replace").replace('', ' ') + return text diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn50/clip/clip/utils.py b/modules/image/text_to_image/disco_diffusion_clip_rn50/clip/clip/utils.py new file mode 100755 index 000000000..979784682 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn50/clip/clip/utils.py @@ -0,0 +1,122 @@ +import os +from typing import List +from typing import Union + +import numpy as np +import paddle +from paddle.utils import download +from paddle.vision.transforms import CenterCrop +from paddle.vision.transforms import Compose +from paddle.vision.transforms import Normalize +from paddle.vision.transforms import Resize +from paddle.vision.transforms import ToTensor + +from .model import CLIP +from .simple_tokenizer import SimpleTokenizer + +__all__ = ['transform', 'tokenize', 'build_model'] + +MODEL_NAMES = ['RN50', 'RN101', 'VIT32'] + +URL = { + 'RN50': os.path.join(os.path.dirname(__file__), 'pre_trained', 'RN50.pdparams'), + 'RN101': os.path.join(os.path.dirname(__file__), 'pre_trained', 'RN101.pdparams'), + 'VIT32': os.path.join(os.path.dirname(__file__), 'pre_trained', 'ViT-B-32.pdparams') +} + +MEAN, STD = (0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711) +_tokenizer = SimpleTokenizer() + +transform = Compose([ + Resize(224, interpolation='bicubic'), + CenterCrop(224), lambda image: image.convert('RGB'), + ToTensor(), + Normalize(mean=MEAN, std=STD), lambda t: t.unsqueeze_(0) +]) + + +def tokenize(texts: Union[str, List[str]], context_length: int = 77): + """ + Returns the tokenized representation of given input string(s) + + Parameters + ---------- + texts : Union[str, List[str]] + An input string or a list of input strings to tokenize + + context_length : int + The context length to use; all CLIP models use 77 as the context length + + Returns + ------- + A two-dimensional tensor containing the resulting tokens, shape = [number of input strings, context_length] + """ + if isinstance(texts, str): + texts = [texts] + + sot_token = _tokenizer.encoder["<|startoftext|>"] + eot_token = _tokenizer.encoder["<|endoftext|>"] + all_tokens = [[sot_token] + _tokenizer.encode(text) + [eot_token] for text in texts] + result = paddle.zeros((len(all_tokens), context_length), dtype='int64') + + for i, tokens in enumerate(all_tokens): + if len(tokens) > context_length: + raise RuntimeError(f"Input {texts[i]} is too long for context length {context_length}") + result[i, :len(tokens)] = paddle.Tensor(np.array(tokens)) + + return result + + +def build_model(name='RN50'): + assert name in MODEL_NAMES, f"model name must be one of {MODEL_NAMES}" + name2model = {'RN101': build_rn101_model, 'VIT32': build_vit_model, 'RN50': build_rn50_model} + model = name2model[name]() + weight = URL[name] + sd = paddle.load(weight) + model.load_dict(sd) + model.eval() + return model + + +def build_vit_model(): + + model = CLIP(embed_dim=512, + image_resolution=224, + vision_layers=12, + vision_width=768, + vision_patch_size=32, + context_length=77, + vocab_size=49408, + transformer_width=512, + transformer_heads=8, + transformer_layers=12) + return model + + +def build_rn101_model(): + model = CLIP( + embed_dim=512, + image_resolution=224, + vision_layers=(3, 4, 23, 3), + vision_width=64, + vision_patch_size=0, #Not used in resnet + context_length=77, + vocab_size=49408, + transformer_width=512, + transformer_heads=8, + transformer_layers=12) + return model + + +def build_rn50_model(): + model = CLIP(embed_dim=1024, + image_resolution=224, + vision_layers=(3, 4, 6, 3), + vision_width=64, + vision_patch_size=None, + context_length=77, + vocab_size=49408, + transformer_width=512, + transformer_heads=8, + transformer_layers=12) + return model diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn50/module.py b/modules/image/text_to_image/disco_diffusion_clip_rn50/module.py new file mode 100755 index 000000000..4b681525b --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn50/module.py @@ -0,0 +1,441 @@ +# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import ast +import os +import sys +from functools import partial +from typing import List +from typing import Optional + +import disco_diffusion_clip_rn50.clip as clip +import disco_diffusion_clip_rn50.resize_right as resize_right +import paddle +from disco_diffusion_clip_rn50.reverse_diffusion import create + +import paddlehub as hub +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving + + +@moduleinfo(name="disco_diffusion_clip_rn50", + version="1.0.0", + type="image/text_to_image", + summary="", + author="paddlepaddle", + author_email="paddle-dev@baidu.com") +class DiscoDiffusionClip: + + def generate_image(self, + text_prompts: [str], + style: Optional[str] = None, + artist: Optional[str] = None, + init_image: Optional[str] = None, + width_height: Optional[List[int]] = [1280, 768], + skip_steps: Optional[int] = 0, + steps: Optional[int] = 250, + cut_ic_pow: Optional[int] = 1, + init_scale: Optional[int] = 1000, + clip_guidance_scale: Optional[int] = 5000, + tv_scale: Optional[int] = 0, + range_scale: Optional[int] = 0, + sat_scale: Optional[int] = 0, + cutn_batches: Optional[int] = 4, + diffusion_sampling_mode: Optional[str] = 'ddim', + perlin_init: Optional[bool] = False, + perlin_mode: Optional[str] = 'mixed', + seed: Optional[int] = None, + eta: Optional[float] = 0.8, + clamp_grad: Optional[bool] = True, + clamp_max: Optional[float] = 0.05, + randomize_class: Optional[bool] = True, + clip_denoised: Optional[bool] = False, + fuzzy_prompt: Optional[bool] = False, + rand_mag: Optional[float] = 0.05, + cut_overview: Optional[str] = '[12]*400+[4]*600', + cut_innercut: Optional[str] = '[4]*400+[12]*600', + cut_icgray_p: Optional[str] = '[0.2]*400+[0]*600', + display_rate: Optional[int] = 10, + n_batches: Optional[int] = 1, + batch_size: Optional[int] = 1, + batch_name: Optional[str] = '', + use_gpu: Optional[bool] = True, + output_dir: Optional[str] = 'disco_diffusion_clip_rn50_out'): + """ + Create Disco Diffusion artworks and save the result into a DocumentArray. + + :param text_prompts: Phrase, sentence, or string of words and phrases describing what the image should look like. The words will be analyzed by the AI and will guide the diffusion process toward the image(s) you describe. These can include commas and weights to adjust the relative importance of each element. E.g. "A beautiful painting of a singular lighthouse, shining its light across a tumultuous sea of blood by greg rutkowski and thomas kinkade, Trending on artstation."Notice that this prompt loosely follows a structure: [subject], [prepositional details], [setting], [meta modifiers and artist]; this is a good starting point for your experiments. Developing text prompts takes practice and experience, and is not the subject of this guide. If you are a beginner to writing text prompts, a good place to start is on a simple AI art app like Night Cafe, starry ai or WOMBO prior to using DD, to get a feel for how text gets translated into images by GAN tools. These other apps use different technologies, but many of the same principles apply. + :param style: Image style, such as oil paintings, if specified, style will be used to construct prompts. + :param artist: Artist style, if specified, style will be used to construct prompts. + :param init_image: Recall that in the image sequence above, the first image shown is just noise. If an init_image is provided, diffusion will replace the noise with the init_image as its starting state. To use an init_image, upload the image to the Colab instance or your Google Drive, and enter the full image path here. If using an init_image, you may need to increase skip_steps to ~ 50% of total steps to retain the character of the init. See skip_steps above for further discussion. + :param width_height: Desired final image size, in pixels. You can have a square, wide, or tall image, but each edge length should be set to a multiple of 64px, and a minimum of 512px on the default CLIP model setting. If you forget to use multiples of 64px in your dimensions, DD will adjust the dimensions of your image to make it so. + :param skip_steps: Consider the chart shown here. Noise scheduling (denoise strength) starts very high and progressively gets lower and lower as diffusion steps progress. The noise levels in the first few steps are very high, so images change dramatically in early steps.As DD moves along the curve, noise levels (and thus the amount an image changes per step) declines, and image coherence from one step to the next increases.The first few steps of denoising are often so dramatic that some steps (maybe 10-15% of total) can be skipped without affecting the final image. You can experiment with this as a way to cut render times.If you skip too many steps, however, the remaining noise may not be high enough to generate new content, and thus may not have ‘time left’ to finish an image satisfactorily.Also, depending on your other settings, you may need to skip steps to prevent CLIP from overshooting your goal, resulting in ‘blown out’ colors (hyper saturated, solid white, or solid black regions) or otherwise poor image quality. Consider that the denoising process is at its strongest in the early steps, so skipping steps can sometimes mitigate other problems.Lastly, if using an init_image, you will need to skip ~50% of the diffusion steps to retain the shapes in the original init image. However, if you’re using an init_image, you can also adjust skip_steps up or down for creative reasons. With low skip_steps you can get a result "inspired by" the init_image which will retain the colors and rough layout and shapes but look quite different. With high skip_steps you can preserve most of the init_image contents and just do fine tuning of the texture. + :param steps: When creating an image, the denoising curve is subdivided into steps for processing. Each step (or iteration) involves the AI looking at subsets of the image called ‘cuts’ and calculating the ‘direction’ the image should be guided to be more like the prompt. Then it adjusts the image with the help of the diffusion denoiser, and moves to the next step.Increasing steps will provide more opportunities for the AI to adjust the image, and each adjustment will be smaller, and thus will yield a more precise, detailed image. Increasing steps comes at the expense of longer render times. Also, while increasing steps should generally increase image quality, there is a diminishing return on additional steps beyond 250 - 500 steps. However, some intricate images can take 1000, 2000, or more steps. It is really up to the user. Just know that the render time is directly related to the number of steps, and many other parameters have a major impact on image quality, without costing additional time. + :param cut_ic_pow: This sets the size of the border used for inner cuts. High cut_ic_pow values have larger borders, and therefore the cuts themselves will be smaller and provide finer details. If you have too many or too-small inner cuts, you may lose overall image coherency and/or it may cause an undesirable ‘mosaic’ effect. Low cut_ic_pow values will allow the inner cuts to be larger, helping image coherency while still helping with some details. + :param init_scale: This controls how strongly CLIP will try to match the init_image provided. This is balanced against the clip_guidance_scale (CGS) above. Too much init scale, and the image won’t change much during diffusion. Too much CGS and the init image will be lost. + :param clip_guidance_scale: CGS is one of the most important parameters you will use. It tells DD how strongly you want CLIP to move toward your prompt each timestep. Higher is generally better, but if CGS is too strong it will overshoot the goal and distort the image. So a happy medium is needed, and it takes experience to learn how to adjust CGS. Note that this parameter generally scales with image dimensions. In other words, if you increase your total dimensions by 50% (e.g. a change from 512 x 512 to 512 x 768), then to maintain the same effect on the image, you’d want to increase clip_guidance_scale from 5000 to 7500. Of the basic settings, clip_guidance_scale, steps and skip_steps are the most important contributors to image quality, so learn them well. + :param tv_scale: Total variance denoising. Optional, set to zero to turn off. Controls ‘smoothness’ of final output. If used, tv_scale will try to smooth out your final image to reduce overall noise. If your image is too ‘crunchy’, increase tv_scale. TV denoising is good at preserving edges while smoothing away noise in flat regions. See https://en.wikipedia.org/wiki/Total_variation_denoising + :param range_scale: Optional, set to zero to turn off. Used for adjustment of color contrast. Lower range_scale will increase contrast. Very low numbers create a reduced color palette, resulting in more vibrant or poster-like images. Higher range_scale will reduce contrast, for more muted images. + :param sat_scale: Saturation scale. Optional, set to zero to turn off. If used, sat_scale will help mitigate oversaturation. If your image is too saturated, increase sat_scale to reduce the saturation. + :param cutn_batches: Each iteration, the AI cuts the image into smaller pieces known as cuts, and compares each cut to the prompt to decide how to guide the next diffusion step. More cuts can generally lead to better images, since DD has more chances to fine-tune the image precision in each timestep. Additional cuts are memory intensive, however, and if DD tries to evaluate too many cuts at once, it can run out of memory. You can use cutn_batches to increase cuts per timestep without increasing memory usage. At the default settings, DD is scheduled to do 16 cuts per timestep. If cutn_batches is set to 1, there will indeed only be 16 cuts total per timestep. However, if cutn_batches is increased to 4, DD will do 64 cuts total in each timestep, divided into 4 sequential batches of 16 cuts each. Because the cuts are being evaluated only 16 at a time, DD uses the memory required for only 16 cuts, but gives you the quality benefit of 64 cuts. The tradeoff, of course, is that this will take ~4 times as long to render each image.So, (scheduled cuts) x (cutn_batches) = (total cuts per timestep). Increasing cutn_batches will increase render times, however, as the work is being done sequentially. DD’s default cut schedule is a good place to start, but the cut schedule can be adjusted in the Cutn Scheduling section, explained below. + :param diffusion_sampling_mode: Two alternate diffusion denoising algorithms. ddim has been around longer, and is more established and tested. plms is a newly added alternate method that promises good diffusion results in fewer steps, but has not been as fully tested and may have side effects. This new plms mode is actively being researched in the #settings-and-techniques channel in the DD Discord. + :param perlin_init: Normally, DD will use an image filled with random noise as a starting point for the diffusion curve. If perlin_init is selected, DD will instead use a Perlin noise model as an initial state. Perlin has very interesting characteristics, distinct from random noise, so it’s worth experimenting with this for your projects. Beyond perlin, you can, of course, generate your own noise images (such as with GIMP, etc) and use them as an init_image (without skipping steps). Choosing perlin_init does not affect the actual diffusion process, just the starting point for the diffusion. Please note that selecting a perlin_init will replace and override any init_image you may have specified. Further, because the 2D, 3D and video animation systems all rely on the init_image system, if you enable Perlin while using animation modes, the perlin_init will jump in front of any previous image or video input, and DD will NOT give you the expected sequence of coherent images. All of that said, using Perlin and animation modes together do make a very colorful rainbow effect, which can be used creatively. + :param perlin_mode: sets type of Perlin noise: colored, gray, or a mix of both, giving you additional options for noise types. Experiment to see what these do in your projects. + :param seed: Deep in the diffusion code, there is a random number ‘seed’ which is used as the basis for determining the initial state of the diffusion. By default, this is random, but you can also specify your own seed. This is useful if you like a particular result and would like to run more iterations that will be similar. After each run, the actual seed value used will be reported in the parameters report, and can be reused if desired by entering seed # here. If a specific numerical seed is used repeatedly, the resulting images will be quite similar but not identical. + :param eta: eta (greek letter η) is a diffusion model variable that mixes in a random amount of scaled noise into each timestep. 0 is no noise, 1.0 is more noise. As with most DD parameters, you can go below zero for eta, but it may give you unpredictable results. The steps parameter has a close relationship with the eta parameter. If you set eta to 0, then you can get decent output with only 50-75 steps. Setting eta to 1.0 favors higher step counts, ideally around 250 and up. eta has a subtle, unpredictable effect on image, so you’ll need to experiment to see how this affects your projects. + :param clamp_grad: As I understand it, clamp_grad is an internal limiter that stops DD from producing extreme results. Try your images with and without clamp_grad. If the image changes drastically with clamp_grad turned off, it probably means your clip_guidance_scale is too high and should be reduced. + :param clamp_max: Sets the value of the clamp_grad limitation. Default is 0.05, providing for smoother, more muted coloration in images, but setting higher values (0.15-0.3) can provide interesting contrast and vibrancy. + :param fuzzy_prompt: Controls whether to add multiple noisy prompts to the prompt losses. If True, can increase variability of image output. Experiment with this. + :param rand_mag: Affects only the fuzzy_prompt. Controls the magnitude of the random noise added by fuzzy_prompt. + :param cut_overview: The schedule of overview cuts + :param cut_innercut: The schedule of inner cuts + :param cut_icgray_p: This sets the size of the border used for inner cuts. High cut_ic_pow values have larger borders, and therefore the cuts themselves will be smaller and provide finer details. If you have too many or too-small inner cuts, you may lose overall image coherency and/or it may cause an undesirable ‘mosaic’ effect. Low cut_ic_pow values will allow the inner cuts to be larger, helping image coherency while still helping with some details. + :param display_rate: During a diffusion run, you can monitor the progress of each image being created with this variable. If display_rate is set to 50, DD will show you the in-progress image every 50 timesteps. Setting this to a lower value, like 5 or 10, is a good way to get an early peek at where your image is heading. If you don’t like the progression, just interrupt execution, change some settings, and re-run. If you are planning a long, unmonitored batch, it’s better to set display_rate equal to steps, because displaying interim images does slow Colab down slightly. + :param n_batches: This variable sets the number of still images you want DD to create. If you are using an animation mode (see below for details) DD will ignore n_batches and create a single set of animated frames based on the animation settings. + :param batch_name: The name of the batch, the batch id will be named as "discoart-[batch_name]-seed". To avoid your artworks be overridden by other users, please use a unique name. + :param use_gpu: whether to use gpu or not. + :return: a DocumentArray object that has `n_batches` Documents + """ + if use_gpu: + try: + _places = os.environ.get("CUDA_VISIBLE_DEVICES", None) + if _places: + paddle.device.set_device("gpu:{}".format(0)) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + else: + paddle.device.set_device("cpu") + paddle.disable_static() + if not os.path.exists(output_dir): + os.makedirs(output_dir, exist_ok=True) + + if isinstance(text_prompts, str): + text_prompts = text_prompts.rstrip(',.,。') + if style is not None: + text_prompts += ",{}".format(style) + if artist is not None: + text_prompts += ",{},trending on artstation".format(artist) + elif isinstance(text_prompts, list): + text_prompts[0] = text_prompts[0].rstrip(',.,。') + if style is not None: + text_prompts[0] += ",{}".format(style) + if artist is not None: + text_prompts[0] += ",{},trending on artstation".format(artist) + + return create(text_prompts=text_prompts, + init_image=init_image, + width_height=width_height, + skip_steps=skip_steps, + steps=steps, + cut_ic_pow=cut_ic_pow, + init_scale=init_scale, + clip_guidance_scale=clip_guidance_scale, + tv_scale=tv_scale, + range_scale=range_scale, + sat_scale=sat_scale, + cutn_batches=cutn_batches, + diffusion_sampling_mode=diffusion_sampling_mode, + perlin_init=perlin_init, + perlin_mode=perlin_mode, + seed=seed, + eta=eta, + clamp_grad=clamp_grad, + clamp_max=clamp_max, + randomize_class=randomize_class, + clip_denoised=clip_denoised, + fuzzy_prompt=fuzzy_prompt, + rand_mag=rand_mag, + cut_overview=cut_overview, + cut_innercut=cut_innercut, + cut_icgray_p=cut_icgray_p, + display_rate=display_rate, + n_batches=n_batches, + batch_size=batch_size, + batch_name=batch_name, + clip_models=['RN50'], + output_dir=output_dir) + + @serving + def serving_method(self, text_prompts, **kwargs): + """ + Run as a service. + """ + results = [] + for text_prompt in text_prompts: + result = self.generate_image(text_prompts=text_prompt, **kwargs)[0].to_base64() + results.append(result) + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.generate_image(text_prompts=args.text_prompts, + style=args.style, + artist=args.artist, + init_image=args.init_image, + width_height=args.width_height, + skip_steps=args.skip_steps, + steps=args.steps, + cut_ic_pow=args.cut_ic_pow, + init_scale=args.init_scale, + clip_guidance_scale=args.clip_guidance_scale, + tv_scale=args.tv_scale, + range_scale=args.range_scale, + sat_scale=args.sat_scale, + cutn_batches=args.cutn_batches, + diffusion_sampling_mode=args.diffusion_sampling_mode, + perlin_init=args.perlin_init, + perlin_mode=args.perlin_mode, + seed=args.seed, + eta=args.eta, + clamp_grad=args.clamp_grad, + clamp_max=args.clamp_max, + randomize_class=args.randomize_class, + clip_denoised=args.clip_denoised, + fuzzy_prompt=args.fuzzy_prompt, + rand_mag=args.rand_mag, + cut_overview=args.cut_overview, + cut_innercut=args.cut_innercut, + cut_icgray_p=args.cut_icgray_p, + display_rate=args.display_rate, + n_batches=args.n_batches, + batch_size=args.batch_size, + batch_name=args.batch_name, + output_dir=args.output_dir) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_input_group.add_argument( + '--skip_steps', + type=int, + default=0, + help= + 'Consider the chart shown here. Noise scheduling (denoise strength) starts very high and progressively gets lower and lower as diffusion steps progress. The noise levels in the first few steps are very high, so images change dramatically in early steps.As DD moves along the curve, noise levels (and thus the amount an image changes per step) declines, and image coherence from one step to the next increases.The first few steps of denoising are often so dramatic that some steps (maybe 10-15%% of total) can be skipped without affecting the final image. You can experiment with this as a way to cut render times.If you skip too many steps, however, the remaining noise may not be high enough to generate new content, and thus may not have ‘time left’ to finish an image satisfactorily.Also, depending on your other settings, you may need to skip steps to prevent CLIP from overshooting your goal, resulting in ‘blown out’ colors (hyper saturated, solid white, or solid black regions) or otherwise poor image quality. Consider that the denoising process is at its strongest in the early steps, so skipping steps can sometimes mitigate other problems.Lastly, if using an init_image, you will need to skip ~50%% of the diffusion steps to retain the shapes in the original init image. However, if you’re using an init_image, you can also adjust skip_steps up or down for creative reasons. With low skip_steps you can get a result "inspired by" the init_image which will retain the colors and rough layout and shapes but look quite different. With high skip_steps you can preserve most of the init_image contents and just do fine tuning of the texture' + ) + self.arg_input_group.add_argument( + '--steps', + type=int, + default=250, + help= + "When creating an image, the denoising curve is subdivided into steps for processing. Each step (or iteration) involves the AI looking at subsets of the image called ‘cuts’ and calculating the ‘direction’ the image should be guided to be more like the prompt. Then it adjusts the image with the help of the diffusion denoiser, and moves to the next step.Increasing steps will provide more opportunities for the AI to adjust the image, and each adjustment will be smaller, and thus will yield a more precise, detailed image. Increasing steps comes at the expense of longer render times. Also, while increasing steps should generally increase image quality, there is a diminishing return on additional steps beyond 250 - 500 steps. However, some intricate images can take 1000, 2000, or more steps. It is really up to the user. Just know that the render time is directly related to the number of steps, and many other parameters have a major impact on image quality, without costing additional time." + ) + self.arg_input_group.add_argument( + '--cut_ic_pow', + type=int, + default=1, + help= + "This sets the size of the border used for inner cuts. High cut_ic_pow values have larger borders, and therefore the cuts themselves will be smaller and provide finer details. If you have too many or too-small inner cuts, you may lose overall image coherency and/or it may cause an undesirable ‘mosaic’ effect. Low cut_ic_pow values will allow the inner cuts to be larger, helping image coherency while still helping with some details." + ) + self.arg_input_group.add_argument( + '--init_scale', + type=int, + default=1000, + help= + "This controls how strongly CLIP will try to match the init_image provided. This is balanced against the clip_guidance_scale (CGS) above. Too much init scale, and the image won’t change much during diffusion. Too much CGS and the init image will be lost." + ) + self.arg_input_group.add_argument( + '--clip_guidance_scale', + type=int, + default=5000, + help= + "CGS is one of the most important parameters you will use. It tells DD how strongly you want CLIP to move toward your prompt each timestep. Higher is generally better, but if CGS is too strong it will overshoot the goal and distort the image. So a happy medium is needed, and it takes experience to learn how to adjust CGS. Note that this parameter generally scales with image dimensions. In other words, if you increase your total dimensions by 50% (e.g. a change from 512 x 512 to 512 x 768), then to maintain the same effect on the image, you’d want to increase clip_guidance_scale from 5000 to 7500. Of the basic settings, clip_guidance_scale, steps and skip_steps are the most important contributors to image quality, so learn them well." + ) + self.arg_input_group.add_argument( + '--tv_scale', + type=int, + default=0, + help= + "Total variance denoising. Optional, set to zero to turn off. Controls ‘smoothness’ of final output. If used, tv_scale will try to smooth out your final image to reduce overall noise. If your image is too ‘crunchy’, increase tv_scale. TV denoising is good at preserving edges while smoothing away noise in flat regions. See https://en.wikipedia.org/wiki/Total_variation_denoising" + ) + self.arg_input_group.add_argument( + '--range_scale', + type=int, + default=0, + help= + "Optional, set to zero to turn off. Used for adjustment of color contrast. Lower range_scale will increase contrast. Very low numbers create a reduced color palette, resulting in more vibrant or poster-like images. Higher range_scale will reduce contrast, for more muted images." + ) + self.arg_input_group.add_argument( + '--sat_scale', + type=int, + default=0, + help= + "Saturation scale. Optional, set to zero to turn off. If used, sat_scale will help mitigate oversaturation. If your image is too saturated, increase sat_scale to reduce the saturation." + ) + self.arg_input_group.add_argument( + '--cutn_batches', + type=int, + default=4, + help= + "Each iteration, the AI cuts the image into smaller pieces known as cuts, and compares each cut to the prompt to decide how to guide the next diffusion step. More cuts can generally lead to better images, since DD has more chances to fine-tune the image precision in each timestep. Additional cuts are memory intensive, however, and if DD tries to evaluate too many cuts at once, it can run out of memory. You can use cutn_batches to increase cuts per timestep without increasing memory usage. At the default settings, DD is scheduled to do 16 cuts per timestep. If cutn_batches is set to 1, there will indeed only be 16 cuts total per timestep. However, if cutn_batches is increased to 4, DD will do 64 cuts total in each timestep, divided into 4 sequential batches of 16 cuts each. Because the cuts are being evaluated only 16 at a time, DD uses the memory required for only 16 cuts, but gives you the quality benefit of 64 cuts. The tradeoff, of course, is that this will take ~4 times as long to render each image.So, (scheduled cuts) x (cutn_batches) = (total cuts per timestep). Increasing cutn_batches will increase render times, however, as the work is being done sequentially. DD’s default cut schedule is a good place to start, but the cut schedule can be adjusted in the Cutn Scheduling section, explained below." + ) + self.arg_input_group.add_argument( + '--diffusion_sampling_mode', + type=str, + default='ddim', + help= + "Two alternate diffusion denoising algorithms. ddim has been around longer, and is more established and tested. plms is a newly added alternate method that promises good diffusion results in fewer steps, but has not been as fully tested and may have side effects. This new plms mode is actively being researched in the #settings-and-techniques channel in the DD Discord." + ) + self.arg_input_group.add_argument( + '--perlin_init', + type=bool, + default=False, + help= + "Normally, DD will use an image filled with random noise as a starting point for the diffusion curve. If perlin_init is selected, DD will instead use a Perlin noise model as an initial state. Perlin has very interesting characteristics, distinct from random noise, so it’s worth experimenting with this for your projects. Beyond perlin, you can, of course, generate your own noise images (such as with GIMP, etc) and use them as an init_image (without skipping steps). Choosing perlin_init does not affect the actual diffusion process, just the starting point for the diffusion. Please note that selecting a perlin_init will replace and override any init_image you may have specified. Further, because the 2D, 3D and video animation systems all rely on the init_image system, if you enable Perlin while using animation modes, the perlin_init will jump in front of any previous image or video input, and DD will NOT give you the expected sequence of coherent images. All of that said, using Perlin and animation modes together do make a very colorful rainbow effect, which can be used creatively." + ) + self.arg_input_group.add_argument( + '--perlin_mode', + type=str, + default='mixed', + help= + "sets type of Perlin noise: colored, gray, or a mix of both, giving you additional options for noise types. Experiment to see what these do in your projects." + ) + self.arg_input_group.add_argument( + '--seed', + type=int, + default=None, + help= + "Deep in the diffusion code, there is a random number ‘seed’ which is used as the basis for determining the initial state of the diffusion. By default, this is random, but you can also specify your own seed. This is useful if you like a particular result and would like to run more iterations that will be similar. After each run, the actual seed value used will be reported in the parameters report, and can be reused if desired by entering seed # here. If a specific numerical seed is used repeatedly, the resulting images will be quite similar but not identical." + ) + self.arg_input_group.add_argument( + '--eta', + type=float, + default=0.8, + help= + "eta (greek letter η) is a diffusion model variable that mixes in a random amount of scaled noise into each timestep. 0 is no noise, 1.0 is more noise. As with most DD parameters, you can go below zero for eta, but it may give you unpredictable results. The steps parameter has a close relationship with the eta parameter. If you set eta to 0, then you can get decent output with only 50-75 steps. Setting eta to 1.0 favors higher step counts, ideally around 250 and up. eta has a subtle, unpredictable effect on image, so you’ll need to experiment to see how this affects your projects." + ) + self.arg_input_group.add_argument( + '--clamp_grad', + type=bool, + default=True, + help= + "As I understand it, clamp_grad is an internal limiter that stops DD from producing extreme results. Try your images with and without clamp_grad. If the image changes drastically with clamp_grad turned off, it probably means your clip_guidance_scale is too high and should be reduced." + ) + self.arg_input_group.add_argument( + '--clamp_max', + type=float, + default=0.05, + help= + "Sets the value of the clamp_grad limitation. Default is 0.05, providing for smoother, more muted coloration in images, but setting higher values (0.15-0.3) can provide interesting contrast and vibrancy." + ) + self.arg_input_group.add_argument('--randomize_class', type=bool, default=True, help="Random class.") + self.arg_input_group.add_argument('--clip_denoised', type=bool, default=False, help="Clip denoised.") + self.arg_input_group.add_argument( + '--fuzzy_prompt', + type=bool, + default=False, + help= + "Controls whether to add multiple noisy prompts to the prompt losses. If True, can increase variability of image output. Experiment with this." + ) + self.arg_input_group.add_argument( + '--rand_mag', + type=float, + default=0.5, + help="Affects only the fuzzy_prompt. Controls the magnitude of the random noise added by fuzzy_prompt.") + self.arg_input_group.add_argument('--cut_overview', + type=str, + default='[12]*400+[4]*600', + help="The schedule of overview cuts") + self.arg_input_group.add_argument('--cut_innercut', + type=str, + default='[4]*400+[12]*600', + help="The schedule of inner cuts") + self.arg_input_group.add_argument( + '--cut_icgray_p', + type=str, + default='[0.2]*400+[0]*600', + help= + "This sets the size of the border used for inner cuts. High cut_ic_pow values have larger borders, and therefore the cuts themselves will be smaller and provide finer details. If you have too many or too-small inner cuts, you may lose overall image coherency and/or it may cause an undesirable ‘mosaic’ effect. Low cut_ic_pow values will allow the inner cuts to be larger, helping image coherency while still helping with some details." + ) + self.arg_input_group.add_argument( + '--display_rate', + type=int, + default=10, + help= + "During a diffusion run, you can monitor the progress of each image being created with this variable. If display_rate is set to 50, DD will show you the in-progress image every 50 timesteps. Setting this to a lower value, like 5 or 10, is a good way to get an early peek at where your image is heading. If you don’t like the progression, just interrupt execution, change some settings, and re-run. If you are planning a long, unmonitored batch, it’s better to set display_rate equal to steps, because displaying interim images does slow Colab down slightly." + ) + self.arg_config_group.add_argument('--use_gpu', + type=ast.literal_eval, + default=True, + help="whether use GPU or not") + self.arg_config_group.add_argument('--output_dir', + type=str, + default='disco_diffusion_clip_rn50_out', + help='Output directory.') + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument( + '--text_prompts', + type=str, + help= + 'Phrase, sentence, or string of words and phrases describing what the image should look like. The words will be analyzed by the AI and will guide the diffusion process toward the image(s) you describe. These can include commas and weights to adjust the relative importance of each element. E.g. "A beautiful painting of a singular lighthouse, shining its light across a tumultuous sea of blood by greg rutkowski and thomas kinkade, Trending on artstation."Notice that this prompt loosely follows a structure: [subject], [prepositional details], [setting], [meta modifiers and artist]; this is a good starting point for your experiments. Developing text prompts takes practice and experience, and is not the subject of this guide. If you are a beginner to writing text prompts, a good place to start is on a simple AI art app like Night Cafe, starry ai or WOMBO prior to using DD, to get a feel for how text gets translated into images by GAN tools. These other apps use different technologies, but many of the same principles apply.' + ) + self.arg_input_group.add_argument( + '--style', + type=str, + default=None, + help='Image style, such as oil paintings, if specified, style will be used to construct prompts.') + self.arg_input_group.add_argument('--artist', + type=str, + default=None, + help='Artist style, if specified, style will be used to construct prompts.') + self.arg_input_group.add_argument( + '--init_image', + type=str, + default=None, + help= + "Recall that in the image sequence above, the first image shown is just noise. If an init_image is provided, diffusion will replace the noise with the init_image as its starting state. To use an init_image, upload the image to the Colab instance or your Google Drive, and enter the full image path here. If using an init_image, you may need to increase skip_steps to ~ 50% of total steps to retain the character of the init. See skip_steps above for further discussion." + ) + self.arg_input_group.add_argument( + '--width_height', + type=ast.literal_eval, + default=[1280, 768], + help= + "Desired final image size, in pixels. You can have a square, wide, or tall image, but each edge length should be set to a multiple of 64px, and a minimum of 512px on the default CLIP model setting. If you forget to use multiples of 64px in your dimensions, DD will adjust the dimensions of your image to make it so." + ) + self.arg_input_group.add_argument( + '--n_batches', + type=int, + default=1, + help= + "This variable sets the number of still images you want DD to create. If you are using an animation mode (see below for details) DD will ignore n_batches and create a single set of animated frames based on the animation settings." + ) + self.arg_input_group.add_argument('--batch_size', type=int, default=1, help="Batch size.") + self.arg_input_group.add_argument( + '--batch_name', + type=str, + default='', + help= + 'The name of the batch, the batch id will be named as "discoart-[batch_name]-seed". To avoid your artworks be overridden by other users, please use a unique name.' + ) diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn50/requirements.txt b/modules/image/text_to_image/disco_diffusion_clip_rn50/requirements.txt new file mode 100755 index 000000000..8b4bc0ea4 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn50/requirements.txt @@ -0,0 +1,8 @@ +numpy +paddle_lpips==0.1.2 +ftfy +docarray>=0.13.29 +pyyaml +regex +tqdm +ipywidgets diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn50/resize_right/README.md b/modules/image/text_to_image/disco_diffusion_clip_rn50/resize_right/README.md new file mode 100644 index 000000000..1f8d0bb0a --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn50/resize_right/README.md @@ -0,0 +1,3 @@ +# ResizeRight (Paddle) +Fully differentiable resize function implemented by Paddle. +This module is based on [assafshocher/ResizeRight](https://github.com/assafshocher/ResizeRight). diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn50/resize_right/__init__.py b/modules/image/text_to_image/disco_diffusion_clip_rn50/resize_right/__init__.py new file mode 100755 index 000000000..e69de29bb diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn50/resize_right/interp_methods.py b/modules/image/text_to_image/disco_diffusion_clip_rn50/resize_right/interp_methods.py new file mode 100755 index 000000000..276eb055a --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn50/resize_right/interp_methods.py @@ -0,0 +1,70 @@ +from math import pi + +try: + import paddle +except ImportError: + paddle = None + +try: + import numpy + import numpy as np +except ImportError: + numpy = None + +if numpy is None and paddle is None: + raise ImportError("Must have either Numpy or PyTorch but both not found") + + +def set_framework_dependencies(x): + if type(x) is numpy.ndarray: + to_dtype = lambda a: a + fw = numpy + else: + to_dtype = lambda a: paddle.cast(a, x.dtype) + fw = paddle + # eps = fw.finfo(fw.float32).eps + eps = paddle.to_tensor(np.finfo(np.float32).eps) + return fw, to_dtype, eps + + +def support_sz(sz): + + def wrapper(f): + f.support_sz = sz + return f + + return wrapper + + +@support_sz(4) +def cubic(x): + fw, to_dtype, eps = set_framework_dependencies(x) + absx = fw.abs(x) + absx2 = absx**2 + absx3 = absx**3 + return ((1.5 * absx3 - 2.5 * absx2 + 1.) * to_dtype(absx <= 1.) + + (-0.5 * absx3 + 2.5 * absx2 - 4. * absx + 2.) * to_dtype((1. < absx) & (absx <= 2.))) + + +@support_sz(4) +def lanczos2(x): + fw, to_dtype, eps = set_framework_dependencies(x) + return (((fw.sin(pi * x) * fw.sin(pi * x / 2) + eps) / ((pi**2 * x**2 / 2) + eps)) * to_dtype(abs(x) < 2)) + + +@support_sz(6) +def lanczos3(x): + fw, to_dtype, eps = set_framework_dependencies(x) + return (((fw.sin(pi * x) * fw.sin(pi * x / 3) + eps) / ((pi**2 * x**2 / 3) + eps)) * to_dtype(abs(x) < 3)) + + +@support_sz(2) +def linear(x): + fw, to_dtype, eps = set_framework_dependencies(x) + return ((x + 1) * to_dtype((-1 <= x) & (x < 0)) + (1 - x) * to_dtype((0 <= x) & (x <= 1))) + + +@support_sz(1) +def box(x): + fw, to_dtype, eps = set_framework_dependencies(x) + return to_dtype((-1 <= x) & (x < 0)) + to_dtype((0 <= x) & (x <= 1)) diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn50/resize_right/resize_right.py b/modules/image/text_to_image/disco_diffusion_clip_rn50/resize_right/resize_right.py new file mode 100755 index 000000000..4f6cb94a8 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn50/resize_right/resize_right.py @@ -0,0 +1,403 @@ +import warnings +from fractions import Fraction +from math import ceil +from typing import Tuple + +import disco_diffusion_clip_rn50.resize_right.interp_methods as interp_methods + + +class NoneClass: + pass + + +try: + import paddle + from paddle import nn + nnModuleWrapped = nn.Layer +except ImportError: + warnings.warn('No PyTorch found, will work only with Numpy') + paddle = None + nnModuleWrapped = NoneClass + +try: + import numpy + import numpy as np +except ImportError: + warnings.warn('No Numpy found, will work only with PyTorch') + numpy = None + +if numpy is None and paddle is None: + raise ImportError("Must have either Numpy or PyTorch but both not found") + + +def resize(input, + scale_factors=None, + out_shape=None, + interp_method=interp_methods.cubic, + support_sz=None, + antialiasing=True, + by_convs=False, + scale_tolerance=None, + max_numerator=10, + pad_mode='constant'): + # get properties of the input tensor + in_shape, n_dims = input.shape, input.ndim + + # fw stands for framework that can be either numpy or paddle, + # determined by the input type + fw = numpy if type(input) is numpy.ndarray else paddle + eps = np.finfo(np.float32).eps if fw == numpy else paddle.to_tensor(np.finfo(np.float32).eps) + device = input.place if fw is paddle else None + + # set missing scale factors or output shapem one according to another, + # scream if both missing. this is also where all the defults policies + # take place. also handling the by_convs attribute carefully. + scale_factors, out_shape, by_convs = set_scale_and_out_sz(in_shape, out_shape, scale_factors, by_convs, + scale_tolerance, max_numerator, eps, fw) + + # sort indices of dimensions according to scale of each dimension. + # since we are going dim by dim this is efficient + sorted_filtered_dims_and_scales = [(dim, scale_factors[dim], by_convs[dim], in_shape[dim], out_shape[dim]) + for dim in sorted(range(n_dims), key=lambda ind: scale_factors[ind]) + if scale_factors[dim] != 1.] + # unless support size is specified by the user, it is an attribute + # of the interpolation method + if support_sz is None: + support_sz = interp_method.support_sz + + # output begins identical to input and changes with each iteration + output = input + + # iterate over dims + for (dim, scale_factor, dim_by_convs, in_sz, out_sz) in sorted_filtered_dims_and_scales: + # STEP 1- PROJECTED GRID: The non-integer locations of the projection + # of output pixel locations to the input tensor + projected_grid = get_projected_grid(in_sz, out_sz, scale_factor, fw, dim_by_convs, device) + + # STEP 1.5: ANTIALIASING- If antialiasing is taking place, we modify + # the window size and the interpolation method (see inside function) + cur_interp_method, cur_support_sz = apply_antialiasing_if_needed(interp_method, support_sz, scale_factor, + antialiasing) + + # STEP 2- FIELDS OF VIEW: for each output pixels, map the input pixels + # that influence it. Also calculate needed padding and update grid + # accoedingly + field_of_view = get_field_of_view(projected_grid, cur_support_sz, fw, eps, device) + + # STEP 2.5- CALCULATE PAD AND UPDATE: according to the field of view, + # the input should be padded to handle the boundaries, coordinates + # should be updated. actual padding only occurs when weights are + # aplied (step 4). if using by_convs for this dim, then we need to + # calc right and left boundaries for each filter instead. + pad_sz, projected_grid, field_of_view = calc_pad_sz(in_sz, out_sz, field_of_view, projected_grid, scale_factor, + dim_by_convs, fw, device) + # STEP 3- CALCULATE WEIGHTS: Match a set of weights to the pixels in + # the field of view for each output pixel + weights = get_weights(cur_interp_method, projected_grid, field_of_view) + + # STEP 4- APPLY WEIGHTS: Each output pixel is calculated by multiplying + # its set of weights with the pixel values in its field of view. + # We now multiply the fields of view with their matching weights. + # We do this by tensor multiplication and broadcasting. + # if by_convs is true for this dim, then we do this action by + # convolutions. this is equivalent but faster. + if not dim_by_convs: + output = apply_weights(output, field_of_view, weights, dim, n_dims, pad_sz, pad_mode, fw) + else: + output = apply_convs(output, scale_factor, in_sz, out_sz, weights, dim, pad_sz, pad_mode, fw) + return output + + +def get_projected_grid(in_sz, out_sz, scale_factor, fw, by_convs, device=None): + # we start by having the ouput coordinates which are just integer locations + # in the special case when usin by_convs, we only need two cycles of grid + # points. the first and last. + grid_sz = out_sz if not by_convs else scale_factor.numerator + out_coordinates = fw_arange(grid_sz, fw, device) + + # This is projecting the ouput pixel locations in 1d to the input tensor, + # as non-integer locations. + # the following fomrula is derived in the paper + # "From Discrete to Continuous Convolutions" by Shocher et al. + return (out_coordinates / float(scale_factor) + (in_sz - 1) / 2 - (out_sz - 1) / (2 * float(scale_factor))) + + +def get_field_of_view(projected_grid, cur_support_sz, fw, eps, device): + # for each output pixel, map which input pixels influence it, in 1d. + # we start by calculating the leftmost neighbor, using half of the window + # size (eps is for when boundary is exact int) + left_boundaries = fw_ceil(projected_grid - cur_support_sz / 2 - eps, fw) + + # then we simply take all the pixel centers in the field by counting + # window size pixels from the left boundary + ordinal_numbers = fw_arange(ceil(cur_support_sz - eps), fw, device) + return left_boundaries[:, None] + ordinal_numbers + + +def calc_pad_sz(in_sz, out_sz, field_of_view, projected_grid, scale_factor, dim_by_convs, fw, device): + if not dim_by_convs: + # determine padding according to neighbor coords out of bound. + # this is a generalized notion of padding, when pad<0 it means crop + pad_sz = [-field_of_view[0, 0].item(), field_of_view[-1, -1].item() - in_sz + 1] + + # since input image will be changed by padding, coordinates of both + # field_of_view and projected_grid need to be updated + field_of_view += pad_sz[0] + projected_grid += pad_sz[0] + + else: + # only used for by_convs, to calc the boundaries of each filter the + # number of distinct convolutions is the numerator of the scale factor + num_convs, stride = scale_factor.numerator, scale_factor.denominator + + # calculate left and right boundaries for each conv. left can also be + # negative right can be bigger than in_sz. such cases imply padding if + # needed. however if# both are in-bounds, it means we need to crop, + # practically apply the conv only on part of the image. + left_pads = -field_of_view[:, 0] + + # next calc is tricky, explanation by rows: + # 1) counting output pixels between the first position of each filter + # to the right boundary of the input + # 2) dividing it by number of filters to count how many 'jumps' + # each filter does + # 3) multiplying by the stride gives us the distance over the input + # coords done by all these jumps for each filter + # 4) to this distance we add the right boundary of the filter when + # placed in its leftmost position. so now we get the right boundary + # of that filter in input coord. + # 5) the padding size needed is obtained by subtracting the rightmost + # input coordinate. if the result is positive padding is needed. if + # negative then negative padding means shaving off pixel columns. + right_pads = (((out_sz - fw_arange(num_convs, fw, device) - 1) # (1) + // num_convs) # (2) + * stride # (3) + + field_of_view[:, -1] # (4) + - in_sz + 1) # (5) + + # in the by_convs case pad_sz is a list of left-right pairs. one per + # each filter + + pad_sz = list(zip(left_pads, right_pads)) + + return pad_sz, projected_grid, field_of_view + + +def get_weights(interp_method, projected_grid, field_of_view): + # the set of weights per each output pixels is the result of the chosen + # interpolation method applied to the distances between projected grid + # locations and the pixel-centers in the field of view (distances are + # directed, can be positive or negative) + weights = interp_method(projected_grid[:, None] - field_of_view) + + # we now carefully normalize the weights to sum to 1 per each output pixel + sum_weights = weights.sum(1, keepdim=True) + sum_weights[sum_weights == 0] = 1 + return weights / sum_weights + + +def apply_weights(input, field_of_view, weights, dim, n_dims, pad_sz, pad_mode, fw): + # for this operation we assume the resized dim is the first one. + # so we transpose and will transpose back after multiplying + tmp_input = fw_swapaxes(input, dim, 0, fw) + + # apply padding + tmp_input = fw_pad(tmp_input, fw, pad_sz, pad_mode) + + # field_of_view is a tensor of order 2: for each output (1d location + # along cur dim)- a list of 1d neighbors locations. + # note that this whole operations is applied to each dim separately, + # this is why it is all in 1d. + # neighbors = tmp_input[field_of_view] is a tensor of order image_dims+1: + # for each output pixel (this time indicated in all dims), these are the + # values of the neighbors in the 1d field of view. note that we only + # consider neighbors along the current dim, but such set exists for every + # multi-dim location, hence the final tensor order is image_dims+1. + paddle.device.cuda.empty_cache() + neighbors = tmp_input[field_of_view] + + # weights is an order 2 tensor: for each output location along 1d- a list + # of weights matching the field of view. we augment it with ones, for + # broadcasting, so that when multiplies some tensor the weights affect + # only its first dim. + tmp_weights = fw.reshape(weights, (*weights.shape, *[1] * (n_dims - 1))) + + # now we simply multiply the weights with the neighbors, and then sum + # along the field of view, to get a single value per out pixel + tmp_output = (neighbors * tmp_weights).sum(1) + # we transpose back the resized dim to its original position + return fw_swapaxes(tmp_output, 0, dim, fw) + + +def apply_convs(input, scale_factor, in_sz, out_sz, weights, dim, pad_sz, pad_mode, fw): + # for this operations we assume the resized dim is the last one. + # so we transpose and will transpose back after multiplying + input = fw_swapaxes(input, dim, -1, fw) + + # the stride for all convs is the denominator of the scale factor + stride, num_convs = scale_factor.denominator, scale_factor.numerator + + # prepare an empty tensor for the output + tmp_out_shape = list(input.shape) + tmp_out_shape[-1] = out_sz + tmp_output = fw_empty(tuple(tmp_out_shape), fw, input.device) + + # iterate over the conv operations. we have as many as the numerator + # of the scale-factor. for each we need boundaries and a filter. + for conv_ind, (pad_sz, filt) in enumerate(zip(pad_sz, weights)): + # apply padding (we pad last dim, padding can be negative) + pad_dim = input.ndim - 1 + tmp_input = fw_pad(input, fw, pad_sz, pad_mode, dim=pad_dim) + + # apply convolution over last dim. store in the output tensor with + # positional strides so that when the loop is comlete conv results are + # interwind + tmp_output[..., conv_ind::num_convs] = fw_conv(tmp_input, filt, stride) + + return fw_swapaxes(tmp_output, -1, dim, fw) + + +def set_scale_and_out_sz(in_shape, out_shape, scale_factors, by_convs, scale_tolerance, max_numerator, eps, fw): + # eventually we must have both scale-factors and out-sizes for all in/out + # dims. however, we support many possible partial arguments + if scale_factors is None and out_shape is None: + raise ValueError("either scale_factors or out_shape should be " + "provided") + if out_shape is not None: + # if out_shape has less dims than in_shape, we defaultly resize the + # first dims for numpy and last dims for paddle + out_shape = (list(out_shape) + + list(in_shape[len(out_shape):]) if fw is numpy else list(in_shape[:-len(out_shape)]) + + list(out_shape)) + if scale_factors is None: + # if no scale given, we calculate it as the out to in ratio + # (not recomended) + scale_factors = [out_sz / in_sz for out_sz, in_sz in zip(out_shape, in_shape)] + if scale_factors is not None: + # by default, if a single number is given as scale, we assume resizing + # two dims (most common are images with 2 spatial dims) + scale_factors = (scale_factors if isinstance(scale_factors, (list, tuple)) else [scale_factors, scale_factors]) + # if less scale_factors than in_shape dims, we defaultly resize the + # first dims for numpy and last dims for paddle + scale_factors = (list(scale_factors) + [1] * (len(in_shape) - len(scale_factors)) if fw is numpy else [1] * + (len(in_shape) - len(scale_factors)) + list(scale_factors)) + if out_shape is None: + # when no out_shape given, it is calculated by multiplying the + # scale by the in_shape (not recomended) + out_shape = [ceil(scale_factor * in_sz) for scale_factor, in_sz in zip(scale_factors, in_shape)] + # next part intentionally after out_shape determined for stability + # we fix by_convs to be a list of truth values in case it is not + if not isinstance(by_convs, (list, tuple)): + by_convs = [by_convs] * len(out_shape) + + # next loop fixes the scale for each dim to be either frac or float. + # this is determined by by_convs and by tolerance for scale accuracy. + for ind, (sf, dim_by_convs) in enumerate(zip(scale_factors, by_convs)): + # first we fractionaize + if dim_by_convs: + frac = Fraction(1 / sf).limit_denominator(max_numerator) + frac = Fraction(numerator=frac.denominator, denominator=frac.numerator) + + # if accuracy is within tolerance scale will be frac. if not, then + # it will be float and the by_convs attr will be set false for + # this dim + if scale_tolerance is None: + scale_tolerance = eps + if dim_by_convs and abs(frac - sf) < scale_tolerance: + scale_factors[ind] = frac + else: + scale_factors[ind] = float(sf) + by_convs[ind] = False + + return scale_factors, out_shape, by_convs + + +def apply_antialiasing_if_needed(interp_method, support_sz, scale_factor, antialiasing): + # antialiasing is "stretching" the field of view according to the scale + # factor (only for downscaling). this is low-pass filtering. this + # requires modifying both the interpolation (stretching the 1d + # function and multiplying by the scale-factor) and the window size. + scale_factor = float(scale_factor) + if scale_factor >= 1.0 or not antialiasing: + return interp_method, support_sz + cur_interp_method = (lambda arg: scale_factor * interp_method(scale_factor * arg)) + cur_support_sz = support_sz / scale_factor + return cur_interp_method, cur_support_sz + + +def fw_ceil(x, fw): + if fw is numpy: + return fw.int_(fw.ceil(x)) + else: + return paddle.cast(x.ceil(), dtype='int64') + + +def fw_floor(x, fw): + if fw is numpy: + return fw.int_(fw.floor(x)) + else: + return paddle.cast(x.floor(), dtype='int64') + + +def fw_cat(x, fw): + if fw is numpy: + return fw.concatenate(x) + else: + return fw.concat(x) + + +def fw_swapaxes(x, ax_1, ax_2, fw): + if fw is numpy: + return fw.swapaxes(x, ax_1, ax_2) + else: + if ax_1 == -1: + ax_1 = len(x.shape) - 1 + if ax_2 == -1: + ax_2 = len(x.shape) - 1 + perm0 = list(range(len(x.shape))) + temp = ax_1 + perm0[temp] = ax_2 + perm0[ax_2] = temp + return fw.transpose(x, perm0) + + +def fw_pad(x, fw, pad_sz, pad_mode, dim=0): + if pad_sz == (0, 0): + return x + if fw is numpy: + pad_vec = [(0, 0)] * x.ndim + pad_vec[dim] = pad_sz + return fw.pad(x, pad_width=pad_vec, mode=pad_mode) + else: + if x.ndim < 3: + x = x[None, None, ...] + + pad_vec = [0] * ((x.ndim - 2) * 2) + pad_vec[0:2] = pad_sz + return fw_swapaxes(fw.nn.functional.pad(fw_swapaxes(x, dim, -1, fw), pad=pad_vec, mode=pad_mode), dim, -1, fw) + + +def fw_conv(input, filter, stride): + # we want to apply 1d conv to any nd array. the way to do it is to reshape + # the input to a 4D tensor. first two dims are singeletons, 3rd dim stores + # all the spatial dims that we are not convolving along now. then we can + # apply conv2d with a 1xK filter. This convolves the same way all the other + # dims stored in the 3d dim. like depthwise conv over these. + # TODO: numpy support + reshaped_input = input.reshape(1, 1, -1, input.shape[-1]) + reshaped_output = paddle.nn.functional.conv2d(reshaped_input, filter.view(1, 1, 1, -1), stride=(1, stride)) + return reshaped_output.reshape(*input.shape[:-1], -1) + + +def fw_arange(upper_bound, fw, device): + if fw is numpy: + return fw.arange(upper_bound) + else: + return fw.arange(upper_bound) + + +def fw_empty(shape, fw, device): + if fw is numpy: + return fw.empty(shape) + else: + return fw.empty(shape=shape) diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/README.md b/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/README.md new file mode 100644 index 000000000..711671bad --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/README.md @@ -0,0 +1,2 @@ +# Diffusion model (Paddle) +This module implements diffusion model which accepts a text prompt and outputs images semantically close to the text. The code is rewritten by Paddle, and mainly refer to two projects: jina-ai/discoart[https://github.com/jina-ai/discoart] and openai/guided-diffusion[https://github.com/openai/guided-diffusion]. Thanks for their wonderful work. diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/__init__.py b/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/__init__.py new file mode 100755 index 000000000..39fc908dc --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/__init__.py @@ -0,0 +1,156 @@ +''' +https://github.com/jina-ai/discoart/blob/main/discoart/__init__.py +''' +import os +import warnings + +os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE' + +__all__ = ['create'] + +import sys + +__resources_path__ = os.path.join( + os.path.dirname(sys.modules.get(__package__).__file__ if __package__ in sys.modules else __file__), + 'resources', +) + +import gc + +# check if GPU is available +import paddle + +# download and load models, this will take some time on the first load + +from .helper import load_all_models, load_diffusion_model, load_clip_models + +model_config, secondary_model = load_all_models('512x512_diffusion_uncond_finetune_008100', use_secondary_model=True) + +from typing import TYPE_CHECKING, overload, List, Optional + +if TYPE_CHECKING: + from docarray import DocumentArray, Document + +_clip_models_cache = {} + +# begin_create_overload + + +@overload +def create(text_prompts: Optional[List[str]] = [ + 'A beautiful painting of a singular lighthouse, shining its light across a tumultuous sea of blood by greg rutkowski and thomas kinkade, Trending on artstation.', + 'yellow color scheme', +], + init_image: Optional[str] = None, + width_height: Optional[List[int]] = [1280, 768], + skip_steps: Optional[int] = 10, + steps: Optional[int] = 250, + cut_ic_pow: Optional[int] = 1, + init_scale: Optional[int] = 1000, + clip_guidance_scale: Optional[int] = 5000, + tv_scale: Optional[int] = 0, + range_scale: Optional[int] = 150, + sat_scale: Optional[int] = 0, + cutn_batches: Optional[int] = 4, + diffusion_model: Optional[str] = '512x512_diffusion_uncond_finetune_008100', + use_secondary_model: Optional[bool] = True, + diffusion_sampling_mode: Optional[str] = 'ddim', + perlin_init: Optional[bool] = False, + perlin_mode: Optional[str] = 'mixed', + seed: Optional[int] = None, + eta: Optional[float] = 0.8, + clamp_grad: Optional[bool] = True, + clamp_max: Optional[float] = 0.05, + randomize_class: Optional[bool] = True, + clip_denoised: Optional[bool] = False, + fuzzy_prompt: Optional[bool] = False, + rand_mag: Optional[float] = 0.05, + cut_overview: Optional[str] = '[12]*400+[4]*600', + cut_innercut: Optional[str] = '[4]*400+[12]*600', + cut_icgray_p: Optional[str] = '[0.2]*400+[0]*600', + display_rate: Optional[int] = 10, + n_batches: Optional[int] = 4, + batch_size: Optional[int] = 1, + batch_name: Optional[str] = '', + clip_models: Optional[list] = ['ViTB32', 'ViTB16', 'RN50'], + output_dir: Optional[str] = 'discoart_output') -> 'DocumentArray': + """ + Create Disco Diffusion artworks and save the result into a DocumentArray. + + :param text_prompts: Phrase, sentence, or string of words and phrases describing what the image should look like. The words will be analyzed by the AI and will guide the diffusion process toward the image(s) you describe. These can include commas and weights to adjust the relative importance of each element. E.g. "A beautiful painting of a singular lighthouse, shining its light across a tumultuous sea of blood by greg rutkowski and thomas kinkade, Trending on artstation."Notice that this prompt loosely follows a structure: [subject], [prepositional details], [setting], [meta modifiers and artist]; this is a good starting point for your experiments. Developing text prompts takes practice and experience, and is not the subject of this guide. If you are a beginner to writing text prompts, a good place to start is on a simple AI art app like Night Cafe, starry ai or WOMBO prior to using DD, to get a feel for how text gets translated into images by GAN tools. These other apps use different technologies, but many of the same principles apply. + :param init_image: Recall that in the image sequence above, the first image shown is just noise. If an init_image is provided, diffusion will replace the noise with the init_image as its starting state. To use an init_image, upload the image to the Colab instance or your Google Drive, and enter the full image path here. If using an init_image, you may need to increase skip_steps to ~ 50% of total steps to retain the character of the init. See skip_steps above for further discussion. + :param width_height: Desired final image size, in pixels. You can have a square, wide, or tall image, but each edge length should be set to a multiple of 64px, and a minimum of 512px on the default CLIP model setting. If you forget to use multiples of 64px in your dimensions, DD will adjust the dimensions of your image to make it so. + :param skip_steps: Consider the chart shown here. Noise scheduling (denoise strength) starts very high and progressively gets lower and lower as diffusion steps progress. The noise levels in the first few steps are very high, so images change dramatically in early steps.As DD moves along the curve, noise levels (and thus the amount an image changes per step) declines, and image coherence from one step to the next increases.The first few steps of denoising are often so dramatic that some steps (maybe 10-15% of total) can be skipped without affecting the final image. You can experiment with this as a way to cut render times.If you skip too many steps, however, the remaining noise may not be high enough to generate new content, and thus may not have ‘time left’ to finish an image satisfactorily.Also, depending on your other settings, you may need to skip steps to prevent CLIP from overshooting your goal, resulting in ‘blown out’ colors (hyper saturated, solid white, or solid black regions) or otherwise poor image quality. Consider that the denoising process is at its strongest in the early steps, so skipping steps can sometimes mitigate other problems.Lastly, if using an init_image, you will need to skip ~50% of the diffusion steps to retain the shapes in the original init image. However, if you’re using an init_image, you can also adjust skip_steps up or down for creative reasons. With low skip_steps you can get a result "inspired by" the init_image which will retain the colors and rough layout and shapes but look quite different. With high skip_steps you can preserve most of the init_image contents and just do fine tuning of the texture. + :param steps: When creating an image, the denoising curve is subdivided into steps for processing. Each step (or iteration) involves the AI looking at subsets of the image called ‘cuts’ and calculating the ‘direction’ the image should be guided to be more like the prompt. Then it adjusts the image with the help of the diffusion denoiser, and moves to the next step.Increasing steps will provide more opportunities for the AI to adjust the image, and each adjustment will be smaller, and thus will yield a more precise, detailed image. Increasing steps comes at the expense of longer render times. Also, while increasing steps should generally increase image quality, there is a diminishing return on additional steps beyond 250 - 500 steps. However, some intricate images can take 1000, 2000, or more steps. It is really up to the user. Just know that the render time is directly related to the number of steps, and many other parameters have a major impact on image quality, without costing additional time. + :param cut_ic_pow: This sets the size of the border used for inner cuts. High cut_ic_pow values have larger borders, and therefore the cuts themselves will be smaller and provide finer details. If you have too many or too-small inner cuts, you may lose overall image coherency and/or it may cause an undesirable ‘mosaic’ effect. Low cut_ic_pow values will allow the inner cuts to be larger, helping image coherency while still helping with some details. + :param init_scale: This controls how strongly CLIP will try to match the init_image provided. This is balanced against the clip_guidance_scale (CGS) above. Too much init scale, and the image won’t change much during diffusion. Too much CGS and the init image will be lost. + :param clip_guidance_scale: CGS is one of the most important parameters you will use. It tells DD how strongly you want CLIP to move toward your prompt each timestep. Higher is generally better, but if CGS is too strong it will overshoot the goal and distort the image. So a happy medium is needed, and it takes experience to learn how to adjust CGS. Note that this parameter generally scales with image dimensions. In other words, if you increase your total dimensions by 50% (e.g. a change from 512 x 512 to 512 x 768), then to maintain the same effect on the image, you’d want to increase clip_guidance_scale from 5000 to 7500. Of the basic settings, clip_guidance_scale, steps and skip_steps are the most important contributors to image quality, so learn them well. + :param tv_scale: Total variance denoising. Optional, set to zero to turn off. Controls ‘smoothness’ of final output. If used, tv_scale will try to smooth out your final image to reduce overall noise. If your image is too ‘crunchy’, increase tv_scale. TV denoising is good at preserving edges while smoothing away noise in flat regions. See https://en.wikipedia.org/wiki/Total_variation_denoising + :param range_scale: Optional, set to zero to turn off. Used for adjustment of color contrast. Lower range_scale will increase contrast. Very low numbers create a reduced color palette, resulting in more vibrant or poster-like images. Higher range_scale will reduce contrast, for more muted images. + :param sat_scale: Saturation scale. Optional, set to zero to turn off. If used, sat_scale will help mitigate oversaturation. If your image is too saturated, increase sat_scale to reduce the saturation. + :param cutn_batches: Each iteration, the AI cuts the image into smaller pieces known as cuts, and compares each cut to the prompt to decide how to guide the next diffusion step. More cuts can generally lead to better images, since DD has more chances to fine-tune the image precision in each timestep. Additional cuts are memory intensive, however, and if DD tries to evaluate too many cuts at once, it can run out of memory. You can use cutn_batches to increase cuts per timestep without increasing memory usage. At the default settings, DD is scheduled to do 16 cuts per timestep. If cutn_batches is set to 1, there will indeed only be 16 cuts total per timestep. However, if cutn_batches is increased to 4, DD will do 64 cuts total in each timestep, divided into 4 sequential batches of 16 cuts each. Because the cuts are being evaluated only 16 at a time, DD uses the memory required for only 16 cuts, but gives you the quality benefit of 64 cuts. The tradeoff, of course, is that this will take ~4 times as long to render each image.So, (scheduled cuts) x (cutn_batches) = (total cuts per timestep). Increasing cutn_batches will increase render times, however, as the work is being done sequentially. DD’s default cut schedule is a good place to start, but the cut schedule can be adjusted in the Cutn Scheduling section, explained below. + :param diffusion_model: Diffusion_model of choice. + :param use_secondary_model: Option to use a secondary purpose-made diffusion model to clean up interim diffusion images for CLIP evaluation. If this option is turned off, DD will use the regular (large) diffusion model. Using the secondary model is faster - one user reported a 50% improvement in render speed! However, the secondary model is much smaller, and may reduce image quality and detail. I suggest you experiment with this. + :param diffusion_sampling_mode: Two alternate diffusion denoising algorithms. ddim has been around longer, and is more established and tested. plms is a newly added alternate method that promises good diffusion results in fewer steps, but has not been as fully tested and may have side effects. This new plms mode is actively being researched in the #settings-and-techniques channel in the DD Discord. + :param perlin_init: Normally, DD will use an image filled with random noise as a starting point for the diffusion curve. If perlin_init is selected, DD will instead use a Perlin noise model as an initial state. Perlin has very interesting characteristics, distinct from random noise, so it’s worth experimenting with this for your projects. Beyond perlin, you can, of course, generate your own noise images (such as with GIMP, etc) and use them as an init_image (without skipping steps). Choosing perlin_init does not affect the actual diffusion process, just the starting point for the diffusion. Please note that selecting a perlin_init will replace and override any init_image you may have specified. Further, because the 2D, 3D and video animation systems all rely on the init_image system, if you enable Perlin while using animation modes, the perlin_init will jump in front of any previous image or video input, and DD will NOT give you the expected sequence of coherent images. All of that said, using Perlin and animation modes together do make a very colorful rainbow effect, which can be used creatively. + :param perlin_mode: sets type of Perlin noise: colored, gray, or a mix of both, giving you additional options for noise types. Experiment to see what these do in your projects. + :param seed: Deep in the diffusion code, there is a random number ‘seed’ which is used as the basis for determining the initial state of the diffusion. By default, this is random, but you can also specify your own seed. This is useful if you like a particular result and would like to run more iterations that will be similar. After each run, the actual seed value used will be reported in the parameters report, and can be reused if desired by entering seed # here. If a specific numerical seed is used repeatedly, the resulting images will be quite similar but not identical. + :param eta: eta (greek letter η) is a diffusion model variable that mixes in a random amount of scaled noise into each timestep. 0 is no noise, 1.0 is more noise. As with most DD parameters, you can go below zero for eta, but it may give you unpredictable results. The steps parameter has a close relationship with the eta parameter. If you set eta to 0, then you can get decent output with only 50-75 steps. Setting eta to 1.0 favors higher step counts, ideally around 250 and up. eta has a subtle, unpredictable effect on image, so you’ll need to experiment to see how this affects your projects. + :param clamp_grad: As I understand it, clamp_grad is an internal limiter that stops DD from producing extreme results. Try your images with and without clamp_grad. If the image changes drastically with clamp_grad turned off, it probably means your clip_guidance_scale is too high and should be reduced. + :param clamp_max: Sets the value of the clamp_grad limitation. Default is 0.05, providing for smoother, more muted coloration in images, but setting higher values (0.15-0.3) can provide interesting contrast and vibrancy. + :param fuzzy_prompt: Controls whether to add multiple noisy prompts to the prompt losses. If True, can increase variability of image output. Experiment with this. + :param rand_mag: Affects only the fuzzy_prompt. Controls the magnitude of the random noise added by fuzzy_prompt. + :param cut_overview: The schedule of overview cuts + :param cut_innercut: The schedule of inner cuts + :param cut_icgray_p: This sets the size of the border used for inner cuts. High cut_ic_pow values have larger borders, and therefore the cuts themselves will be smaller and provide finer details. If you have too many or too-small inner cuts, you may lose overall image coherency and/or it may cause an undesirable ‘mosaic’ effect. Low cut_ic_pow values will allow the inner cuts to be larger, helping image coherency while still helping with some details. + :param display_rate: During a diffusion run, you can monitor the progress of each image being created with this variable. If display_rate is set to 50, DD will show you the in-progress image every 50 timesteps. Setting this to a lower value, like 5 or 10, is a good way to get an early peek at where your image is heading. If you don’t like the progression, just interrupt execution, change some settings, and re-run. If you are planning a long, unmonitored batch, it’s better to set display_rate equal to steps, because displaying interim images does slow Colab down slightly. + :param n_batches: This variable sets the number of still images you want DD to create. If you are using an animation mode (see below for details) DD will ignore n_batches and create a single set of animated frames based on the animation settings. + :param batch_name: The name of the batch, the batch id will be named as "discoart-[batch_name]-seed". To avoid your artworks be overridden by other users, please use a unique name. + :param clip_models: CLIP Model selectors. ViTB32, ViTB16, ViTL14, RN101, RN50, RN50x4, RN50x16, RN50x64.These various CLIP models are available for you to use during image generation. Models have different styles or ‘flavors,’ so look around. You can mix in multiple models as well for different results. However, keep in mind that some models are extremely memory-hungry, and turning on additional models will take additional memory and may cause a crash.The rough order of speed/mem usage is (smallest/fastest to largest/slowest):VitB32RN50RN101VitB16RN50x4RN50x16RN50x64ViTL14For RN50x64 & ViTL14 you may need to use fewer cuts, depending on your VRAM. + :return: a DocumentArray object that has `n_batches` Documents + """ + + +# end_create_overload + + +@overload +def create(init_document: 'Document') -> 'DocumentArray': + """ + Create an artwork using a DocArray ``Document`` object as initial state. + :param init_document: its ``.tags`` will be used as parameters, ``.uri`` (if present) will be used as init image. + :return: a DocumentArray object that has `n_batches` Documents + """ + + +def create(**kwargs) -> 'DocumentArray': + from .config import load_config + from .runner import do_run + + if 'init_document' in kwargs: + d = kwargs['init_document'] + _kwargs = d.tags + if not _kwargs: + warnings.warn('init_document has no .tags, fallback to default config') + if d.uri: + _kwargs['init_image'] = kwargs['init_document'].uri + else: + warnings.warn('init_document has no .uri, fallback to no init image') + kwargs.pop('init_document') + if kwargs: + warnings.warn('init_document has .tags and .uri, but kwargs are also present, will override .tags') + _kwargs.update(kwargs) + _args = load_config(user_config=_kwargs) + else: + _args = load_config(user_config=kwargs) + + model, diffusion = load_diffusion_model(model_config, _args.diffusion_model, steps=_args.steps) + + clip_models = load_clip_models(enabled=_args.clip_models, clip_models=_clip_models_cache) + + gc.collect() + paddle.device.cuda.empty_cache() + try: + return do_run(_args, (model, diffusion, clip_models, secondary_model)) + except KeyboardInterrupt: + pass diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/config.py b/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/config.py new file mode 100755 index 000000000..0cbc71e6f --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/config.py @@ -0,0 +1,77 @@ +''' +https://github.com/jina-ai/discoart/blob/main/discoart/config.py +''' +import copy +import random +import warnings +from types import SimpleNamespace +from typing import Dict + +import yaml +from yaml import Loader + +from . import __resources_path__ + +with open(f'{__resources_path__}/default.yml') as ymlfile: + default_args = yaml.load(ymlfile, Loader=Loader) + + +def load_config(user_config: Dict, ): + cfg = copy.deepcopy(default_args) + + if user_config: + cfg.update(**user_config) + + for k in user_config.keys(): + if k not in cfg: + warnings.warn(f'unknown argument {k}, ignored') + + for k, v in cfg.items(): + if k in ('batch_size', 'display_rate', 'seed', 'skip_steps', 'steps', 'n_batches', + 'cutn_batches') and isinstance(v, float): + cfg[k] = int(v) + if k == 'width_height': + cfg[k] = [int(vv) for vv in v] + + cfg.update(**{ + 'seed': cfg['seed'] or random.randint(0, 2**32), + }) + + if cfg['batch_name']: + da_name = f'{__package__}-{cfg["batch_name"]}-{cfg["seed"]}' + else: + da_name = f'{__package__}-{cfg["seed"]}' + warnings.warn('you did not set `batch_name`, set it to have unique session ID') + + cfg.update(**{'name_docarray': da_name}) + + print_args_table(cfg) + + return SimpleNamespace(**cfg) + + +def print_args_table(cfg): + from rich.table import Table + from rich import box + from rich.console import Console + + console = Console() + + param_str = Table( + title=cfg['name_docarray'], + box=box.ROUNDED, + highlight=True, + title_justify='left', + ) + param_str.add_column('Argument', justify='right') + param_str.add_column('Value', justify='left') + + for k, v in sorted(cfg.items()): + value = str(v) + + if not default_args.get(k, None) == v: + value = f'[b]{value}[/]' + + param_str.add_row(k, value) + + console.print(param_str) diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/helper.py b/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/helper.py new file mode 100755 index 000000000..2a4fa163e --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/helper.py @@ -0,0 +1,137 @@ +''' +This code is rewritten by Paddle based on Jina-ai/discoart. +https://github.com/jina-ai/discoart/blob/main/discoart/helper.py +''' +import hashlib +import logging +import os +import subprocess +import sys +from os.path import expanduser +from pathlib import Path +from typing import Any +from typing import Dict +from typing import List + +import paddle + + +def _get_logger(): + logger = logging.getLogger(__package__) + logger.setLevel("INFO") + ch = logging.StreamHandler() + ch.setLevel("INFO") + formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + ch.setFormatter(formatter) + logger.addHandler(ch) + return logger + + +logger = _get_logger() + + +def load_clip_models(enabled: List[str], clip_models: Dict[str, Any] = {}): + + import disco_diffusion_clip_rn50.clip.clip as clip + from disco_diffusion_clip_rn50.clip.clip import build_model, tokenize, transform + + # load enabled models + for k in enabled: + if k not in clip_models: + clip_models[k] = build_model(name=k) + clip_models[k].eval() + for parameter in clip_models[k].parameters(): + parameter.stop_gradient = True + + # disable not enabled models to save memory + for k in clip_models: + if k not in enabled: + clip_models.pop(k) + + return list(clip_models.values()) + + +def load_all_models(diffusion_model, use_secondary_model): + from .model.script_util import ( + model_and_diffusion_defaults, ) + + model_config = model_and_diffusion_defaults() + + if diffusion_model == '512x512_diffusion_uncond_finetune_008100': + model_config.update({ + 'attention_resolutions': '32, 16, 8', + 'class_cond': False, + 'diffusion_steps': 1000, # No need to edit this, it is taken care of later. + 'rescale_timesteps': True, + 'timestep_respacing': 250, # No need to edit this, it is taken care of later. + 'image_size': 512, + 'learn_sigma': True, + 'noise_schedule': 'linear', + 'num_channels': 256, + 'num_head_channels': 64, + 'num_res_blocks': 2, + 'resblock_updown': True, + 'use_fp16': False, + 'use_scale_shift_norm': True, + }) + elif diffusion_model == '256x256_diffusion_uncond': + model_config.update({ + 'attention_resolutions': '32, 16, 8', + 'class_cond': False, + 'diffusion_steps': 1000, # No need to edit this, it is taken care of later. + 'rescale_timesteps': True, + 'timestep_respacing': 250, # No need to edit this, it is taken care of later. + 'image_size': 256, + 'learn_sigma': True, + 'noise_schedule': 'linear', + 'num_channels': 256, + 'num_head_channels': 64, + 'num_res_blocks': 2, + 'resblock_updown': True, + 'use_fp16': False, + 'use_scale_shift_norm': True, + }) + + secondary_model = None + if use_secondary_model: + from .model.sec_diff import SecondaryDiffusionImageNet2 + secondary_model = SecondaryDiffusionImageNet2() + model_dict = paddle.load( + os.path.join(os.path.dirname(__file__), 'pre_trained', 'secondary_model_imagenet_2.pdparams')) + secondary_model.set_state_dict(model_dict) + secondary_model.eval() + for parameter in secondary_model.parameters(): + parameter.stop_gradient = True + + return model_config, secondary_model + + +def load_diffusion_model(model_config, diffusion_model, steps): + from .model.script_util import ( + create_model_and_diffusion, ) + + timestep_respacing = f'ddim{steps}' + diffusion_steps = (1000 // steps) * steps if steps < 1000 else steps + model_config.update({ + 'timestep_respacing': timestep_respacing, + 'diffusion_steps': diffusion_steps, + }) + + model, diffusion = create_model_and_diffusion(**model_config) + model.set_state_dict( + paddle.load(os.path.join(os.path.dirname(__file__), 'pre_trained', f'{diffusion_model}.pdparams'))) + model.eval() + for name, param in model.named_parameters(): + param.stop_gradient = True + + return model, diffusion + + +def parse_prompt(prompt): + if prompt.startswith('http://') or prompt.startswith('https://'): + vals = prompt.rsplit(':', 2) + vals = [vals[0] + ':' + vals[1], *vals[2:]] + else: + vals = prompt.rsplit(':', 1) + vals = vals + ['', '1'][len(vals):] + return vals[0], float(vals[1]) diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/model/__init__.py b/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/model/__init__.py new file mode 100755 index 000000000..466800666 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/model/__init__.py @@ -0,0 +1,3 @@ +""" +Codebase for "Improved Denoising Diffusion Probabilistic Models" implemented by Paddle. +""" diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/model/gaussian_diffusion.py b/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/model/gaussian_diffusion.py new file mode 100755 index 000000000..86cd2c650 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/model/gaussian_diffusion.py @@ -0,0 +1,1214 @@ +""" +Diffusion model implemented by Paddle. +This code is rewritten based on Pytorch version of of Ho et al's diffusion models: +https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/diffusion_utils_2.py +""" +import enum +import math + +import numpy as np +import paddle + +from .losses import discretized_gaussian_log_likelihood +from .losses import normal_kl +from .nn import mean_flat + + +def get_named_beta_schedule(schedule_name, num_diffusion_timesteps): + """ + Get a pre-defined beta schedule for the given name. + + The beta schedule library consists of beta schedules which remain similar + in the limit of num_diffusion_timesteps. + Beta schedules may be added, but should not be removed or changed once + they are committed to maintain backwards compatibility. + """ + if schedule_name == "linear": + # Linear schedule from Ho et al, extended to work for any number of + # diffusion steps. + scale = 1000 / num_diffusion_timesteps + beta_start = scale * 0.0001 + beta_end = scale * 0.02 + return np.linspace(beta_start, beta_end, num_diffusion_timesteps, dtype=np.float64) + elif schedule_name == "cosine": + return betas_for_alpha_bar( + num_diffusion_timesteps, + lambda t: math.cos((t + 0.008) / 1.008 * math.pi / 2)**2, + ) + else: + raise NotImplementedError(f"unknown beta schedule: {schedule_name}") + + +def betas_for_alpha_bar(num_diffusion_timesteps, alpha_bar, max_beta=0.999): + """ + Create a beta schedule that discretizes the given alpha_t_bar function, + which defines the cumulative product of (1-beta) over time from t = [0,1]. + + :param num_diffusion_timesteps: the number of betas to produce. + :param alpha_bar: a lambda that takes an argument t from 0 to 1 and + produces the cumulative product of (1-beta) up to that + part of the diffusion process. + :param max_beta: the maximum beta to use; use values lower than 1 to + prevent singularities. + """ + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta)) + return np.array(betas) + + +class ModelMeanType(enum.Enum): + """ + Which type of output the model predicts. + """ + + PREVIOUS_X = enum.auto() # the model predicts x_{t-1} + START_X = enum.auto() # the model predicts x_0 + EPSILON = enum.auto() # the model predicts epsilon + + +class ModelVarType(enum.Enum): + """ + What is used as the model's output variance. + + The LEARNED_RANGE option has been added to allow the model to predict + values between FIXED_SMALL and FIXED_LARGE, making its job easier. + """ + + LEARNED = enum.auto() + FIXED_SMALL = enum.auto() + FIXED_LARGE = enum.auto() + LEARNED_RANGE = enum.auto() + + +class LossType(enum.Enum): + MSE = enum.auto() # use raw MSE loss (and KL when learning variances) + RESCALED_MSE = (enum.auto()) # use raw MSE loss (with RESCALED_KL when learning variances) + KL = enum.auto() # use the variational lower-bound + RESCALED_KL = enum.auto() # like KL, but rescale to estimate the full VLB + + def is_vb(self): + return self == LossType.KL or self == LossType.RESCALED_KL + + +class GaussianDiffusion: + """ + Utilities for training and sampling diffusion models. + + Ported directly from here, and then adapted over time to further experimentation. + https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/diffusion_utils_2.py#L42 + + :param betas: a 1-D numpy array of betas for each diffusion timestep, + starting at T and going to 1. + :param model_mean_type: a ModelMeanType determining what the model outputs. + :param model_var_type: a ModelVarType determining how variance is output. + :param loss_type: a LossType determining the loss function to use. + :param rescale_timesteps: if True, pass floating point timesteps into the + model so that they are always scaled like in the + original paper (0 to 1000). + """ + + def __init__( + self, + *, + betas, + model_mean_type, + model_var_type, + loss_type, + rescale_timesteps=False, + ): + self.model_mean_type = model_mean_type + self.model_var_type = model_var_type + self.loss_type = loss_type + self.rescale_timesteps = rescale_timesteps + + # Use float64 for accuracy. + betas = np.array(betas, dtype=np.float64) + self.betas = betas + assert len(betas.shape) == 1, "betas must be 1-D" + assert (betas > 0).all() and (betas <= 1).all() + + self.num_timesteps = int(betas.shape[0]) + + alphas = 1.0 - betas + self.alphas_cumprod = np.cumprod(alphas, axis=0) + self.alphas_cumprod_prev = np.append(1.0, self.alphas_cumprod[:-1]) + self.alphas_cumprod_next = np.append(self.alphas_cumprod[1:], 0.0) + assert self.alphas_cumprod_prev.shape == (self.num_timesteps, ) + + # calculations for diffusion q(x_t | x_{t-1}) and others + self.sqrt_alphas_cumprod = np.sqrt(self.alphas_cumprod) + self.sqrt_one_minus_alphas_cumprod = np.sqrt(1.0 - self.alphas_cumprod) + self.log_one_minus_alphas_cumprod = np.log(1.0 - self.alphas_cumprod) + self.sqrt_recip_alphas_cumprod = np.sqrt(1.0 / self.alphas_cumprod) + self.sqrt_recipm1_alphas_cumprod = np.sqrt(1.0 / self.alphas_cumprod - 1) + + # calculations for posterior q(x_{t-1} | x_t, x_0) + self.posterior_variance = (betas * (1.0 - self.alphas_cumprod_prev) / (1.0 - self.alphas_cumprod)) + # log calculation clipped because the posterior variance is 0 at the + # beginning of the diffusion chain. + self.posterior_log_variance_clipped = np.log(np.append(self.posterior_variance[1], self.posterior_variance[1:])) + self.posterior_mean_coef1 = (betas * np.sqrt(self.alphas_cumprod_prev) / (1.0 - self.alphas_cumprod)) + self.posterior_mean_coef2 = ((1.0 - self.alphas_cumprod_prev) * np.sqrt(alphas) / (1.0 - self.alphas_cumprod)) + + def q_mean_variance(self, x_start, t): + """ + Get the distribution q(x_t | x_0). + + :param x_start: the [N x C x ...] tensor of noiseless inputs. + :param t: the number of diffusion steps (minus 1). Here, 0 means one step. + :return: A tuple (mean, variance, log_variance), all of x_start's shape. + """ + mean = (_extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start) + variance = _extract_into_tensor(1.0 - self.alphas_cumprod, t, x_start.shape) + log_variance = _extract_into_tensor(self.log_one_minus_alphas_cumprod, t, x_start.shape) + return mean, variance, log_variance + + def q_sample(self, x_start, t, noise=None): + """ + Diffuse the data for a given number of diffusion steps. + + In other words, sample from q(x_t | x_0). + + :param x_start: the initial data batch. + :param t: the number of diffusion steps (minus 1). Here, 0 means one step. + :param noise: if specified, the split-out normal noise. + :return: A noisy version of x_start. + """ + if noise is None: + # noise = th.randn_like(x_start) + noise = paddle.randn(x_start.shape, x_start.dtype) + assert noise.shape == x_start.shape + return (_extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start + + _extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape) * noise) + + def q_posterior_mean_variance(self, x_start, x_t, t): + """ + Compute the mean and variance of the diffusion posterior: + + q(x_{t-1} | x_t, x_0) + + """ + assert x_start.shape == x_t.shape + posterior_mean = (_extract_into_tensor(self.posterior_mean_coef1, t, x_t.shape) * x_start + + _extract_into_tensor(self.posterior_mean_coef2, t, x_t.shape) * x_t) + posterior_variance = _extract_into_tensor(self.posterior_variance, t, x_t.shape) + posterior_log_variance_clipped = _extract_into_tensor(self.posterior_log_variance_clipped, t, x_t.shape) + assert (posterior_mean.shape[0] == posterior_variance.shape[0] == posterior_log_variance_clipped.shape[0] == + x_start.shape[0]) + return posterior_mean, posterior_variance, posterior_log_variance_clipped + + def p_mean_variance(self, model, x, t, clip_denoised=True, denoised_fn=None, model_kwargs=None): + """ + Apply the model to get p(x_{t-1} | x_t), as well as a prediction of + the initial x, x_0. + + :param model: the model, which takes a signal and a batch of timesteps + as input. + :param x: the [N x C x ...] tensor at time t. + :param t: a 1-D Tensor of timesteps. + :param clip_denoised: if True, clip the denoised signal into [-1, 1]. + :param denoised_fn: if not None, a function which applies to the + x_start prediction before it is used to sample. Applies before + clip_denoised. + :param model_kwargs: if not None, a dict of extra keyword arguments to + pass to the model. This can be used for conditioning. + :return: a dict with the following keys: + - 'mean': the model mean output. + - 'variance': the model variance output. + - 'log_variance': the log of 'variance'. + - 'pred_xstart': the prediction for x_0. + """ + if model_kwargs is None: + model_kwargs = {} + + B, C = x.shape[:2] + assert t.shape == [B] + model_output = model(x, self._scale_timesteps(t), **model_kwargs) + + if self.model_var_type in [ModelVarType.LEARNED, ModelVarType.LEARNED_RANGE]: + assert model_output.shape == [B, C * 2, *x.shape[2:]] + model_output, model_var_values = paddle.split(model_output, 2, axis=1) + if self.model_var_type == ModelVarType.LEARNED: + model_log_variance = model_var_values + model_variance = paddle.exp(model_log_variance) + else: + min_log = _extract_into_tensor(self.posterior_log_variance_clipped, t, x.shape) + max_log = _extract_into_tensor(np.log(self.betas), t, x.shape) + # The model_var_values is [-1, 1] for [min_var, max_var]. + frac = (model_var_values + 1) / 2 + model_log_variance = frac * max_log + (1 - frac) * min_log + model_variance = paddle.exp(model_log_variance) + else: + model_variance, model_log_variance = { + # for fixedlarge, we set the initial (log-)variance like so + # to get a better decoder log likelihood. + ModelVarType.FIXED_LARGE: ( + np.append(self.posterior_variance[1], self.betas[1:]), + np.log(np.append(self.posterior_variance[1], self.betas[1:])), + ), + ModelVarType.FIXED_SMALL: ( + self.posterior_variance, + self.posterior_log_variance_clipped, + ), + }[self.model_var_type] + model_variance = _extract_into_tensor(model_variance, t, x.shape) + model_log_variance = _extract_into_tensor(model_log_variance, t, x.shape) + + def process_xstart(x): + if denoised_fn is not None: + x = denoised_fn(x) + if clip_denoised: + return x.clamp(-1, 1) + return x + + if self.model_mean_type == ModelMeanType.PREVIOUS_X: + pred_xstart = process_xstart(self._predict_xstart_from_xprev(x_t=x, t=t, xprev=model_output)) + model_mean = model_output + elif self.model_mean_type in [ModelMeanType.START_X, ModelMeanType.EPSILON]: + if self.model_mean_type == ModelMeanType.START_X: + pred_xstart = process_xstart(model_output) + else: + pred_xstart = process_xstart(self._predict_xstart_from_eps(x_t=x, t=t, eps=model_output)) + model_mean, _, _ = self.q_posterior_mean_variance(x_start=pred_xstart, x_t=x, t=t) + else: + raise NotImplementedError(self.model_mean_type) + + assert (model_mean.shape == model_log_variance.shape == pred_xstart.shape == x.shape) + return { + "mean": model_mean, + "variance": model_variance, + "log_variance": model_log_variance, + "pred_xstart": pred_xstart, + } + + def _predict_xstart_from_eps(self, x_t, t, eps): + assert x_t.shape == eps.shape + return (_extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t - + _extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape) * eps) + + def _predict_xstart_from_xprev(self, x_t, t, xprev): + assert x_t.shape == xprev.shape + return ( # (xprev - coef2*x_t) / coef1 + _extract_into_tensor(1.0 / self.posterior_mean_coef1, t, x_t.shape) * xprev - + _extract_into_tensor(self.posterior_mean_coef2 / self.posterior_mean_coef1, t, x_t.shape) * x_t) + + def _predict_eps_from_xstart(self, x_t, t, pred_xstart): + return (_extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t - + pred_xstart) / _extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape) + + def _scale_timesteps(self, t): + if self.rescale_timesteps: + return paddle.cast((t), 'float32') * (1000.0 / self.num_timesteps) + return t + + def condition_mean(self, cond_fn, p_mean_var, x, t, model_kwargs=None): + """ + Compute the mean for the previous step, given a function cond_fn that + computes the gradient of a conditional log probability with respect to + x. In particular, cond_fn computes grad(log(p(y|x))), and we want to + condition on y. + + This uses the conditioning strategy from Sohl-Dickstein et al. (2015). + """ + gradient = cond_fn(x, self._scale_timesteps(t), **model_kwargs) + new_mean = (paddle.cast((p_mean_var["mean"]), 'float32') + p_mean_var["variance"] * paddle.cast( + (gradient), 'float32')) + return new_mean + + def condition_mean_with_grad(self, cond_fn, p_mean_var, x, t, model_kwargs=None): + """ + Compute the mean for the previous step, given a function cond_fn that + computes the gradient of a conditional log probability with respect to + x. In particular, cond_fn computes grad(log(p(y|x))), and we want to + condition on y. + + This uses the conditioning strategy from Sohl-Dickstein et al. (2015). + """ + gradient = cond_fn(x, t, p_mean_var, **model_kwargs) + new_mean = (paddle.cast((p_mean_var["mean"]), 'float32') + p_mean_var["variance"] * paddle.cast( + (gradient), 'float32')) + return new_mean + + def condition_score(self, cond_fn, p_mean_var, x, t, model_kwargs=None): + """ + Compute what the p_mean_variance output would have been, should the + model's score function be conditioned by cond_fn. + + See condition_mean() for details on cond_fn. + + Unlike condition_mean(), this instead uses the conditioning strategy + from Song et al (2020). + """ + alpha_bar = _extract_into_tensor(self.alphas_cumprod, t, x.shape) + + eps = self._predict_eps_from_xstart(x, t, p_mean_var["pred_xstart"]) + eps = eps - (1 - alpha_bar).sqrt() * cond_fn(x, self._scale_timesteps(t), **model_kwargs) + + out = p_mean_var.copy() + out["pred_xstart"] = self._predict_xstart_from_eps(x, t, eps) + out["mean"], _, _ = self.q_posterior_mean_variance(x_start=out["pred_xstart"], x_t=x, t=t) + return out + + def condition_score_with_grad(self, cond_fn, p_mean_var, x, t, model_kwargs=None): + """ + Compute what the p_mean_variance output would have been, should the + model's score function be conditioned by cond_fn. + + See condition_mean() for details on cond_fn. + + Unlike condition_mean(), this instead uses the conditioning strategy + from Song et al (2020). + """ + alpha_bar = _extract_into_tensor(self.alphas_cumprod, t, x.shape) + + eps = self._predict_eps_from_xstart(x, t, p_mean_var["pred_xstart"]) + eps = eps - (1 - alpha_bar).sqrt() * cond_fn(x, t, p_mean_var, **model_kwargs) + + out = p_mean_var.copy() + out["pred_xstart"] = self._predict_xstart_from_eps(x, t, eps) + out["mean"], _, _ = self.q_posterior_mean_variance(x_start=out["pred_xstart"], x_t=x, t=t) + return out + + def p_sample( + self, + model, + x, + t, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + ): + """ + Sample x_{t-1} from the model at the given timestep. + + :param model: the model to sample from. + :param x: the current tensor at x_{t-1}. + :param t: the value of t, starting at 0 for the first diffusion step. + :param clip_denoised: if True, clip the x_start prediction to [-1, 1]. + :param denoised_fn: if not None, a function which applies to the + x_start prediction before it is used to sample. + :param cond_fn: if not None, this is a gradient function that acts + similarly to the model. + :param model_kwargs: if not None, a dict of extra keyword arguments to + pass to the model. This can be used for conditioning. + :return: a dict containing the following keys: + - 'sample': a random sample from the model. + - 'pred_xstart': a prediction of x_0. + """ + out = self.p_mean_variance( + model, + x, + t, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + model_kwargs=model_kwargs, + ) + # noise = th.randn_like(x) + noise = paddle.randn(x.shape, x.dtype) + nonzero_mask = (paddle.cast((t != 0), 'float32').reshape([-1, + *([1] * (len(x.shape) - 1))])) # no noise when t == 0 + if cond_fn is not None: + out["mean"] = self.condition_mean(cond_fn, out, x, t, model_kwargs=model_kwargs) + sample = out["mean"] + nonzero_mask * paddle.exp(0.5 * out["log_variance"]) * noise + return {"sample": sample, "pred_xstart": out["pred_xstart"]} + + def p_sample_with_grad( + self, + model, + x, + t, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + ): + """ + Sample x_{t-1} from the model at the given timestep. + + :param model: the model to sample from. + :param x: the current tensor at x_{t-1}. + :param t: the value of t, starting at 0 for the first diffusion step. + :param clip_denoised: if True, clip the x_start prediction to [-1, 1]. + :param denoised_fn: if not None, a function which applies to the + x_start prediction before it is used to sample. + :param cond_fn: if not None, this is a gradient function that acts + similarly to the model. + :param model_kwargs: if not None, a dict of extra keyword arguments to + pass to the model. This can be used for conditioning. + :return: a dict containing the following keys: + - 'sample': a random sample from the model. + - 'pred_xstart': a prediction of x_0. + """ + # with th.enable_grad(): + # x = x.detach().requires_grad_() + x = x.detach() + # x.stop_gradient = False + out = self.p_mean_variance( + model, + x, + t, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + model_kwargs=model_kwargs, + ) + # noise = th.randn_like(x) + noise = paddle.randn(x.shape, x.dtype) + nonzero_mask = (paddle.cast((t != 0), 'float32').reshape([-1, + *([1] * (len(x.shape) - 1))])) # no noise when t == 0 + if cond_fn is not None: + out["mean"] = self.condition_mean_with_grad(cond_fn, out, x, t, model_kwargs=model_kwargs) + sample = out["mean"] + nonzero_mask * paddle.exp(0.5 * out["log_variance"]) * noise + return {"sample": sample, "pred_xstart": out["pred_xstart"].detach()} + + def p_sample_loop( + self, + model, + shape, + noise=None, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + device=None, + progress=False, + skip_timesteps=0, + init_image=None, + randomize_class=False, + cond_fn_with_grad=False, + ): + """ + Generate samples from the model. + + :param model: the model module. + :param shape: the shape of the samples, (N, C, H, W). + :param noise: if specified, the noise from the encoder to sample. + Should be of the same shape as `shape`. + :param clip_denoised: if True, clip x_start predictions to [-1, 1]. + :param denoised_fn: if not None, a function which applies to the + x_start prediction before it is used to sample. + :param cond_fn: if not None, this is a gradient function that acts + similarly to the model. + :param model_kwargs: if not None, a dict of extra keyword arguments to + pass to the model. This can be used for conditioning. + :param device: if specified, the device to create the samples on. + If not specified, use a model parameter's device. + :param progress: if True, show a tqdm progress bar. + :return: a non-differentiable batch of samples. + """ + final = None + for sample in self.p_sample_loop_progressive( + model, + shape, + noise=noise, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + cond_fn=cond_fn, + model_kwargs=model_kwargs, + device=device, + progress=progress, + skip_timesteps=skip_timesteps, + init_image=init_image, + randomize_class=randomize_class, + cond_fn_with_grad=cond_fn_with_grad, + ): + final = sample + return final["sample"] + + def p_sample_loop_progressive( + self, + model, + shape, + noise=None, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + device=None, + progress=False, + skip_timesteps=0, + init_image=None, + randomize_class=False, + cond_fn_with_grad=False, + ): + """ + Generate samples from the model and yield intermediate samples from + each timestep of diffusion. + + Arguments are the same as p_sample_loop(). + Returns a generator over dicts, where each dict is the return value of + p_sample(). + """ + if device is None: + device = model.parameters()[0].place + assert isinstance(shape, (tuple, list)) + if noise is not None: + img = noise + else: + img = paddle.randn(shape) + + if skip_timesteps and init_image is None: + init_image = paddle.zeros_like(img) + + indices = list(range(self.num_timesteps - skip_timesteps))[::-1] + + if init_image is not None: + my_t = paddle.ones([shape[0]], dtype='int64') * indices[0] + img = self.q_sample(init_image, my_t, img) + + if progress: + # Lazy import so that we don't depend on tqdm. + from tqdm.auto import tqdm + + indices = tqdm(indices) + + for i in indices: + t = paddle.to_tensor([i] * shape[0], place=device) + if randomize_class and 'y' in model_kwargs: + model_kwargs['y'] = paddle.randint(low=0, high=model.num_classes, shape=model_kwargs['y'].shape) + # with paddle.no_grad(): + sample_fn = self.p_sample_with_grad if cond_fn_with_grad else self.p_sample + out = sample_fn( + model, + img, + t, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + cond_fn=cond_fn, + model_kwargs=model_kwargs, + ) + yield out + img = out["sample"] + + def ddim_sample( + self, + model, + x, + t, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + eta=0.0, + ): + """ + Sample x_{t-1} from the model using DDIM. + + Same usage as p_sample(). + """ + out_orig = self.p_mean_variance( + model, + x, + t, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + model_kwargs=model_kwargs, + ) + if cond_fn is not None: + out = self.condition_score(cond_fn, out_orig, x, t, model_kwargs=model_kwargs) + else: + out = out_orig + + # Usually our model outputs epsilon, but we re-derive it + # in case we used x_start or x_prev prediction. + eps = self._predict_eps_from_xstart(x, t, out["pred_xstart"]) + + alpha_bar = _extract_into_tensor(self.alphas_cumprod, t, x.shape) + alpha_bar_prev = _extract_into_tensor(self.alphas_cumprod_prev, t, x.shape) + sigma = (eta * paddle.sqrt( + (1 - alpha_bar_prev) / (1 - alpha_bar)) * paddle.sqrt(1 - alpha_bar / alpha_bar_prev)) + # Equation 12. + # noise = th.randn_like(x) + noise = paddle.randn(x.shape, x.dtype) + mean_pred = (out["pred_xstart"] * paddle.sqrt(alpha_bar_prev) + + paddle.sqrt(1 - alpha_bar_prev - sigma**2) * eps) + nonzero_mask = (paddle.cast((t != 0), 'float32').reshape([-1, + *([1] * (len(x.shape) - 1))])) # no noise when t == 0 + sample = mean_pred + nonzero_mask * sigma * noise + return {"sample": sample, "pred_xstart": out_orig["pred_xstart"]} + + def ddim_sample_with_grad( + self, + model, + x, + t, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + eta=0.0, + ): + """ + Sample x_{t-1} from the model using DDIM. + + Same usage as p_sample(). + """ + # with th.enable_grad(): + # x = x.detach().requires_grad_() + x = x.detach() + # x.stop_gradient = False + out_orig = self.p_mean_variance( + model, + x, + t, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + model_kwargs=model_kwargs, + ) + if cond_fn is not None: + out = self.condition_score_with_grad(cond_fn, out_orig, x, t, model_kwargs=model_kwargs) + else: + out = out_orig + + out["pred_xstart"] = out["pred_xstart"].detach() + + # Usually our model outputs epsilon, but we re-derive it + # in case we used x_start or x_prev prediction. + eps = self._predict_eps_from_xstart(x, t, out["pred_xstart"]) + + alpha_bar = _extract_into_tensor(self.alphas_cumprod, t, x.shape) + alpha_bar_prev = _extract_into_tensor(self.alphas_cumprod_prev, t, x.shape) + sigma = (eta * paddle.sqrt( + (1 - alpha_bar_prev) / (1 - alpha_bar)) * paddle.sqrt(1 - alpha_bar / alpha_bar_prev)) + # Equation 12. + # noise = th.randn_like(x) + noise = paddle.randn(x.shape, x.dtype) + mean_pred = (out["pred_xstart"] * paddle.sqrt(alpha_bar_prev) + + paddle.sqrt(1 - alpha_bar_prev - sigma**2) * eps) + nonzero_mask = (paddle.cast((t != 0), 'float32').reshape([-1, + *([1] * (len(x.shape) - 1))])) # no noise when t == 0 + sample = mean_pred + nonzero_mask * sigma * noise + return {"sample": sample, "pred_xstart": out_orig["pred_xstart"].detach()} + + def ddim_reverse_sample( + self, + model, + x, + t, + clip_denoised=True, + denoised_fn=None, + model_kwargs=None, + eta=0.0, + ): + """ + Sample x_{t+1} from the model using DDIM reverse ODE. + """ + assert eta == 0.0, "Reverse ODE only for deterministic path" + out = self.p_mean_variance( + model, + x, + t, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + model_kwargs=model_kwargs, + ) + # Usually our model outputs epsilon, but we re-derive it + # in case we used x_start or x_prev prediction. + eps = (_extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x.shape) * x - + out["pred_xstart"]) / _extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x.shape) + alpha_bar_next = _extract_into_tensor(self.alphas_cumprod_next, t, x.shape) + + # Equation 12. reversed + mean_pred = (out["pred_xstart"] * paddle.sqrt(alpha_bar_next) + paddle.sqrt(1 - alpha_bar_next) * eps) + + return {"sample": mean_pred, "pred_xstart": out["pred_xstart"]} + + def ddim_sample_loop( + self, + model, + shape, + noise=None, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + device=None, + progress=False, + eta=0.0, + skip_timesteps=0, + init_image=None, + randomize_class=False, + cond_fn_with_grad=False, + ): + """ + Generate samples from the model using DDIM. + + Same usage as p_sample_loop(). + """ + final = None + for sample in self.ddim_sample_loop_progressive( + model, + shape, + noise=noise, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + cond_fn=cond_fn, + model_kwargs=model_kwargs, + device=device, + progress=progress, + eta=eta, + skip_timesteps=skip_timesteps, + init_image=init_image, + randomize_class=randomize_class, + cond_fn_with_grad=cond_fn_with_grad, + ): + final = sample + return final["sample"] + + def ddim_sample_loop_progressive( + self, + model, + shape, + noise=None, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + device=None, + progress=False, + eta=0.0, + skip_timesteps=0, + init_image=None, + randomize_class=False, + cond_fn_with_grad=False, + ): + """ + Use DDIM to sample from the model and yield intermediate samples from + each timestep of DDIM. + + Same usage as p_sample_loop_progressive(). + """ + # if device is None: + # device = next(model.parameters()).device + if device is None: + device = model.parameters()[0].place + assert isinstance(shape, (tuple, list)) + if noise is not None: + img = noise + else: + img = paddle.randn(shape) + + if skip_timesteps and init_image is None: + init_image = paddle.zeros_like(img) + + indices = list(range(self.num_timesteps - skip_timesteps))[::-1] + + if init_image is not None: + my_t = paddle.ones([shape[0]], dtype='int64') * indices[0] + img = self.q_sample(init_image, my_t, img) + + if progress: + # Lazy import so that we don't depend on tqdm. + from tqdm.auto import tqdm + + indices = tqdm(indices) + + for i in indices: + t = paddle.to_tensor([i] * shape[0]) + if randomize_class and 'y' in model_kwargs: + model_kwargs['y'] = paddle.randint( + low=0, + high=model.num_classes, + shape=model_kwargs['y'].shape, + ) + sample_fn = self.ddim_sample_with_grad if cond_fn_with_grad else self.ddim_sample + out = sample_fn( + model, + img, + t, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + cond_fn=cond_fn, + model_kwargs=model_kwargs, + eta=eta, + ) + yield out + img = out["sample"] + + def plms_sample( + self, + model, + x, + t, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + cond_fn_with_grad=False, + order=2, + old_out=None, + ): + """ + Sample x_{t-1} from the model using Pseudo Linear Multistep. + + Same usage as p_sample(). + """ + if not int(order) or not 1 <= order <= 4: + raise ValueError('order is invalid (should be int from 1-4).') + + def get_model_output(x, t): + with paddle.set_grad_enabled(cond_fn_with_grad and cond_fn is not None): + x = x.detach().requires_grad_() if cond_fn_with_grad else x + out_orig = self.p_mean_variance( + model, + x, + t, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + model_kwargs=model_kwargs, + ) + if cond_fn is not None: + if cond_fn_with_grad: + out = self.condition_score_with_grad(cond_fn, out_orig, x, t, model_kwargs=model_kwargs) + x = x.detach() + else: + out = self.condition_score(cond_fn, out_orig, x, t, model_kwargs=model_kwargs) + else: + out = out_orig + + # Usually our model outputs epsilon, but we re-derive it + # in case we used x_start or x_prev prediction. + eps = self._predict_eps_from_xstart(x, t, out["pred_xstart"]) + return eps, out, out_orig + + alpha_bar = _extract_into_tensor(self.alphas_cumprod, t, x.shape) + alpha_bar_prev = _extract_into_tensor(self.alphas_cumprod_prev, t, x.shape) + eps, out, out_orig = get_model_output(x, t) + + if order > 1 and old_out is None: + # Pseudo Improved Euler + old_eps = [eps] + mean_pred = out["pred_xstart"] * paddle.sqrt(alpha_bar_prev) + paddle.sqrt(1 - alpha_bar_prev) * eps + eps_2, _, _ = get_model_output(mean_pred, t - 1) + eps_prime = (eps + eps_2) / 2 + pred_prime = self._predict_xstart_from_eps(x, t, eps_prime) + mean_pred = pred_prime * paddle.sqrt(alpha_bar_prev) + paddle.sqrt(1 - alpha_bar_prev) * eps_prime + else: + # Pseudo Linear Multistep (Adams-Bashforth) + old_eps = old_out["old_eps"] + old_eps.append(eps) + cur_order = min(order, len(old_eps)) + if cur_order == 1: + eps_prime = old_eps[-1] + elif cur_order == 2: + eps_prime = (3 * old_eps[-1] - old_eps[-2]) / 2 + elif cur_order == 3: + eps_prime = (23 * old_eps[-1] - 16 * old_eps[-2] + 5 * old_eps[-3]) / 12 + elif cur_order == 4: + eps_prime = (55 * old_eps[-1] - 59 * old_eps[-2] + 37 * old_eps[-3] - 9 * old_eps[-4]) / 24 + else: + raise RuntimeError('cur_order is invalid.') + pred_prime = self._predict_xstart_from_eps(x, t, eps_prime) + mean_pred = pred_prime * paddle.sqrt(alpha_bar_prev) + paddle.sqrt(1 - alpha_bar_prev) * eps_prime + + if len(old_eps) >= order: + old_eps.pop(0) + + nonzero_mask = paddle.cast((t != 0), 'float32').reshape([-1, *([1] * (len(x.shape) - 1))]) + sample = mean_pred * nonzero_mask + out["pred_xstart"] * (1 - nonzero_mask) + + return {"sample": sample, "pred_xstart": out_orig["pred_xstart"], "old_eps": old_eps} + + def plms_sample_loop( + self, + model, + shape, + noise=None, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + device=None, + progress=False, + skip_timesteps=0, + init_image=None, + randomize_class=False, + cond_fn_with_grad=False, + order=2, + ): + """ + Generate samples from the model using Pseudo Linear Multistep. + + Same usage as p_sample_loop(). + """ + final = None + for sample in self.plms_sample_loop_progressive( + model, + shape, + noise=noise, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + cond_fn=cond_fn, + model_kwargs=model_kwargs, + device=device, + progress=progress, + skip_timesteps=skip_timesteps, + init_image=init_image, + randomize_class=randomize_class, + cond_fn_with_grad=cond_fn_with_grad, + order=order, + ): + final = sample + return final["sample"] + + def plms_sample_loop_progressive( + self, + model, + shape, + noise=None, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + device=None, + progress=False, + skip_timesteps=0, + init_image=None, + randomize_class=False, + cond_fn_with_grad=False, + order=2, + ): + """ + Use PLMS to sample from the model and yield intermediate samples from each + timestep of PLMS. + + Same usage as p_sample_loop_progressive(). + """ + if device is None: + device = model.parameters()[0].place + assert isinstance(shape, (tuple, list)) + if noise is not None: + img = noise + else: + img = paddle.randn(shape) + + if skip_timesteps and init_image is None: + init_image = paddle.zeros_like(img) + + indices = list(range(self.num_timesteps - skip_timesteps))[::-1] + + if init_image is not None: + my_t = paddle.ones([shape[0]], dtype='int64') * indices[0] + img = self.q_sample(init_image, my_t, img) + + if progress: + # Lazy import so that we don't depend on tqdm. + from tqdm.auto import tqdm + + indices = tqdm(indices) + + old_out = None + + for i in indices: + t = paddle.to_tensor([i] * shape[0], place=device) + if randomize_class and 'y' in model_kwargs: + model_kwargs['y'] = paddle.randint(low=0, high=model.num_classes, shape=model_kwargs['y'].shape) + # with paddle.no_grad(): + out = self.plms_sample( + model, + img, + t, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + cond_fn=cond_fn, + model_kwargs=model_kwargs, + cond_fn_with_grad=cond_fn_with_grad, + order=order, + old_out=old_out, + ) + yield out + old_out = out + img = out["sample"] + + def _vb_terms_bpd(self, model, x_start, x_t, t, clip_denoised=True, model_kwargs=None): + """ + Get a term for the variational lower-bound. + + The resulting units are bits (rather than nats, as one might expect). + This allows for comparison to other papers. + + :return: a dict with the following keys: + - 'output': a shape [N] tensor of NLLs or KLs. + - 'pred_xstart': the x_0 predictions. + """ + true_mean, _, true_log_variance_clipped = self.q_posterior_mean_variance(x_start=x_start, x_t=x_t, t=t) + out = self.p_mean_variance(model, x_t, t, clip_denoised=clip_denoised, model_kwargs=model_kwargs) + kl = normal_kl(true_mean, true_log_variance_clipped, out["mean"], out["log_variance"]) + kl = mean_flat(kl) / np.log(2.0) + + decoder_nll = -discretized_gaussian_log_likelihood( + x_start, means=out["mean"], log_scales=0.5 * out["log_variance"]) + assert decoder_nll.shape == x_start.shape + decoder_nll = mean_flat(decoder_nll) / np.log(2.0) + + # At the first timestep return the decoder NLL, + # otherwise return KL(q(x_{t-1}|x_t,x_0) || p(x_{t-1}|x_t)) + output = paddle.where((t == 0), decoder_nll, kl) + return {"output": output, "pred_xstart": out["pred_xstart"]} + + def training_losses(self, model, x_start, t, model_kwargs=None, noise=None): + """ + Compute training losses for a single timestep. + + :param model: the model to evaluate loss on. + :param x_start: the [N x C x ...] tensor of inputs. + :param t: a batch of timestep indices. + :param model_kwargs: if not None, a dict of extra keyword arguments to + pass to the model. This can be used for conditioning. + :param noise: if specified, the specific Gaussian noise to try to remove. + :return: a dict with the key "loss" containing a tensor of shape [N]. + Some mean or variance settings may also have other keys. + """ + if model_kwargs is None: + model_kwargs = {} + if noise is None: + # noise = th.randn_like(x_start) + noise = paddle.randn(x_start.shape, x_start.dtype) + x_t = self.q_sample(x_start, t, noise=noise) + + terms = {} + + if self.loss_type == LossType.KL or self.loss_type == LossType.RESCALED_KL: + terms["loss"] = self._vb_terms_bpd( + model=model, + x_start=x_start, + x_t=x_t, + t=t, + clip_denoised=False, + model_kwargs=model_kwargs, + )["output"] + if self.loss_type == LossType.RESCALED_KL: + terms["loss"] *= self.num_timesteps + elif self.loss_type == LossType.MSE or self.loss_type == LossType.RESCALED_MSE: + model_output = model(x_t, self._scale_timesteps(t), **model_kwargs) + + if self.model_var_type in [ + ModelVarType.LEARNED, + ModelVarType.LEARNED_RANGE, + ]: + B, C = x_t.shape[:2] + assert model_output.shape == (B, C * 2, *x_t.shape[2:]) + model_output, model_var_values = paddle.split(model_output, 2, dim=1) + # Learn the variance using the variational bound, but don't let + # it affect our mean prediction. + frozen_out = paddle.concat([model_output.detach(), model_var_values], axis=1) + terms["vb"] = self._vb_terms_bpd( + model=lambda *args, r=frozen_out: r, + x_start=x_start, + x_t=x_t, + t=t, + clip_denoised=False, + )["output"] + if self.loss_type == LossType.RESCALED_MSE: + # Divide by 1000 for equivalence with initial implementation. + # Without a factor of 1/1000, the VB term hurts the MSE term. + terms["vb"] *= self.num_timesteps / 1000.0 + + target = { + ModelMeanType.PREVIOUS_X: self.q_posterior_mean_variance(x_start=x_start, x_t=x_t, t=t)[0], + ModelMeanType.START_X: x_start, + ModelMeanType.EPSILON: noise, + }[self.model_mean_type] + assert model_output.shape == target.shape == x_start.shape + terms["mse"] = mean_flat((target - model_output)**2) + if "vb" in terms: + terms["loss"] = terms["mse"] + terms["vb"] + else: + terms["loss"] = terms["mse"] + else: + raise NotImplementedError(self.loss_type) + + return terms + + def _prior_bpd(self, x_start): + """ + Get the prior KL term for the variational lower-bound, measured in + bits-per-dim. + + This term can't be optimized, as it only depends on the encoder. + + :param x_start: the [N x C x ...] tensor of inputs. + :return: a batch of [N] KL values (in bits), one per batch element. + """ + batch_size = x_start.shape[0] + t = paddle.to_tensor([self.num_timesteps - 1] * batch_size, place=x_start.place) + qt_mean, _, qt_log_variance = self.q_mean_variance(x_start, t) + kl_prior = normal_kl(mean1=qt_mean, logvar1=qt_log_variance, mean2=0.0, logvar2=0.0) + return mean_flat(kl_prior) / np.log(2.0) + + def calc_bpd_loop(self, model, x_start, clip_denoised=True, model_kwargs=None): + """ + Compute the entire variational lower-bound, measured in bits-per-dim, + as well as other related quantities. + + :param model: the model to evaluate loss on. + :param x_start: the [N x C x ...] tensor of inputs. + :param clip_denoised: if True, clip denoised samples. + :param model_kwargs: if not None, a dict of extra keyword arguments to + pass to the model. This can be used for conditioning. + + :return: a dict containing the following keys: + - total_bpd: the total variational lower-bound, per batch element. + - prior_bpd: the prior term in the lower-bound. + - vb: an [N x T] tensor of terms in the lower-bound. + - xstart_mse: an [N x T] tensor of x_0 MSEs for each timestep. + - mse: an [N x T] tensor of epsilon MSEs for each timestep. + """ + device = x_start.place + batch_size = x_start.shape[0] + + vb = [] + xstart_mse = [] + mse = [] + for t in list(range(self.num_timesteps))[::-1]: + t_batch = paddle.to_tensor([t] * batch_size, place=device) + # noise = th.randn_like(x_start) + noise = paddle.randn(x_start.shape, x_start.dtype) + x_t = self.q_sample(x_start=x_start, t=t_batch, noise=noise) + # Calculate VLB term at the current timestep + # with paddle.no_grad(): + out = self._vb_terms_bpd( + model, + x_start=x_start, + x_t=x_t, + t=t_batch, + clip_denoised=clip_denoised, + model_kwargs=model_kwargs, + ) + vb.append(out["output"]) + xstart_mse.append(mean_flat((out["pred_xstart"] - x_start)**2)) + eps = self._predict_eps_from_xstart(x_t, t_batch, out["pred_xstart"]) + mse.append(mean_flat((eps - noise)**2)) + + vb = paddle.stack(vb, axis=1) + xstart_mse = paddle.stack(xstart_mse, axis=1) + mse = paddle.stack(mse, axis=1) + + prior_bpd = self._prior_bpd(x_start) + total_bpd = vb.sum(axis=1) + prior_bpd + return { + "total_bpd": total_bpd, + "prior_bpd": prior_bpd, + "vb": vb, + "xstart_mse": xstart_mse, + "mse": mse, + } + + +def _extract_into_tensor(arr, timesteps, broadcast_shape): + """ + Extract values from a 1-D numpy array for a batch of indices. + + :param arr: the 1-D numpy array. + :param timesteps: a tensor of indices into the array to extract. + :param broadcast_shape: a larger shape of K dimensions with the batch + dimension equal to the length of timesteps. + :return: a tensor of shape [batch_size, 1, ...] where the shape has K dims. + """ + res = paddle.to_tensor(arr, place=timesteps.place)[timesteps] + while len(res.shape) < len(broadcast_shape): + res = res[..., None] + return res.expand(broadcast_shape) diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/model/losses.py b/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/model/losses.py new file mode 100755 index 000000000..5c3970de5 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/model/losses.py @@ -0,0 +1,86 @@ +""" +Helpers for various likelihood-based losses implemented by Paddle. These are ported from the original +Ho et al. diffusion models codebase: +https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/utils.py +""" +import numpy as np +import paddle +import paddle.nn.functional as F + + +def normal_kl(mean1, logvar1, mean2, logvar2): + """ + Compute the KL divergence between two gaussians. + + Shapes are automatically broadcasted, so batches can be compared to + scalars, among other use cases. + """ + tensor = None + for obj in (mean1, logvar1, mean2, logvar2): + if isinstance(obj, paddle.Tensor): + tensor = obj + break + assert tensor is not None, "at least one argument must be a Tensor" + + # Force variances to be Tensors. Broadcasting helps convert scalars to + # Tensors, but it does not work for th.exp(). + logvar1, logvar2 = [x if isinstance(x, paddle.Tensor) else paddle.to_tensor(x) for x in (logvar1, logvar2)] + + return 0.5 * (-1.0 + logvar2 - logvar1 + paddle.exp(logvar1 - logvar2) + + ((mean1 - mean2)**2) * paddle.exp(-logvar2)) + + +def approx_standard_normal_cdf(x): + """ + A fast approximation of the cumulative distribution function of the + standard normal. + """ + return 0.5 * (1.0 + paddle.tanh(np.sqrt(2.0 / np.pi) * (x + 0.044715 * paddle.pow(x, 3)))) + + +def discretized_gaussian_log_likelihood(x, *, means, log_scales): + """ + Compute the log-likelihood of a Gaussian distribution discretizing to a + given image. + + :param x: the target images. It is assumed that this was uint8 values, + rescaled to the range [-1, 1]. + :param means: the Gaussian mean Tensor. + :param log_scales: the Gaussian log stddev Tensor. + :return: a tensor like x of log probabilities (in nats). + """ + assert x.shape == means.shape == log_scales.shape + centered_x = x - means + inv_stdv = paddle.exp(-log_scales) + plus_in = inv_stdv * (centered_x + 1.0 / 255.0) + cdf_plus = approx_standard_normal_cdf(plus_in) + min_in = inv_stdv * (centered_x - 1.0 / 255.0) + cdf_min = approx_standard_normal_cdf(min_in) + log_cdf_plus = paddle.log(cdf_plus.clip(min=1e-12)) + log_one_minus_cdf_min = paddle.log((1.0 - cdf_min).clip(min=1e-12)) + cdf_delta = cdf_plus - cdf_min + log_probs = paddle.where( + x < -0.999, + log_cdf_plus, + paddle.where(x > 0.999, log_one_minus_cdf_min, paddle.log(cdf_delta.clip(min=1e-12))), + ) + assert log_probs.shape == x.shape + return log_probs + + +def spherical_dist_loss(x, y): + x = F.normalize(x, axis=-1) + y = F.normalize(y, axis=-1) + return (x - y).norm(axis=-1).divide(paddle.to_tensor(2.0)).asin().pow(2).multiply(paddle.to_tensor(2.0)) + + +def tv_loss(input): + """L2 total variation loss, as in Mahendran et al.""" + input = F.pad(input, (0, 1, 0, 1), 'replicate') + x_diff = input[..., :-1, 1:] - input[..., :-1, :-1] + y_diff = input[..., 1:, :-1] - input[..., :-1, :-1] + return (x_diff**2 + y_diff**2).mean([1, 2, 3]) + + +def range_loss(input): + return (input - input.clip(-1, 1)).pow(2).mean([1, 2, 3]) diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/model/make_cutouts.py b/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/model/make_cutouts.py new file mode 100755 index 000000000..392c7877e --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/model/make_cutouts.py @@ -0,0 +1,177 @@ +''' +This code is rewritten by Paddle based on Jina-ai/discoart. +https://github.com/jina-ai/discoart/blob/main/discoart/nn/make_cutouts.py +''' +import math + +import paddle +import paddle.nn as nn +from disco_diffusion_clip_rn50.resize_right.resize_right import resize +from paddle.nn import functional as F + +from . import transforms as T + +skip_augs = False # @param{type: 'boolean'} + + +def sinc(x): + return paddle.where(x != 0, paddle.sin(math.pi * x) / (math.pi * x), x.new_ones([])) + + +def lanczos(x, a): + cond = paddle.logical_and(-a < x, x < a) + out = paddle.where(cond, sinc(x) * sinc(x / a), x.new_zeros([])) + return out / out.sum() + + +def ramp(ratio, width): + n = math.ceil(width / ratio + 1) + out = paddle.empty([n]) + cur = 0 + for i in range(out.shape[0]): + out[i] = cur + cur += ratio + return paddle.concat([-out[1:].flip([0]), out])[1:-1] + + +class MakeCutouts(nn.Layer): + + def __init__(self, cut_size, cutn, skip_augs=False): + super().__init__() + self.cut_size = cut_size + self.cutn = cutn + self.skip_augs = skip_augs + self.augs = nn.Sequential(*[ + T.RandomHorizontalFlip(prob=0.5), + T.Lambda(lambda x: x + paddle.randn(x.shape) * 0.01), + T.RandomAffine(degrees=15, translate=(0.1, 0.1)), + T.Lambda(lambda x: x + paddle.randn(x.shape) * 0.01), + T.RandomPerspective(distortion_scale=0.4, p=0.7), + T.Lambda(lambda x: x + paddle.randn(x.shape) * 0.01), + T.RandomGrayscale(p=0.15), + T.Lambda(lambda x: x + paddle.randn(x.shape) * 0.01), + T.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1), + ]) + + def forward(self, input): + input = T.Pad(input.shape[2] // 4, fill=0)(input) + sideY, sideX = input.shape[2:4] + max_size = min(sideX, sideY) + + cutouts = [] + for ch in range(self.cutn): + if ch > self.cutn - self.cutn // 4: + cutout = input.clone() + else: + size = int(max_size * + paddle.zeros(1, ).normal_(mean=0.8, std=0.3).clip(float(self.cut_size / max_size), 1.0)) + offsetx = paddle.randint(0, abs(sideX - size + 1), ()) + offsety = paddle.randint(0, abs(sideY - size + 1), ()) + cutout = input[:, :, offsety:offsety + size, offsetx:offsetx + size] + + if not self.skip_augs: + cutout = self.augs(cutout) + cutouts.append(resample(cutout, (self.cut_size, self.cut_size))) + del cutout + + cutouts = paddle.concat(cutouts, axis=0) + return cutouts + + +class MakeCutoutsDango(nn.Layer): + + def __init__(self, cut_size, Overview=4, InnerCrop=0, IC_Size_Pow=0.5, IC_Grey_P=0.2): + super().__init__() + self.cut_size = cut_size + self.Overview = Overview + self.InnerCrop = InnerCrop + self.IC_Size_Pow = IC_Size_Pow + self.IC_Grey_P = IC_Grey_P + self.augs = nn.Sequential(*[ + T.RandomHorizontalFlip(prob=0.5), + T.Lambda(lambda x: x + paddle.randn(x.shape) * 0.01), + T.RandomAffine( + degrees=10, + translate=(0.05, 0.05), + interpolation=T.InterpolationMode.BILINEAR, + ), + T.Lambda(lambda x: x + paddle.randn(x.shape) * 0.01), + T.RandomGrayscale(p=0.1), + T.Lambda(lambda x: x + paddle.randn(x.shape) * 0.01), + T.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1), + ]) + + def forward(self, input): + cutouts = [] + gray = T.Grayscale(3) + sideY, sideX = input.shape[2:4] + max_size = min(sideX, sideY) + min_size = min(sideX, sideY, self.cut_size) + output_shape = [1, 3, self.cut_size, self.cut_size] + pad_input = F.pad( + input, + ( + (sideY - max_size) // 2, + (sideY - max_size) // 2, + (sideX - max_size) // 2, + (sideX - max_size) // 2, + ), + **padargs, + ) + cutout = resize(pad_input, out_shape=output_shape) + + if self.Overview > 0: + if self.Overview <= 4: + if self.Overview >= 1: + cutouts.append(cutout) + if self.Overview >= 2: + cutouts.append(gray(cutout)) + if self.Overview >= 3: + cutouts.append(cutout[:, :, :, ::-1]) + if self.Overview == 4: + cutouts.append(gray(cutout[:, :, :, ::-1])) + else: + cutout = resize(pad_input, out_shape=output_shape) + for _ in range(self.Overview): + cutouts.append(cutout) + + if self.InnerCrop > 0: + for i in range(self.InnerCrop): + size = int(paddle.rand([1])**self.IC_Size_Pow * (max_size - min_size) + min_size) + offsetx = paddle.randint(0, sideX - size + 1) + offsety = paddle.randint(0, sideY - size + 1) + cutout = input[:, :, offsety:offsety + size, offsetx:offsetx + size] + if i <= int(self.IC_Grey_P * self.InnerCrop): + cutout = gray(cutout) + cutout = resize(cutout, out_shape=output_shape) + cutouts.append(cutout) + + cutouts = paddle.concat(cutouts) + if skip_augs is not True: + cutouts = self.augs(cutouts) + return cutouts + + +def resample(input, size, align_corners=True): + n, c, h, w = input.shape + dh, dw = size + + input = input.reshape([n * c, 1, h, w]) + + if dh < h: + kernel_h = lanczos(ramp(dh / h, 2), 2).to(input.device, input.dtype) + pad_h = (kernel_h.shape[0] - 1) // 2 + input = F.pad(input, (0, 0, pad_h, pad_h), 'reflect') + input = F.conv2d(input, kernel_h[None, None, :, None]) + + if dw < w: + kernel_w = lanczos(ramp(dw / w, 2), 2).to(input.device, input.dtype) + pad_w = (kernel_w.shape[0] - 1) // 2 + input = F.pad(input, (pad_w, pad_w, 0, 0), 'reflect') + input = F.conv2d(input, kernel_w[None, None, None, :]) + + input = input.reshape([n, c, h, w]) + return F.interpolate(input, size, mode='bicubic', align_corners=align_corners) + + +padargs = {} diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/model/nn.py b/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/model/nn.py new file mode 100755 index 000000000..d618183e2 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/model/nn.py @@ -0,0 +1,127 @@ +""" +Various utilities for neural networks implemented by Paddle. This code is rewritten based on: +https://github.com/openai/guided-diffusion/blob/main/guided_diffusion/nn.py +""" +import math + +import paddle +import paddle.nn as nn + + +class SiLU(nn.Layer): + + def forward(self, x): + return x * nn.functional.sigmoid(x) + + +class GroupNorm32(nn.GroupNorm): + + def forward(self, x): + return super().forward(x) + + +def conv_nd(dims, *args, **kwargs): + """ + Create a 1D, 2D, or 3D convolution module. + """ + if dims == 1: + return nn.Conv1D(*args, **kwargs) + elif dims == 2: + return nn.Conv2D(*args, **kwargs) + elif dims == 3: + return nn.Conv3D(*args, **kwargs) + raise ValueError(f"unsupported dimensions: {dims}") + + +def linear(*args, **kwargs): + """ + Create a linear module. + """ + return nn.Linear(*args, **kwargs) + + +def avg_pool_nd(dims, *args, **kwargs): + """ + Create a 1D, 2D, or 3D average pooling module. + """ + if dims == 1: + return nn.AvgPool1D(*args, **kwargs) + elif dims == 2: + return nn.AvgPool2D(*args, **kwargs) + elif dims == 3: + return nn.AvgPool3D(*args, **kwargs) + raise ValueError(f"unsupported dimensions: {dims}") + + +def update_ema(target_params, source_params, rate=0.99): + """ + Update target parameters to be closer to those of source parameters using + an exponential moving average. + + :param target_params: the target parameter sequence. + :param source_params: the source parameter sequence. + :param rate: the EMA rate (closer to 1 means slower). + """ + for targ, src in zip(target_params, source_params): + targ.detach().mul_(rate).add_(src, alpha=1 - rate) + + +def zero_module(module): + """ + Zero out the parameters of a module and return it. + """ + for p in module.parameters(): + p.detach().zero_() + return module + + +def scale_module(module, scale): + """ + Scale the parameters of a module and return it. + """ + for p in module.parameters(): + p.detach().mul_(scale) + return module + + +def mean_flat(tensor): + """ + Take the mean over all non-batch dimensions. + """ + return tensor.mean(axis=list(range(1, len(tensor.shape)))) + + +def normalization(channels): + """ + Make a standard normalization layer. + + :param channels: number of input channels. + :return: an nn.Module for normalization. + """ + return GroupNorm32(32, channels) + + +def timestep_embedding(timesteps, dim, max_period=10000): + """ + Create sinusoidal timestep embeddings. + + :param timesteps: a 1-D Tensor of N indices, one per batch element. + These may be fractional. + :param dim: the dimension of the output. + :param max_period: controls the minimum frequency of the embeddings. + :return: an [N x dim] Tensor of positional embeddings. + """ + half = dim // 2 + freqs = paddle.exp(-math.log(max_period) * paddle.arange(start=0, end=half, dtype=paddle.float32) / half) + args = paddle.cast(timesteps[:, None], 'float32') * freqs[None] + embedding = paddle.concat([paddle.cos(args), paddle.sin(args)], axis=-1) + if dim % 2: + embedding = paddle.concat([embedding, paddle.zeros_like(embedding[:, :1])], axis=-1) + return embedding + + +def checkpoint(func, inputs, params, flag): + """ + This function is disabled. And now just forward. + """ + return func(*inputs) diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/model/perlin_noises.py b/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/model/perlin_noises.py new file mode 100755 index 000000000..6dacb331b --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/model/perlin_noises.py @@ -0,0 +1,78 @@ +''' +Perlin noise implementation by Paddle. +This code is rewritten based on: +https://github.com/jina-ai/discoart/blob/main/discoart/nn/perlin_noises.py +''' +import numpy as np +import paddle +import paddle.vision.transforms as TF +from PIL import Image +from PIL import ImageOps + + +def interp(t): + return 3 * t**2 - 2 * t**3 + + +def perlin(width, height, scale=10): + gx, gy = paddle.randn([2, width + 1, height + 1, 1, 1]) + xs = paddle.linspace(0, 1, scale + 1)[:-1, None] + ys = paddle.linspace(0, 1, scale + 1)[None, :-1] + wx = 1 - interp(xs) + wy = 1 - interp(ys) + dots = 0 + dots += wx * wy * (gx[:-1, :-1] * xs + gy[:-1, :-1] * ys) + dots += (1 - wx) * wy * (-gx[1:, :-1] * (1 - xs) + gy[1:, :-1] * ys) + dots += wx * (1 - wy) * (gx[:-1, 1:] * xs - gy[:-1, 1:] * (1 - ys)) + dots += (1 - wx) * (1 - wy) * (-gx[1:, 1:] * (1 - xs) - gy[1:, 1:] * (1 - ys)) + return dots.transpose([0, 2, 1, 3]).reshape([width * scale, height * scale]) + + +def perlin_ms(octaves, width, height, grayscale): + out_array = [0.5] if grayscale else [0.5, 0.5, 0.5] + # out_array = [0.0] if grayscale else [0.0, 0.0, 0.0] + for i in range(1 if grayscale else 3): + scale = 2**len(octaves) + oct_width = width + oct_height = height + for oct in octaves: + p = perlin(oct_width, oct_height, scale) + out_array[i] += p * oct + scale //= 2 + oct_width *= 2 + oct_height *= 2 + return paddle.concat(out_array) + + +def create_perlin_noise(octaves, width, height, grayscale, side_y, side_x): + out = perlin_ms(octaves, width, height, grayscale) + if grayscale: + out = TF.resize(size=(side_y, side_x), img=out.numpy()) + out = np.uint8(out) + out = Image.fromarray(out).convert('RGB') + else: + out = out.reshape([-1, 3, out.shape[0] // 3, out.shape[1]]) + out = out.squeeze().transpose([1, 2, 0]).numpy() + out = TF.resize(size=(side_y, side_x), img=out) + out = out.clip(0, 1) * 255 + out = np.uint8(out) + out = Image.fromarray(out) + + out = ImageOps.autocontrast(out) + return out + + +def regen_perlin(perlin_mode, side_y, side_x, batch_size): + if perlin_mode == 'color': + init = create_perlin_noise([1.5**-i * 0.5 for i in range(12)], 1, 1, False, side_y, side_x) + init2 = create_perlin_noise([1.5**-i * 0.5 for i in range(8)], 4, 4, False, side_y, side_x) + elif perlin_mode == 'gray': + init = create_perlin_noise([1.5**-i * 0.5 for i in range(12)], 1, 1, True, side_y, side_x) + init2 = create_perlin_noise([1.5**-i * 0.5 for i in range(8)], 4, 4, True, side_y, side_x) + else: + init = create_perlin_noise([1.5**-i * 0.5 for i in range(12)], 1, 1, False, side_y, side_x) + init2 = create_perlin_noise([1.5**-i * 0.5 for i in range(8)], 4, 4, True, side_y, side_x) + + init = (TF.to_tensor(init).add(TF.to_tensor(init2)).divide(paddle.to_tensor(2.0)).unsqueeze(0) * 2 - 1) + del init2 + return init.expand([batch_size, -1, -1, -1]) diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/model/respace.py b/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/model/respace.py new file mode 100755 index 000000000..c001c70d0 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/model/respace.py @@ -0,0 +1,123 @@ +''' +This code is rewritten by Paddle based on +https://github.com/openai/guided-diffusion/blob/main/guided_diffusion/respace.py +''' +import numpy as np +import paddle + +from .gaussian_diffusion import GaussianDiffusion + + +def space_timesteps(num_timesteps, section_counts): + """ + Create a list of timesteps to use from an original diffusion process, + given the number of timesteps we want to take from equally-sized portions + of the original process. + + For example, if there's 300 timesteps and the section counts are [10,15,20] + then the first 100 timesteps are strided to be 10 timesteps, the second 100 + are strided to be 15 timesteps, and the final 100 are strided to be 20. + + If the stride is a string starting with "ddim", then the fixed striding + from the DDIM paper is used, and only one section is allowed. + + :param num_timesteps: the number of diffusion steps in the original + process to divide up. + :param section_counts: either a list of numbers, or a string containing + comma-separated numbers, indicating the step count + per section. As a special case, use "ddimN" where N + is a number of steps to use the striding from the + DDIM paper. + :return: a set of diffusion steps from the original process to use. + """ + if isinstance(section_counts, str): + if section_counts.startswith("ddim"): + desired_count = int(section_counts[len("ddim"):]) + for i in range(1, num_timesteps): + if len(range(0, num_timesteps, i)) == desired_count: + return set(range(0, num_timesteps, i)) + raise ValueError(f"cannot create exactly {num_timesteps} steps with an integer stride") + section_counts = [int(x) for x in section_counts.split(",")] + size_per = num_timesteps // len(section_counts) + extra = num_timesteps % len(section_counts) + start_idx = 0 + all_steps = [] + for i, section_count in enumerate(section_counts): + size = size_per + (1 if i < extra else 0) + if size < section_count: + raise ValueError(f"cannot divide section of {size} steps into {section_count}") + if section_count <= 1: + frac_stride = 1 + else: + frac_stride = (size - 1) / (section_count - 1) + cur_idx = 0.0 + taken_steps = [] + for _ in range(section_count): + taken_steps.append(start_idx + round(cur_idx)) + cur_idx += frac_stride + all_steps += taken_steps + start_idx += size + return set(all_steps) + + +class SpacedDiffusion(GaussianDiffusion): + """ + A diffusion process which can skip steps in a base diffusion process. + + :param use_timesteps: a collection (sequence or set) of timesteps from the + original diffusion process to retain. + :param kwargs: the kwargs to create the base diffusion process. + """ + + def __init__(self, use_timesteps, **kwargs): + self.use_timesteps = set(use_timesteps) + self.timestep_map = [] + self.original_num_steps = len(kwargs["betas"]) + + base_diffusion = GaussianDiffusion(**kwargs) # pylint: disable=missing-kwoa + last_alpha_cumprod = 1.0 + new_betas = [] + for i, alpha_cumprod in enumerate(base_diffusion.alphas_cumprod): + if i in self.use_timesteps: + new_betas.append(1 - alpha_cumprod / last_alpha_cumprod) + last_alpha_cumprod = alpha_cumprod + self.timestep_map.append(i) + kwargs["betas"] = np.array(new_betas) + super().__init__(**kwargs) + + def p_mean_variance(self, model, *args, **kwargs): # pylint: disable=signature-differs + return super().p_mean_variance(self._wrap_model(model), *args, **kwargs) + + def training_losses(self, model, *args, **kwargs): # pylint: disable=signature-differs + return super().training_losses(self._wrap_model(model), *args, **kwargs) + + def condition_mean(self, cond_fn, *args, **kwargs): + return super().condition_mean(self._wrap_model(cond_fn), *args, **kwargs) + + def condition_score(self, cond_fn, *args, **kwargs): + return super().condition_score(self._wrap_model(cond_fn), *args, **kwargs) + + def _wrap_model(self, model): + if isinstance(model, _WrappedModel): + return model + return _WrappedModel(model, self.timestep_map, self.rescale_timesteps, self.original_num_steps) + + def _scale_timesteps(self, t): + # Scaling is done by the wrapped model. + return t + + +class _WrappedModel: + + def __init__(self, model, timestep_map, rescale_timesteps, original_num_steps): + self.model = model + self.timestep_map = timestep_map + self.rescale_timesteps = rescale_timesteps + self.original_num_steps = original_num_steps + + def __call__(self, x, ts, **kwargs): + map_tensor = paddle.to_tensor(self.timestep_map, place=ts.place, dtype=ts.dtype) + new_ts = map_tensor[ts] + if self.rescale_timesteps: + new_ts = paddle.cast(new_ts, 'float32') * (1000.0 / self.original_num_steps) + return self.model(x, new_ts, **kwargs) diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/model/script_util.py b/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/model/script_util.py new file mode 100755 index 000000000..d728a5430 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/model/script_util.py @@ -0,0 +1,201 @@ +''' +This code is based on +https://github.com/openai/guided-diffusion/blob/main/guided_diffusion/script_util.py +''' +import argparse +import inspect + +from . import gaussian_diffusion as gd +from .respace import space_timesteps +from .respace import SpacedDiffusion +from .unet import EncoderUNetModel +from .unet import SuperResModel +from .unet import UNetModel + +NUM_CLASSES = 1000 + + +def diffusion_defaults(): + """ + Defaults for image and classifier training. + """ + return dict( + learn_sigma=False, + diffusion_steps=1000, + noise_schedule="linear", + timestep_respacing="", + use_kl=False, + predict_xstart=False, + rescale_timesteps=False, + rescale_learned_sigmas=False, + ) + + +def model_and_diffusion_defaults(): + """ + Defaults for image training. + """ + res = dict( + image_size=64, + num_channels=128, + num_res_blocks=2, + num_heads=4, + num_heads_upsample=-1, + num_head_channels=-1, + attention_resolutions="16,8", + channel_mult="", + dropout=0.0, + class_cond=False, + use_checkpoint=False, + use_scale_shift_norm=True, + resblock_updown=False, + use_fp16=False, + use_new_attention_order=False, + ) + res.update(diffusion_defaults()) + return res + + +def create_model_and_diffusion( + image_size, + class_cond, + learn_sigma, + num_channels, + num_res_blocks, + channel_mult, + num_heads, + num_head_channels, + num_heads_upsample, + attention_resolutions, + dropout, + diffusion_steps, + noise_schedule, + timestep_respacing, + use_kl, + predict_xstart, + rescale_timesteps, + rescale_learned_sigmas, + use_checkpoint, + use_scale_shift_norm, + resblock_updown, + use_fp16, + use_new_attention_order, +): + model = create_model( + image_size, + num_channels, + num_res_blocks, + channel_mult=channel_mult, + learn_sigma=learn_sigma, + class_cond=class_cond, + use_checkpoint=use_checkpoint, + attention_resolutions=attention_resolutions, + num_heads=num_heads, + num_head_channels=num_head_channels, + num_heads_upsample=num_heads_upsample, + use_scale_shift_norm=use_scale_shift_norm, + dropout=dropout, + resblock_updown=resblock_updown, + use_fp16=use_fp16, + use_new_attention_order=use_new_attention_order, + ) + diffusion = create_gaussian_diffusion( + steps=diffusion_steps, + learn_sigma=learn_sigma, + noise_schedule=noise_schedule, + use_kl=use_kl, + predict_xstart=predict_xstart, + rescale_timesteps=rescale_timesteps, + rescale_learned_sigmas=rescale_learned_sigmas, + timestep_respacing=timestep_respacing, + ) + return model, diffusion + + +def create_model( + image_size, + num_channels, + num_res_blocks, + channel_mult="", + learn_sigma=False, + class_cond=False, + use_checkpoint=False, + attention_resolutions="16", + num_heads=1, + num_head_channels=-1, + num_heads_upsample=-1, + use_scale_shift_norm=False, + dropout=0, + resblock_updown=False, + use_fp16=False, + use_new_attention_order=False, +): + if channel_mult == "": + if image_size == 512: + channel_mult = (0.5, 1, 1, 2, 2, 4, 4) + elif image_size == 256: + channel_mult = (1, 1, 2, 2, 4, 4) + elif image_size == 128: + channel_mult = (1, 1, 2, 3, 4) + elif image_size == 64: + channel_mult = (1, 2, 3, 4) + else: + raise ValueError(f"unsupported image size: {image_size}") + else: + channel_mult = tuple(int(ch_mult) for ch_mult in channel_mult.split(",")) + + attention_ds = [] + for res in attention_resolutions.split(","): + attention_ds.append(image_size // int(res)) + + return UNetModel( + image_size=image_size, + in_channels=3, + model_channels=num_channels, + out_channels=(3 if not learn_sigma else 6), + num_res_blocks=num_res_blocks, + attention_resolutions=tuple(attention_ds), + dropout=dropout, + channel_mult=channel_mult, + num_classes=(NUM_CLASSES if class_cond else None), + use_checkpoint=use_checkpoint, + use_fp16=use_fp16, + num_heads=num_heads, + num_head_channels=num_head_channels, + num_heads_upsample=num_heads_upsample, + use_scale_shift_norm=use_scale_shift_norm, + resblock_updown=resblock_updown, + use_new_attention_order=use_new_attention_order, + ) + + +def create_gaussian_diffusion( + *, + steps=1000, + learn_sigma=False, + sigma_small=False, + noise_schedule="linear", + use_kl=False, + predict_xstart=False, + rescale_timesteps=False, + rescale_learned_sigmas=False, + timestep_respacing="", +): + betas = gd.get_named_beta_schedule(noise_schedule, steps) + if use_kl: + loss_type = gd.LossType.RESCALED_KL + elif rescale_learned_sigmas: + loss_type = gd.LossType.RESCALED_MSE + else: + loss_type = gd.LossType.MSE + if not timestep_respacing: + timestep_respacing = [steps] + return SpacedDiffusion( + use_timesteps=space_timesteps(steps, timestep_respacing), + betas=betas, + model_mean_type=(gd.ModelMeanType.EPSILON if not predict_xstart else gd.ModelMeanType.START_X), + model_var_type=((gd.ModelVarType.FIXED_LARGE if not sigma_small else gd.ModelVarType.FIXED_SMALL) + if not learn_sigma else gd.ModelVarType.LEARNED_RANGE), + loss_type=loss_type, + rescale_timesteps=rescale_timesteps, + ) diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/model/sec_diff.py b/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/model/sec_diff.py new file mode 100755 index 000000000..1e361f18f --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/model/sec_diff.py @@ -0,0 +1,135 @@ +''' +This code is rewritten by Paddle based on +https://github.com/jina-ai/discoart/blob/main/discoart/nn/sec_diff.py +''' +import math +from dataclasses import dataclass +from functools import partial + +import paddle +import paddle.nn as nn + + +@dataclass +class DiffusionOutput: + v: paddle.Tensor + pred: paddle.Tensor + eps: paddle.Tensor + + +class SkipBlock(nn.Layer): + + def __init__(self, main, skip=None): + super().__init__() + self.main = nn.Sequential(*main) + self.skip = skip if skip else nn.Identity() + + def forward(self, input): + return paddle.concat([self.main(input), self.skip(input)], axis=1) + + +def append_dims(x, n): + return x[(Ellipsis, *(None, ) * (n - x.ndim))] + + +def expand_to_planes(x, shape): + return paddle.tile(append_dims(x, len(shape)), [1, 1, *shape[2:]]) + + +def alpha_sigma_to_t(alpha, sigma): + return paddle.atan2(sigma, alpha) * 2 / math.pi + + +def t_to_alpha_sigma(t): + return paddle.cos(t * math.pi / 2), paddle.sin(t * math.pi / 2) + + +class SecondaryDiffusionImageNet2(nn.Layer): + + def __init__(self): + super().__init__() + c = 64 # The base channel count + cs = [c, c * 2, c * 2, c * 4, c * 4, c * 8] + + self.timestep_embed = FourierFeatures(1, 16) + self.down = nn.AvgPool2D(2) + self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False) + + self.net = nn.Sequential( + ConvBlock(3 + 16, cs[0]), + ConvBlock(cs[0], cs[0]), + SkipBlock([ + self.down, + ConvBlock(cs[0], cs[1]), + ConvBlock(cs[1], cs[1]), + SkipBlock([ + self.down, + ConvBlock(cs[1], cs[2]), + ConvBlock(cs[2], cs[2]), + SkipBlock([ + self.down, + ConvBlock(cs[2], cs[3]), + ConvBlock(cs[3], cs[3]), + SkipBlock([ + self.down, + ConvBlock(cs[3], cs[4]), + ConvBlock(cs[4], cs[4]), + SkipBlock([ + self.down, + ConvBlock(cs[4], cs[5]), + ConvBlock(cs[5], cs[5]), + ConvBlock(cs[5], cs[5]), + ConvBlock(cs[5], cs[4]), + self.up, + ]), + ConvBlock(cs[4] * 2, cs[4]), + ConvBlock(cs[4], cs[3]), + self.up, + ]), + ConvBlock(cs[3] * 2, cs[3]), + ConvBlock(cs[3], cs[2]), + self.up, + ]), + ConvBlock(cs[2] * 2, cs[2]), + ConvBlock(cs[2], cs[1]), + self.up, + ]), + ConvBlock(cs[1] * 2, cs[1]), + ConvBlock(cs[1], cs[0]), + self.up, + ]), + ConvBlock(cs[0] * 2, cs[0]), + nn.Conv2D(cs[0], 3, 3, padding=1), + ) + + def forward(self, input, t): + timestep_embed = expand_to_planes(self.timestep_embed(t[:, None]), input.shape) + v = self.net(paddle.concat([input, timestep_embed], axis=1)) + alphas, sigmas = map(partial(append_dims, n=v.ndim), t_to_alpha_sigma(t)) + pred = input * alphas - v * sigmas + eps = input * sigmas + v * alphas + return DiffusionOutput(v, pred, eps) + + +class FourierFeatures(nn.Layer): + + def __init__(self, in_features, out_features, std=1.0): + super().__init__() + assert out_features % 2 == 0 + # self.weight = nn.Parameter(paddle.randn([out_features // 2, in_features]) * std) + self.weight = paddle.create_parameter([out_features // 2, in_features], + dtype='float32', + default_initializer=nn.initializer.Normal(mean=0.0, std=std)) + + def forward(self, input): + f = 2 * math.pi * input @ self.weight.T + return paddle.concat([f.cos(), f.sin()], axis=-1) + + +class ConvBlock(nn.Sequential): + + def __init__(self, c_in, c_out): + super().__init__( + nn.Conv2D(c_in, c_out, 3, padding=1), + nn.ReLU(), + ) diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/model/transforms.py b/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/model/transforms.py new file mode 100755 index 000000000..e0b620b01 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/model/transforms.py @@ -0,0 +1,757 @@ +''' +This code is rewritten by Paddle based on +https://github.com/pytorch/vision/blob/main/torchvision/transforms/transforms.py +''' +import math +import numbers +import warnings +from enum import Enum +from typing import Any +from typing import Dict +from typing import List +from typing import Optional +from typing import Sequence +from typing import Tuple +from typing import Union + +import paddle +import paddle.nn as nn +from paddle import Tensor +from paddle.nn import functional as F +from paddle.nn.functional import grid_sample +from paddle.vision import transforms as T + + +class Normalize(nn.Layer): + + def __init__(self, mean, std): + super(Normalize, self).__init__() + self.mean = paddle.to_tensor(mean) + self.std = paddle.to_tensor(std) + + def forward(self, tensor: Tensor): + dtype = tensor.dtype + mean = paddle.to_tensor(self.mean, dtype=dtype) + std = paddle.to_tensor(self.std, dtype=dtype) + mean = mean.reshape([1, -1, 1, 1]) + std = std.reshape([1, -1, 1, 1]) + result = tensor.subtract(mean).divide(std) + return result + + +class InterpolationMode(Enum): + """Interpolation modes + Available interpolation methods are ``nearest``, ``bilinear``, ``bicubic``, ``box``, ``hamming``, and ``lanczos``. + """ + + NEAREST = "nearest" + BILINEAR = "bilinear" + BICUBIC = "bicubic" + # For PIL compatibility + BOX = "box" + HAMMING = "hamming" + LANCZOS = "lanczos" + + +class Grayscale(nn.Layer): + + def __init__(self, num_output_channels): + super(Grayscale, self).__init__() + self.num_output_channels = num_output_channels + + def forward(self, x): + output = (0.2989 * x[:, 0:1, :, :] + 0.587 * x[:, 1:2, :, :] + 0.114 * x[:, 2:3, :, :]) + if self.num_output_channels == 3: + return output.expand(x.shape) + + return output + + +class Lambda(nn.Layer): + + def __init__(self, func): + super(Lambda, self).__init__() + self.transform = func + + def forward(self, x): + return self.transform(x) + + +class RandomGrayscale(nn.Layer): + + def __init__(self, p): + super(RandomGrayscale, self).__init__() + self.prob = p + self.transform = Grayscale(3) + + def forward(self, x): + if paddle.rand([1]) < self.prob: + return self.transform(x) + else: + return x + + +class RandomHorizontalFlip(nn.Layer): + + def __init__(self, prob): + super(RandomHorizontalFlip, self).__init__() + self.prob = prob + + def forward(self, x): + if paddle.rand([1]) < self.prob: + return x[:, :, :, ::-1] + else: + return x + + +def _blend(img1: Tensor, img2: Tensor, ratio: float) -> Tensor: + ratio = float(ratio) + bound = 1.0 + return (ratio * img1 + (1.0 - ratio) * img2).clip(0, bound) + + +def trunc_div(a, b): + ipt = paddle.divide(a, b) + sign_ipt = paddle.sign(ipt) + abs_ipt = paddle.abs(ipt) + abs_ipt = paddle.floor(abs_ipt) + out = paddle.multiply(sign_ipt, abs_ipt) + return out + + +def fmod(a, b): + return a - trunc_div(a, b) * b + + +def _rgb2hsv(img: Tensor) -> Tensor: + r, g, b = img.unbind(axis=-3) + + # Implementation is based on https://github.com/python-pillow/Pillow/blob/4174d4267616897df3746d315d5a2d0f82c656ee/ + # src/libImaging/Convert.c#L330 + maxc = paddle.max(img, axis=-3) + minc = paddle.min(img, axis=-3) + + # The algorithm erases S and H channel where `maxc = minc`. This avoids NaN + # from happening in the results, because + # + S channel has division by `maxc`, which is zero only if `maxc = minc` + # + H channel has division by `(maxc - minc)`. + # + # Instead of overwriting NaN afterwards, we just prevent it from occuring so + # we don't need to deal with it in case we save the NaN in a buffer in + # backprop, if it is ever supported, but it doesn't hurt to do so. + eqc = maxc == minc + + cr = maxc - minc + # Since `eqc => cr = 0`, replacing denominator with 1 when `eqc` is fine. + ones = paddle.ones_like(maxc) + s = cr / paddle.where(eqc, ones, maxc) + # Note that `eqc => maxc = minc = r = g = b`. So the following calculation + # of `h` would reduce to `bc - gc + 2 + rc - bc + 4 + rc - bc = 6` so it + # would not matter what values `rc`, `gc`, and `bc` have here, and thus + # replacing denominator with 1 when `eqc` is fine. + cr_divisor = paddle.where(eqc, ones, cr) + rc = (maxc - r) / cr_divisor + gc = (maxc - g) / cr_divisor + bc = (maxc - b) / cr_divisor + + hr = (maxc == r).cast('float32') * (bc - gc) + hg = ((maxc == g) & (maxc != r)).cast('float32') * (2.0 + rc - bc) + hb = ((maxc != g) & (maxc != r)).cast('float32') * (4.0 + gc - rc) + h = hr + hg + hb + h = fmod((h / 6.0 + 1.0), paddle.to_tensor(1.0)) + return paddle.stack((h, s, maxc), axis=-3) + + +def _hsv2rgb(img: Tensor) -> Tensor: + h, s, v = img.unbind(axis=-3) + i = paddle.floor(h * 6.0) + f = (h * 6.0) - i + i = i.cast(dtype='int32') + + p = paddle.clip((v * (1.0 - s)), 0.0, 1.0) + q = paddle.clip((v * (1.0 - s * f)), 0.0, 1.0) + t = paddle.clip((v * (1.0 - s * (1.0 - f))), 0.0, 1.0) + i = i % 6 + + mask = i.unsqueeze(axis=-3) == paddle.arange(6).reshape([-1, 1, 1]) + + a1 = paddle.stack((v, q, p, p, t, v), axis=-3) + a2 = paddle.stack((t, v, v, q, p, p), axis=-3) + a3 = paddle.stack((p, p, t, v, v, q), axis=-3) + a4 = paddle.stack((a1, a2, a3), axis=-4) + + return paddle.einsum("...ijk, ...xijk -> ...xjk", mask.cast(dtype=img.dtype), a4) + + +def adjust_brightness(img: Tensor, brightness_factor: float) -> Tensor: + if brightness_factor < 0: + raise ValueError(f"brightness_factor ({brightness_factor}) is not non-negative.") + + return _blend(img, paddle.zeros_like(img), brightness_factor) + + +def adjust_contrast(img: Tensor, contrast_factor: float) -> Tensor: + if contrast_factor < 0: + raise ValueError(f"contrast_factor ({contrast_factor}) is not non-negative.") + + c = img.shape[1] + + if c == 3: + output = (0.2989 * img[:, 0:1, :, :] + 0.587 * img[:, 1:2, :, :] + 0.114 * img[:, 2:3, :, :]) + mean = paddle.mean(output, axis=(-3, -2, -1), keepdim=True) + + else: + mean = paddle.mean(img, axis=(-3, -2, -1), keepdim=True) + + return _blend(img, mean, contrast_factor) + + +def adjust_hue(img: Tensor, hue_factor: float) -> Tensor: + if not (-0.5 <= hue_factor <= 0.5): + raise ValueError(f"hue_factor ({hue_factor}) is not in [-0.5, 0.5].") + + img = _rgb2hsv(img) + h, s, v = img.unbind(axis=-3) + h = fmod(h + hue_factor, paddle.to_tensor(1.0)) + img = paddle.stack((h, s, v), axis=-3) + img_hue_adj = _hsv2rgb(img) + return img_hue_adj + + +def adjust_saturation(img: Tensor, saturation_factor: float) -> Tensor: + if saturation_factor < 0: + raise ValueError(f"saturation_factor ({saturation_factor}) is not non-negative.") + + output = (0.2989 * img[:, 0:1, :, :] + 0.587 * img[:, 1:2, :, :] + 0.114 * img[:, 2:3, :, :]) + + return _blend(img, output, saturation_factor) + + +class ColorJitter(nn.Layer): + + def __init__(self, brightness=0, contrast=0, saturation=0, hue=0): + super(ColorJitter, self).__init__() + self.brightness = self._check_input(brightness, "brightness") + self.contrast = self._check_input(contrast, "contrast") + self.saturation = self._check_input(saturation, "saturation") + self.hue = self._check_input(hue, "hue", center=0, bound=(-0.5, 0.5), clip_first_on_zero=False) + + def _check_input(self, value, name, center=1, bound=(0, float("inf")), clip_first_on_zero=True): + if isinstance(value, numbers.Number): + if value < 0: + raise ValueError(f"If {name} is a single number, it must be non negative.") + value = [center - float(value), center + float(value)] + if clip_first_on_zero: + value[0] = max(value[0], 0.0) + elif isinstance(value, (tuple, list)) and len(value) == 2: + if not bound[0] <= value[0] <= value[1] <= bound[1]: + raise ValueError(f"{name} values should be between {bound}") + else: + raise TypeError(f"{name} should be a single number or a list/tuple with length 2.") + + # if value is 0 or (1., 1.) for brightness/contrast/saturation + # or (0., 0.) for hue, do nothing + if value[0] == value[1] == center: + value = None + return value + + @staticmethod + def get_params( + brightness: Optional[List[float]], + contrast: Optional[List[float]], + saturation: Optional[List[float]], + hue: Optional[List[float]], + ) -> Tuple[Tensor, Optional[float], Optional[float], Optional[float], Optional[float]]: + """Get the parameters for the randomized transform to be applied on image. + + Args: + brightness (tuple of float (min, max), optional): The range from which the brightness_factor is chosen + uniformly. Pass None to turn off the transformation. + contrast (tuple of float (min, max), optional): The range from which the contrast_factor is chosen + uniformly. Pass None to turn off the transformation. + saturation (tuple of float (min, max), optional): The range from which the saturation_factor is chosen + uniformly. Pass None to turn off the transformation. + hue (tuple of float (min, max), optional): The range from which the hue_factor is chosen uniformly. + Pass None to turn off the transformation. + + Returns: + tuple: The parameters used to apply the randomized transform + along with their random order. + """ + fn_idx = paddle.randperm(4) + + b = None if brightness is None else paddle.empty([1]).uniform_(brightness[0], brightness[1]) + c = None if contrast is None else paddle.empty([1]).uniform_(contrast[0], contrast[1]) + s = None if saturation is None else paddle.empty([1]).uniform_(saturation[0], saturation[1]) + h = None if hue is None else paddle.empty([1]).uniform_(hue[0], hue[1]) + + return fn_idx, b, c, s, h + + def forward(self, img): + """ + Args: + img (PIL Image or Tensor): Input image. + + Returns: + PIL Image or Tensor: Color jittered image. + """ + fn_idx, brightness_factor, contrast_factor, saturation_factor, hue_factor = self.get_params( + self.brightness, self.contrast, self.saturation, self.hue) + + for fn_id in fn_idx: + if fn_id == 0 and brightness_factor is not None: + img = adjust_brightness(img, brightness_factor) + elif fn_id == 1 and contrast_factor is not None: + img = adjust_contrast(img, contrast_factor) + elif fn_id == 2 and saturation_factor is not None: + img = adjust_saturation(img, saturation_factor) + elif fn_id == 3 and hue_factor is not None: + img = adjust_hue(img, hue_factor) + + return img + + def __repr__(self) -> str: + s = (f"{self.__class__.__name__}(" + f"brightness={self.brightness}" + f", contrast={self.contrast}" + f", saturation={self.saturation}" + f", hue={self.hue})") + return s + + +def _apply_grid_transform(img: Tensor, grid: Tensor, mode: str, fill: Optional[List[float]]) -> Tensor: + + if img.shape[0] > 1: + # Apply same grid to a batch of images + grid = grid.expand([img.shape[0], grid.shape[1], grid.shape[2], grid.shape[3]]) + + # Append a dummy mask for customized fill colors, should be faster than grid_sample() twice + if fill is not None: + dummy = paddle.ones((img.shape[0], 1, img.shape[2], img.shape[3]), dtype=img.dtype) + img = paddle.concat((img, dummy), axis=1) + + img = grid_sample(img, grid, mode=mode, padding_mode="zeros", align_corners=False) + + # Fill with required color + if fill is not None: + mask = img[:, -1:, :, :] # N * 1 * H * W + img = img[:, :-1, :, :] # N * C * H * W + mask = mask.expand_as(img) + len_fill = len(fill) if isinstance(fill, (tuple, list)) else 1 + fill_img = paddle.to_tensor(fill, dtype=img.dtype).reshape([1, len_fill, 1, 1]).expand_as(img) + if mode == "nearest": + mask = mask < 0.5 + img[mask] = fill_img[mask] + else: # 'bilinear' + img = img * mask + (1.0 - mask) * fill_img + return img + + +def _gen_affine_grid( + theta: Tensor, + w: int, + h: int, + ow: int, + oh: int, +) -> Tensor: + # https://github.com/pytorch/pytorch/blob/74b65c32be68b15dc7c9e8bb62459efbfbde33d8/aten/src/ATen/native/ + # AffineGridGenerator.cpp#L18 + # Difference with AffineGridGenerator is that: + # 1) we normalize grid values after applying theta + # 2) we can normalize by other image size, such that it covers "extend" option like in PIL.Image.rotate + + d = 0.5 + base_grid = paddle.empty([1, oh, ow, 3], dtype=theta.dtype) + x_grid = paddle.linspace(-ow * 0.5 + d, ow * 0.5 + d - 1, num=ow) + base_grid[..., 0] = (x_grid) + y_grid = paddle.linspace(-oh * 0.5 + d, oh * 0.5 + d - 1, num=oh).unsqueeze_(-1) + base_grid[..., 1] = (y_grid) + base_grid[..., 2] = 1.0 + rescaled_theta = theta.transpose([0, 2, 1]) / paddle.to_tensor([0.5 * w, 0.5 * h], dtype=theta.dtype) + output_grid = base_grid.reshape([1, oh * ow, 3]).bmm(rescaled_theta) + return output_grid.reshape([1, oh, ow, 2]) + + +def affine_impl(img: Tensor, + matrix: List[float], + interpolation: str = "nearest", + fill: Optional[List[float]] = None) -> Tensor: + theta = paddle.to_tensor(matrix, dtype=img.dtype).reshape([1, 2, 3]) + shape = img.shape + # grid will be generated on the same device as theta and img + grid = _gen_affine_grid(theta, w=shape[-1], h=shape[-2], ow=shape[-1], oh=shape[-2]) + return _apply_grid_transform(img, grid, interpolation, fill=fill) + + +def _get_inverse_affine_matrix(center: List[float], + angle: float, + translate: List[float], + scale: float, + shear: List[float], + inverted: bool = True) -> List[float]: + # Helper method to compute inverse matrix for affine transformation + + # Pillow requires inverse affine transformation matrix: + # Affine matrix is : M = T * C * RotateScaleShear * C^-1 + # + # where T is translation matrix: [1, 0, tx | 0, 1, ty | 0, 0, 1] + # C is translation matrix to keep center: [1, 0, cx | 0, 1, cy | 0, 0, 1] + # RotateScaleShear is rotation with scale and shear matrix + # + # RotateScaleShear(a, s, (sx, sy)) = + # = R(a) * S(s) * SHy(sy) * SHx(sx) + # = [ s*cos(a - sy)/cos(sy), s*(-cos(a - sy)*tan(sx)/cos(sy) - sin(a)), 0 ] + # [ s*sin(a + sy)/cos(sy), s*(-sin(a - sy)*tan(sx)/cos(sy) + cos(a)), 0 ] + # [ 0 , 0 , 1 ] + # where R is a rotation matrix, S is a scaling matrix, and SHx and SHy are the shears: + # SHx(s) = [1, -tan(s)] and SHy(s) = [1 , 0] + # [0, 1 ] [-tan(s), 1] + # + # Thus, the inverse is M^-1 = C * RotateScaleShear^-1 * C^-1 * T^-1 + + rot = math.radians(angle) + sx = math.radians(shear[0]) + sy = math.radians(shear[1]) + + cx, cy = center + tx, ty = translate + + # RSS without scaling + a = math.cos(rot - sy) / math.cos(sy) + b = -math.cos(rot - sy) * math.tan(sx) / math.cos(sy) - math.sin(rot) + c = math.sin(rot - sy) / math.cos(sy) + d = -math.sin(rot - sy) * math.tan(sx) / math.cos(sy) + math.cos(rot) + + if inverted: + # Inverted rotation matrix with scale and shear + # det([[a, b], [c, d]]) == 1, since det(rotation) = 1 and det(shear) = 1 + matrix = [d, -b, 0.0, -c, a, 0.0] + matrix = [x / scale for x in matrix] + # Apply inverse of translation and of center translation: RSS^-1 * C^-1 * T^-1 + matrix[2] += matrix[0] * (-cx - tx) + matrix[1] * (-cy - ty) + matrix[5] += matrix[3] * (-cx - tx) + matrix[4] * (-cy - ty) + # Apply center translation: C * RSS^-1 * C^-1 * T^-1 + matrix[2] += cx + matrix[5] += cy + else: + matrix = [a, b, 0.0, c, d, 0.0] + matrix = [x * scale for x in matrix] + # Apply inverse of center translation: RSS * C^-1 + matrix[2] += matrix[0] * (-cx) + matrix[1] * (-cy) + matrix[5] += matrix[3] * (-cx) + matrix[4] * (-cy) + # Apply translation and center : T * C * RSS * C^-1 + matrix[2] += cx + tx + matrix[5] += cy + ty + + return matrix + + +def affine( + img: Tensor, + angle: float, + translate: List[int], + scale: float, + shear: List[float], + interpolation: InterpolationMode = InterpolationMode.NEAREST, + fill: Optional[List[float]] = None, + resample: Optional[int] = None, + fillcolor: Optional[List[float]] = None, + center: Optional[List[int]] = None, +) -> Tensor: + """Apply affine transformation on the image keeping image center invariant. + If the image is paddle Tensor, it is expected + to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions. + + Args: + img (PIL Image or Tensor): image to transform. + angle (number): rotation angle in degrees between -180 and 180, clockwise direction. + translate (sequence of integers): horizontal and vertical translations (post-rotation translation) + scale (float): overall scale + shear (float or sequence): shear angle value in degrees between -180 to 180, clockwise direction. + If a sequence is specified, the first value corresponds to a shear parallel to the x axis, while + the second value corresponds to a shear parallel to the y axis. + interpolation (InterpolationMode): Desired interpolation enum defined by + :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``. + If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported. + For backward compatibility integer values (e.g. ``PIL.Image[.Resampling].NEAREST``) are still accepted, + but deprecated since 0.13 and will be removed in 0.15. Please use InterpolationMode enum. + fill (sequence or number, optional): Pixel fill value for the area outside the transformed + image. If given a number, the value is used for all bands respectively. + + .. note:: + In torchscript mode single int/float value is not supported, please use a sequence + of length 1: ``[value, ]``. + fillcolor (sequence or number, optional): + .. warning:: + This parameter was deprecated in ``0.12`` and will be removed in ``0.14``. Please use ``fill`` instead. + resample (int, optional): + .. warning:: + This parameter was deprecated in ``0.12`` and will be removed in ``0.14``. Please use ``interpolation`` + instead. + center (sequence, optional): Optional center of rotation. Origin is the upper left corner. + Default is the center of the image. + + Returns: + PIL Image or Tensor: Transformed image. + """ + + # Backward compatibility with integer value + if isinstance(interpolation, int): + warnings.warn("Argument 'interpolation' of type int is deprecated since 0.13 and will be removed in 0.15. " + "Please use InterpolationMode enum.") + interpolation = _interpolation_modes_from_int(interpolation) + + if fillcolor is not None: + warnings.warn("The parameter 'fillcolor' is deprecated since 0.12 and will be removed in 0.14. " + "Please use 'fill' instead.") + fill = fillcolor + + if not isinstance(angle, (int, float)): + raise TypeError("Argument angle should be int or float") + + if not isinstance(translate, (list, tuple)): + raise TypeError("Argument translate should be a sequence") + + if len(translate) != 2: + raise ValueError("Argument translate should be a sequence of length 2") + + if scale <= 0.0: + raise ValueError("Argument scale should be positive") + + if not isinstance(shear, (numbers.Number, (list, tuple))): + raise TypeError("Shear should be either a single value or a sequence of two values") + + if not isinstance(interpolation, InterpolationMode): + raise TypeError("Argument interpolation should be a InterpolationMode") + + if isinstance(angle, int): + angle = float(angle) + + if isinstance(translate, tuple): + translate = list(translate) + + if isinstance(shear, numbers.Number): + shear = [shear, 0.0] + + if isinstance(shear, tuple): + shear = list(shear) + + if len(shear) == 1: + shear = [shear[0], shear[0]] + + if len(shear) != 2: + raise ValueError(f"Shear should be a sequence containing two values. Got {shear}") + + if center is not None and not isinstance(center, (list, tuple)): + raise TypeError("Argument center should be a sequence") + center_f = [0.0, 0.0] + if center is not None: + _, height, width = img.shape[0], img.shape[1], img.shape[2] + # Center values should be in pixel coordinates but translated such that (0, 0) corresponds to image center. + center_f = [1.0 * (c - s * 0.5) for c, s in zip(center, [width, height])] + + translate_f = [1.0 * t for t in translate] + matrix = _get_inverse_affine_matrix(center_f, angle, translate_f, scale, shear) + return affine_impl(img, matrix=matrix, interpolation=interpolation.value, fill=fill) + + +def _interpolation_modes_from_int(i: int) -> InterpolationMode: + inverse_modes_mapping = { + 0: InterpolationMode.NEAREST, + 2: InterpolationMode.BILINEAR, + 3: InterpolationMode.BICUBIC, + 4: InterpolationMode.BOX, + 5: InterpolationMode.HAMMING, + 1: InterpolationMode.LANCZOS, + } + return inverse_modes_mapping[i] + + +def _check_sequence_input(x, name, req_sizes): + msg = req_sizes[0] if len(req_sizes) < 2 else " or ".join([str(s) for s in req_sizes]) + if not isinstance(x, Sequence): + raise TypeError(f"{name} should be a sequence of length {msg}.") + if len(x) not in req_sizes: + raise ValueError(f"{name} should be sequence of length {msg}.") + + +def _setup_angle(x, name, req_sizes=(2, )): + if isinstance(x, numbers.Number): + if x < 0: + raise ValueError(f"If {name} is a single number, it must be positive.") + x = [-x, x] + else: + _check_sequence_input(x, name, req_sizes) + + return [float(d) for d in x] + + +class RandomAffine(nn.Layer): + """Random affine transformation of the image keeping center invariant. + If the image is paddle Tensor, it is expected + to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions. + + Args: + degrees (sequence or number): Range of degrees to select from. + If degrees is a number instead of sequence like (min, max), the range of degrees + will be (-degrees, +degrees). Set to 0 to deactivate rotations. + translate (tuple, optional): tuple of maximum absolute fraction for horizontal + and vertical translations. For example translate=(a, b), then horizontal shift + is randomly sampled in the range -img_width * a < dx < img_width * a and vertical shift is + randomly sampled in the range -img_height * b < dy < img_height * b. Will not translate by default. + scale (tuple, optional): scaling factor interval, e.g (a, b), then scale is + randomly sampled from the range a <= scale <= b. Will keep original scale by default. + shear (sequence or number, optional): Range of degrees to select from. + If shear is a number, a shear parallel to the x axis in the range (-shear, +shear) + will be applied. Else if shear is a sequence of 2 values a shear parallel to the x axis in the + range (shear[0], shear[1]) will be applied. Else if shear is a sequence of 4 values, + a x-axis shear in (shear[0], shear[1]) and y-axis shear in (shear[2], shear[3]) will be applied. + Will not apply shear by default. + interpolation (InterpolationMode): Desired interpolation enum defined by + :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``. + If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported. + For backward compatibility integer values (e.g. ``PIL.Image[.Resampling].NEAREST``) are still accepted, + but deprecated since 0.13 and will be removed in 0.15. Please use InterpolationMode enum. + fill (sequence or number): Pixel fill value for the area outside the transformed + image. Default is ``0``. If given a number, the value is used for all bands respectively. + fillcolor (sequence or number, optional): + .. warning:: + This parameter was deprecated in ``0.12`` and will be removed in ``0.14``. Please use ``fill`` instead. + resample (int, optional): + .. warning:: + This parameter was deprecated in ``0.12`` and will be removed in ``0.14``. Please use ``interpolation`` + instead. + center (sequence, optional): Optional center of rotation, (x, y). Origin is the upper left corner. + Default is the center of the image. + + .. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters + + """ + + def __init__( + self, + degrees, + translate=None, + scale=None, + shear=None, + interpolation=InterpolationMode.NEAREST, + fill=0, + fillcolor=None, + resample=None, + center=None, + ): + super(RandomAffine, self).__init__() + if resample is not None: + warnings.warn("The parameter 'resample' is deprecated since 0.12 and will be removed in 0.14. " + "Please use 'interpolation' instead.") + interpolation = _interpolation_modes_from_int(resample) + + # Backward compatibility with integer value + if isinstance(interpolation, int): + warnings.warn("Argument 'interpolation' of type int is deprecated since 0.13 and will be removed in 0.15. " + "Please use InterpolationMode enum.") + interpolation = _interpolation_modes_from_int(interpolation) + + if fillcolor is not None: + warnings.warn("The parameter 'fillcolor' is deprecated since 0.12 and will be removed in 0.14. " + "Please use 'fill' instead.") + fill = fillcolor + + self.degrees = _setup_angle(degrees, name="degrees", req_sizes=(2, )) + + if translate is not None: + _check_sequence_input(translate, "translate", req_sizes=(2, )) + for t in translate: + if not (0.0 <= t <= 1.0): + raise ValueError("translation values should be between 0 and 1") + self.translate = translate + + if scale is not None: + _check_sequence_input(scale, "scale", req_sizes=(2, )) + for s in scale: + if s <= 0: + raise ValueError("scale values should be positive") + self.scale = scale + + if shear is not None: + self.shear = _setup_angle(shear, name="shear", req_sizes=(2, 4)) + else: + self.shear = shear + + self.resample = self.interpolation = interpolation + + if fill is None: + fill = 0 + elif not isinstance(fill, (Sequence, numbers.Number)): + raise TypeError("Fill should be either a sequence or a number.") + + self.fillcolor = self.fill = fill + + if center is not None: + _check_sequence_input(center, "center", req_sizes=(2, )) + + self.center = center + + @staticmethod + def get_params( + degrees: List[float], + translate: Optional[List[float]], + scale_ranges: Optional[List[float]], + shears: Optional[List[float]], + img_size: List[int], + ) -> Tuple[float, Tuple[int, int], float, Tuple[float, float]]: + """Get parameters for affine transformation + + Returns: + params to be passed to the affine transformation + """ + angle = float(paddle.empty([1]).uniform_(float(degrees[0]), float(degrees[1]))) + if translate is not None: + max_dx = float(translate[0] * img_size[0]) + max_dy = float(translate[1] * img_size[1]) + tx = int(float(paddle.empty([1]).uniform_(-max_dx, max_dx))) + ty = int(float(paddle.empty([1]).uniform_(-max_dy, max_dy))) + translations = (tx, ty) + else: + translations = (0, 0) + + if scale_ranges is not None: + scale = float(paddle.empty([1]).uniform_(scale_ranges[0], scale_ranges[1])) + else: + scale = 1.0 + + shear_x = shear_y = 0.0 + if shears is not None: + shear_x = float(paddle.empty([1]).uniform_(shears[0], shears[1])) + if len(shears) == 4: + shear_y = float(paddle.empty([1]).uniform_(shears[2], shears[3])) + + shear = (shear_x, shear_y) + + return angle, translations, scale, shear + + def forward(self, img): + fill = self.fill + channels, height, width = img.shape[1], img.shape[2], img.shape[3] + if isinstance(fill, (int, float)): + fill = [float(fill)] * channels + else: + fill = [float(f) for f in fill] + + img_size = [width, height] # flip for keeping BC on get_params call + + ret = self.get_params(self.degrees, self.translate, self.scale, self.shear, img_size) + + return affine(img, *ret, interpolation=self.interpolation, fill=fill, center=self.center) + + def __repr__(self) -> str: + s = f"{self.__class__.__name__}(degrees={self.degrees}" + s += f", translate={self.translate}" if self.translate is not None else "" + s += f", scale={self.scale}" if self.scale is not None else "" + s += f", shear={self.shear}" if self.shear is not None else "" + s += f", interpolation={self.interpolation.value}" if self.interpolation != InterpolationMode.NEAREST else "" + s += f", fill={self.fill}" if self.fill != 0 else "" + s += f", center={self.center}" if self.center is not None else "" + s += ")" + + return s diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/model/unet.py b/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/model/unet.py new file mode 100755 index 000000000..56f3ad61e --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/model/unet.py @@ -0,0 +1,838 @@ +''' +This code is rewritten by Paddle based on +https://github.com/openai/guided-diffusion/blob/main/guided_diffusion/unet.py +''' +import math +from abc import abstractmethod + +import numpy as np +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + +from .nn import avg_pool_nd +from .nn import checkpoint +from .nn import conv_nd +from .nn import linear +from .nn import normalization +from .nn import SiLU +from .nn import timestep_embedding +from .nn import zero_module + + +class AttentionPool2d(nn.Layer): + """ + Adapted from CLIP: https://github.com/openai/CLIP/blob/main/clip/model.py + """ + + def __init__( + self, + spacial_dim: int, + embed_dim: int, + num_heads_channels: int, + output_dim: int = None, + ): + super().__init__() + # self.positional_embedding = nn.Parameter( + # th.randn(embed_dim, spacial_dim ** 2 + 1) / embed_dim ** 0.5 + # ) + positional_embedding = self.create_parameter(paddle.randn(embed_dim, spacial_dim**2 + 1) / embed_dim**0.5) + self.add_parameter("positional_embedding", positional_embedding) + self.qkv_proj = conv_nd(1, embed_dim, 3 * embed_dim, 1) + self.c_proj = conv_nd(1, embed_dim, output_dim or embed_dim, 1) + self.num_heads = embed_dim // num_heads_channels + self.attention = QKVAttention(self.num_heads) + + def forward(self, x): + b, c, *_spatial = x.shape + # x = x.reshape(b, c, -1) # NC(HW) + x = paddle.reshape(x, [b, c, -1]) + x = paddle.concat([x.mean(dim=-1, keepdim=True), x], axis=-1) # NC(HW+1) + x = x + paddle.cast(self.positional_embedding[None, :, :], x.dtype) # NC(HW+1) + x = self.qkv_proj(x) + x = self.attention(x) + x = self.c_proj(x) + return x[:, :, 0] + + +class TimestepBlock(nn.Layer): + """ + Any module where forward() takes timestep embeddings as a second argument. + """ + + @abstractmethod + def forward(self, x, emb): + """ + Apply the module to `x` given `emb` timestep embeddings. + """ + + +class TimestepEmbedSequential(nn.Sequential, TimestepBlock): + """ + A sequential module that passes timestep embeddings to the children that + support it as an extra input. + """ + + def forward(self, x, emb): + for layer in self: + if isinstance(layer, TimestepBlock): + x = layer(x, emb) + else: + x = layer(x) + return x + + +class Upsample(nn.Layer): + """ + An upsampling layer with an optional convolution. + + :param channels: channels in the inputs and outputs. + :param use_conv: a bool determining if a convolution is applied. + :param dims: determines if the signal is 1D, 2D, or 3D. If 3D, then + upsampling occurs in the inner-two dimensions. + """ + + def __init__(self, channels, use_conv, dims=2, out_channels=None): + super().__init__() + self.channels = channels + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.dims = dims + if use_conv: + self.conv = conv_nd(dims, self.channels, self.out_channels, 3, padding=1) + + def forward(self, x): + assert x.shape[1] == self.channels + if self.dims == 3: + x = F.interpolate(x, (x.shape[2], x.shape[3] * 2, x.shape[4] * 2), mode="nearest") + else: + x = F.interpolate(x, scale_factor=2, mode="nearest") + if self.use_conv: + x = self.conv(x) + return x + + +class Downsample(nn.Layer): + """ + A downsampling layer with an optional convolution. + + :param channels: channels in the inputs and outputs. + :param use_conv: a bool determining if a convolution is applied. + :param dims: determines if the signal is 1D, 2D, or 3D. If 3D, then + downsampling occurs in the inner-two dimensions. + """ + + def __init__(self, channels, use_conv, dims=2, out_channels=None): + super().__init__() + self.channels = channels + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.dims = dims + stride = 2 if dims != 3 else (1, 2, 2) + if use_conv: + self.op = conv_nd(dims, self.channels, self.out_channels, 3, stride=stride, padding=1) + else: + assert self.channels == self.out_channels + self.op = avg_pool_nd(dims, kernel_size=stride, stride=stride) + + def forward(self, x): + assert x.shape[1] == self.channels + return self.op(x) + + +class ResBlock(TimestepBlock): + """ + A residual block that can optionally change the number of channels. + + :param channels: the number of input channels. + :param emb_channels: the number of timestep embedding channels. + :param dropout: the rate of dropout. + :param out_channels: if specified, the number of out channels. + :param use_conv: if True and out_channels is specified, use a spatial + convolution instead of a smaller 1x1 convolution to change the + channels in the skip connection. + :param dims: determines if the signal is 1D, 2D, or 3D. + :param use_checkpoint: if True, use gradient checkpointing on this module. + :param up: if True, use this block for upsampling. + :param down: if True, use this block for downsampling. + """ + + def __init__( + self, + channels, + emb_channels, + dropout, + out_channels=None, + use_conv=False, + use_scale_shift_norm=False, + dims=2, + use_checkpoint=False, + up=False, + down=False, + ): + super().__init__() + self.channels = channels + self.emb_channels = emb_channels + self.dropout = dropout + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.use_checkpoint = use_checkpoint + self.use_scale_shift_norm = use_scale_shift_norm + + self.in_layers = nn.Sequential( + normalization(channels), + SiLU(), + conv_nd(dims, channels, self.out_channels, 3, padding=1), + ) + + self.updown = up or down + + if up: + self.h_upd = Upsample(channels, False, dims) + self.x_upd = Upsample(channels, False, dims) + elif down: + self.h_upd = Downsample(channels, False, dims) + self.x_upd = Downsample(channels, False, dims) + else: + self.h_upd = self.x_upd = nn.Identity() + + self.emb_layers = nn.Sequential( + SiLU(), + linear( + emb_channels, + 2 * self.out_channels if use_scale_shift_norm else self.out_channels, + ), + ) + self.out_layers = nn.Sequential( + normalization(self.out_channels), + SiLU(), + nn.Dropout(p=dropout), + zero_module(conv_nd(dims, self.out_channels, self.out_channels, 3, padding=1)), + ) + + if self.out_channels == channels: + self.skip_connection = nn.Identity() + elif use_conv: + self.skip_connection = conv_nd(dims, channels, self.out_channels, 3, padding=1) + else: + self.skip_connection = conv_nd(dims, channels, self.out_channels, 1) + + def forward(self, x, emb): + """ + Apply the block to a Tensor, conditioned on a timestep embedding. + + :param x: an [N x C x ...] Tensor of features. + :param emb: an [N x emb_channels] Tensor of timestep embeddings. + :return: an [N x C x ...] Tensor of outputs. + """ + return checkpoint(self._forward, (x, emb), self.parameters(), self.use_checkpoint) + + def _forward(self, x, emb): + if self.updown: + in_rest, in_conv = self.in_layers[:-1], self.in_layers[-1] + h = in_rest(x) + h = self.h_upd(h) + x = self.x_upd(x) + h = in_conv(h) + else: + h = self.in_layers(x) + emb_out = self.emb_layers(emb) + emb_out = paddle.cast(emb_out, h.dtype) + while len(emb_out.shape) < len(h.shape): + emb_out = emb_out[..., None] + if self.use_scale_shift_norm: + out_norm, out_rest = self.out_layers[0], self.out_layers[1:] + scale, shift = paddle.chunk(emb_out, 2, axis=1) + h = out_norm(h) * (1 + scale) + shift + h = out_rest(h) + else: + h = h + emb_out + h = self.out_layers(h) + return self.skip_connection(x) + h + + +class AttentionBlock(nn.Layer): + """ + An attention block that allows spatial positions to attend to each other. + + Originally ported from here, but adapted to the N-d case. + https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/models/unet.py#L66. + """ + + def __init__( + self, + channels, + num_heads=1, + num_head_channels=-1, + use_checkpoint=False, + use_new_attention_order=False, + ): + super().__init__() + self.channels = channels + if num_head_channels == -1: + self.num_heads = num_heads + else: + assert (channels % num_head_channels == 0 + ), f"q,k,v channels {channels} is not divisible by num_head_channels {num_head_channels}" + self.num_heads = channels // num_head_channels + self.use_checkpoint = use_checkpoint + self.norm = normalization(channels) + self.qkv = conv_nd(1, channels, channels * 3, 1) + if use_new_attention_order: + # split qkv before split heads + self.attention = QKVAttention(self.num_heads) + else: + # split heads before split qkv + self.attention = QKVAttentionLegacy(self.num_heads) + + self.proj_out = zero_module(conv_nd(1, channels, channels, 1)) + + def forward(self, x): + return checkpoint(self._forward, (x, ), self.parameters(), self.use_checkpoint) + + def _forward(self, x): + b, c, *spatial = x.shape + # x = x.reshape(b, c, -1) + x = paddle.reshape(x, [b, c, -1]) + qkv = self.qkv(self.norm(x)) + h = self.attention(qkv) + h = self.proj_out(h) + # return (x + h).reshape(b, c, *spatial) + return paddle.reshape(x + h, [b, c, *spatial]) + + +def count_flops_attn(model, _x, y): + """ + A counter for the `thop` package to count the operations in an + attention operation. + Meant to be used like: + macs, params = thop.profile( + model, + inputs=(inputs, timestamps), + custom_ops={QKVAttention: QKVAttention.count_flops}, + ) + """ + b, c, *spatial = y[0].shape + num_spatial = int(np.prod(spatial)) + # We perform two matmuls with the same number of ops. + # The first computes the weight matrix, the second computes + # the combination of the value vectors. + matmul_ops = 2 * b * (num_spatial**2) * c + model.total_ops += paddle.to_tensor([matmul_ops], dtype='float64') + + +class QKVAttentionLegacy(nn.Layer): + """ + A module which performs QKV attention. Matches legacy QKVAttention + input/ouput heads shaping + """ + + def __init__(self, n_heads): + super().__init__() + self.n_heads = n_heads + + def forward(self, qkv): + """ + Apply QKV attention. + + :param qkv: an [N x (H * 3 * C) x T] tensor of Qs, Ks, and Vs. + :return: an [N x (H * C) x T] tensor after attention. + """ + bs, width, length = qkv.shape + assert width % (3 * self.n_heads) == 0 + ch = width // (3 * self.n_heads) + # q, k, v = qkv.reshape(bs * self.n_heads, ch * 3, length).split(ch, dim=1) + q, k, v = paddle.reshape(qkv, [bs * self.n_heads, ch * 3, length]).split(3, axis=1) + scale = 1 / math.sqrt(math.sqrt(ch)) + weight = paddle.einsum("bct,bcs->bts", q * scale, k * scale) # More stable with f16 than dividing afterwards + weight = paddle.cast(nn.functional.softmax(paddle.cast(weight, 'float32'), axis=-1), weight.dtype) + a = paddle.einsum("bts,bcs->bct", weight, v) + # return a.reshape(bs, -1, length) + return paddle.reshape(a, [bs, -1, length]) + + @staticmethod + def count_flops(model, _x, y): + return count_flops_attn(model, _x, y) + + +class QKVAttention(nn.Layer): + """ + A module which performs QKV attention and splits in a different order. + """ + + def __init__(self, n_heads): + super().__init__() + self.n_heads = n_heads + + def forward(self, qkv): + """ + Apply QKV attention. + + :param qkv: an [N x (3 * H * C) x T] tensor of Qs, Ks, and Vs. + :return: an [N x (H * C) x T] tensor after attention. + """ + bs, width, length = qkv.shape + assert width % (3 * self.n_heads) == 0 + ch = width // (3 * self.n_heads) + q, k, v = qkv.chunk(3, axis=1) + scale = 1 / math.sqrt(math.sqrt(ch)) + weight = paddle.einsum( + "bct,bcs->bts", + (q * scale).view(bs * self.n_heads, ch, length), + (k * scale).view(bs * self.n_heads, ch, length), + ) # More stable with f16 than dividing afterwards + weight = paddle.cast(nn.functional.softmax(paddle.cast(weight, 'float32'), axis=-1), weight.dtype) + a = paddle.einsum("bts,bcs->bct", weight, v.reshape(bs * self.n_heads, ch, length)) + # return a.reshape(bs, -1, length) + return paddle.reshape(a, [bs, -1, length]) + + @staticmethod + def count_flops(model, _x, y): + return count_flops_attn(model, _x, y) + + +class UNetModel(nn.Layer): + """ + The full UNet model with attention and timestep embedding. + + :param in_channels: channels in the input Tensor. + :param model_channels: base channel count for the model. + :param out_channels: channels in the output Tensor. + :param num_res_blocks: number of residual blocks per downsample. + :param attention_resolutions: a collection of downsample rates at which + attention will take place. May be a set, list, or tuple. + For example, if this contains 4, then at 4x downsampling, attention + will be used. + :param dropout: the dropout probability. + :param channel_mult: channel multiplier for each level of the UNet. + :param conv_resample: if True, use learned convolutions for upsampling and + downsampling. + :param dims: determines if the signal is 1D, 2D, or 3D. + :param num_classes: if specified (as an int), then this model will be + class-conditional with `num_classes` classes. + :param use_checkpoint: use gradient checkpointing to reduce memory usage. + :param num_heads: the number of attention heads in each attention layer. + :param num_heads_channels: if specified, ignore num_heads and instead use + a fixed channel width per attention head. + :param num_heads_upsample: works with num_heads to set a different number + of heads for upsampling. Deprecated. + :param use_scale_shift_norm: use a FiLM-like conditioning mechanism. + :param resblock_updown: use residual blocks for up/downsampling. + :param use_new_attention_order: use a different attention pattern for potentially + increased efficiency. + """ + + def __init__( + self, + image_size, + in_channels, + model_channels, + out_channels, + num_res_blocks, + attention_resolutions, + dropout=0, + channel_mult=(1, 2, 4, 8), + conv_resample=True, + dims=2, + num_classes=None, + use_checkpoint=False, + use_fp16=False, + num_heads=1, + num_head_channels=-1, + num_heads_upsample=-1, + use_scale_shift_norm=False, + resblock_updown=False, + use_new_attention_order=False, + ): + super().__init__() + + if num_heads_upsample == -1: + num_heads_upsample = num_heads + + self.image_size = image_size + self.in_channels = in_channels + self.model_channels = model_channels + self.out_channels = out_channels + self.num_res_blocks = num_res_blocks + self.attention_resolutions = attention_resolutions + self.dropout = dropout + self.channel_mult = channel_mult + self.conv_resample = conv_resample + self.num_classes = num_classes + self.use_checkpoint = use_checkpoint + self.dtype = paddle.float16 if use_fp16 else paddle.float32 + self.num_heads = num_heads + self.num_head_channels = num_head_channels + self.num_heads_upsample = num_heads_upsample + + time_embed_dim = model_channels * 4 + self.time_embed = nn.Sequential( + linear(model_channels, time_embed_dim), + SiLU(), + linear(time_embed_dim, time_embed_dim), + ) + + if self.num_classes is not None: + self.label_emb = nn.Embedding(num_classes, time_embed_dim) + + ch = input_ch = int(channel_mult[0] * model_channels) + self.input_blocks = nn.LayerList([TimestepEmbedSequential(conv_nd(dims, in_channels, ch, 3, padding=1))]) + self._feature_size = ch + input_block_chans = [ch] + ds = 1 + for level, mult in enumerate(channel_mult): + for _ in range(num_res_blocks): + layers = [ + ResBlock( + ch, + time_embed_dim, + dropout, + out_channels=int(mult * model_channels), + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ) + ] + ch = int(mult * model_channels) + if ds in attention_resolutions: + layers.append( + AttentionBlock( + ch, + use_checkpoint=use_checkpoint, + num_heads=num_heads, + num_head_channels=num_head_channels, + use_new_attention_order=use_new_attention_order, + )) + self.input_blocks.append(TimestepEmbedSequential(*layers)) + self._feature_size += ch + input_block_chans.append(ch) + if level != len(channel_mult) - 1: + out_ch = ch + self.input_blocks.append( + TimestepEmbedSequential( + ResBlock( + ch, + time_embed_dim, + dropout, + out_channels=out_ch, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + down=True, + ) if resblock_updown else Downsample(ch, conv_resample, dims=dims, out_channels=out_ch))) + ch = out_ch + input_block_chans.append(ch) + ds *= 2 + self._feature_size += ch + + self.middle_block = TimestepEmbedSequential( + ResBlock( + ch, + time_embed_dim, + dropout, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ), + AttentionBlock( + ch, + use_checkpoint=use_checkpoint, + num_heads=num_heads, + num_head_channels=num_head_channels, + use_new_attention_order=use_new_attention_order, + ), + ResBlock( + ch, + time_embed_dim, + dropout, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ), + ) + self._feature_size += ch + + self.output_blocks = nn.LayerList([]) + for level, mult in list(enumerate(channel_mult))[::-1]: + for i in range(num_res_blocks + 1): + ich = input_block_chans.pop() + layers = [ + ResBlock( + ch + ich, + time_embed_dim, + dropout, + out_channels=int(model_channels * mult), + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ) + ] + ch = int(model_channels * mult) + if ds in attention_resolutions: + layers.append( + AttentionBlock( + ch, + use_checkpoint=use_checkpoint, + num_heads=num_heads_upsample, + num_head_channels=num_head_channels, + use_new_attention_order=use_new_attention_order, + )) + if level and i == num_res_blocks: + out_ch = ch + layers.append( + ResBlock( + ch, + time_embed_dim, + dropout, + out_channels=out_ch, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + up=True, + ) if resblock_updown else Upsample(ch, conv_resample, dims=dims, out_channels=out_ch)) + ds //= 2 + self.output_blocks.append(TimestepEmbedSequential(*layers)) + self._feature_size += ch + + self.out = nn.Sequential( + normalization(ch), + SiLU(), + zero_module(conv_nd(dims, input_ch, out_channels, 3, padding=1)), + ) + + def forward(self, x, timesteps, y=None): + """ + Apply the model to an input batch. + + :param x: an [N x C x ...] Tensor of inputs. + :param timesteps: a 1-D batch of timesteps. + :param y: an [N] Tensor of labels, if class-conditional. + :return: an [N x C x ...] Tensor of outputs. + """ + assert (y is not None) == (self.num_classes + is not None), "must specify y if and only if the model is class-conditional" + + hs = [] + emb = self.time_embed(timestep_embedding(timesteps, self.model_channels)) + if self.num_classes is not None: + assert y.shape == (x.shape[0], ) + emb = emb + self.label_emb(y) + + h = paddle.cast(x, self.dtype) + for module in self.input_blocks: + h = module(h, emb) + hs.append(h) + h = self.middle_block(h, emb) + for module in self.output_blocks: + h = paddle.concat([h, hs.pop()], axis=1) + h = module(h, emb) + # h = paddle.cast(h, x.dtype) + return self.out(h) + + +class SuperResModel(UNetModel): + """ + A UNetModel that performs super-resolution. + + Expects an extra kwarg `low_res` to condition on a low-resolution image. + """ + + def __init__(self, image_size, in_channels, *args, **kwargs): + super().__init__(image_size, in_channels * 2, *args, **kwargs) + + def forward(self, x, timesteps, low_res=None, **kwargs): + _, _, new_height, new_width = x.shape + upsampled = F.interpolate(low_res, (new_height, new_width), mode="bilinear") + x = paddle.concat([x, upsampled], axis=1) + return super().forward(x, timesteps, **kwargs) + + +class EncoderUNetModel(nn.Layer): + """ + The half UNet model with attention and timestep embedding. + + For usage, see UNet. + """ + + def __init__( + self, + image_size, + in_channels, + model_channels, + out_channels, + num_res_blocks, + attention_resolutions, + dropout=0, + channel_mult=(1, 2, 4, 8), + conv_resample=True, + dims=2, + use_checkpoint=False, + use_fp16=False, + num_heads=1, + num_head_channels=-1, + num_heads_upsample=-1, + use_scale_shift_norm=False, + resblock_updown=False, + use_new_attention_order=False, + pool="adaptive", + ): + super().__init__() + + if num_heads_upsample == -1: + num_heads_upsample = num_heads + + self.in_channels = in_channels + self.model_channels = model_channels + self.out_channels = out_channels + self.num_res_blocks = num_res_blocks + self.attention_resolutions = attention_resolutions + self.dropout = dropout + self.channel_mult = channel_mult + self.conv_resample = conv_resample + self.use_checkpoint = use_checkpoint + self.dtype = paddle.float16 if use_fp16 else paddle.float32 + self.num_heads = num_heads + self.num_head_channels = num_head_channels + self.num_heads_upsample = num_heads_upsample + + time_embed_dim = model_channels * 4 + self.time_embed = nn.Sequential( + linear(model_channels, time_embed_dim), + SiLU(), + linear(time_embed_dim, time_embed_dim), + ) + + ch = int(channel_mult[0] * model_channels) + self.input_blocks = nn.LayerList([TimestepEmbedSequential(conv_nd(dims, in_channels, ch, 3, padding=1))]) + self._feature_size = ch + input_block_chans = [ch] + ds = 1 + for level, mult in enumerate(channel_mult): + for _ in range(num_res_blocks): + layers = [ + ResBlock( + ch, + time_embed_dim, + dropout, + out_channels=int(mult * model_channels), + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ) + ] + ch = int(mult * model_channels) + if ds in attention_resolutions: + layers.append( + AttentionBlock( + ch, + use_checkpoint=use_checkpoint, + num_heads=num_heads, + num_head_channels=num_head_channels, + use_new_attention_order=use_new_attention_order, + )) + self.input_blocks.append(TimestepEmbedSequential(*layers)) + self._feature_size += ch + input_block_chans.append(ch) + if level != len(channel_mult) - 1: + out_ch = ch + self.input_blocks.append( + TimestepEmbedSequential( + ResBlock( + ch, + time_embed_dim, + dropout, + out_channels=out_ch, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + down=True, + ) if resblock_updown else Downsample(ch, conv_resample, dims=dims, out_channels=out_ch))) + ch = out_ch + input_block_chans.append(ch) + ds *= 2 + self._feature_size += ch + + self.middle_block = TimestepEmbedSequential( + ResBlock( + ch, + time_embed_dim, + dropout, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ), + AttentionBlock( + ch, + use_checkpoint=use_checkpoint, + num_heads=num_heads, + num_head_channels=num_head_channels, + use_new_attention_order=use_new_attention_order, + ), + ResBlock( + ch, + time_embed_dim, + dropout, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ), + ) + self._feature_size += ch + self.pool = pool + if pool == "adaptive": + self.out = nn.Sequential( + normalization(ch), + SiLU(), + nn.AdaptiveAvgPool2D((1, 1)), + zero_module(conv_nd(dims, ch, out_channels, 1)), + nn.Flatten(), + ) + elif pool == "attention": + assert num_head_channels != -1 + self.out = nn.Sequential( + normalization(ch), + SiLU(), + AttentionPool2d((image_size // ds), ch, num_head_channels, out_channels), + ) + elif pool == "spatial": + self.out = nn.Sequential( + nn.Linear(self._feature_size, 2048), + nn.ReLU(), + nn.Linear(2048, self.out_channels), + ) + elif pool == "spatial_v2": + self.out = nn.Sequential( + nn.Linear(self._feature_size, 2048), + normalization(2048), + SiLU(), + nn.Linear(2048, self.out_channels), + ) + else: + raise NotImplementedError(f"Unexpected {pool} pooling") + + def forward(self, x, timesteps): + """ + Apply the model to an input batch. + + :param x: an [N x C x ...] Tensor of inputs. + :param timesteps: a 1-D batch of timesteps. + :return: an [N x K] Tensor of outputs. + """ + emb = self.time_embed(timestep_embedding(timesteps, self.model_channels)) + + results = [] + # h = x.type(self.dtype) + h = paddle.cast(x, self.dtype) + for module in self.input_blocks: + h = module(h, emb) + if self.pool.startswith("spatial"): + # results.append(h.type(x.dtype).mean(axis=(2, 3))) + results.append(paddle.cast(h, x.dtype).mean(axis=(2, 3))) + h = self.middle_block(h, emb) + if self.pool.startswith("spatial"): + results.append(paddle.cast(h, x.dtype).mean(axis=(2, 3))) + h = paddle.concat(results, axis=-1) + return self.out(h) + else: + # h = h.type(x.dtype) + h = paddle.cast(h, x.dtype) + return self.out(h) diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/resources/default.yml b/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/resources/default.yml new file mode 100755 index 000000000..97c3c1b98 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/resources/default.yml @@ -0,0 +1,47 @@ +text_prompts: + - A beautiful painting of a singular lighthouse, shining its light across a tumultuous sea of blood by greg rutkowski and thomas kinkade, Trending on artstation. + +init_image: + +width_height: [ 1280, 768] + +skip_steps: 10 +steps: 250 + +cut_ic_pow: 1 +init_scale: 1000 +clip_guidance_scale: 5000 + +tv_scale: 0 +range_scale: 150 +sat_scale: 0 +cutn_batches: 4 + +diffusion_model: 512x512_diffusion_uncond_finetune_008100 +use_secondary_model: True +diffusion_sampling_mode: ddim + +perlin_init: False +perlin_mode: mixed +seed: 445467575 +eta: 0.8 +clamp_grad: True +clamp_max: 0.05 + +randomize_class: True +clip_denoised: False +fuzzy_prompt: False +rand_mag: 0.05 + +cut_overview: "[12]*400+[4]*600" +cut_innercut: "[4]*400+[12]*600" +cut_icgray_p: "[0.2]*400+[0]*600" + +display_rate: 10 +n_batches: 1 +batch_size: 1 +batch_name: '' +clip_models: + - VIT + - RN50 + - RN101 diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/resources/docstrings.yml b/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/resources/docstrings.yml new file mode 100755 index 000000000..702015e1c --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/resources/docstrings.yml @@ -0,0 +1,103 @@ +text_prompts: | + Phrase, sentence, or string of words and phrases describing what the image should look like. The words will be analyzed by the AI and will guide the diffusion process toward the image(s) you describe. These can include commas and weights to adjust the relative importance of each element. E.g. "A beautiful painting of a singular lighthouse, shining its light across a tumultuous sea of blood by greg rutkowski and thomas kinkade, Trending on artstation." + Notice that this prompt loosely follows a structure: [subject], [prepositional details], [setting], [meta modifiers and artist]; this is a good starting point for your experiments. + Developing text prompts takes practice and experience, and is not the subject of this guide. If you are a beginner to writing text prompts, a good place to start is on a simple AI art app like Night Cafe, starry ai or WOMBO prior to using DD, to get a feel for how text gets translated into images by GAN tools. These other apps use different technologies, but many of the same principles apply. +init_image: | + Recall that in the image sequence above, the first image shown is just noise. If an init_image is provided, diffusion will replace the noise with the init_image as its starting state. To use an init_image, upload the image to the Colab instance or your Google Drive, and enter the full image path here. + If using an init_image, you may need to increase skip_steps to ~ 50% of total steps to retain the character of the init. See skip_steps above for further discussion. +width_height: | + Desired final image size, in pixels. You can have a square, wide, or tall image, but each edge length should be set to a multiple of 64px, and a minimum of 512px on the default CLIP model setting. If you forget to use multiples of 64px in your dimensions, DD will adjust the dimensions of your image to make it so. + +skip_steps: | + Consider the chart shown here. Noise scheduling (denoise strength) starts very high and progressively gets lower and lower as diffusion steps progress. The noise levels in the first few steps are very high, so images change dramatically in early steps. + As DD moves along the curve, noise levels (and thus the amount an image changes per step) declines, and image coherence from one step to the next increases. + The first few steps of denoising are often so dramatic that some steps (maybe 10-15% of total) can be skipped without affecting the final image. You can experiment with this as a way to cut render times. + If you skip too many steps, however, the remaining noise may not be high enough to generate new content, and thus may not have ‘time left’ to finish an image satisfactorily. + Also, depending on your other settings, you may need to skip steps to prevent CLIP from overshooting your goal, resulting in ‘blown out’ colors (hyper saturated, solid white, or solid black regions) or otherwise poor image quality. Consider that the denoising process is at its strongest in the early steps, so skipping steps can sometimes mitigate other problems. + Lastly, if using an init_image, you will need to skip ~50% of the diffusion steps to retain the shapes in the original init image. + However, if you’re using an init_image, you can also adjust skip_steps up or down for creative reasons. With low skip_steps you can get a result "inspired by" the init_image which will retain the colors and rough layout and shapes but look quite different. With high skip_steps you can preserve most of the init_image contents and just do fine tuning of the texture. + +steps: | + When creating an image, the denoising curve is subdivided into steps for processing. Each step (or iteration) involves the AI looking at subsets of the image called ‘cuts’ and calculating the ‘direction’ the image should be guided to be more like the prompt. Then it adjusts the image with the help of the diffusion denoiser, and moves to the next step. + Increasing steps will provide more opportunities for the AI to adjust the image, and each adjustment will be smaller, and thus will yield a more precise, detailed image. Increasing steps comes at the expense of longer render times. Also, while increasing steps should generally increase image quality, there is a diminishing return on additional steps beyond 250 - 500 steps. However, some intricate images can take 1000, 2000, or more steps. It is really up to the user. + Just know that the render time is directly related to the number of steps, and many other parameters have a major impact on image quality, without costing additional time. + +cut_ic_pow: | + This sets the size of the border used for inner cuts. High cut_ic_pow values have larger borders, and therefore the cuts themselves will be smaller and provide finer details. If you have too many or too-small inner cuts, you may lose overall image coherency and/or it may cause an undesirable ‘mosaic’ effect. Low cut_ic_pow values will allow the inner cuts to be larger, helping image coherency while still helping with some details. + +init_scale: | + This controls how strongly CLIP will try to match the init_image provided. This is balanced against the clip_guidance_scale (CGS) above. Too much init scale, and the image won’t change much during diffusion. Too much CGS and the init image will be lost. +clip_guidance_scale: | + CGS is one of the most important parameters you will use. It tells DD how strongly you want CLIP to move toward your prompt each timestep. Higher is generally better, but if CGS is too strong it will overshoot the goal and distort the image. So a happy medium is needed, and it takes experience to learn how to adjust CGS. + Note that this parameter generally scales with image dimensions. In other words, if you increase your total dimensions by 50% (e.g. a change from 512 x 512 to 512 x 768), then to maintain the same effect on the image, you’d want to increase clip_guidance_scale from 5000 to 7500. + Of the basic settings, clip_guidance_scale, steps and skip_steps are the most important contributors to image quality, so learn them well. +tv_scale: | + Total variance denoising. Optional, set to zero to turn off. Controls ‘smoothness’ of final output. If used, tv_scale will try to smooth out your final image to reduce overall noise. If your image is too ‘crunchy’, increase tv_scale. TV denoising is good at preserving edges while smoothing away noise in flat regions. See https://en.wikipedia.org/wiki/Total_variation_denoising +range_scale: | + Optional, set to zero to turn off. Used for adjustment of color contrast. Lower range_scale will increase contrast. Very low numbers create a reduced color palette, resulting in more vibrant or poster-like images. Higher range_scale will reduce contrast, for more muted images. + +sat_scale: | + Saturation scale. Optional, set to zero to turn off. If used, sat_scale will help mitigate oversaturation. If your image is too saturated, increase sat_scale to reduce the saturation. +cutn_batches: | + Each iteration, the AI cuts the image into smaller pieces known as cuts, and compares each cut to the prompt to decide how to guide the next diffusion step. More cuts can generally lead to better images, since DD has more chances to fine-tune the image precision in each timestep. + Additional cuts are memory intensive, however, and if DD tries to evaluate too many cuts at once, it can run out of memory. You can use cutn_batches to increase cuts per timestep without increasing memory usage. + At the default settings, DD is scheduled to do 16 cuts per timestep. If cutn_batches is set to 1, there will indeed only be 16 cuts total per timestep. + However, if cutn_batches is increased to 4, DD will do 64 cuts total in each timestep, divided into 4 sequential batches of 16 cuts each. Because the cuts are being evaluated only 16 at a time, DD uses the memory required for only 16 cuts, but gives you the quality benefit of 64 cuts. The tradeoff, of course, is that this will take ~4 times as long to render each image. + So, (scheduled cuts) x (cutn_batches) = (total cuts per timestep). Increasing cutn_batches will increase render times, however, as the work is being done sequentially. DD’s default cut schedule is a good place to start, but the cut schedule can be adjusted in the Cutn Scheduling section, explained below. + +diffusion_model: Diffusion_model of choice. + +use_secondary_model: | + Option to use a secondary purpose-made diffusion model to clean up interim diffusion images for CLIP evaluation. If this option is turned off, DD will use the regular (large) diffusion model. Using the secondary model is faster - one user reported a 50% improvement in render speed! However, the secondary model is much smaller, and may reduce image quality and detail. I suggest you experiment with this. + +diffusion_sampling_mode: | + Two alternate diffusion denoising algorithms. ddim has been around longer, and is more established and tested. plms is a newly added alternate method that promises good diffusion results in fewer steps, but has not been as fully tested and may have side effects. This new plms mode is actively being researched in the #settings-and-techniques channel in the DD Discord. + +perlin_init: | + Normally, DD will use an image filled with random noise as a starting point for the diffusion curve. If perlin_init is selected, DD will instead use a Perlin noise model as an initial state. Perlin has very interesting characteristics, distinct from random noise, so it’s worth experimenting with this for your projects. Beyond perlin, you can, of course, generate your own noise images (such as with GIMP, etc) and use them as an init_image (without skipping steps). + Choosing perlin_init does not affect the actual diffusion process, just the starting point for the diffusion. Please note that selecting a perlin_init will replace and override any init_image you may have specified. Further, because the 2D, 3D and video animation systems all rely on the init_image system, if you enable Perlin while using animation modes, the perlin_init will jump in front of any previous image or video input, and DD will NOT give you the expected sequence of coherent images. All of that said, using Perlin and animation modes together do make a very colorful rainbow effect, which can be used creatively. + +perlin_mode: | + sets type of Perlin noise: colored, gray, or a mix of both, giving you additional options for noise types. Experiment to see what these do in your projects. +seed: | + Deep in the diffusion code, there is a random number ‘seed’ which is used as the basis for determining the initial state of the diffusion. By default, this is random, but you can also specify your own seed. This is useful if you like a particular result and would like to run more iterations that will be similar. + After each run, the actual seed value used will be reported in the parameters report, and can be reused if desired by entering seed # here. If a specific numerical seed is used repeatedly, the resulting images will be quite similar but not identical. +eta: | + eta (greek letter η) is a diffusion model variable that mixes in a random amount of scaled noise into each timestep. 0 is no noise, 1.0 is more noise. As with most DD parameters, you can go below zero for eta, but it may give you unpredictable results. + The steps parameter has a close relationship with the eta parameter. If you set eta to 0, then you can get decent output with only 50-75 steps. Setting eta to 1.0 favors higher step counts, ideally around 250 and up. eta has a subtle, unpredictable effect on image, so you’ll need to experiment to see how this affects your projects. +clamp_grad: | + As I understand it, clamp_grad is an internal limiter that stops DD from producing extreme results. Try your images with and without clamp_grad. If the image changes drastically with clamp_grad turned off, it probably means your clip_guidance_scale is too high and should be reduced. +clamp_max: | + Sets the value of the clamp_grad limitation. Default is 0.05, providing for smoother, more muted coloration in images, but setting higher values (0.15-0.3) can provide interesting contrast and vibrancy. + +randomize_class: +clip_denoised: False +fuzzy_prompt: | + Controls whether to add multiple noisy prompts to the prompt losses. If True, can increase variability of image output. Experiment with this. +rand_mag: | + Affects only the fuzzy_prompt. Controls the magnitude of the random noise added by fuzzy_prompt. + +cut_overview: The schedule of overview cuts +cut_innercut: The schedule of inner cuts +cut_icgray_p: This sets the size of the border used for inner cuts. High cut_ic_pow values have larger borders, and therefore the cuts themselves will be smaller and provide finer details. If you have too many or too-small inner cuts, you may lose overall image coherency and/or it may cause an undesirable ‘mosaic’ effect. Low cut_ic_pow values will allow the inner cuts to be larger, helping image coherency while still helping with some details. + +display_rate: | + During a diffusion run, you can monitor the progress of each image being created with this variable. If display_rate is set to 50, DD will show you the in-progress image every 50 timesteps. Setting this to a lower value, like 5 or 10, is a good way to get an early peek at where your image is heading. If you don’t like the progression, just interrupt execution, change some settings, and re-run. If you are planning a long, unmonitored batch, it’s better to set display_rate equal to steps, because displaying interim images does slow Colab down slightly. +n_batches: | + This variable sets the number of still images you want DD to create. If you are using an animation mode (see below for details) DD will ignore n_batches and create a single set of animated frames based on the animation settings. +batch_name: | + The name of the batch, the batch id will be named as "discoart-[batch_name]-seed". To avoid your artworks be overridden by other users, please use a unique name. +clip_models: | + CLIP Model selectors. ViT-B/32, ViT-B/16, ViT-L/14, RN101, RN50, RN50x4, RN50x16, RN50x64. + These various CLIP models are available for you to use during image generation. Models have different styles or ‘flavors,’ so look around. + You can mix in multiple models as well for different results. However, keep in mind that some models are extremely memory-hungry, and turning on additional models will take additional memory and may cause a crash. + The rough order of speed/mem usage is (smallest/fastest to largest/slowest): + ViT-B/32 + RN50 + RN101 + ViT-B/16 + RN50x4 + RN50x16 + RN50x64 + ViT-L/14 + For RN50x64 & ViTL14 you may need to use fewer cuts, depending on your VRAM. diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/runner.py b/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/runner.py new file mode 100755 index 000000000..9645d93cf --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn50/reverse_diffusion/runner.py @@ -0,0 +1,285 @@ +''' +This code is rewritten by Paddle based on Jina-ai/discoart. +https://github.com/jina-ai/discoart/blob/main/discoart/runner.py +''' +import gc +import os +import random +from threading import Thread + +import disco_diffusion_clip_rn50.clip.clip as clip +import numpy as np +import paddle +import paddle.vision.transforms as T +import paddle_lpips as lpips +from docarray import Document +from docarray import DocumentArray +from IPython import display +from ipywidgets import Output +from PIL import Image + +from .helper import logger +from .helper import parse_prompt +from .model.losses import range_loss +from .model.losses import spherical_dist_loss +from .model.losses import tv_loss +from .model.make_cutouts import MakeCutoutsDango +from .model.sec_diff import alpha_sigma_to_t +from .model.sec_diff import SecondaryDiffusionImageNet2 +from .model.transforms import Normalize + + +def do_run(args, models) -> 'DocumentArray': + logger.info('preparing models...') + model, diffusion, clip_models, secondary_model = models + normalize = Normalize( + mean=[0.48145466, 0.4578275, 0.40821073], + std=[0.26862954, 0.26130258, 0.27577711], + ) + lpips_model = lpips.LPIPS(net='vgg') + for parameter in lpips_model.parameters(): + parameter.stop_gradient = True + side_x = (args.width_height[0] // 64) * 64 + side_y = (args.width_height[1] // 64) * 64 + cut_overview = eval(args.cut_overview) + cut_innercut = eval(args.cut_innercut) + cut_icgray_p = eval(args.cut_icgray_p) + + from .model.perlin_noises import create_perlin_noise, regen_perlin + + seed = args.seed + + skip_steps = args.skip_steps + + loss_values = [] + + if seed is not None: + np.random.seed(seed) + random.seed(seed) + paddle.seed(seed) + + model_stats = [] + for clip_model in clip_models: + model_stat = { + 'clip_model': None, + 'target_embeds': [], + 'make_cutouts': None, + 'weights': [], + } + model_stat['clip_model'] = clip_model + + if isinstance(args.text_prompts, str): + args.text_prompts = [args.text_prompts] + + for prompt in args.text_prompts: + txt, weight = parse_prompt(prompt) + txt = clip_model.encode_text(clip.tokenize(prompt)) + if args.fuzzy_prompt: + for i in range(25): + model_stat['target_embeds'].append((txt + paddle.randn(txt.shape) * args.rand_mag).clip(0, 1)) + model_stat['weights'].append(weight) + else: + model_stat['target_embeds'].append(txt) + model_stat['weights'].append(weight) + + model_stat['target_embeds'] = paddle.concat(model_stat['target_embeds']) + model_stat['weights'] = paddle.to_tensor(model_stat['weights']) + if model_stat['weights'].sum().abs() < 1e-3: + raise RuntimeError('The weights must not sum to 0.') + model_stat['weights'] /= model_stat['weights'].sum().abs() + model_stats.append(model_stat) + + init = None + if args.init_image: + d = Document(uri=args.init_image).load_uri_to_image_tensor(side_x, side_y) + init = T.to_tensor(d.tensor).unsqueeze(0) * 2 - 1 + + if args.perlin_init: + if args.perlin_mode == 'color': + init = create_perlin_noise([1.5**-i * 0.5 for i in range(12)], 1, 1, False, side_y, side_x) + init2 = create_perlin_noise([1.5**-i * 0.5 for i in range(8)], 4, 4, False, side_y, side_x) + elif args.perlin_mode == 'gray': + init = create_perlin_noise([1.5**-i * 0.5 for i in range(12)], 1, 1, True, side_y, side_x) + init2 = create_perlin_noise([1.5**-i * 0.5 for i in range(8)], 4, 4, True, side_y, side_x) + else: + init = create_perlin_noise([1.5**-i * 0.5 for i in range(12)], 1, 1, False, side_y, side_x) + init2 = create_perlin_noise([1.5**-i * 0.5 for i in range(8)], 4, 4, True, side_y, side_x) + init = (T.to_tensor(init).add(T.to_tensor(init2)).divide(paddle.to_tensor(2.0)).unsqueeze(0) * 2 - 1) + del init2 + + cur_t = None + + def cond_fn(x, t, y=None): + x_is_NaN = False + n = x.shape[0] + if secondary_model: + alpha = paddle.to_tensor(diffusion.sqrt_alphas_cumprod[cur_t], dtype='float32') + sigma = paddle.to_tensor(diffusion.sqrt_one_minus_alphas_cumprod[cur_t], dtype='float32') + cosine_t = alpha_sigma_to_t(alpha, sigma) + x = paddle.to_tensor(x.detach(), dtype='float32') + x.stop_gradient = False + cosine_t = paddle.tile(paddle.to_tensor(cosine_t.detach().cpu().numpy()), [n]) + cosine_t.stop_gradient = False + out = secondary_model(x, cosine_t).pred + fac = diffusion.sqrt_one_minus_alphas_cumprod[cur_t] + x_in_d = out * fac + x * (1 - fac) + x_in = x_in_d.detach() + x_in.stop_gradient = False + x_in_grad = paddle.zeros_like(x_in, dtype='float32') + else: + t = paddle.ones([n], dtype='int64') * cur_t + out = diffusion.p_mean_variance(model, x, t, clip_denoised=False, model_kwargs={'y': y}) + fac = diffusion.sqrt_one_minus_alphas_cumprod[cur_t] + x_in_d = out['pred_xstart'] * fac + x * (1 - fac) + x_in = x_in_d.detach() + x_in.stop_gradient = False + x_in_grad = paddle.zeros_like(x_in, dtype='float32') + for model_stat in model_stats: + for i in range(args.cutn_batches): + t_int = (int(t.item()) + 1) # errors on last step without +1, need to find source + # when using SLIP Base model the dimensions need to be hard coded to avoid AttributeError: 'VisionTransformer' object has no attribute 'input_resolution' + try: + input_resolution = model_stat['clip_model'].visual.input_resolution + except: + input_resolution = 224 + + cuts = MakeCutoutsDango( + input_resolution, + Overview=cut_overview[1000 - t_int], + InnerCrop=cut_innercut[1000 - t_int], + IC_Size_Pow=args.cut_ic_pow, + IC_Grey_P=cut_icgray_p[1000 - t_int], + ) + clip_in = normalize(cuts(x_in.add(paddle.to_tensor(1.0)).divide(paddle.to_tensor(2.0)))) + image_embeds = (model_stat['clip_model'].encode_image(clip_in)) + + dists = spherical_dist_loss( + image_embeds.unsqueeze(1), + model_stat['target_embeds'].unsqueeze(0), + ) + + dists = dists.reshape([ + cut_overview[1000 - t_int] + cut_innercut[1000 - t_int], + n, + -1, + ]) + losses = dists.multiply(model_stat['weights']).sum(2).mean(0) + loss_values.append(losses.sum().item()) # log loss, probably shouldn't do per cutn_batch + + x_in_grad += (paddle.grad(losses.sum() * args.clip_guidance_scale, x_in)[0] / args.cutn_batches) + tv_losses = tv_loss(x_in) + range_losses = range_loss(x_in) + sat_losses = paddle.abs(x_in - x_in.clip(min=-1, max=1)).mean() + loss = (tv_losses.sum() * args.tv_scale + range_losses.sum() * args.range_scale + + sat_losses.sum() * args.sat_scale) + if init is not None and args.init_scale: + init_losses = lpips_model(x_in, init) + loss = loss + init_losses.sum() * args.init_scale + x_in_grad += paddle.grad(loss, x_in)[0] + if not paddle.isnan(x_in_grad).any(): + grad = -paddle.grad(x_in_d, x, x_in_grad)[0] + else: + x_is_NaN = True + grad = paddle.zeros_like(x) + if args.clamp_grad and not x_is_NaN: + magnitude = grad.square().mean().sqrt() + return (grad * magnitude.clip(max=args.clamp_max) / magnitude) + return grad + + if args.diffusion_sampling_mode == 'ddim': + sample_fn = diffusion.ddim_sample_loop_progressive + else: + sample_fn = diffusion.plms_sample_loop_progressive + + logger.info('creating artwork...') + + image_display = Output() + da_batches = DocumentArray() + + for _nb in range(args.n_batches): + display.clear_output(wait=True) + display.display(args.name_docarray, image_display) + gc.collect() + paddle.device.cuda.empty_cache() + + d = Document(tags=vars(args)) + da_batches.append(d) + + cur_t = diffusion.num_timesteps - skip_steps - 1 + + if args.perlin_init: + init = regen_perlin(args.perlin_mode, side_y, side_x, args.batch_size) + + if args.diffusion_sampling_mode == 'ddim': + samples = sample_fn( + model, + (args.batch_size, 3, side_y, side_x), + clip_denoised=args.clip_denoised, + model_kwargs={}, + cond_fn=cond_fn, + progress=True, + skip_timesteps=skip_steps, + init_image=init, + randomize_class=args.randomize_class, + eta=args.eta, + ) + else: + samples = sample_fn( + model, + (args.batch_size, 3, side_y, side_x), + clip_denoised=args.clip_denoised, + model_kwargs={}, + cond_fn=cond_fn, + progress=True, + skip_timesteps=skip_steps, + init_image=init, + randomize_class=args.randomize_class, + order=2, + ) + + threads = [] + for j, sample in enumerate(samples): + cur_t -= 1 + with image_display: + if j % args.display_rate == 0 or cur_t == -1: + for _, image in enumerate(sample['pred_xstart']): + image = (image + 1) / 2 + image = image.clip(0, 1).squeeze().transpose([1, 2, 0]).numpy() * 255 + image = np.uint8(image) + image = Image.fromarray(image) + + image.save(os.path.join(args.output_dir, 'progress-{}.png'.format(_nb))) + c = Document(tags={'cur_t': cur_t}) + c.load_pil_image_to_datauri(image) + d.chunks.append(c) + display.clear_output(wait=True) + display.display(display.Image(os.path.join(args.output_dir, 'progress-{}.png'.format(_nb)))) + d.chunks.plot_image_sprites(os.path.join(args.output_dir, + f'{args.name_docarray}-progress-{_nb}.png'), + show_index=True) + t = Thread( + target=_silent_push, + args=( + da_batches, + args.name_docarray, + ), + ) + threads.append(t) + t.start() + + if cur_t == -1: + d.load_pil_image_to_datauri(image) + + for t in threads: + t.join() + display.clear_output(wait=True) + logger.info(f'done! {args.name_docarray}') + da_batches.plot_image_sprites(skip_empty=True, show_index=True, keep_aspect_ratio=True) + return da_batches + + +def _silent_push(da_batches: DocumentArray, name: str) -> None: + try: + da_batches.push(name) + except Exception as ex: + logger.debug(f'push failed: {ex}') From aeebde7562da044c8c768ea613b4bbcd3ce93b5b Mon Sep 17 00:00:00 2001 From: chenjian Date: Fri, 19 Aug 2022 13:20:49 +0800 Subject: [PATCH 033/117] add disco_diffusion_clip_rn101 module --- .../disco_diffusion_clip_rn101/README.md | 114 ++ .../disco_diffusion_clip_rn101/clip/README.md | 2 + .../clip/clip/__init__.py | 1 + .../clip/clip/layers.py | 182 +++ .../clip/clip/model.py | 227 +++ .../clip/clip/simple_tokenizer.py | 135 ++ .../clip/clip/utils.py | 122 ++ .../disco_diffusion_clip_rn101/module.py | 441 ++++++ .../requirements.txt | 8 + .../resize_right/README.md | 3 + .../resize_right/__init__.py | 0 .../resize_right/interp_methods.py | 70 + .../resize_right/resize_right.py | 403 ++++++ .../reverse_diffusion/README.md | 2 + .../reverse_diffusion/__init__.py | 156 +++ .../reverse_diffusion/config.py | 77 ++ .../reverse_diffusion/helper.py | 137 ++ .../reverse_diffusion/model/__init__.py | 3 + .../model/gaussian_diffusion.py | 1214 +++++++++++++++++ .../reverse_diffusion/model/losses.py | 86 ++ .../reverse_diffusion/model/make_cutouts.py | 177 +++ .../reverse_diffusion/model/nn.py | 127 ++ .../reverse_diffusion/model/perlin_noises.py | 78 ++ .../reverse_diffusion/model/respace.py | 123 ++ .../reverse_diffusion/model/script_util.py | 201 +++ .../reverse_diffusion/model/sec_diff.py | 135 ++ .../reverse_diffusion/model/transforms.py | 757 ++++++++++ .../reverse_diffusion/model/unet.py | 838 ++++++++++++ .../reverse_diffusion/resources/default.yml | 47 + .../resources/docstrings.yml | 103 ++ .../reverse_diffusion/runner.py | 285 ++++ 31 files changed, 6254 insertions(+) create mode 100644 modules/image/text_to_image/disco_diffusion_clip_rn101/README.md create mode 100644 modules/image/text_to_image/disco_diffusion_clip_rn101/clip/README.md create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn101/clip/clip/__init__.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn101/clip/clip/layers.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn101/clip/clip/model.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn101/clip/clip/simple_tokenizer.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn101/clip/clip/utils.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn101/module.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn101/requirements.txt create mode 100644 modules/image/text_to_image/disco_diffusion_clip_rn101/resize_right/README.md create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn101/resize_right/__init__.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn101/resize_right/interp_methods.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn101/resize_right/resize_right.py create mode 100644 modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/README.md create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/__init__.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/config.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/helper.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/model/__init__.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/model/gaussian_diffusion.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/model/losses.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/model/make_cutouts.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/model/nn.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/model/perlin_noises.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/model/respace.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/model/script_util.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/model/sec_diff.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/model/transforms.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/model/unet.py create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/resources/default.yml create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/resources/docstrings.yml create mode 100755 modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/runner.py diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn101/README.md b/modules/image/text_to_image/disco_diffusion_clip_rn101/README.md new file mode 100644 index 000000000..f6b446bc6 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn101/README.md @@ -0,0 +1,114 @@ +# disco_diffusion_clip_rn101 + +|模型名称|disco_diffusion_clip_rn101| +| :--- | :---: | +|类别|图像-文图生成| +|网络|dd+clip ResNet101| +|数据集|-| +|是否支持Fine-tuning|否| +|模型大小|2.9GB| +|最新更新日期|2022-08-02| +|数据指标|-| + +## 一、模型基本信息 + +### 应用效果展示 + + - 输入文本 "A beautiful painting of a singular lighthouse, shining its light across a tumultuous sea of blood by greg rutkowski and thomas kinkade, Trending on artstation." + + - 输出图像 +

+ +
+ + - 生成过程 +

+ +
+ +### 模型介绍 + +disco_diffusion_clip_rn101 是一个文图生成模型,可以通过输入一段文字来生成符合该句子语义的图像。该模型由两部分组成,一部分是扩散模型,是一种生成模型,可以从噪声输入中重建出原始图像。另一部分是多模态预训练模型(CLIP), 可以将文本和图像表示在同一个特征空间,相近语义的文本和图像在该特征空间里距离会更相近。在该文图生成模型中,扩散模型负责从初始噪声或者指定初始图像中来生成目标图像,CLIP负责引导生成图像的语义和输入的文本的语义尽可能接近,随着扩散模型在CLIP的引导下不断的迭代生成新图像,最终能够生成文本所描述内容的图像。该模块中使用的CLIP模型结构为ResNet101。 + +更多详情请参考论文:[Diffusion Models Beat GANs on Image Synthesis](https://arxiv.org/abs/2105.05233) 以及 [Learning Transferable Visual Models From Natural Language Supervision](https://arxiv.org/abs/2103.00020) + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.2.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install disco_diffusion_clip_rn101 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run disco_diffusion_clip_rn101 --text_prompts "A beautiful painting of a singular lighthouse, shining its light across a tumultuous sea of blood by greg rutkowski and thomas kinkade, Trending on artstation." --output_dir disco_diffusion_clip_rn101_out + ``` + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + + module = hub.Module(name="disco_diffusion_clip_rn101") + text_prompts = ["A beautiful painting of a singular lighthouse, shining its light across a tumultuous sea of blood by greg rutkowski and thomas kinkade, Trending on artstation."] + # 生成图像, 默认会在disco_diffusion_clip_rn101_out目录保存图像 + # 返回的da是一个DocumentArray对象,保存了所有的结果,包括最终结果和迭代过程的中间结果 + # 可以通过操作DocumentArray对象对生成的图像做后处理,保存或者分析 + da = module.generate_image(text_prompts=text_prompts, output_dir='./disco_diffusion_clip_rn101_out/') + # 手动将最终生成的图像保存到指定路径 + da[0].save_uri_to_file('disco_diffusion_clip_rn101_out-result.png') + # 展示所有的中间结果 + da[0].chunks.plot_image_sprites(skip_empty=True, show_index=True, keep_aspect_ratio=True) + # 将整个生成过程保存为一个动态图gif + da[0].chunks.save_gif('disco_diffusion_clip_rn101_out-result.gif', show_index=True, inline_display=True, size_ratio=0.5) + ``` + +- ### 3、API + + - ```python + def generate_image( + text_prompts, + style: Optional[str] = None, + artist: Optional[str] = None, + width_height: Optional[List[int]] = [1280, 768], + seed: Optional[int] = None, + output_dir: Optional[str] = 'disco_diffusion_clip_rn101_out'): + ``` + + - 文图生成API,生成文本描述内容的图像。 + + - **参数** + + - text_prompts(str): 输入的语句,描述想要生成的图像的内容。通常比较有效的构造方式为 "一段描述性的文字内容" + "指定艺术家的名字",如"a beautiful painting of Chinese architecture, by krenz, sunny, super wide angle, artstation."。prompt的构造可以参考[网站](https://docs.google.com/document/d/1XUT2G9LmkZataHFzmuOtRXnuWBfhvXDAo8DkS--8tec/edit#)。 + - style(Optional[str]): 指定绘画的风格,如'watercolor','Chinese painting'等。当不指定时,风格完全由您所填写的prompt决定。 + - artist(Optional[str]): 指定特定的艺术家,如Greg Rutkowsk、krenz,将会生成所指定艺术家的绘画风格。当不指定时,风格完全由您所填写的prompt决定。各种艺术家的风格可以参考[网站](https://weirdwonderfulai.art/resources/disco-diffusion-70-plus-artist-studies/)。 + - width_height(Optional[List[int]]): 指定最终输出图像的宽高,宽和高都需要是64的倍数,生成的图像越大,所需要的计算时间越长。 + - seed(Optional[int]): 随机种子,由于输入默认是随机高斯噪声,设置不同的随机种子会由不同的初始输入,从而最终生成不同的结果,可以设置该参数来获得不同的输出图像。 + - output_dir(Optional[str]): 保存输出图像的目录,默认为"disco_diffusion_clip_rn101_out"。 + + + - **返回** + - ra(DocumentArray): DocumentArray对象, 包含`n_batches`个Documents,其中每个Document都保存了迭代过程的所有中间结果。详细可参考[DocumentArray使用文档](https://docarray.jina.ai/fundamentals/documentarray/index.html)。 + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + ```shell + $ hub install disco_diffusion_clip_rn101 == 1.0.0 + ``` diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn101/clip/README.md b/modules/image/text_to_image/disco_diffusion_clip_rn101/clip/README.md new file mode 100644 index 000000000..317214d80 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn101/clip/README.md @@ -0,0 +1,2 @@ +# OpenAI CLIP implemented in Paddle. +The original implementation repo is [ranchlai/clip.paddle](https://github.com/ranchlai/clip.paddle). We copy this repo here for guided diffusion. diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn101/clip/clip/__init__.py b/modules/image/text_to_image/disco_diffusion_clip_rn101/clip/clip/__init__.py new file mode 100755 index 000000000..5657b56e6 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn101/clip/clip/__init__.py @@ -0,0 +1 @@ +from .utils import * diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn101/clip/clip/layers.py b/modules/image/text_to_image/disco_diffusion_clip_rn101/clip/clip/layers.py new file mode 100755 index 000000000..286f35ab4 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn101/clip/clip/layers.py @@ -0,0 +1,182 @@ +from typing import Optional + +import paddle +import paddle.nn as nn +from paddle import Tensor +from paddle.nn import functional as F +from paddle.nn import Linear + +__all__ = ['ResidualAttentionBlock', 'AttentionPool2d', 'multi_head_attention_forward', 'MultiHeadAttention'] + + +def multi_head_attention_forward(x: Tensor, + num_heads: int, + q_proj: Linear, + k_proj: Linear, + v_proj: Linear, + c_proj: Linear, + attn_mask: Optional[Tensor] = None): + max_len, batch_size, emb_dim = x.shape + head_dim = emb_dim // num_heads + scaling = float(head_dim)**-0.5 + q = q_proj(x) # L, N, E + k = k_proj(x) # L, N, E + v = v_proj(x) # L, N, E + #k = k.con + v = v.reshape((-1, batch_size * num_heads, head_dim)).transpose((1, 0, 2)) + k = k.reshape((-1, batch_size * num_heads, head_dim)).transpose((1, 0, 2)) + q = q.reshape((-1, batch_size * num_heads, head_dim)).transpose((1, 0, 2)) + + q = q * scaling + qk = paddle.bmm(q, k.transpose((0, 2, 1))) + if attn_mask is not None: + if attn_mask.ndim == 2: + attn_mask.unsqueeze_(0) + #assert str(attn_mask.dtype) == 'VarType.FP32' and attn_mask.ndim == 3 + assert attn_mask.shape[0] == 1 and attn_mask.shape[1] == max_len and attn_mask.shape[2] == max_len + qk += attn_mask + + qk = paddle.nn.functional.softmax(qk, axis=-1) + atten = paddle.bmm(qk, v) + atten = atten.transpose((1, 0, 2)) + atten = atten.reshape((max_len, batch_size, emb_dim)) + atten = c_proj(atten) + return atten + + +class MultiHeadAttention(nn.Layer): # without attention mask + + def __init__(self, emb_dim: int, num_heads: int): + super().__init__() + self.q_proj = nn.Linear(emb_dim, emb_dim, bias_attr=True) + self.k_proj = nn.Linear(emb_dim, emb_dim, bias_attr=True) + self.v_proj = nn.Linear(emb_dim, emb_dim, bias_attr=True) + self.c_proj = nn.Linear(emb_dim, emb_dim, bias_attr=True) + self.head_dim = emb_dim // num_heads + self.emb_dim = emb_dim + self.num_heads = num_heads + assert self.head_dim * num_heads == emb_dim, "embed_dim must be divisible by num_heads" + #self.scaling = float(self.head_dim) ** -0.5 + + def forward(self, x, attn_mask=None): # x is in shape[max_len,batch_size,emb_dim] + + atten = multi_head_attention_forward(x, + self.num_heads, + self.q_proj, + self.k_proj, + self.v_proj, + self.c_proj, + attn_mask=attn_mask) + + return atten + + +class Identity(nn.Layer): + + def __init__(self): + super().__init__() + + def forward(self, x): + return x + + +class Bottleneck(nn.Layer): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1): + super().__init__() + + # all conv layers have stride 1. an avgpool is performed after the second convolution when stride > 1 + self.conv1 = nn.Conv2D(inplanes, planes, 1, bias_attr=False) + self.bn1 = nn.BatchNorm2D(planes) + + self.conv2 = nn.Conv2D(planes, planes, 3, padding=1, bias_attr=False) + self.bn2 = nn.BatchNorm2D(planes) + + self.avgpool = nn.AvgPool2D(stride) if stride > 1 else Identity() + + self.conv3 = nn.Conv2D(planes, planes * self.expansion, 1, bias_attr=False) + self.bn3 = nn.BatchNorm2D(planes * self.expansion) + + self.relu = nn.ReLU() + self.downsample = None + self.stride = stride + + if stride > 1 or inplanes != planes * Bottleneck.expansion: + self.downsample = nn.Sequential( + ("-1", nn.AvgPool2D(stride)), + ("0", nn.Conv2D(inplanes, planes * self.expansion, 1, stride=1, bias_attr=False)), + ("1", nn.BatchNorm2D(planes * self.expansion))) + + def forward(self, x): + identity = x + + out = self.relu(self.bn1(self.conv1(x))) + out = self.relu(self.bn2(self.conv2(out))) + out = self.avgpool(out) + out = self.bn3(self.conv3(out)) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.relu(out) + return out + + +class AttentionPool2d(nn.Layer): + + def __init__(self, spacial_dim: int, embed_dim: int, num_heads: int, output_dim: int = None): + super().__init__() + + self.positional_embedding = paddle.create_parameter((spacial_dim**2 + 1, embed_dim), dtype='float32') + + self.q_proj = nn.Linear(embed_dim, embed_dim, bias_attr=True) + self.k_proj = nn.Linear(embed_dim, embed_dim, bias_attr=True) + self.v_proj = nn.Linear(embed_dim, embed_dim, bias_attr=True) + self.c_proj = nn.Linear(embed_dim, output_dim or embed_dim, bias_attr=True) + self.num_heads = num_heads + + self.head_dim = embed_dim // num_heads + assert self.head_dim * num_heads == embed_dim, "embed_dim must be divisible by num_heads" + + def forward(self, x): + + x = x.reshape((x.shape[0], x.shape[1], x.shape[2] * x.shape[3])).transpose((2, 0, 1)) # NCHW -> (HW)NC + max_len, batch_size, emb_dim = x.shape + head_dim = self.head_dim + x = paddle.concat([paddle.mean(x, axis=0, keepdim=True), x], axis=0) + x = x + paddle.unsqueeze(self.positional_embedding, 1) + out = multi_head_attention_forward(x, self.num_heads, self.q_proj, self.k_proj, self.v_proj, self.c_proj) + + return out[0] + + +class QuickGELU(nn.Layer): + + def forward(self, x): + return x * paddle.nn.functional.sigmoid(1.702 * x) + + +class ResidualAttentionBlock(nn.Layer): + + def __init__(self, d_model: int, n_head: int, attn_mask=None): + super().__init__() + + self.attn = MultiHeadAttention(d_model, n_head) + self.ln_1 = nn.LayerNorm(d_model) + self.mlp = nn.Sequential(("c_fc", nn.Linear(d_model, d_model * 4)), ("gelu", QuickGELU()), + ("c_proj", nn.Linear(d_model * 4, d_model))) + self.ln_2 = nn.LayerNorm(d_model) + self.attn_mask = attn_mask + + def attention(self, x): + x = self.attn(x, self.attn_mask) + assert isinstance(x, paddle.Tensor) # not tuble here + return x + + def forward(self, x): + + x = x + self.attention(self.ln_1(x)) + x = x + self.mlp(self.ln_2(x)) + return x diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn101/clip/clip/model.py b/modules/image/text_to_image/disco_diffusion_clip_rn101/clip/clip/model.py new file mode 100755 index 000000000..63d1835c5 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn101/clip/clip/model.py @@ -0,0 +1,227 @@ +from typing import Tuple +from typing import Union + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddle import nn + +from .layers import AttentionPool2d +from .layers import Bottleneck +from .layers import MultiHeadAttention +from .layers import ResidualAttentionBlock + + +class ModifiedResNet(nn.Layer): + """ + A ResNet class that is similar to torchvision's but contains the following changes: + - There are now 3 "stem" convolutions as opposed to 1, with an average pool instead of a max pool. + - Performs anti-aliasing strided convolutions, where an avgpool is prepended to convolutions with stride > 1 + - The final pooling layer is a QKV attention instead of an average pool + """ + + def __init__(self, layers, output_dim, heads, input_resolution=224, width=64): + super().__init__() + self.output_dim = output_dim + self.input_resolution = input_resolution + + # the 3-layer stem + self.conv1 = nn.Conv2D(3, width // 2, kernel_size=3, stride=2, padding=1, bias_attr=False) + self.bn1 = nn.BatchNorm2D(width // 2) + self.conv2 = nn.Conv2D(width // 2, width // 2, kernel_size=3, padding=1, bias_attr=False) + self.bn2 = nn.BatchNorm2D(width // 2) + self.conv3 = nn.Conv2D(width // 2, width, kernel_size=3, padding=1, bias_attr=False) + self.bn3 = nn.BatchNorm2D(width) + self.avgpool = nn.AvgPool2D(2) + self.relu = nn.ReLU() + + # residual layers + self._inplanes = width # this is a *mutable* variable used during construction + self.layer1 = self._make_layer(width, layers[0]) + self.layer2 = self._make_layer(width * 2, layers[1], stride=2) + self.layer3 = self._make_layer(width * 4, layers[2], stride=2) + self.layer4 = self._make_layer(width * 8, layers[3], stride=2) + + embed_dim = width * 32 # the ResNet feature dimension + self.attnpool = AttentionPool2d(input_resolution // 32, embed_dim, heads, output_dim) + + def _make_layer(self, planes, blocks, stride=1): + layers = [Bottleneck(self._inplanes, planes, stride)] + + self._inplanes = planes * Bottleneck.expansion + for _ in range(1, blocks): + layers.append(Bottleneck(self._inplanes, planes)) + + return nn.Sequential(*layers) + + def forward(self, x): + + def stem(x): + for conv, bn in [(self.conv1, self.bn1), (self.conv2, self.bn2), (self.conv3, self.bn3)]: + x = self.relu(bn(conv(x))) + x = self.avgpool(x) + return x + + #x = x.type(self.conv1.weight.dtype) + x = stem(x) + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + x = self.attnpool(x) + + return x + + +class Transformer(nn.Layer): + + def __init__(self, width: int, layers: int, heads: int, attn_mask=None): + super().__init__() + self.width = width + self.layers = layers + self.resblocks = nn.Sequential(*[ResidualAttentionBlock(width, heads, attn_mask) for _ in range(layers)]) + + def forward(self, x): + return self.resblocks(x) + + +class VisualTransformer(nn.Layer): + + def __init__(self, input_resolution: int, patch_size: int, width: int, layers: int, heads: int, output_dim: int): + super().__init__() + self.input_resolution = input_resolution + self.output_dim = output_dim + # used patch_size x patch_size, stride patch_size to do linear projection + self.conv1 = nn.Conv2D(in_channels=3, + out_channels=width, + kernel_size=patch_size, + stride=patch_size, + bias_attr=False) + + # scale = width ** -0.5 + self.class_embedding = paddle.create_parameter((width, ), 'float32') + + self.positional_embedding = paddle.create_parameter(((input_resolution // patch_size)**2 + 1, width), 'float32') + + self.ln_pre = nn.LayerNorm(width) + + self.transformer = Transformer(width, layers, heads) + + self.ln_post = nn.LayerNorm(width) + self.proj = paddle.create_parameter((width, output_dim), 'float32') + + def forward(self, x): + + x = self.conv1(x) + x = x.reshape((x.shape[0], x.shape[1], -1)) + x = x.transpose((0, 2, 1)) + x = paddle.concat([self.class_embedding + paddle.zeros((x.shape[0], 1, x.shape[-1]), dtype=x.dtype), x], axis=1) + + x = x + self.positional_embedding + x = self.ln_pre(x) + x = x.transpose((1, 0, 2)) + x = self.transformer(x) + x = x.transpose((1, 0, 2)) + x = self.ln_post(x[:, 0, :]) + if self.proj is not None: + x = paddle.matmul(x, self.proj) + + return x + + +class CLIP(nn.Layer): + + def __init__( + self, + embed_dim: int, + # vision + image_resolution: int, + vision_layers: Union[Tuple[int, int, int, int], int], + vision_width: int, + vision_patch_size: int, + # text + context_length: int, + vocab_size: int, + transformer_width: int, + transformer_heads: int, + transformer_layers: int): + super().__init__() + + self.context_length = context_length + if isinstance(vision_layers, (tuple, list)): + vision_heads = vision_width * 32 // 64 + self.visual = ModifiedResNet(layers=vision_layers, + output_dim=embed_dim, + heads=vision_heads, + input_resolution=image_resolution, + width=vision_width) + else: + vision_heads = vision_width // 64 + self.visual = VisualTransformer(input_resolution=image_resolution, + patch_size=vision_patch_size, + width=vision_width, + layers=vision_layers, + heads=vision_heads, + output_dim=embed_dim) + + self.transformer = Transformer(width=transformer_width, + layers=transformer_layers, + heads=transformer_heads, + attn_mask=self.build_attention_mask()) + + self.vocab_size = vocab_size + self.token_embedding = nn.Embedding(vocab_size, transformer_width) + self.positional_embedding = paddle.create_parameter((self.context_length, transformer_width), 'float32') + self.ln_final = nn.LayerNorm(transformer_width) + + self.text_projection = paddle.create_parameter((transformer_width, embed_dim), 'float32') + self.logit_scale = paddle.create_parameter((1, ), 'float32') + + def build_attention_mask(self): + # lazily create causal attention mask, with full attention between the vision tokens + # mask = paddle.empty((self.context_length, self.context_length),dtype='float32') + # mask.fill_(float("-inf")) + #mask.triu_(1) # zero out the lower diagonal + + mask = paddle.ones((self.context_length, self.context_length)) * float("-inf") + mask = paddle.triu(mask, diagonal=1) + + return mask + + def encode_image(self, image): + return self.visual(image) + + def encode_text(self, text): + x = self.token_embedding(text) # [batch_size, n_ctx, d_model] + # print(x.shape) + + x = x + self.positional_embedding + #print(x.shape) + + x = x.transpose((1, 0, 2)) # NLD -> LND + x = self.transformer(x) + x = x.transpose((1, 0, 2)) # LND -> NLD + x = self.ln_final(x) + + idx = text.numpy().argmax(-1) + idx = list(idx) + x = [x[i:i + 1, int(j), :] for i, j in enumerate(idx)] + x = paddle.concat(x, 0) + x = paddle.matmul(x, self.text_projection) + return x + + def forward(self, image, text): + image_features = self.encode_image(image) + text_features = self.encode_text(text) + + # normalized features + image_features = image_features / image_features.norm(dim=-1, keepdim=True) + text_features = text_features / text_features.norm(dim=-1, keepdim=True) + + # cosine similarity as logits + logit_scale = self.logit_scale.exp() + logits_per_image = paddle.matmul(logit_scale * image_features, text_features.t()) + logits_per_text = paddle.matmul(logit_scale * text_features, image_features.t()) + + # shape = [global_batch_size, global_batch_size] + return logits_per_image, logits_per_text diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn101/clip/clip/simple_tokenizer.py b/modules/image/text_to_image/disco_diffusion_clip_rn101/clip/clip/simple_tokenizer.py new file mode 100755 index 000000000..4eaf82e9e --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn101/clip/clip/simple_tokenizer.py @@ -0,0 +1,135 @@ +import gzip +import html +import os +from functools import lru_cache + +import ftfy +import regex as re + + +@lru_cache() +def default_bpe(): + return os.path.join(os.path.dirname(os.path.abspath(__file__)), "../assets/bpe_simple_vocab_16e6.txt.gz") + + +@lru_cache() +def bytes_to_unicode(): + """ + Returns list of utf-8 byte and a corresponding list of unicode strings. + The reversible bpe codes work on unicode strings. + This means you need a large # of unicode characters in your vocab if you want to avoid UNKs. + When you're at something like a 10B token dataset you end up needing around 5K for decent coverage. + This is a signficant percentage of your normal, say, 32K bpe vocab. + To avoid that, we want lookup tables between utf-8 bytes and unicode strings. + And avoids mapping to whitespace/control characters the bpe code barfs on. + """ + bs = list(range(ord("!"), ord("~") + 1)) + list(range(ord("¡"), ord("¬") + 1)) + list(range(ord("®"), ord("ÿ") + 1)) + cs = bs[:] + n = 0 + for b in range(2**8): + if b not in bs: + bs.append(b) + cs.append(2**8 + n) + n += 1 + cs = [chr(n) for n in cs] + return dict(zip(bs, cs)) + + +def get_pairs(word): + """Return set of symbol pairs in a word. + Word is represented as tuple of symbols (symbols being variable-length strings). + """ + pairs = set() + prev_char = word[0] + for char in word[1:]: + pairs.add((prev_char, char)) + prev_char = char + return pairs + + +def basic_clean(text): + text = ftfy.fix_text(text) + text = html.unescape(html.unescape(text)) + return text.strip() + + +def whitespace_clean(text): + text = re.sub(r'\s+', ' ', text) + text = text.strip() + return text + + +class SimpleTokenizer(object): + + def __init__(self, bpe_path: str = default_bpe()): + self.byte_encoder = bytes_to_unicode() + self.byte_decoder = {v: k for k, v in self.byte_encoder.items()} + merges = gzip.open(bpe_path).read().decode("utf-8").split('\n') + merges = merges[1:49152 - 256 - 2 + 1] + merges = [tuple(merge.split()) for merge in merges] + vocab = list(bytes_to_unicode().values()) + vocab = vocab + [v + '' for v in vocab] + for merge in merges: + vocab.append(''.join(merge)) + vocab.extend(['<|startoftext|>', '<|endoftext|>']) + self.encoder = dict(zip(vocab, range(len(vocab)))) + self.decoder = {v: k for k, v in self.encoder.items()} + self.bpe_ranks = dict(zip(merges, range(len(merges)))) + self.cache = {'<|startoftext|>': '<|startoftext|>', '<|endoftext|>': '<|endoftext|>'} + self.pat = re.compile( + r"""<\|startoftext\|>|<\|endoftext\|>|'s|'t|'re|'ve|'m|'ll|'d|[\p{L}]+|[\p{N}]|[^\s\p{L}\p{N}]+""", + re.IGNORECASE) + + def bpe(self, token): + if token in self.cache: + return self.cache[token] + word = tuple(token[:-1]) + (token[-1] + '', ) + pairs = get_pairs(word) + + if not pairs: + return token + '' + + while True: + bigram = min(pairs, key=lambda pair: self.bpe_ranks.get(pair, float('inf'))) + if bigram not in self.bpe_ranks: + break + first, second = bigram + new_word = [] + i = 0 + while i < len(word): + try: + j = word.index(first, i) + new_word.extend(word[i:j]) + i = j + except: + new_word.extend(word[i:]) + break + + if word[i] == first and i < len(word) - 1 and word[i + 1] == second: + new_word.append(first + second) + i += 2 + else: + new_word.append(word[i]) + i += 1 + new_word = tuple(new_word) + word = new_word + if len(word) == 1: + break + else: + pairs = get_pairs(word) + word = ' '.join(word) + self.cache[token] = word + return word + + def encode(self, text): + bpe_tokens = [] + text = whitespace_clean(basic_clean(text)).lower() + for token in re.findall(self.pat, text): + token = ''.join(self.byte_encoder[b] for b in token.encode('utf-8')) + bpe_tokens.extend(self.encoder[bpe_token] for bpe_token in self.bpe(token).split(' ')) + return bpe_tokens + + def decode(self, tokens): + text = ''.join([self.decoder[token] for token in tokens]) + text = bytearray([self.byte_decoder[c] for c in text]).decode('utf-8', errors="replace").replace('', ' ') + return text diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn101/clip/clip/utils.py b/modules/image/text_to_image/disco_diffusion_clip_rn101/clip/clip/utils.py new file mode 100755 index 000000000..53d5c4440 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn101/clip/clip/utils.py @@ -0,0 +1,122 @@ +import os +from typing import List +from typing import Union + +import numpy as np +import paddle +from paddle.utils import download +from paddle.vision.transforms import CenterCrop +from paddle.vision.transforms import Compose +from paddle.vision.transforms import Normalize +from paddle.vision.transforms import Resize +from paddle.vision.transforms import ToTensor + +from .model import CLIP +from .simple_tokenizer import SimpleTokenizer + +__all__ = ['transform', 'tokenize', 'build_model'] + +MODEL_NAMES = ['RN50', 'RN101', 'VIT32'] + +URL = { + 'RN50': os.path.join(os.path.dirname(__file__), 'pre_trained', 'RN50.pdparams'), + 'RN101': os.path.join(os.path.dirname(__file__), 'pre_trained', 'RN101.pdparams'), + 'VIT32': os.path.join(os.path.dirname(__file__), 'pre_trained', 'ViT-B-32.pdparams') +} + +MEAN, STD = (0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711) +_tokenizer = SimpleTokenizer() + +transform = Compose([ + Resize(224, interpolation='bicubic'), + CenterCrop(224), lambda image: image.convert('RGB'), + ToTensor(), + Normalize(mean=MEAN, std=STD), lambda t: t.unsqueeze_(0) +]) + + +def tokenize(texts: Union[str, List[str]], context_length: int = 77): + """ + Returns the tokenized representation of given input string(s) + + Parameters + ---------- + texts : Union[str, List[str]] + An input string or a list of input strings to tokenize + + context_length : int + The context length to use; all CLIP models use 77 as the context length + + Returns + ------- + A two-dimensional tensor containing the resulting tokens, shape = [number of input strings, context_length] + """ + if isinstance(texts, str): + texts = [texts] + + sot_token = _tokenizer.encoder["<|startoftext|>"] + eot_token = _tokenizer.encoder["<|endoftext|>"] + all_tokens = [[sot_token] + _tokenizer.encode(text) + [eot_token] for text in texts] + result = paddle.zeros((len(all_tokens), context_length), dtype='int64') + + for i, tokens in enumerate(all_tokens): + if len(tokens) > context_length: + raise RuntimeError(f"Input {texts[i]} is too long for context length {context_length}") + result[i, :len(tokens)] = paddle.Tensor(np.array(tokens)) + + return result + + +def build_model(name='RN101'): + assert name in MODEL_NAMES, f"model name must be one of {MODEL_NAMES}" + name2model = {'RN101': build_rn101_model, 'VIT32': build_vit_model, 'RN50': build_rn50_model} + model = name2model[name]() + weight = URL[name] + sd = paddle.load(weight) + model.load_dict(sd) + model.eval() + return model + + +def build_vit_model(): + + model = CLIP(embed_dim=512, + image_resolution=224, + vision_layers=12, + vision_width=768, + vision_patch_size=32, + context_length=77, + vocab_size=49408, + transformer_width=512, + transformer_heads=8, + transformer_layers=12) + return model + + +def build_rn101_model(): + model = CLIP( + embed_dim=512, + image_resolution=224, + vision_layers=(3, 4, 23, 3), + vision_width=64, + vision_patch_size=0, #Not used in resnet + context_length=77, + vocab_size=49408, + transformer_width=512, + transformer_heads=8, + transformer_layers=12) + return model + + +def build_rn50_model(): + model = CLIP(embed_dim=1024, + image_resolution=224, + vision_layers=(3, 4, 6, 3), + vision_width=64, + vision_patch_size=None, + context_length=77, + vocab_size=49408, + transformer_width=512, + transformer_heads=8, + transformer_layers=12) + return model diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn101/module.py b/modules/image/text_to_image/disco_diffusion_clip_rn101/module.py new file mode 100755 index 000000000..c59b2f5ff --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn101/module.py @@ -0,0 +1,441 @@ +# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import ast +import os +import sys +from functools import partial +from typing import List +from typing import Optional + +import disco_diffusion_clip_rn101.clip as clip +import disco_diffusion_clip_rn101.resize_right as resize_right +import paddle +from disco_diffusion_clip_rn101.reverse_diffusion import create + +import paddlehub as hub +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving + + +@moduleinfo(name="disco_diffusion_clip_rn101", + version="1.0.0", + type="image/text_to_image", + summary="", + author="paddlepaddle", + author_email="paddle-dev@baidu.com") +class DiscoDiffusionClip: + + def generate_image(self, + text_prompts, + style: Optional[str] = None, + artist: Optional[str] = None, + init_image: Optional[str] = None, + width_height: Optional[List[int]] = [1280, 768], + skip_steps: Optional[int] = 0, + steps: Optional[int] = 250, + cut_ic_pow: Optional[int] = 1, + init_scale: Optional[int] = 1000, + clip_guidance_scale: Optional[int] = 5000, + tv_scale: Optional[int] = 0, + range_scale: Optional[int] = 0, + sat_scale: Optional[int] = 0, + cutn_batches: Optional[int] = 4, + diffusion_sampling_mode: Optional[str] = 'ddim', + perlin_init: Optional[bool] = False, + perlin_mode: Optional[str] = 'mixed', + seed: Optional[int] = None, + eta: Optional[float] = 0.8, + clamp_grad: Optional[bool] = True, + clamp_max: Optional[float] = 0.05, + randomize_class: Optional[bool] = True, + clip_denoised: Optional[bool] = False, + fuzzy_prompt: Optional[bool] = False, + rand_mag: Optional[float] = 0.05, + cut_overview: Optional[str] = '[12]*400+[4]*600', + cut_innercut: Optional[str] = '[4]*400+[12]*600', + cut_icgray_p: Optional[str] = '[0.2]*400+[0]*600', + display_rate: Optional[int] = 10, + n_batches: Optional[int] = 1, + batch_size: Optional[int] = 1, + batch_name: Optional[str] = '', + use_gpu: Optional[bool] = True, + output_dir: Optional[str] = 'disco_diffusion_clip_rn101_out'): + """ + Create Disco Diffusion artworks and save the result into a DocumentArray. + + :param text_prompts: Phrase, sentence, or string of words and phrases describing what the image should look like. The words will be analyzed by the AI and will guide the diffusion process toward the image(s) you describe. These can include commas and weights to adjust the relative importance of each element. E.g. "A beautiful painting of a singular lighthouse, shining its light across a tumultuous sea of blood by greg rutkowski and thomas kinkade, Trending on artstation."Notice that this prompt loosely follows a structure: [subject], [prepositional details], [setting], [meta modifiers and artist]; this is a good starting point for your experiments. Developing text prompts takes practice and experience, and is not the subject of this guide. If you are a beginner to writing text prompts, a good place to start is on a simple AI art app like Night Cafe, starry ai or WOMBO prior to using DD, to get a feel for how text gets translated into images by GAN tools. These other apps use different technologies, but many of the same principles apply. + :param style: Image style, such as oil paintings, if specified, style will be used to construct prompts. + :param artist: Artist style, if specified, style will be used to construct prompts. + :param init_image: Recall that in the image sequence above, the first image shown is just noise. If an init_image is provided, diffusion will replace the noise with the init_image as its starting state. To use an init_image, upload the image to the Colab instance or your Google Drive, and enter the full image path here. If using an init_image, you may need to increase skip_steps to ~ 50% of total steps to retain the character of the init. See skip_steps above for further discussion. + :param width_height: Desired final image size, in pixels. You can have a square, wide, or tall image, but each edge length should be set to a multiple of 64px, and a minimum of 512px on the default CLIP model setting. If you forget to use multiples of 64px in your dimensions, DD will adjust the dimensions of your image to make it so. + :param skip_steps: Consider the chart shown here. Noise scheduling (denoise strength) starts very high and progressively gets lower and lower as diffusion steps progress. The noise levels in the first few steps are very high, so images change dramatically in early steps.As DD moves along the curve, noise levels (and thus the amount an image changes per step) declines, and image coherence from one step to the next increases.The first few steps of denoising are often so dramatic that some steps (maybe 10-15% of total) can be skipped without affecting the final image. You can experiment with this as a way to cut render times.If you skip too many steps, however, the remaining noise may not be high enough to generate new content, and thus may not have ‘time left’ to finish an image satisfactorily.Also, depending on your other settings, you may need to skip steps to prevent CLIP from overshooting your goal, resulting in ‘blown out’ colors (hyper saturated, solid white, or solid black regions) or otherwise poor image quality. Consider that the denoising process is at its strongest in the early steps, so skipping steps can sometimes mitigate other problems.Lastly, if using an init_image, you will need to skip ~50% of the diffusion steps to retain the shapes in the original init image. However, if you’re using an init_image, you can also adjust skip_steps up or down for creative reasons. With low skip_steps you can get a result "inspired by" the init_image which will retain the colors and rough layout and shapes but look quite different. With high skip_steps you can preserve most of the init_image contents and just do fine tuning of the texture. + :param steps: When creating an image, the denoising curve is subdivided into steps for processing. Each step (or iteration) involves the AI looking at subsets of the image called ‘cuts’ and calculating the ‘direction’ the image should be guided to be more like the prompt. Then it adjusts the image with the help of the diffusion denoiser, and moves to the next step.Increasing steps will provide more opportunities for the AI to adjust the image, and each adjustment will be smaller, and thus will yield a more precise, detailed image. Increasing steps comes at the expense of longer render times. Also, while increasing steps should generally increase image quality, there is a diminishing return on additional steps beyond 250 - 500 steps. However, some intricate images can take 1000, 2000, or more steps. It is really up to the user. Just know that the render time is directly related to the number of steps, and many other parameters have a major impact on image quality, without costing additional time. + :param cut_ic_pow: This sets the size of the border used for inner cuts. High cut_ic_pow values have larger borders, and therefore the cuts themselves will be smaller and provide finer details. If you have too many or too-small inner cuts, you may lose overall image coherency and/or it may cause an undesirable ‘mosaic’ effect. Low cut_ic_pow values will allow the inner cuts to be larger, helping image coherency while still helping with some details. + :param init_scale: This controls how strongly CLIP will try to match the init_image provided. This is balanced against the clip_guidance_scale (CGS) above. Too much init scale, and the image won’t change much during diffusion. Too much CGS and the init image will be lost. + :param clip_guidance_scale: CGS is one of the most important parameters you will use. It tells DD how strongly you want CLIP to move toward your prompt each timestep. Higher is generally better, but if CGS is too strong it will overshoot the goal and distort the image. So a happy medium is needed, and it takes experience to learn how to adjust CGS. Note that this parameter generally scales with image dimensions. In other words, if you increase your total dimensions by 50% (e.g. a change from 512 x 512 to 512 x 768), then to maintain the same effect on the image, you’d want to increase clip_guidance_scale from 5000 to 7500. Of the basic settings, clip_guidance_scale, steps and skip_steps are the most important contributors to image quality, so learn them well. + :param tv_scale: Total variance denoising. Optional, set to zero to turn off. Controls ‘smoothness’ of final output. If used, tv_scale will try to smooth out your final image to reduce overall noise. If your image is too ‘crunchy’, increase tv_scale. TV denoising is good at preserving edges while smoothing away noise in flat regions. See https://en.wikipedia.org/wiki/Total_variation_denoising + :param range_scale: Optional, set to zero to turn off. Used for adjustment of color contrast. Lower range_scale will increase contrast. Very low numbers create a reduced color palette, resulting in more vibrant or poster-like images. Higher range_scale will reduce contrast, for more muted images. + :param sat_scale: Saturation scale. Optional, set to zero to turn off. If used, sat_scale will help mitigate oversaturation. If your image is too saturated, increase sat_scale to reduce the saturation. + :param cutn_batches: Each iteration, the AI cuts the image into smaller pieces known as cuts, and compares each cut to the prompt to decide how to guide the next diffusion step. More cuts can generally lead to better images, since DD has more chances to fine-tune the image precision in each timestep. Additional cuts are memory intensive, however, and if DD tries to evaluate too many cuts at once, it can run out of memory. You can use cutn_batches to increase cuts per timestep without increasing memory usage. At the default settings, DD is scheduled to do 16 cuts per timestep. If cutn_batches is set to 1, there will indeed only be 16 cuts total per timestep. However, if cutn_batches is increased to 4, DD will do 64 cuts total in each timestep, divided into 4 sequential batches of 16 cuts each. Because the cuts are being evaluated only 16 at a time, DD uses the memory required for only 16 cuts, but gives you the quality benefit of 64 cuts. The tradeoff, of course, is that this will take ~4 times as long to render each image.So, (scheduled cuts) x (cutn_batches) = (total cuts per timestep). Increasing cutn_batches will increase render times, however, as the work is being done sequentially. DD’s default cut schedule is a good place to start, but the cut schedule can be adjusted in the Cutn Scheduling section, explained below. + :param diffusion_sampling_mode: Two alternate diffusion denoising algorithms. ddim has been around longer, and is more established and tested. plms is a newly added alternate method that promises good diffusion results in fewer steps, but has not been as fully tested and may have side effects. This new plms mode is actively being researched in the #settings-and-techniques channel in the DD Discord. + :param perlin_init: Normally, DD will use an image filled with random noise as a starting point for the diffusion curve. If perlin_init is selected, DD will instead use a Perlin noise model as an initial state. Perlin has very interesting characteristics, distinct from random noise, so it’s worth experimenting with this for your projects. Beyond perlin, you can, of course, generate your own noise images (such as with GIMP, etc) and use them as an init_image (without skipping steps). Choosing perlin_init does not affect the actual diffusion process, just the starting point for the diffusion. Please note that selecting a perlin_init will replace and override any init_image you may have specified. Further, because the 2D, 3D and video animation systems all rely on the init_image system, if you enable Perlin while using animation modes, the perlin_init will jump in front of any previous image or video input, and DD will NOT give you the expected sequence of coherent images. All of that said, using Perlin and animation modes together do make a very colorful rainbow effect, which can be used creatively. + :param perlin_mode: sets type of Perlin noise: colored, gray, or a mix of both, giving you additional options for noise types. Experiment to see what these do in your projects. + :param seed: Deep in the diffusion code, there is a random number ‘seed’ which is used as the basis for determining the initial state of the diffusion. By default, this is random, but you can also specify your own seed. This is useful if you like a particular result and would like to run more iterations that will be similar. After each run, the actual seed value used will be reported in the parameters report, and can be reused if desired by entering seed # here. If a specific numerical seed is used repeatedly, the resulting images will be quite similar but not identical. + :param eta: eta (greek letter η) is a diffusion model variable that mixes in a random amount of scaled noise into each timestep. 0 is no noise, 1.0 is more noise. As with most DD parameters, you can go below zero for eta, but it may give you unpredictable results. The steps parameter has a close relationship with the eta parameter. If you set eta to 0, then you can get decent output with only 50-75 steps. Setting eta to 1.0 favors higher step counts, ideally around 250 and up. eta has a subtle, unpredictable effect on image, so you’ll need to experiment to see how this affects your projects. + :param clamp_grad: As I understand it, clamp_grad is an internal limiter that stops DD from producing extreme results. Try your images with and without clamp_grad. If the image changes drastically with clamp_grad turned off, it probably means your clip_guidance_scale is too high and should be reduced. + :param clamp_max: Sets the value of the clamp_grad limitation. Default is 0.05, providing for smoother, more muted coloration in images, but setting higher values (0.15-0.3) can provide interesting contrast and vibrancy. + :param fuzzy_prompt: Controls whether to add multiple noisy prompts to the prompt losses. If True, can increase variability of image output. Experiment with this. + :param rand_mag: Affects only the fuzzy_prompt. Controls the magnitude of the random noise added by fuzzy_prompt. + :param cut_overview: The schedule of overview cuts + :param cut_innercut: The schedule of inner cuts + :param cut_icgray_p: This sets the size of the border used for inner cuts. High cut_ic_pow values have larger borders, and therefore the cuts themselves will be smaller and provide finer details. If you have too many or too-small inner cuts, you may lose overall image coherency and/or it may cause an undesirable ‘mosaic’ effect. Low cut_ic_pow values will allow the inner cuts to be larger, helping image coherency while still helping with some details. + :param display_rate: During a diffusion run, you can monitor the progress of each image being created with this variable. If display_rate is set to 50, DD will show you the in-progress image every 50 timesteps. Setting this to a lower value, like 5 or 10, is a good way to get an early peek at where your image is heading. If you don’t like the progression, just interrupt execution, change some settings, and re-run. If you are planning a long, unmonitored batch, it’s better to set display_rate equal to steps, because displaying interim images does slow Colab down slightly. + :param n_batches: This variable sets the number of still images you want DD to create. If you are using an animation mode (see below for details) DD will ignore n_batches and create a single set of animated frames based on the animation settings. + :param batch_name: The name of the batch, the batch id will be named as "discoart-[batch_name]-seed". To avoid your artworks be overridden by other users, please use a unique name. + :param use_gpu: whether to use gpu or not. + :return: a DocumentArray object that has `n_batches` Documents + """ + if use_gpu: + try: + _places = os.environ.get("CUDA_VISIBLE_DEVICES", None) + if _places: + paddle.device.set_device("gpu:{}".format(0)) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + else: + paddle.device.set_device("cpu") + paddle.disable_static() + if not os.path.exists(output_dir): + os.makedirs(output_dir, exist_ok=True) + + if isinstance(text_prompts, str): + text_prompts = text_prompts.rstrip(',.,。') + if style is not None: + text_prompts += ",{}".format(style) + if artist is not None: + text_prompts += ",{},trending on artstation".format(artist) + elif isinstance(text_prompts, list): + text_prompts[0] = text_prompts[0].rstrip(',.,。') + if style is not None: + text_prompts[0] += ",{}".format(style) + if artist is not None: + text_prompts[0] += ",{},trending on artstation".format(artist) + + return create(text_prompts=text_prompts, + init_image=init_image, + width_height=width_height, + skip_steps=skip_steps, + steps=steps, + cut_ic_pow=cut_ic_pow, + init_scale=init_scale, + clip_guidance_scale=clip_guidance_scale, + tv_scale=tv_scale, + range_scale=range_scale, + sat_scale=sat_scale, + cutn_batches=cutn_batches, + diffusion_sampling_mode=diffusion_sampling_mode, + perlin_init=perlin_init, + perlin_mode=perlin_mode, + seed=seed, + eta=eta, + clamp_grad=clamp_grad, + clamp_max=clamp_max, + randomize_class=randomize_class, + clip_denoised=clip_denoised, + fuzzy_prompt=fuzzy_prompt, + rand_mag=rand_mag, + cut_overview=cut_overview, + cut_innercut=cut_innercut, + cut_icgray_p=cut_icgray_p, + display_rate=display_rate, + n_batches=n_batches, + batch_size=batch_size, + batch_name=batch_name, + clip_models=['RN101'], + output_dir=output_dir) + + @serving + def serving_method(self, text_prompts, **kwargs): + """ + Run as a service. + """ + results = [] + for text_prompt in text_prompts: + result = self.generate_image(text_prompts=text_prompt, **kwargs)[0].to_base64() + results.append(result) + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.generate_image(text_prompts=args.text_prompts, + style=args.style, + artist=args.artist, + init_image=args.init_image, + width_height=args.width_height, + skip_steps=args.skip_steps, + steps=args.steps, + cut_ic_pow=args.cut_ic_pow, + init_scale=args.init_scale, + clip_guidance_scale=args.clip_guidance_scale, + tv_scale=args.tv_scale, + range_scale=args.range_scale, + sat_scale=args.sat_scale, + cutn_batches=args.cutn_batches, + diffusion_sampling_mode=args.diffusion_sampling_mode, + perlin_init=args.perlin_init, + perlin_mode=args.perlin_mode, + seed=args.seed, + eta=args.eta, + clamp_grad=args.clamp_grad, + clamp_max=args.clamp_max, + randomize_class=args.randomize_class, + clip_denoised=args.clip_denoised, + fuzzy_prompt=args.fuzzy_prompt, + rand_mag=args.rand_mag, + cut_overview=args.cut_overview, + cut_innercut=args.cut_innercut, + cut_icgray_p=args.cut_icgray_p, + display_rate=args.display_rate, + n_batches=args.n_batches, + batch_size=args.batch_size, + batch_name=args.batch_name, + output_dir=args.output_dir) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_input_group.add_argument( + '--skip_steps', + type=int, + default=0, + help= + 'Consider the chart shown here. Noise scheduling (denoise strength) starts very high and progressively gets lower and lower as diffusion steps progress. The noise levels in the first few steps are very high, so images change dramatically in early steps.As DD moves along the curve, noise levels (and thus the amount an image changes per step) declines, and image coherence from one step to the next increases.The first few steps of denoising are often so dramatic that some steps (maybe 10-15%% of total) can be skipped without affecting the final image. You can experiment with this as a way to cut render times.If you skip too many steps, however, the remaining noise may not be high enough to generate new content, and thus may not have ‘time left’ to finish an image satisfactorily.Also, depending on your other settings, you may need to skip steps to prevent CLIP from overshooting your goal, resulting in ‘blown out’ colors (hyper saturated, solid white, or solid black regions) or otherwise poor image quality. Consider that the denoising process is at its strongest in the early steps, so skipping steps can sometimes mitigate other problems.Lastly, if using an init_image, you will need to skip ~50%% of the diffusion steps to retain the shapes in the original init image. However, if you’re using an init_image, you can also adjust skip_steps up or down for creative reasons. With low skip_steps you can get a result "inspired by" the init_image which will retain the colors and rough layout and shapes but look quite different. With high skip_steps you can preserve most of the init_image contents and just do fine tuning of the texture' + ) + self.arg_input_group.add_argument( + '--steps', + type=int, + default=250, + help= + "When creating an image, the denoising curve is subdivided into steps for processing. Each step (or iteration) involves the AI looking at subsets of the image called ‘cuts’ and calculating the ‘direction’ the image should be guided to be more like the prompt. Then it adjusts the image with the help of the diffusion denoiser, and moves to the next step.Increasing steps will provide more opportunities for the AI to adjust the image, and each adjustment will be smaller, and thus will yield a more precise, detailed image. Increasing steps comes at the expense of longer render times. Also, while increasing steps should generally increase image quality, there is a diminishing return on additional steps beyond 250 - 500 steps. However, some intricate images can take 1000, 2000, or more steps. It is really up to the user. Just know that the render time is directly related to the number of steps, and many other parameters have a major impact on image quality, without costing additional time." + ) + self.arg_input_group.add_argument( + '--cut_ic_pow', + type=int, + default=1, + help= + "This sets the size of the border used for inner cuts. High cut_ic_pow values have larger borders, and therefore the cuts themselves will be smaller and provide finer details. If you have too many or too-small inner cuts, you may lose overall image coherency and/or it may cause an undesirable ‘mosaic’ effect. Low cut_ic_pow values will allow the inner cuts to be larger, helping image coherency while still helping with some details." + ) + self.arg_input_group.add_argument( + '--init_scale', + type=int, + default=1000, + help= + "This controls how strongly CLIP will try to match the init_image provided. This is balanced against the clip_guidance_scale (CGS) above. Too much init scale, and the image won’t change much during diffusion. Too much CGS and the init image will be lost." + ) + self.arg_input_group.add_argument( + '--clip_guidance_scale', + type=int, + default=5000, + help= + "CGS is one of the most important parameters you will use. It tells DD how strongly you want CLIP to move toward your prompt each timestep. Higher is generally better, but if CGS is too strong it will overshoot the goal and distort the image. So a happy medium is needed, and it takes experience to learn how to adjust CGS. Note that this parameter generally scales with image dimensions. In other words, if you increase your total dimensions by 50% (e.g. a change from 512 x 512 to 512 x 768), then to maintain the same effect on the image, you’d want to increase clip_guidance_scale from 5000 to 7500. Of the basic settings, clip_guidance_scale, steps and skip_steps are the most important contributors to image quality, so learn them well." + ) + self.arg_input_group.add_argument( + '--tv_scale', + type=int, + default=0, + help= + "Total variance denoising. Optional, set to zero to turn off. Controls ‘smoothness’ of final output. If used, tv_scale will try to smooth out your final image to reduce overall noise. If your image is too ‘crunchy’, increase tv_scale. TV denoising is good at preserving edges while smoothing away noise in flat regions. See https://en.wikipedia.org/wiki/Total_variation_denoising" + ) + self.arg_input_group.add_argument( + '--range_scale', + type=int, + default=0, + help= + "Optional, set to zero to turn off. Used for adjustment of color contrast. Lower range_scale will increase contrast. Very low numbers create a reduced color palette, resulting in more vibrant or poster-like images. Higher range_scale will reduce contrast, for more muted images." + ) + self.arg_input_group.add_argument( + '--sat_scale', + type=int, + default=0, + help= + "Saturation scale. Optional, set to zero to turn off. If used, sat_scale will help mitigate oversaturation. If your image is too saturated, increase sat_scale to reduce the saturation." + ) + self.arg_input_group.add_argument( + '--cutn_batches', + type=int, + default=4, + help= + "Each iteration, the AI cuts the image into smaller pieces known as cuts, and compares each cut to the prompt to decide how to guide the next diffusion step. More cuts can generally lead to better images, since DD has more chances to fine-tune the image precision in each timestep. Additional cuts are memory intensive, however, and if DD tries to evaluate too many cuts at once, it can run out of memory. You can use cutn_batches to increase cuts per timestep without increasing memory usage. At the default settings, DD is scheduled to do 16 cuts per timestep. If cutn_batches is set to 1, there will indeed only be 16 cuts total per timestep. However, if cutn_batches is increased to 4, DD will do 64 cuts total in each timestep, divided into 4 sequential batches of 16 cuts each. Because the cuts are being evaluated only 16 at a time, DD uses the memory required for only 16 cuts, but gives you the quality benefit of 64 cuts. The tradeoff, of course, is that this will take ~4 times as long to render each image.So, (scheduled cuts) x (cutn_batches) = (total cuts per timestep). Increasing cutn_batches will increase render times, however, as the work is being done sequentially. DD’s default cut schedule is a good place to start, but the cut schedule can be adjusted in the Cutn Scheduling section, explained below." + ) + self.arg_input_group.add_argument( + '--diffusion_sampling_mode', + type=str, + default='ddim', + help= + "Two alternate diffusion denoising algorithms. ddim has been around longer, and is more established and tested. plms is a newly added alternate method that promises good diffusion results in fewer steps, but has not been as fully tested and may have side effects. This new plms mode is actively being researched in the #settings-and-techniques channel in the DD Discord." + ) + self.arg_input_group.add_argument( + '--perlin_init', + type=bool, + default=False, + help= + "Normally, DD will use an image filled with random noise as a starting point for the diffusion curve. If perlin_init is selected, DD will instead use a Perlin noise model as an initial state. Perlin has very interesting characteristics, distinct from random noise, so it’s worth experimenting with this for your projects. Beyond perlin, you can, of course, generate your own noise images (such as with GIMP, etc) and use them as an init_image (without skipping steps). Choosing perlin_init does not affect the actual diffusion process, just the starting point for the diffusion. Please note that selecting a perlin_init will replace and override any init_image you may have specified. Further, because the 2D, 3D and video animation systems all rely on the init_image system, if you enable Perlin while using animation modes, the perlin_init will jump in front of any previous image or video input, and DD will NOT give you the expected sequence of coherent images. All of that said, using Perlin and animation modes together do make a very colorful rainbow effect, which can be used creatively." + ) + self.arg_input_group.add_argument( + '--perlin_mode', + type=str, + default='mixed', + help= + "sets type of Perlin noise: colored, gray, or a mix of both, giving you additional options for noise types. Experiment to see what these do in your projects." + ) + self.arg_input_group.add_argument( + '--seed', + type=int, + default=None, + help= + "Deep in the diffusion code, there is a random number ‘seed’ which is used as the basis for determining the initial state of the diffusion. By default, this is random, but you can also specify your own seed. This is useful if you like a particular result and would like to run more iterations that will be similar. After each run, the actual seed value used will be reported in the parameters report, and can be reused if desired by entering seed # here. If a specific numerical seed is used repeatedly, the resulting images will be quite similar but not identical." + ) + self.arg_input_group.add_argument( + '--eta', + type=float, + default=0.8, + help= + "eta (greek letter η) is a diffusion model variable that mixes in a random amount of scaled noise into each timestep. 0 is no noise, 1.0 is more noise. As with most DD parameters, you can go below zero for eta, but it may give you unpredictable results. The steps parameter has a close relationship with the eta parameter. If you set eta to 0, then you can get decent output with only 50-75 steps. Setting eta to 1.0 favors higher step counts, ideally around 250 and up. eta has a subtle, unpredictable effect on image, so you’ll need to experiment to see how this affects your projects." + ) + self.arg_input_group.add_argument( + '--clamp_grad', + type=bool, + default=True, + help= + "As I understand it, clamp_grad is an internal limiter that stops DD from producing extreme results. Try your images with and without clamp_grad. If the image changes drastically with clamp_grad turned off, it probably means your clip_guidance_scale is too high and should be reduced." + ) + self.arg_input_group.add_argument( + '--clamp_max', + type=float, + default=0.05, + help= + "Sets the value of the clamp_grad limitation. Default is 0.05, providing for smoother, more muted coloration in images, but setting higher values (0.15-0.3) can provide interesting contrast and vibrancy." + ) + self.arg_input_group.add_argument('--randomize_class', type=bool, default=True, help="Random class.") + self.arg_input_group.add_argument('--clip_denoised', type=bool, default=False, help="Clip denoised.") + self.arg_input_group.add_argument( + '--fuzzy_prompt', + type=bool, + default=False, + help= + "Controls whether to add multiple noisy prompts to the prompt losses. If True, can increase variability of image output. Experiment with this." + ) + self.arg_input_group.add_argument( + '--rand_mag', + type=float, + default=0.5, + help="Affects only the fuzzy_prompt. Controls the magnitude of the random noise added by fuzzy_prompt.") + self.arg_input_group.add_argument('--cut_overview', + type=str, + default='[12]*400+[4]*600', + help="The schedule of overview cuts") + self.arg_input_group.add_argument('--cut_innercut', + type=str, + default='[4]*400+[12]*600', + help="The schedule of inner cuts") + self.arg_input_group.add_argument( + '--cut_icgray_p', + type=str, + default='[0.2]*400+[0]*600', + help= + "This sets the size of the border used for inner cuts. High cut_ic_pow values have larger borders, and therefore the cuts themselves will be smaller and provide finer details. If you have too many or too-small inner cuts, you may lose overall image coherency and/or it may cause an undesirable ‘mosaic’ effect. Low cut_ic_pow values will allow the inner cuts to be larger, helping image coherency while still helping with some details." + ) + self.arg_input_group.add_argument( + '--display_rate', + type=int, + default=10, + help= + "During a diffusion run, you can monitor the progress of each image being created with this variable. If display_rate is set to 50, DD will show you the in-progress image every 50 timesteps. Setting this to a lower value, like 5 or 10, is a good way to get an early peek at where your image is heading. If you don’t like the progression, just interrupt execution, change some settings, and re-run. If you are planning a long, unmonitored batch, it’s better to set display_rate equal to steps, because displaying interim images does slow Colab down slightly." + ) + self.arg_config_group.add_argument('--use_gpu', + type=ast.literal_eval, + default=True, + help="whether use GPU or not") + self.arg_config_group.add_argument('--output_dir', + type=str, + default='disco_diffusion_clip_rn101_out', + help='Output directory.') + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument( + '--text_prompts', + type=str, + help= + 'Phrase, sentence, or string of words and phrases describing what the image should look like. The words will be analyzed by the AI and will guide the diffusion process toward the image(s) you describe. These can include commas and weights to adjust the relative importance of each element. E.g. "A beautiful painting of a singular lighthouse, shining its light across a tumultuous sea of blood by greg rutkowski and thomas kinkade, Trending on artstation."Notice that this prompt loosely follows a structure: [subject], [prepositional details], [setting], [meta modifiers and artist]; this is a good starting point for your experiments. Developing text prompts takes practice and experience, and is not the subject of this guide. If you are a beginner to writing text prompts, a good place to start is on a simple AI art app like Night Cafe, starry ai or WOMBO prior to using DD, to get a feel for how text gets translated into images by GAN tools. These other apps use different technologies, but many of the same principles apply.' + ) + self.arg_input_group.add_argument( + '--style', + type=str, + default=None, + help='Image style, such as oil paintings, if specified, style will be used to construct prompts.') + self.arg_input_group.add_argument('--artist', + type=str, + default=None, + help='Artist style, if specified, style will be used to construct prompts.') + self.arg_input_group.add_argument( + '--init_image', + type=str, + default=None, + help= + "Recall that in the image sequence above, the first image shown is just noise. If an init_image is provided, diffusion will replace the noise with the init_image as its starting state. To use an init_image, upload the image to the Colab instance or your Google Drive, and enter the full image path here. If using an init_image, you may need to increase skip_steps to ~ 50% of total steps to retain the character of the init. See skip_steps above for further discussion." + ) + self.arg_input_group.add_argument( + '--width_height', + type=ast.literal_eval, + default=[1280, 768], + help= + "Desired final image size, in pixels. You can have a square, wide, or tall image, but each edge length should be set to a multiple of 64px, and a minimum of 512px on the default CLIP model setting. If you forget to use multiples of 64px in your dimensions, DD will adjust the dimensions of your image to make it so." + ) + self.arg_input_group.add_argument( + '--n_batches', + type=int, + default=1, + help= + "This variable sets the number of still images you want DD to create. If you are using an animation mode (see below for details) DD will ignore n_batches and create a single set of animated frames based on the animation settings." + ) + self.arg_input_group.add_argument('--batch_size', type=int, default=1, help="Batch size.") + self.arg_input_group.add_argument( + '--batch_name', + type=str, + default='', + help= + 'The name of the batch, the batch id will be named as "discoart-[batch_name]-seed". To avoid your artworks be overridden by other users, please use a unique name.' + ) diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn101/requirements.txt b/modules/image/text_to_image/disco_diffusion_clip_rn101/requirements.txt new file mode 100755 index 000000000..8b4bc0ea4 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn101/requirements.txt @@ -0,0 +1,8 @@ +numpy +paddle_lpips==0.1.2 +ftfy +docarray>=0.13.29 +pyyaml +regex +tqdm +ipywidgets diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn101/resize_right/README.md b/modules/image/text_to_image/disco_diffusion_clip_rn101/resize_right/README.md new file mode 100644 index 000000000..1f8d0bb0a --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn101/resize_right/README.md @@ -0,0 +1,3 @@ +# ResizeRight (Paddle) +Fully differentiable resize function implemented by Paddle. +This module is based on [assafshocher/ResizeRight](https://github.com/assafshocher/ResizeRight). diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn101/resize_right/__init__.py b/modules/image/text_to_image/disco_diffusion_clip_rn101/resize_right/__init__.py new file mode 100755 index 000000000..e69de29bb diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn101/resize_right/interp_methods.py b/modules/image/text_to_image/disco_diffusion_clip_rn101/resize_right/interp_methods.py new file mode 100755 index 000000000..276eb055a --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn101/resize_right/interp_methods.py @@ -0,0 +1,70 @@ +from math import pi + +try: + import paddle +except ImportError: + paddle = None + +try: + import numpy + import numpy as np +except ImportError: + numpy = None + +if numpy is None and paddle is None: + raise ImportError("Must have either Numpy or PyTorch but both not found") + + +def set_framework_dependencies(x): + if type(x) is numpy.ndarray: + to_dtype = lambda a: a + fw = numpy + else: + to_dtype = lambda a: paddle.cast(a, x.dtype) + fw = paddle + # eps = fw.finfo(fw.float32).eps + eps = paddle.to_tensor(np.finfo(np.float32).eps) + return fw, to_dtype, eps + + +def support_sz(sz): + + def wrapper(f): + f.support_sz = sz + return f + + return wrapper + + +@support_sz(4) +def cubic(x): + fw, to_dtype, eps = set_framework_dependencies(x) + absx = fw.abs(x) + absx2 = absx**2 + absx3 = absx**3 + return ((1.5 * absx3 - 2.5 * absx2 + 1.) * to_dtype(absx <= 1.) + + (-0.5 * absx3 + 2.5 * absx2 - 4. * absx + 2.) * to_dtype((1. < absx) & (absx <= 2.))) + + +@support_sz(4) +def lanczos2(x): + fw, to_dtype, eps = set_framework_dependencies(x) + return (((fw.sin(pi * x) * fw.sin(pi * x / 2) + eps) / ((pi**2 * x**2 / 2) + eps)) * to_dtype(abs(x) < 2)) + + +@support_sz(6) +def lanczos3(x): + fw, to_dtype, eps = set_framework_dependencies(x) + return (((fw.sin(pi * x) * fw.sin(pi * x / 3) + eps) / ((pi**2 * x**2 / 3) + eps)) * to_dtype(abs(x) < 3)) + + +@support_sz(2) +def linear(x): + fw, to_dtype, eps = set_framework_dependencies(x) + return ((x + 1) * to_dtype((-1 <= x) & (x < 0)) + (1 - x) * to_dtype((0 <= x) & (x <= 1))) + + +@support_sz(1) +def box(x): + fw, to_dtype, eps = set_framework_dependencies(x) + return to_dtype((-1 <= x) & (x < 0)) + to_dtype((0 <= x) & (x <= 1)) diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn101/resize_right/resize_right.py b/modules/image/text_to_image/disco_diffusion_clip_rn101/resize_right/resize_right.py new file mode 100755 index 000000000..6a92c828c --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn101/resize_right/resize_right.py @@ -0,0 +1,403 @@ +import warnings +from fractions import Fraction +from math import ceil +from typing import Tuple + +import disco_diffusion_clip_rn101.resize_right.interp_methods as interp_methods + + +class NoneClass: + pass + + +try: + import paddle + from paddle import nn + nnModuleWrapped = nn.Layer +except ImportError: + warnings.warn('No PyTorch found, will work only with Numpy') + paddle = None + nnModuleWrapped = NoneClass + +try: + import numpy + import numpy as np +except ImportError: + warnings.warn('No Numpy found, will work only with PyTorch') + numpy = None + +if numpy is None and paddle is None: + raise ImportError("Must have either Numpy or PyTorch but both not found") + + +def resize(input, + scale_factors=None, + out_shape=None, + interp_method=interp_methods.cubic, + support_sz=None, + antialiasing=True, + by_convs=False, + scale_tolerance=None, + max_numerator=10, + pad_mode='constant'): + # get properties of the input tensor + in_shape, n_dims = input.shape, input.ndim + + # fw stands for framework that can be either numpy or paddle, + # determined by the input type + fw = numpy if type(input) is numpy.ndarray else paddle + eps = np.finfo(np.float32).eps if fw == numpy else paddle.to_tensor(np.finfo(np.float32).eps) + device = input.place if fw is paddle else None + + # set missing scale factors or output shapem one according to another, + # scream if both missing. this is also where all the defults policies + # take place. also handling the by_convs attribute carefully. + scale_factors, out_shape, by_convs = set_scale_and_out_sz(in_shape, out_shape, scale_factors, by_convs, + scale_tolerance, max_numerator, eps, fw) + + # sort indices of dimensions according to scale of each dimension. + # since we are going dim by dim this is efficient + sorted_filtered_dims_and_scales = [(dim, scale_factors[dim], by_convs[dim], in_shape[dim], out_shape[dim]) + for dim in sorted(range(n_dims), key=lambda ind: scale_factors[ind]) + if scale_factors[dim] != 1.] + # unless support size is specified by the user, it is an attribute + # of the interpolation method + if support_sz is None: + support_sz = interp_method.support_sz + + # output begins identical to input and changes with each iteration + output = input + + # iterate over dims + for (dim, scale_factor, dim_by_convs, in_sz, out_sz) in sorted_filtered_dims_and_scales: + # STEP 1- PROJECTED GRID: The non-integer locations of the projection + # of output pixel locations to the input tensor + projected_grid = get_projected_grid(in_sz, out_sz, scale_factor, fw, dim_by_convs, device) + + # STEP 1.5: ANTIALIASING- If antialiasing is taking place, we modify + # the window size and the interpolation method (see inside function) + cur_interp_method, cur_support_sz = apply_antialiasing_if_needed(interp_method, support_sz, scale_factor, + antialiasing) + + # STEP 2- FIELDS OF VIEW: for each output pixels, map the input pixels + # that influence it. Also calculate needed padding and update grid + # accoedingly + field_of_view = get_field_of_view(projected_grid, cur_support_sz, fw, eps, device) + + # STEP 2.5- CALCULATE PAD AND UPDATE: according to the field of view, + # the input should be padded to handle the boundaries, coordinates + # should be updated. actual padding only occurs when weights are + # aplied (step 4). if using by_convs for this dim, then we need to + # calc right and left boundaries for each filter instead. + pad_sz, projected_grid, field_of_view = calc_pad_sz(in_sz, out_sz, field_of_view, projected_grid, scale_factor, + dim_by_convs, fw, device) + # STEP 3- CALCULATE WEIGHTS: Match a set of weights to the pixels in + # the field of view for each output pixel + weights = get_weights(cur_interp_method, projected_grid, field_of_view) + + # STEP 4- APPLY WEIGHTS: Each output pixel is calculated by multiplying + # its set of weights with the pixel values in its field of view. + # We now multiply the fields of view with their matching weights. + # We do this by tensor multiplication and broadcasting. + # if by_convs is true for this dim, then we do this action by + # convolutions. this is equivalent but faster. + if not dim_by_convs: + output = apply_weights(output, field_of_view, weights, dim, n_dims, pad_sz, pad_mode, fw) + else: + output = apply_convs(output, scale_factor, in_sz, out_sz, weights, dim, pad_sz, pad_mode, fw) + return output + + +def get_projected_grid(in_sz, out_sz, scale_factor, fw, by_convs, device=None): + # we start by having the ouput coordinates which are just integer locations + # in the special case when usin by_convs, we only need two cycles of grid + # points. the first and last. + grid_sz = out_sz if not by_convs else scale_factor.numerator + out_coordinates = fw_arange(grid_sz, fw, device) + + # This is projecting the ouput pixel locations in 1d to the input tensor, + # as non-integer locations. + # the following fomrula is derived in the paper + # "From Discrete to Continuous Convolutions" by Shocher et al. + return (out_coordinates / float(scale_factor) + (in_sz - 1) / 2 - (out_sz - 1) / (2 * float(scale_factor))) + + +def get_field_of_view(projected_grid, cur_support_sz, fw, eps, device): + # for each output pixel, map which input pixels influence it, in 1d. + # we start by calculating the leftmost neighbor, using half of the window + # size (eps is for when boundary is exact int) + left_boundaries = fw_ceil(projected_grid - cur_support_sz / 2 - eps, fw) + + # then we simply take all the pixel centers in the field by counting + # window size pixels from the left boundary + ordinal_numbers = fw_arange(ceil(cur_support_sz - eps), fw, device) + return left_boundaries[:, None] + ordinal_numbers + + +def calc_pad_sz(in_sz, out_sz, field_of_view, projected_grid, scale_factor, dim_by_convs, fw, device): + if not dim_by_convs: + # determine padding according to neighbor coords out of bound. + # this is a generalized notion of padding, when pad<0 it means crop + pad_sz = [-field_of_view[0, 0].item(), field_of_view[-1, -1].item() - in_sz + 1] + + # since input image will be changed by padding, coordinates of both + # field_of_view and projected_grid need to be updated + field_of_view += pad_sz[0] + projected_grid += pad_sz[0] + + else: + # only used for by_convs, to calc the boundaries of each filter the + # number of distinct convolutions is the numerator of the scale factor + num_convs, stride = scale_factor.numerator, scale_factor.denominator + + # calculate left and right boundaries for each conv. left can also be + # negative right can be bigger than in_sz. such cases imply padding if + # needed. however if# both are in-bounds, it means we need to crop, + # practically apply the conv only on part of the image. + left_pads = -field_of_view[:, 0] + + # next calc is tricky, explanation by rows: + # 1) counting output pixels between the first position of each filter + # to the right boundary of the input + # 2) dividing it by number of filters to count how many 'jumps' + # each filter does + # 3) multiplying by the stride gives us the distance over the input + # coords done by all these jumps for each filter + # 4) to this distance we add the right boundary of the filter when + # placed in its leftmost position. so now we get the right boundary + # of that filter in input coord. + # 5) the padding size needed is obtained by subtracting the rightmost + # input coordinate. if the result is positive padding is needed. if + # negative then negative padding means shaving off pixel columns. + right_pads = (((out_sz - fw_arange(num_convs, fw, device) - 1) # (1) + // num_convs) # (2) + * stride # (3) + + field_of_view[:, -1] # (4) + - in_sz + 1) # (5) + + # in the by_convs case pad_sz is a list of left-right pairs. one per + # each filter + + pad_sz = list(zip(left_pads, right_pads)) + + return pad_sz, projected_grid, field_of_view + + +def get_weights(interp_method, projected_grid, field_of_view): + # the set of weights per each output pixels is the result of the chosen + # interpolation method applied to the distances between projected grid + # locations and the pixel-centers in the field of view (distances are + # directed, can be positive or negative) + weights = interp_method(projected_grid[:, None] - field_of_view) + + # we now carefully normalize the weights to sum to 1 per each output pixel + sum_weights = weights.sum(1, keepdim=True) + sum_weights[sum_weights == 0] = 1 + return weights / sum_weights + + +def apply_weights(input, field_of_view, weights, dim, n_dims, pad_sz, pad_mode, fw): + # for this operation we assume the resized dim is the first one. + # so we transpose and will transpose back after multiplying + tmp_input = fw_swapaxes(input, dim, 0, fw) + + # apply padding + tmp_input = fw_pad(tmp_input, fw, pad_sz, pad_mode) + + # field_of_view is a tensor of order 2: for each output (1d location + # along cur dim)- a list of 1d neighbors locations. + # note that this whole operations is applied to each dim separately, + # this is why it is all in 1d. + # neighbors = tmp_input[field_of_view] is a tensor of order image_dims+1: + # for each output pixel (this time indicated in all dims), these are the + # values of the neighbors in the 1d field of view. note that we only + # consider neighbors along the current dim, but such set exists for every + # multi-dim location, hence the final tensor order is image_dims+1. + paddle.device.cuda.empty_cache() + neighbors = tmp_input[field_of_view] + + # weights is an order 2 tensor: for each output location along 1d- a list + # of weights matching the field of view. we augment it with ones, for + # broadcasting, so that when multiplies some tensor the weights affect + # only its first dim. + tmp_weights = fw.reshape(weights, (*weights.shape, *[1] * (n_dims - 1))) + + # now we simply multiply the weights with the neighbors, and then sum + # along the field of view, to get a single value per out pixel + tmp_output = (neighbors * tmp_weights).sum(1) + # we transpose back the resized dim to its original position + return fw_swapaxes(tmp_output, 0, dim, fw) + + +def apply_convs(input, scale_factor, in_sz, out_sz, weights, dim, pad_sz, pad_mode, fw): + # for this operations we assume the resized dim is the last one. + # so we transpose and will transpose back after multiplying + input = fw_swapaxes(input, dim, -1, fw) + + # the stride for all convs is the denominator of the scale factor + stride, num_convs = scale_factor.denominator, scale_factor.numerator + + # prepare an empty tensor for the output + tmp_out_shape = list(input.shape) + tmp_out_shape[-1] = out_sz + tmp_output = fw_empty(tuple(tmp_out_shape), fw, input.device) + + # iterate over the conv operations. we have as many as the numerator + # of the scale-factor. for each we need boundaries and a filter. + for conv_ind, (pad_sz, filt) in enumerate(zip(pad_sz, weights)): + # apply padding (we pad last dim, padding can be negative) + pad_dim = input.ndim - 1 + tmp_input = fw_pad(input, fw, pad_sz, pad_mode, dim=pad_dim) + + # apply convolution over last dim. store in the output tensor with + # positional strides so that when the loop is comlete conv results are + # interwind + tmp_output[..., conv_ind::num_convs] = fw_conv(tmp_input, filt, stride) + + return fw_swapaxes(tmp_output, -1, dim, fw) + + +def set_scale_and_out_sz(in_shape, out_shape, scale_factors, by_convs, scale_tolerance, max_numerator, eps, fw): + # eventually we must have both scale-factors and out-sizes for all in/out + # dims. however, we support many possible partial arguments + if scale_factors is None and out_shape is None: + raise ValueError("either scale_factors or out_shape should be " + "provided") + if out_shape is not None: + # if out_shape has less dims than in_shape, we defaultly resize the + # first dims for numpy and last dims for paddle + out_shape = (list(out_shape) + + list(in_shape[len(out_shape):]) if fw is numpy else list(in_shape[:-len(out_shape)]) + + list(out_shape)) + if scale_factors is None: + # if no scale given, we calculate it as the out to in ratio + # (not recomended) + scale_factors = [out_sz / in_sz for out_sz, in_sz in zip(out_shape, in_shape)] + if scale_factors is not None: + # by default, if a single number is given as scale, we assume resizing + # two dims (most common are images with 2 spatial dims) + scale_factors = (scale_factors if isinstance(scale_factors, (list, tuple)) else [scale_factors, scale_factors]) + # if less scale_factors than in_shape dims, we defaultly resize the + # first dims for numpy and last dims for paddle + scale_factors = (list(scale_factors) + [1] * (len(in_shape) - len(scale_factors)) if fw is numpy else [1] * + (len(in_shape) - len(scale_factors)) + list(scale_factors)) + if out_shape is None: + # when no out_shape given, it is calculated by multiplying the + # scale by the in_shape (not recomended) + out_shape = [ceil(scale_factor * in_sz) for scale_factor, in_sz in zip(scale_factors, in_shape)] + # next part intentionally after out_shape determined for stability + # we fix by_convs to be a list of truth values in case it is not + if not isinstance(by_convs, (list, tuple)): + by_convs = [by_convs] * len(out_shape) + + # next loop fixes the scale for each dim to be either frac or float. + # this is determined by by_convs and by tolerance for scale accuracy. + for ind, (sf, dim_by_convs) in enumerate(zip(scale_factors, by_convs)): + # first we fractionaize + if dim_by_convs: + frac = Fraction(1 / sf).limit_denominator(max_numerator) + frac = Fraction(numerator=frac.denominator, denominator=frac.numerator) + + # if accuracy is within tolerance scale will be frac. if not, then + # it will be float and the by_convs attr will be set false for + # this dim + if scale_tolerance is None: + scale_tolerance = eps + if dim_by_convs and abs(frac - sf) < scale_tolerance: + scale_factors[ind] = frac + else: + scale_factors[ind] = float(sf) + by_convs[ind] = False + + return scale_factors, out_shape, by_convs + + +def apply_antialiasing_if_needed(interp_method, support_sz, scale_factor, antialiasing): + # antialiasing is "stretching" the field of view according to the scale + # factor (only for downscaling). this is low-pass filtering. this + # requires modifying both the interpolation (stretching the 1d + # function and multiplying by the scale-factor) and the window size. + scale_factor = float(scale_factor) + if scale_factor >= 1.0 or not antialiasing: + return interp_method, support_sz + cur_interp_method = (lambda arg: scale_factor * interp_method(scale_factor * arg)) + cur_support_sz = support_sz / scale_factor + return cur_interp_method, cur_support_sz + + +def fw_ceil(x, fw): + if fw is numpy: + return fw.int_(fw.ceil(x)) + else: + return paddle.cast(x.ceil(), dtype='int64') + + +def fw_floor(x, fw): + if fw is numpy: + return fw.int_(fw.floor(x)) + else: + return paddle.cast(x.floor(), dtype='int64') + + +def fw_cat(x, fw): + if fw is numpy: + return fw.concatenate(x) + else: + return fw.concat(x) + + +def fw_swapaxes(x, ax_1, ax_2, fw): + if fw is numpy: + return fw.swapaxes(x, ax_1, ax_2) + else: + if ax_1 == -1: + ax_1 = len(x.shape) - 1 + if ax_2 == -1: + ax_2 = len(x.shape) - 1 + perm0 = list(range(len(x.shape))) + temp = ax_1 + perm0[temp] = ax_2 + perm0[ax_2] = temp + return fw.transpose(x, perm0) + + +def fw_pad(x, fw, pad_sz, pad_mode, dim=0): + if pad_sz == (0, 0): + return x + if fw is numpy: + pad_vec = [(0, 0)] * x.ndim + pad_vec[dim] = pad_sz + return fw.pad(x, pad_width=pad_vec, mode=pad_mode) + else: + if x.ndim < 3: + x = x[None, None, ...] + + pad_vec = [0] * ((x.ndim - 2) * 2) + pad_vec[0:2] = pad_sz + return fw_swapaxes(fw.nn.functional.pad(fw_swapaxes(x, dim, -1, fw), pad=pad_vec, mode=pad_mode), dim, -1, fw) + + +def fw_conv(input, filter, stride): + # we want to apply 1d conv to any nd array. the way to do it is to reshape + # the input to a 4D tensor. first two dims are singeletons, 3rd dim stores + # all the spatial dims that we are not convolving along now. then we can + # apply conv2d with a 1xK filter. This convolves the same way all the other + # dims stored in the 3d dim. like depthwise conv over these. + # TODO: numpy support + reshaped_input = input.reshape(1, 1, -1, input.shape[-1]) + reshaped_output = paddle.nn.functional.conv2d(reshaped_input, filter.view(1, 1, 1, -1), stride=(1, stride)) + return reshaped_output.reshape(*input.shape[:-1], -1) + + +def fw_arange(upper_bound, fw, device): + if fw is numpy: + return fw.arange(upper_bound) + else: + return fw.arange(upper_bound) + + +def fw_empty(shape, fw, device): + if fw is numpy: + return fw.empty(shape) + else: + return fw.empty(shape=shape) diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/README.md b/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/README.md new file mode 100644 index 000000000..711671bad --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/README.md @@ -0,0 +1,2 @@ +# Diffusion model (Paddle) +This module implements diffusion model which accepts a text prompt and outputs images semantically close to the text. The code is rewritten by Paddle, and mainly refer to two projects: jina-ai/discoart[https://github.com/jina-ai/discoart] and openai/guided-diffusion[https://github.com/openai/guided-diffusion]. Thanks for their wonderful work. diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/__init__.py b/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/__init__.py new file mode 100755 index 000000000..39fc908dc --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/__init__.py @@ -0,0 +1,156 @@ +''' +https://github.com/jina-ai/discoart/blob/main/discoart/__init__.py +''' +import os +import warnings + +os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE' + +__all__ = ['create'] + +import sys + +__resources_path__ = os.path.join( + os.path.dirname(sys.modules.get(__package__).__file__ if __package__ in sys.modules else __file__), + 'resources', +) + +import gc + +# check if GPU is available +import paddle + +# download and load models, this will take some time on the first load + +from .helper import load_all_models, load_diffusion_model, load_clip_models + +model_config, secondary_model = load_all_models('512x512_diffusion_uncond_finetune_008100', use_secondary_model=True) + +from typing import TYPE_CHECKING, overload, List, Optional + +if TYPE_CHECKING: + from docarray import DocumentArray, Document + +_clip_models_cache = {} + +# begin_create_overload + + +@overload +def create(text_prompts: Optional[List[str]] = [ + 'A beautiful painting of a singular lighthouse, shining its light across a tumultuous sea of blood by greg rutkowski and thomas kinkade, Trending on artstation.', + 'yellow color scheme', +], + init_image: Optional[str] = None, + width_height: Optional[List[int]] = [1280, 768], + skip_steps: Optional[int] = 10, + steps: Optional[int] = 250, + cut_ic_pow: Optional[int] = 1, + init_scale: Optional[int] = 1000, + clip_guidance_scale: Optional[int] = 5000, + tv_scale: Optional[int] = 0, + range_scale: Optional[int] = 150, + sat_scale: Optional[int] = 0, + cutn_batches: Optional[int] = 4, + diffusion_model: Optional[str] = '512x512_diffusion_uncond_finetune_008100', + use_secondary_model: Optional[bool] = True, + diffusion_sampling_mode: Optional[str] = 'ddim', + perlin_init: Optional[bool] = False, + perlin_mode: Optional[str] = 'mixed', + seed: Optional[int] = None, + eta: Optional[float] = 0.8, + clamp_grad: Optional[bool] = True, + clamp_max: Optional[float] = 0.05, + randomize_class: Optional[bool] = True, + clip_denoised: Optional[bool] = False, + fuzzy_prompt: Optional[bool] = False, + rand_mag: Optional[float] = 0.05, + cut_overview: Optional[str] = '[12]*400+[4]*600', + cut_innercut: Optional[str] = '[4]*400+[12]*600', + cut_icgray_p: Optional[str] = '[0.2]*400+[0]*600', + display_rate: Optional[int] = 10, + n_batches: Optional[int] = 4, + batch_size: Optional[int] = 1, + batch_name: Optional[str] = '', + clip_models: Optional[list] = ['ViTB32', 'ViTB16', 'RN50'], + output_dir: Optional[str] = 'discoart_output') -> 'DocumentArray': + """ + Create Disco Diffusion artworks and save the result into a DocumentArray. + + :param text_prompts: Phrase, sentence, or string of words and phrases describing what the image should look like. The words will be analyzed by the AI and will guide the diffusion process toward the image(s) you describe. These can include commas and weights to adjust the relative importance of each element. E.g. "A beautiful painting of a singular lighthouse, shining its light across a tumultuous sea of blood by greg rutkowski and thomas kinkade, Trending on artstation."Notice that this prompt loosely follows a structure: [subject], [prepositional details], [setting], [meta modifiers and artist]; this is a good starting point for your experiments. Developing text prompts takes practice and experience, and is not the subject of this guide. If you are a beginner to writing text prompts, a good place to start is on a simple AI art app like Night Cafe, starry ai or WOMBO prior to using DD, to get a feel for how text gets translated into images by GAN tools. These other apps use different technologies, but many of the same principles apply. + :param init_image: Recall that in the image sequence above, the first image shown is just noise. If an init_image is provided, diffusion will replace the noise with the init_image as its starting state. To use an init_image, upload the image to the Colab instance or your Google Drive, and enter the full image path here. If using an init_image, you may need to increase skip_steps to ~ 50% of total steps to retain the character of the init. See skip_steps above for further discussion. + :param width_height: Desired final image size, in pixels. You can have a square, wide, or tall image, but each edge length should be set to a multiple of 64px, and a minimum of 512px on the default CLIP model setting. If you forget to use multiples of 64px in your dimensions, DD will adjust the dimensions of your image to make it so. + :param skip_steps: Consider the chart shown here. Noise scheduling (denoise strength) starts very high and progressively gets lower and lower as diffusion steps progress. The noise levels in the first few steps are very high, so images change dramatically in early steps.As DD moves along the curve, noise levels (and thus the amount an image changes per step) declines, and image coherence from one step to the next increases.The first few steps of denoising are often so dramatic that some steps (maybe 10-15% of total) can be skipped without affecting the final image. You can experiment with this as a way to cut render times.If you skip too many steps, however, the remaining noise may not be high enough to generate new content, and thus may not have ‘time left’ to finish an image satisfactorily.Also, depending on your other settings, you may need to skip steps to prevent CLIP from overshooting your goal, resulting in ‘blown out’ colors (hyper saturated, solid white, or solid black regions) or otherwise poor image quality. Consider that the denoising process is at its strongest in the early steps, so skipping steps can sometimes mitigate other problems.Lastly, if using an init_image, you will need to skip ~50% of the diffusion steps to retain the shapes in the original init image. However, if you’re using an init_image, you can also adjust skip_steps up or down for creative reasons. With low skip_steps you can get a result "inspired by" the init_image which will retain the colors and rough layout and shapes but look quite different. With high skip_steps you can preserve most of the init_image contents and just do fine tuning of the texture. + :param steps: When creating an image, the denoising curve is subdivided into steps for processing. Each step (or iteration) involves the AI looking at subsets of the image called ‘cuts’ and calculating the ‘direction’ the image should be guided to be more like the prompt. Then it adjusts the image with the help of the diffusion denoiser, and moves to the next step.Increasing steps will provide more opportunities for the AI to adjust the image, and each adjustment will be smaller, and thus will yield a more precise, detailed image. Increasing steps comes at the expense of longer render times. Also, while increasing steps should generally increase image quality, there is a diminishing return on additional steps beyond 250 - 500 steps. However, some intricate images can take 1000, 2000, or more steps. It is really up to the user. Just know that the render time is directly related to the number of steps, and many other parameters have a major impact on image quality, without costing additional time. + :param cut_ic_pow: This sets the size of the border used for inner cuts. High cut_ic_pow values have larger borders, and therefore the cuts themselves will be smaller and provide finer details. If you have too many or too-small inner cuts, you may lose overall image coherency and/or it may cause an undesirable ‘mosaic’ effect. Low cut_ic_pow values will allow the inner cuts to be larger, helping image coherency while still helping with some details. + :param init_scale: This controls how strongly CLIP will try to match the init_image provided. This is balanced against the clip_guidance_scale (CGS) above. Too much init scale, and the image won’t change much during diffusion. Too much CGS and the init image will be lost. + :param clip_guidance_scale: CGS is one of the most important parameters you will use. It tells DD how strongly you want CLIP to move toward your prompt each timestep. Higher is generally better, but if CGS is too strong it will overshoot the goal and distort the image. So a happy medium is needed, and it takes experience to learn how to adjust CGS. Note that this parameter generally scales with image dimensions. In other words, if you increase your total dimensions by 50% (e.g. a change from 512 x 512 to 512 x 768), then to maintain the same effect on the image, you’d want to increase clip_guidance_scale from 5000 to 7500. Of the basic settings, clip_guidance_scale, steps and skip_steps are the most important contributors to image quality, so learn them well. + :param tv_scale: Total variance denoising. Optional, set to zero to turn off. Controls ‘smoothness’ of final output. If used, tv_scale will try to smooth out your final image to reduce overall noise. If your image is too ‘crunchy’, increase tv_scale. TV denoising is good at preserving edges while smoothing away noise in flat regions. See https://en.wikipedia.org/wiki/Total_variation_denoising + :param range_scale: Optional, set to zero to turn off. Used for adjustment of color contrast. Lower range_scale will increase contrast. Very low numbers create a reduced color palette, resulting in more vibrant or poster-like images. Higher range_scale will reduce contrast, for more muted images. + :param sat_scale: Saturation scale. Optional, set to zero to turn off. If used, sat_scale will help mitigate oversaturation. If your image is too saturated, increase sat_scale to reduce the saturation. + :param cutn_batches: Each iteration, the AI cuts the image into smaller pieces known as cuts, and compares each cut to the prompt to decide how to guide the next diffusion step. More cuts can generally lead to better images, since DD has more chances to fine-tune the image precision in each timestep. Additional cuts are memory intensive, however, and if DD tries to evaluate too many cuts at once, it can run out of memory. You can use cutn_batches to increase cuts per timestep without increasing memory usage. At the default settings, DD is scheduled to do 16 cuts per timestep. If cutn_batches is set to 1, there will indeed only be 16 cuts total per timestep. However, if cutn_batches is increased to 4, DD will do 64 cuts total in each timestep, divided into 4 sequential batches of 16 cuts each. Because the cuts are being evaluated only 16 at a time, DD uses the memory required for only 16 cuts, but gives you the quality benefit of 64 cuts. The tradeoff, of course, is that this will take ~4 times as long to render each image.So, (scheduled cuts) x (cutn_batches) = (total cuts per timestep). Increasing cutn_batches will increase render times, however, as the work is being done sequentially. DD’s default cut schedule is a good place to start, but the cut schedule can be adjusted in the Cutn Scheduling section, explained below. + :param diffusion_model: Diffusion_model of choice. + :param use_secondary_model: Option to use a secondary purpose-made diffusion model to clean up interim diffusion images for CLIP evaluation. If this option is turned off, DD will use the regular (large) diffusion model. Using the secondary model is faster - one user reported a 50% improvement in render speed! However, the secondary model is much smaller, and may reduce image quality and detail. I suggest you experiment with this. + :param diffusion_sampling_mode: Two alternate diffusion denoising algorithms. ddim has been around longer, and is more established and tested. plms is a newly added alternate method that promises good diffusion results in fewer steps, but has not been as fully tested and may have side effects. This new plms mode is actively being researched in the #settings-and-techniques channel in the DD Discord. + :param perlin_init: Normally, DD will use an image filled with random noise as a starting point for the diffusion curve. If perlin_init is selected, DD will instead use a Perlin noise model as an initial state. Perlin has very interesting characteristics, distinct from random noise, so it’s worth experimenting with this for your projects. Beyond perlin, you can, of course, generate your own noise images (such as with GIMP, etc) and use them as an init_image (without skipping steps). Choosing perlin_init does not affect the actual diffusion process, just the starting point for the diffusion. Please note that selecting a perlin_init will replace and override any init_image you may have specified. Further, because the 2D, 3D and video animation systems all rely on the init_image system, if you enable Perlin while using animation modes, the perlin_init will jump in front of any previous image or video input, and DD will NOT give you the expected sequence of coherent images. All of that said, using Perlin and animation modes together do make a very colorful rainbow effect, which can be used creatively. + :param perlin_mode: sets type of Perlin noise: colored, gray, or a mix of both, giving you additional options for noise types. Experiment to see what these do in your projects. + :param seed: Deep in the diffusion code, there is a random number ‘seed’ which is used as the basis for determining the initial state of the diffusion. By default, this is random, but you can also specify your own seed. This is useful if you like a particular result and would like to run more iterations that will be similar. After each run, the actual seed value used will be reported in the parameters report, and can be reused if desired by entering seed # here. If a specific numerical seed is used repeatedly, the resulting images will be quite similar but not identical. + :param eta: eta (greek letter η) is a diffusion model variable that mixes in a random amount of scaled noise into each timestep. 0 is no noise, 1.0 is more noise. As with most DD parameters, you can go below zero for eta, but it may give you unpredictable results. The steps parameter has a close relationship with the eta parameter. If you set eta to 0, then you can get decent output with only 50-75 steps. Setting eta to 1.0 favors higher step counts, ideally around 250 and up. eta has a subtle, unpredictable effect on image, so you’ll need to experiment to see how this affects your projects. + :param clamp_grad: As I understand it, clamp_grad is an internal limiter that stops DD from producing extreme results. Try your images with and without clamp_grad. If the image changes drastically with clamp_grad turned off, it probably means your clip_guidance_scale is too high and should be reduced. + :param clamp_max: Sets the value of the clamp_grad limitation. Default is 0.05, providing for smoother, more muted coloration in images, but setting higher values (0.15-0.3) can provide interesting contrast and vibrancy. + :param fuzzy_prompt: Controls whether to add multiple noisy prompts to the prompt losses. If True, can increase variability of image output. Experiment with this. + :param rand_mag: Affects only the fuzzy_prompt. Controls the magnitude of the random noise added by fuzzy_prompt. + :param cut_overview: The schedule of overview cuts + :param cut_innercut: The schedule of inner cuts + :param cut_icgray_p: This sets the size of the border used for inner cuts. High cut_ic_pow values have larger borders, and therefore the cuts themselves will be smaller and provide finer details. If you have too many or too-small inner cuts, you may lose overall image coherency and/or it may cause an undesirable ‘mosaic’ effect. Low cut_ic_pow values will allow the inner cuts to be larger, helping image coherency while still helping with some details. + :param display_rate: During a diffusion run, you can monitor the progress of each image being created with this variable. If display_rate is set to 50, DD will show you the in-progress image every 50 timesteps. Setting this to a lower value, like 5 or 10, is a good way to get an early peek at where your image is heading. If you don’t like the progression, just interrupt execution, change some settings, and re-run. If you are planning a long, unmonitored batch, it’s better to set display_rate equal to steps, because displaying interim images does slow Colab down slightly. + :param n_batches: This variable sets the number of still images you want DD to create. If you are using an animation mode (see below for details) DD will ignore n_batches and create a single set of animated frames based on the animation settings. + :param batch_name: The name of the batch, the batch id will be named as "discoart-[batch_name]-seed". To avoid your artworks be overridden by other users, please use a unique name. + :param clip_models: CLIP Model selectors. ViTB32, ViTB16, ViTL14, RN101, RN50, RN50x4, RN50x16, RN50x64.These various CLIP models are available for you to use during image generation. Models have different styles or ‘flavors,’ so look around. You can mix in multiple models as well for different results. However, keep in mind that some models are extremely memory-hungry, and turning on additional models will take additional memory and may cause a crash.The rough order of speed/mem usage is (smallest/fastest to largest/slowest):VitB32RN50RN101VitB16RN50x4RN50x16RN50x64ViTL14For RN50x64 & ViTL14 you may need to use fewer cuts, depending on your VRAM. + :return: a DocumentArray object that has `n_batches` Documents + """ + + +# end_create_overload + + +@overload +def create(init_document: 'Document') -> 'DocumentArray': + """ + Create an artwork using a DocArray ``Document`` object as initial state. + :param init_document: its ``.tags`` will be used as parameters, ``.uri`` (if present) will be used as init image. + :return: a DocumentArray object that has `n_batches` Documents + """ + + +def create(**kwargs) -> 'DocumentArray': + from .config import load_config + from .runner import do_run + + if 'init_document' in kwargs: + d = kwargs['init_document'] + _kwargs = d.tags + if not _kwargs: + warnings.warn('init_document has no .tags, fallback to default config') + if d.uri: + _kwargs['init_image'] = kwargs['init_document'].uri + else: + warnings.warn('init_document has no .uri, fallback to no init image') + kwargs.pop('init_document') + if kwargs: + warnings.warn('init_document has .tags and .uri, but kwargs are also present, will override .tags') + _kwargs.update(kwargs) + _args = load_config(user_config=_kwargs) + else: + _args = load_config(user_config=kwargs) + + model, diffusion = load_diffusion_model(model_config, _args.diffusion_model, steps=_args.steps) + + clip_models = load_clip_models(enabled=_args.clip_models, clip_models=_clip_models_cache) + + gc.collect() + paddle.device.cuda.empty_cache() + try: + return do_run(_args, (model, diffusion, clip_models, secondary_model)) + except KeyboardInterrupt: + pass diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/config.py b/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/config.py new file mode 100755 index 000000000..0cbc71e6f --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/config.py @@ -0,0 +1,77 @@ +''' +https://github.com/jina-ai/discoart/blob/main/discoart/config.py +''' +import copy +import random +import warnings +from types import SimpleNamespace +from typing import Dict + +import yaml +from yaml import Loader + +from . import __resources_path__ + +with open(f'{__resources_path__}/default.yml') as ymlfile: + default_args = yaml.load(ymlfile, Loader=Loader) + + +def load_config(user_config: Dict, ): + cfg = copy.deepcopy(default_args) + + if user_config: + cfg.update(**user_config) + + for k in user_config.keys(): + if k not in cfg: + warnings.warn(f'unknown argument {k}, ignored') + + for k, v in cfg.items(): + if k in ('batch_size', 'display_rate', 'seed', 'skip_steps', 'steps', 'n_batches', + 'cutn_batches') and isinstance(v, float): + cfg[k] = int(v) + if k == 'width_height': + cfg[k] = [int(vv) for vv in v] + + cfg.update(**{ + 'seed': cfg['seed'] or random.randint(0, 2**32), + }) + + if cfg['batch_name']: + da_name = f'{__package__}-{cfg["batch_name"]}-{cfg["seed"]}' + else: + da_name = f'{__package__}-{cfg["seed"]}' + warnings.warn('you did not set `batch_name`, set it to have unique session ID') + + cfg.update(**{'name_docarray': da_name}) + + print_args_table(cfg) + + return SimpleNamespace(**cfg) + + +def print_args_table(cfg): + from rich.table import Table + from rich import box + from rich.console import Console + + console = Console() + + param_str = Table( + title=cfg['name_docarray'], + box=box.ROUNDED, + highlight=True, + title_justify='left', + ) + param_str.add_column('Argument', justify='right') + param_str.add_column('Value', justify='left') + + for k, v in sorted(cfg.items()): + value = str(v) + + if not default_args.get(k, None) == v: + value = f'[b]{value}[/]' + + param_str.add_row(k, value) + + console.print(param_str) diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/helper.py b/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/helper.py new file mode 100755 index 000000000..4a1bc6685 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/helper.py @@ -0,0 +1,137 @@ +''' +This code is rewritten by Paddle based on Jina-ai/discoart. +https://github.com/jina-ai/discoart/blob/main/discoart/helper.py +''' +import hashlib +import logging +import os +import subprocess +import sys +from os.path import expanduser +from pathlib import Path +from typing import Any +from typing import Dict +from typing import List + +import paddle + + +def _get_logger(): + logger = logging.getLogger(__package__) + logger.setLevel("INFO") + ch = logging.StreamHandler() + ch.setLevel("INFO") + formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + ch.setFormatter(formatter) + logger.addHandler(ch) + return logger + + +logger = _get_logger() + + +def load_clip_models(enabled: List[str], clip_models: Dict[str, Any] = {}): + + import disco_diffusion_clip_rn101.clip.clip as clip + from disco_diffusion_clip_rn101.clip.clip import build_model, tokenize, transform + + # load enabled models + for k in enabled: + if k not in clip_models: + clip_models[k] = build_model(name=k) + clip_models[k].eval() + for parameter in clip_models[k].parameters(): + parameter.stop_gradient = True + + # disable not enabled models to save memory + for k in clip_models: + if k not in enabled: + clip_models.pop(k) + + return list(clip_models.values()) + + +def load_all_models(diffusion_model, use_secondary_model): + from .model.script_util import ( + model_and_diffusion_defaults, ) + + model_config = model_and_diffusion_defaults() + + if diffusion_model == '512x512_diffusion_uncond_finetune_008100': + model_config.update({ + 'attention_resolutions': '32, 16, 8', + 'class_cond': False, + 'diffusion_steps': 1000, # No need to edit this, it is taken care of later. + 'rescale_timesteps': True, + 'timestep_respacing': 250, # No need to edit this, it is taken care of later. + 'image_size': 512, + 'learn_sigma': True, + 'noise_schedule': 'linear', + 'num_channels': 256, + 'num_head_channels': 64, + 'num_res_blocks': 2, + 'resblock_updown': True, + 'use_fp16': False, + 'use_scale_shift_norm': True, + }) + elif diffusion_model == '256x256_diffusion_uncond': + model_config.update({ + 'attention_resolutions': '32, 16, 8', + 'class_cond': False, + 'diffusion_steps': 1000, # No need to edit this, it is taken care of later. + 'rescale_timesteps': True, + 'timestep_respacing': 250, # No need to edit this, it is taken care of later. + 'image_size': 256, + 'learn_sigma': True, + 'noise_schedule': 'linear', + 'num_channels': 256, + 'num_head_channels': 64, + 'num_res_blocks': 2, + 'resblock_updown': True, + 'use_fp16': False, + 'use_scale_shift_norm': True, + }) + + secondary_model = None + if use_secondary_model: + from .model.sec_diff import SecondaryDiffusionImageNet2 + secondary_model = SecondaryDiffusionImageNet2() + model_dict = paddle.load( + os.path.join(os.path.dirname(__file__), 'pre_trained', 'secondary_model_imagenet_2.pdparams')) + secondary_model.set_state_dict(model_dict) + secondary_model.eval() + for parameter in secondary_model.parameters(): + parameter.stop_gradient = True + + return model_config, secondary_model + + +def load_diffusion_model(model_config, diffusion_model, steps): + from .model.script_util import ( + create_model_and_diffusion, ) + + timestep_respacing = f'ddim{steps}' + diffusion_steps = (1000 // steps) * steps if steps < 1000 else steps + model_config.update({ + 'timestep_respacing': timestep_respacing, + 'diffusion_steps': diffusion_steps, + }) + + model, diffusion = create_model_and_diffusion(**model_config) + model.set_state_dict( + paddle.load(os.path.join(os.path.dirname(__file__), 'pre_trained', f'{diffusion_model}.pdparams'))) + model.eval() + for name, param in model.named_parameters(): + param.stop_gradient = True + + return model, diffusion + + +def parse_prompt(prompt): + if prompt.startswith('http://') or prompt.startswith('https://'): + vals = prompt.rsplit(':', 2) + vals = [vals[0] + ':' + vals[1], *vals[2:]] + else: + vals = prompt.rsplit(':', 1) + vals = vals + ['', '1'][len(vals):] + return vals[0], float(vals[1]) diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/model/__init__.py b/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/model/__init__.py new file mode 100755 index 000000000..466800666 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/model/__init__.py @@ -0,0 +1,3 @@ +""" +Codebase for "Improved Denoising Diffusion Probabilistic Models" implemented by Paddle. +""" diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/model/gaussian_diffusion.py b/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/model/gaussian_diffusion.py new file mode 100755 index 000000000..86cd2c650 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/model/gaussian_diffusion.py @@ -0,0 +1,1214 @@ +""" +Diffusion model implemented by Paddle. +This code is rewritten based on Pytorch version of of Ho et al's diffusion models: +https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/diffusion_utils_2.py +""" +import enum +import math + +import numpy as np +import paddle + +from .losses import discretized_gaussian_log_likelihood +from .losses import normal_kl +from .nn import mean_flat + + +def get_named_beta_schedule(schedule_name, num_diffusion_timesteps): + """ + Get a pre-defined beta schedule for the given name. + + The beta schedule library consists of beta schedules which remain similar + in the limit of num_diffusion_timesteps. + Beta schedules may be added, but should not be removed or changed once + they are committed to maintain backwards compatibility. + """ + if schedule_name == "linear": + # Linear schedule from Ho et al, extended to work for any number of + # diffusion steps. + scale = 1000 / num_diffusion_timesteps + beta_start = scale * 0.0001 + beta_end = scale * 0.02 + return np.linspace(beta_start, beta_end, num_diffusion_timesteps, dtype=np.float64) + elif schedule_name == "cosine": + return betas_for_alpha_bar( + num_diffusion_timesteps, + lambda t: math.cos((t + 0.008) / 1.008 * math.pi / 2)**2, + ) + else: + raise NotImplementedError(f"unknown beta schedule: {schedule_name}") + + +def betas_for_alpha_bar(num_diffusion_timesteps, alpha_bar, max_beta=0.999): + """ + Create a beta schedule that discretizes the given alpha_t_bar function, + which defines the cumulative product of (1-beta) over time from t = [0,1]. + + :param num_diffusion_timesteps: the number of betas to produce. + :param alpha_bar: a lambda that takes an argument t from 0 to 1 and + produces the cumulative product of (1-beta) up to that + part of the diffusion process. + :param max_beta: the maximum beta to use; use values lower than 1 to + prevent singularities. + """ + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta)) + return np.array(betas) + + +class ModelMeanType(enum.Enum): + """ + Which type of output the model predicts. + """ + + PREVIOUS_X = enum.auto() # the model predicts x_{t-1} + START_X = enum.auto() # the model predicts x_0 + EPSILON = enum.auto() # the model predicts epsilon + + +class ModelVarType(enum.Enum): + """ + What is used as the model's output variance. + + The LEARNED_RANGE option has been added to allow the model to predict + values between FIXED_SMALL and FIXED_LARGE, making its job easier. + """ + + LEARNED = enum.auto() + FIXED_SMALL = enum.auto() + FIXED_LARGE = enum.auto() + LEARNED_RANGE = enum.auto() + + +class LossType(enum.Enum): + MSE = enum.auto() # use raw MSE loss (and KL when learning variances) + RESCALED_MSE = (enum.auto()) # use raw MSE loss (with RESCALED_KL when learning variances) + KL = enum.auto() # use the variational lower-bound + RESCALED_KL = enum.auto() # like KL, but rescale to estimate the full VLB + + def is_vb(self): + return self == LossType.KL or self == LossType.RESCALED_KL + + +class GaussianDiffusion: + """ + Utilities for training and sampling diffusion models. + + Ported directly from here, and then adapted over time to further experimentation. + https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/diffusion_utils_2.py#L42 + + :param betas: a 1-D numpy array of betas for each diffusion timestep, + starting at T and going to 1. + :param model_mean_type: a ModelMeanType determining what the model outputs. + :param model_var_type: a ModelVarType determining how variance is output. + :param loss_type: a LossType determining the loss function to use. + :param rescale_timesteps: if True, pass floating point timesteps into the + model so that they are always scaled like in the + original paper (0 to 1000). + """ + + def __init__( + self, + *, + betas, + model_mean_type, + model_var_type, + loss_type, + rescale_timesteps=False, + ): + self.model_mean_type = model_mean_type + self.model_var_type = model_var_type + self.loss_type = loss_type + self.rescale_timesteps = rescale_timesteps + + # Use float64 for accuracy. + betas = np.array(betas, dtype=np.float64) + self.betas = betas + assert len(betas.shape) == 1, "betas must be 1-D" + assert (betas > 0).all() and (betas <= 1).all() + + self.num_timesteps = int(betas.shape[0]) + + alphas = 1.0 - betas + self.alphas_cumprod = np.cumprod(alphas, axis=0) + self.alphas_cumprod_prev = np.append(1.0, self.alphas_cumprod[:-1]) + self.alphas_cumprod_next = np.append(self.alphas_cumprod[1:], 0.0) + assert self.alphas_cumprod_prev.shape == (self.num_timesteps, ) + + # calculations for diffusion q(x_t | x_{t-1}) and others + self.sqrt_alphas_cumprod = np.sqrt(self.alphas_cumprod) + self.sqrt_one_minus_alphas_cumprod = np.sqrt(1.0 - self.alphas_cumprod) + self.log_one_minus_alphas_cumprod = np.log(1.0 - self.alphas_cumprod) + self.sqrt_recip_alphas_cumprod = np.sqrt(1.0 / self.alphas_cumprod) + self.sqrt_recipm1_alphas_cumprod = np.sqrt(1.0 / self.alphas_cumprod - 1) + + # calculations for posterior q(x_{t-1} | x_t, x_0) + self.posterior_variance = (betas * (1.0 - self.alphas_cumprod_prev) / (1.0 - self.alphas_cumprod)) + # log calculation clipped because the posterior variance is 0 at the + # beginning of the diffusion chain. + self.posterior_log_variance_clipped = np.log(np.append(self.posterior_variance[1], self.posterior_variance[1:])) + self.posterior_mean_coef1 = (betas * np.sqrt(self.alphas_cumprod_prev) / (1.0 - self.alphas_cumprod)) + self.posterior_mean_coef2 = ((1.0 - self.alphas_cumprod_prev) * np.sqrt(alphas) / (1.0 - self.alphas_cumprod)) + + def q_mean_variance(self, x_start, t): + """ + Get the distribution q(x_t | x_0). + + :param x_start: the [N x C x ...] tensor of noiseless inputs. + :param t: the number of diffusion steps (minus 1). Here, 0 means one step. + :return: A tuple (mean, variance, log_variance), all of x_start's shape. + """ + mean = (_extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start) + variance = _extract_into_tensor(1.0 - self.alphas_cumprod, t, x_start.shape) + log_variance = _extract_into_tensor(self.log_one_minus_alphas_cumprod, t, x_start.shape) + return mean, variance, log_variance + + def q_sample(self, x_start, t, noise=None): + """ + Diffuse the data for a given number of diffusion steps. + + In other words, sample from q(x_t | x_0). + + :param x_start: the initial data batch. + :param t: the number of diffusion steps (minus 1). Here, 0 means one step. + :param noise: if specified, the split-out normal noise. + :return: A noisy version of x_start. + """ + if noise is None: + # noise = th.randn_like(x_start) + noise = paddle.randn(x_start.shape, x_start.dtype) + assert noise.shape == x_start.shape + return (_extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start + + _extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape) * noise) + + def q_posterior_mean_variance(self, x_start, x_t, t): + """ + Compute the mean and variance of the diffusion posterior: + + q(x_{t-1} | x_t, x_0) + + """ + assert x_start.shape == x_t.shape + posterior_mean = (_extract_into_tensor(self.posterior_mean_coef1, t, x_t.shape) * x_start + + _extract_into_tensor(self.posterior_mean_coef2, t, x_t.shape) * x_t) + posterior_variance = _extract_into_tensor(self.posterior_variance, t, x_t.shape) + posterior_log_variance_clipped = _extract_into_tensor(self.posterior_log_variance_clipped, t, x_t.shape) + assert (posterior_mean.shape[0] == posterior_variance.shape[0] == posterior_log_variance_clipped.shape[0] == + x_start.shape[0]) + return posterior_mean, posterior_variance, posterior_log_variance_clipped + + def p_mean_variance(self, model, x, t, clip_denoised=True, denoised_fn=None, model_kwargs=None): + """ + Apply the model to get p(x_{t-1} | x_t), as well as a prediction of + the initial x, x_0. + + :param model: the model, which takes a signal and a batch of timesteps + as input. + :param x: the [N x C x ...] tensor at time t. + :param t: a 1-D Tensor of timesteps. + :param clip_denoised: if True, clip the denoised signal into [-1, 1]. + :param denoised_fn: if not None, a function which applies to the + x_start prediction before it is used to sample. Applies before + clip_denoised. + :param model_kwargs: if not None, a dict of extra keyword arguments to + pass to the model. This can be used for conditioning. + :return: a dict with the following keys: + - 'mean': the model mean output. + - 'variance': the model variance output. + - 'log_variance': the log of 'variance'. + - 'pred_xstart': the prediction for x_0. + """ + if model_kwargs is None: + model_kwargs = {} + + B, C = x.shape[:2] + assert t.shape == [B] + model_output = model(x, self._scale_timesteps(t), **model_kwargs) + + if self.model_var_type in [ModelVarType.LEARNED, ModelVarType.LEARNED_RANGE]: + assert model_output.shape == [B, C * 2, *x.shape[2:]] + model_output, model_var_values = paddle.split(model_output, 2, axis=1) + if self.model_var_type == ModelVarType.LEARNED: + model_log_variance = model_var_values + model_variance = paddle.exp(model_log_variance) + else: + min_log = _extract_into_tensor(self.posterior_log_variance_clipped, t, x.shape) + max_log = _extract_into_tensor(np.log(self.betas), t, x.shape) + # The model_var_values is [-1, 1] for [min_var, max_var]. + frac = (model_var_values + 1) / 2 + model_log_variance = frac * max_log + (1 - frac) * min_log + model_variance = paddle.exp(model_log_variance) + else: + model_variance, model_log_variance = { + # for fixedlarge, we set the initial (log-)variance like so + # to get a better decoder log likelihood. + ModelVarType.FIXED_LARGE: ( + np.append(self.posterior_variance[1], self.betas[1:]), + np.log(np.append(self.posterior_variance[1], self.betas[1:])), + ), + ModelVarType.FIXED_SMALL: ( + self.posterior_variance, + self.posterior_log_variance_clipped, + ), + }[self.model_var_type] + model_variance = _extract_into_tensor(model_variance, t, x.shape) + model_log_variance = _extract_into_tensor(model_log_variance, t, x.shape) + + def process_xstart(x): + if denoised_fn is not None: + x = denoised_fn(x) + if clip_denoised: + return x.clamp(-1, 1) + return x + + if self.model_mean_type == ModelMeanType.PREVIOUS_X: + pred_xstart = process_xstart(self._predict_xstart_from_xprev(x_t=x, t=t, xprev=model_output)) + model_mean = model_output + elif self.model_mean_type in [ModelMeanType.START_X, ModelMeanType.EPSILON]: + if self.model_mean_type == ModelMeanType.START_X: + pred_xstart = process_xstart(model_output) + else: + pred_xstart = process_xstart(self._predict_xstart_from_eps(x_t=x, t=t, eps=model_output)) + model_mean, _, _ = self.q_posterior_mean_variance(x_start=pred_xstart, x_t=x, t=t) + else: + raise NotImplementedError(self.model_mean_type) + + assert (model_mean.shape == model_log_variance.shape == pred_xstart.shape == x.shape) + return { + "mean": model_mean, + "variance": model_variance, + "log_variance": model_log_variance, + "pred_xstart": pred_xstart, + } + + def _predict_xstart_from_eps(self, x_t, t, eps): + assert x_t.shape == eps.shape + return (_extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t - + _extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape) * eps) + + def _predict_xstart_from_xprev(self, x_t, t, xprev): + assert x_t.shape == xprev.shape + return ( # (xprev - coef2*x_t) / coef1 + _extract_into_tensor(1.0 / self.posterior_mean_coef1, t, x_t.shape) * xprev - + _extract_into_tensor(self.posterior_mean_coef2 / self.posterior_mean_coef1, t, x_t.shape) * x_t) + + def _predict_eps_from_xstart(self, x_t, t, pred_xstart): + return (_extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t - + pred_xstart) / _extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape) + + def _scale_timesteps(self, t): + if self.rescale_timesteps: + return paddle.cast((t), 'float32') * (1000.0 / self.num_timesteps) + return t + + def condition_mean(self, cond_fn, p_mean_var, x, t, model_kwargs=None): + """ + Compute the mean for the previous step, given a function cond_fn that + computes the gradient of a conditional log probability with respect to + x. In particular, cond_fn computes grad(log(p(y|x))), and we want to + condition on y. + + This uses the conditioning strategy from Sohl-Dickstein et al. (2015). + """ + gradient = cond_fn(x, self._scale_timesteps(t), **model_kwargs) + new_mean = (paddle.cast((p_mean_var["mean"]), 'float32') + p_mean_var["variance"] * paddle.cast( + (gradient), 'float32')) + return new_mean + + def condition_mean_with_grad(self, cond_fn, p_mean_var, x, t, model_kwargs=None): + """ + Compute the mean for the previous step, given a function cond_fn that + computes the gradient of a conditional log probability with respect to + x. In particular, cond_fn computes grad(log(p(y|x))), and we want to + condition on y. + + This uses the conditioning strategy from Sohl-Dickstein et al. (2015). + """ + gradient = cond_fn(x, t, p_mean_var, **model_kwargs) + new_mean = (paddle.cast((p_mean_var["mean"]), 'float32') + p_mean_var["variance"] * paddle.cast( + (gradient), 'float32')) + return new_mean + + def condition_score(self, cond_fn, p_mean_var, x, t, model_kwargs=None): + """ + Compute what the p_mean_variance output would have been, should the + model's score function be conditioned by cond_fn. + + See condition_mean() for details on cond_fn. + + Unlike condition_mean(), this instead uses the conditioning strategy + from Song et al (2020). + """ + alpha_bar = _extract_into_tensor(self.alphas_cumprod, t, x.shape) + + eps = self._predict_eps_from_xstart(x, t, p_mean_var["pred_xstart"]) + eps = eps - (1 - alpha_bar).sqrt() * cond_fn(x, self._scale_timesteps(t), **model_kwargs) + + out = p_mean_var.copy() + out["pred_xstart"] = self._predict_xstart_from_eps(x, t, eps) + out["mean"], _, _ = self.q_posterior_mean_variance(x_start=out["pred_xstart"], x_t=x, t=t) + return out + + def condition_score_with_grad(self, cond_fn, p_mean_var, x, t, model_kwargs=None): + """ + Compute what the p_mean_variance output would have been, should the + model's score function be conditioned by cond_fn. + + See condition_mean() for details on cond_fn. + + Unlike condition_mean(), this instead uses the conditioning strategy + from Song et al (2020). + """ + alpha_bar = _extract_into_tensor(self.alphas_cumprod, t, x.shape) + + eps = self._predict_eps_from_xstart(x, t, p_mean_var["pred_xstart"]) + eps = eps - (1 - alpha_bar).sqrt() * cond_fn(x, t, p_mean_var, **model_kwargs) + + out = p_mean_var.copy() + out["pred_xstart"] = self._predict_xstart_from_eps(x, t, eps) + out["mean"], _, _ = self.q_posterior_mean_variance(x_start=out["pred_xstart"], x_t=x, t=t) + return out + + def p_sample( + self, + model, + x, + t, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + ): + """ + Sample x_{t-1} from the model at the given timestep. + + :param model: the model to sample from. + :param x: the current tensor at x_{t-1}. + :param t: the value of t, starting at 0 for the first diffusion step. + :param clip_denoised: if True, clip the x_start prediction to [-1, 1]. + :param denoised_fn: if not None, a function which applies to the + x_start prediction before it is used to sample. + :param cond_fn: if not None, this is a gradient function that acts + similarly to the model. + :param model_kwargs: if not None, a dict of extra keyword arguments to + pass to the model. This can be used for conditioning. + :return: a dict containing the following keys: + - 'sample': a random sample from the model. + - 'pred_xstart': a prediction of x_0. + """ + out = self.p_mean_variance( + model, + x, + t, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + model_kwargs=model_kwargs, + ) + # noise = th.randn_like(x) + noise = paddle.randn(x.shape, x.dtype) + nonzero_mask = (paddle.cast((t != 0), 'float32').reshape([-1, + *([1] * (len(x.shape) - 1))])) # no noise when t == 0 + if cond_fn is not None: + out["mean"] = self.condition_mean(cond_fn, out, x, t, model_kwargs=model_kwargs) + sample = out["mean"] + nonzero_mask * paddle.exp(0.5 * out["log_variance"]) * noise + return {"sample": sample, "pred_xstart": out["pred_xstart"]} + + def p_sample_with_grad( + self, + model, + x, + t, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + ): + """ + Sample x_{t-1} from the model at the given timestep. + + :param model: the model to sample from. + :param x: the current tensor at x_{t-1}. + :param t: the value of t, starting at 0 for the first diffusion step. + :param clip_denoised: if True, clip the x_start prediction to [-1, 1]. + :param denoised_fn: if not None, a function which applies to the + x_start prediction before it is used to sample. + :param cond_fn: if not None, this is a gradient function that acts + similarly to the model. + :param model_kwargs: if not None, a dict of extra keyword arguments to + pass to the model. This can be used for conditioning. + :return: a dict containing the following keys: + - 'sample': a random sample from the model. + - 'pred_xstart': a prediction of x_0. + """ + # with th.enable_grad(): + # x = x.detach().requires_grad_() + x = x.detach() + # x.stop_gradient = False + out = self.p_mean_variance( + model, + x, + t, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + model_kwargs=model_kwargs, + ) + # noise = th.randn_like(x) + noise = paddle.randn(x.shape, x.dtype) + nonzero_mask = (paddle.cast((t != 0), 'float32').reshape([-1, + *([1] * (len(x.shape) - 1))])) # no noise when t == 0 + if cond_fn is not None: + out["mean"] = self.condition_mean_with_grad(cond_fn, out, x, t, model_kwargs=model_kwargs) + sample = out["mean"] + nonzero_mask * paddle.exp(0.5 * out["log_variance"]) * noise + return {"sample": sample, "pred_xstart": out["pred_xstart"].detach()} + + def p_sample_loop( + self, + model, + shape, + noise=None, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + device=None, + progress=False, + skip_timesteps=0, + init_image=None, + randomize_class=False, + cond_fn_with_grad=False, + ): + """ + Generate samples from the model. + + :param model: the model module. + :param shape: the shape of the samples, (N, C, H, W). + :param noise: if specified, the noise from the encoder to sample. + Should be of the same shape as `shape`. + :param clip_denoised: if True, clip x_start predictions to [-1, 1]. + :param denoised_fn: if not None, a function which applies to the + x_start prediction before it is used to sample. + :param cond_fn: if not None, this is a gradient function that acts + similarly to the model. + :param model_kwargs: if not None, a dict of extra keyword arguments to + pass to the model. This can be used for conditioning. + :param device: if specified, the device to create the samples on. + If not specified, use a model parameter's device. + :param progress: if True, show a tqdm progress bar. + :return: a non-differentiable batch of samples. + """ + final = None + for sample in self.p_sample_loop_progressive( + model, + shape, + noise=noise, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + cond_fn=cond_fn, + model_kwargs=model_kwargs, + device=device, + progress=progress, + skip_timesteps=skip_timesteps, + init_image=init_image, + randomize_class=randomize_class, + cond_fn_with_grad=cond_fn_with_grad, + ): + final = sample + return final["sample"] + + def p_sample_loop_progressive( + self, + model, + shape, + noise=None, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + device=None, + progress=False, + skip_timesteps=0, + init_image=None, + randomize_class=False, + cond_fn_with_grad=False, + ): + """ + Generate samples from the model and yield intermediate samples from + each timestep of diffusion. + + Arguments are the same as p_sample_loop(). + Returns a generator over dicts, where each dict is the return value of + p_sample(). + """ + if device is None: + device = model.parameters()[0].place + assert isinstance(shape, (tuple, list)) + if noise is not None: + img = noise + else: + img = paddle.randn(shape) + + if skip_timesteps and init_image is None: + init_image = paddle.zeros_like(img) + + indices = list(range(self.num_timesteps - skip_timesteps))[::-1] + + if init_image is not None: + my_t = paddle.ones([shape[0]], dtype='int64') * indices[0] + img = self.q_sample(init_image, my_t, img) + + if progress: + # Lazy import so that we don't depend on tqdm. + from tqdm.auto import tqdm + + indices = tqdm(indices) + + for i in indices: + t = paddle.to_tensor([i] * shape[0], place=device) + if randomize_class and 'y' in model_kwargs: + model_kwargs['y'] = paddle.randint(low=0, high=model.num_classes, shape=model_kwargs['y'].shape) + # with paddle.no_grad(): + sample_fn = self.p_sample_with_grad if cond_fn_with_grad else self.p_sample + out = sample_fn( + model, + img, + t, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + cond_fn=cond_fn, + model_kwargs=model_kwargs, + ) + yield out + img = out["sample"] + + def ddim_sample( + self, + model, + x, + t, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + eta=0.0, + ): + """ + Sample x_{t-1} from the model using DDIM. + + Same usage as p_sample(). + """ + out_orig = self.p_mean_variance( + model, + x, + t, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + model_kwargs=model_kwargs, + ) + if cond_fn is not None: + out = self.condition_score(cond_fn, out_orig, x, t, model_kwargs=model_kwargs) + else: + out = out_orig + + # Usually our model outputs epsilon, but we re-derive it + # in case we used x_start or x_prev prediction. + eps = self._predict_eps_from_xstart(x, t, out["pred_xstart"]) + + alpha_bar = _extract_into_tensor(self.alphas_cumprod, t, x.shape) + alpha_bar_prev = _extract_into_tensor(self.alphas_cumprod_prev, t, x.shape) + sigma = (eta * paddle.sqrt( + (1 - alpha_bar_prev) / (1 - alpha_bar)) * paddle.sqrt(1 - alpha_bar / alpha_bar_prev)) + # Equation 12. + # noise = th.randn_like(x) + noise = paddle.randn(x.shape, x.dtype) + mean_pred = (out["pred_xstart"] * paddle.sqrt(alpha_bar_prev) + + paddle.sqrt(1 - alpha_bar_prev - sigma**2) * eps) + nonzero_mask = (paddle.cast((t != 0), 'float32').reshape([-1, + *([1] * (len(x.shape) - 1))])) # no noise when t == 0 + sample = mean_pred + nonzero_mask * sigma * noise + return {"sample": sample, "pred_xstart": out_orig["pred_xstart"]} + + def ddim_sample_with_grad( + self, + model, + x, + t, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + eta=0.0, + ): + """ + Sample x_{t-1} from the model using DDIM. + + Same usage as p_sample(). + """ + # with th.enable_grad(): + # x = x.detach().requires_grad_() + x = x.detach() + # x.stop_gradient = False + out_orig = self.p_mean_variance( + model, + x, + t, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + model_kwargs=model_kwargs, + ) + if cond_fn is not None: + out = self.condition_score_with_grad(cond_fn, out_orig, x, t, model_kwargs=model_kwargs) + else: + out = out_orig + + out["pred_xstart"] = out["pred_xstart"].detach() + + # Usually our model outputs epsilon, but we re-derive it + # in case we used x_start or x_prev prediction. + eps = self._predict_eps_from_xstart(x, t, out["pred_xstart"]) + + alpha_bar = _extract_into_tensor(self.alphas_cumprod, t, x.shape) + alpha_bar_prev = _extract_into_tensor(self.alphas_cumprod_prev, t, x.shape) + sigma = (eta * paddle.sqrt( + (1 - alpha_bar_prev) / (1 - alpha_bar)) * paddle.sqrt(1 - alpha_bar / alpha_bar_prev)) + # Equation 12. + # noise = th.randn_like(x) + noise = paddle.randn(x.shape, x.dtype) + mean_pred = (out["pred_xstart"] * paddle.sqrt(alpha_bar_prev) + + paddle.sqrt(1 - alpha_bar_prev - sigma**2) * eps) + nonzero_mask = (paddle.cast((t != 0), 'float32').reshape([-1, + *([1] * (len(x.shape) - 1))])) # no noise when t == 0 + sample = mean_pred + nonzero_mask * sigma * noise + return {"sample": sample, "pred_xstart": out_orig["pred_xstart"].detach()} + + def ddim_reverse_sample( + self, + model, + x, + t, + clip_denoised=True, + denoised_fn=None, + model_kwargs=None, + eta=0.0, + ): + """ + Sample x_{t+1} from the model using DDIM reverse ODE. + """ + assert eta == 0.0, "Reverse ODE only for deterministic path" + out = self.p_mean_variance( + model, + x, + t, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + model_kwargs=model_kwargs, + ) + # Usually our model outputs epsilon, but we re-derive it + # in case we used x_start or x_prev prediction. + eps = (_extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x.shape) * x - + out["pred_xstart"]) / _extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x.shape) + alpha_bar_next = _extract_into_tensor(self.alphas_cumprod_next, t, x.shape) + + # Equation 12. reversed + mean_pred = (out["pred_xstart"] * paddle.sqrt(alpha_bar_next) + paddle.sqrt(1 - alpha_bar_next) * eps) + + return {"sample": mean_pred, "pred_xstart": out["pred_xstart"]} + + def ddim_sample_loop( + self, + model, + shape, + noise=None, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + device=None, + progress=False, + eta=0.0, + skip_timesteps=0, + init_image=None, + randomize_class=False, + cond_fn_with_grad=False, + ): + """ + Generate samples from the model using DDIM. + + Same usage as p_sample_loop(). + """ + final = None + for sample in self.ddim_sample_loop_progressive( + model, + shape, + noise=noise, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + cond_fn=cond_fn, + model_kwargs=model_kwargs, + device=device, + progress=progress, + eta=eta, + skip_timesteps=skip_timesteps, + init_image=init_image, + randomize_class=randomize_class, + cond_fn_with_grad=cond_fn_with_grad, + ): + final = sample + return final["sample"] + + def ddim_sample_loop_progressive( + self, + model, + shape, + noise=None, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + device=None, + progress=False, + eta=0.0, + skip_timesteps=0, + init_image=None, + randomize_class=False, + cond_fn_with_grad=False, + ): + """ + Use DDIM to sample from the model and yield intermediate samples from + each timestep of DDIM. + + Same usage as p_sample_loop_progressive(). + """ + # if device is None: + # device = next(model.parameters()).device + if device is None: + device = model.parameters()[0].place + assert isinstance(shape, (tuple, list)) + if noise is not None: + img = noise + else: + img = paddle.randn(shape) + + if skip_timesteps and init_image is None: + init_image = paddle.zeros_like(img) + + indices = list(range(self.num_timesteps - skip_timesteps))[::-1] + + if init_image is not None: + my_t = paddle.ones([shape[0]], dtype='int64') * indices[0] + img = self.q_sample(init_image, my_t, img) + + if progress: + # Lazy import so that we don't depend on tqdm. + from tqdm.auto import tqdm + + indices = tqdm(indices) + + for i in indices: + t = paddle.to_tensor([i] * shape[0]) + if randomize_class and 'y' in model_kwargs: + model_kwargs['y'] = paddle.randint( + low=0, + high=model.num_classes, + shape=model_kwargs['y'].shape, + ) + sample_fn = self.ddim_sample_with_grad if cond_fn_with_grad else self.ddim_sample + out = sample_fn( + model, + img, + t, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + cond_fn=cond_fn, + model_kwargs=model_kwargs, + eta=eta, + ) + yield out + img = out["sample"] + + def plms_sample( + self, + model, + x, + t, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + cond_fn_with_grad=False, + order=2, + old_out=None, + ): + """ + Sample x_{t-1} from the model using Pseudo Linear Multistep. + + Same usage as p_sample(). + """ + if not int(order) or not 1 <= order <= 4: + raise ValueError('order is invalid (should be int from 1-4).') + + def get_model_output(x, t): + with paddle.set_grad_enabled(cond_fn_with_grad and cond_fn is not None): + x = x.detach().requires_grad_() if cond_fn_with_grad else x + out_orig = self.p_mean_variance( + model, + x, + t, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + model_kwargs=model_kwargs, + ) + if cond_fn is not None: + if cond_fn_with_grad: + out = self.condition_score_with_grad(cond_fn, out_orig, x, t, model_kwargs=model_kwargs) + x = x.detach() + else: + out = self.condition_score(cond_fn, out_orig, x, t, model_kwargs=model_kwargs) + else: + out = out_orig + + # Usually our model outputs epsilon, but we re-derive it + # in case we used x_start or x_prev prediction. + eps = self._predict_eps_from_xstart(x, t, out["pred_xstart"]) + return eps, out, out_orig + + alpha_bar = _extract_into_tensor(self.alphas_cumprod, t, x.shape) + alpha_bar_prev = _extract_into_tensor(self.alphas_cumprod_prev, t, x.shape) + eps, out, out_orig = get_model_output(x, t) + + if order > 1 and old_out is None: + # Pseudo Improved Euler + old_eps = [eps] + mean_pred = out["pred_xstart"] * paddle.sqrt(alpha_bar_prev) + paddle.sqrt(1 - alpha_bar_prev) * eps + eps_2, _, _ = get_model_output(mean_pred, t - 1) + eps_prime = (eps + eps_2) / 2 + pred_prime = self._predict_xstart_from_eps(x, t, eps_prime) + mean_pred = pred_prime * paddle.sqrt(alpha_bar_prev) + paddle.sqrt(1 - alpha_bar_prev) * eps_prime + else: + # Pseudo Linear Multistep (Adams-Bashforth) + old_eps = old_out["old_eps"] + old_eps.append(eps) + cur_order = min(order, len(old_eps)) + if cur_order == 1: + eps_prime = old_eps[-1] + elif cur_order == 2: + eps_prime = (3 * old_eps[-1] - old_eps[-2]) / 2 + elif cur_order == 3: + eps_prime = (23 * old_eps[-1] - 16 * old_eps[-2] + 5 * old_eps[-3]) / 12 + elif cur_order == 4: + eps_prime = (55 * old_eps[-1] - 59 * old_eps[-2] + 37 * old_eps[-3] - 9 * old_eps[-4]) / 24 + else: + raise RuntimeError('cur_order is invalid.') + pred_prime = self._predict_xstart_from_eps(x, t, eps_prime) + mean_pred = pred_prime * paddle.sqrt(alpha_bar_prev) + paddle.sqrt(1 - alpha_bar_prev) * eps_prime + + if len(old_eps) >= order: + old_eps.pop(0) + + nonzero_mask = paddle.cast((t != 0), 'float32').reshape([-1, *([1] * (len(x.shape) - 1))]) + sample = mean_pred * nonzero_mask + out["pred_xstart"] * (1 - nonzero_mask) + + return {"sample": sample, "pred_xstart": out_orig["pred_xstart"], "old_eps": old_eps} + + def plms_sample_loop( + self, + model, + shape, + noise=None, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + device=None, + progress=False, + skip_timesteps=0, + init_image=None, + randomize_class=False, + cond_fn_with_grad=False, + order=2, + ): + """ + Generate samples from the model using Pseudo Linear Multistep. + + Same usage as p_sample_loop(). + """ + final = None + for sample in self.plms_sample_loop_progressive( + model, + shape, + noise=noise, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + cond_fn=cond_fn, + model_kwargs=model_kwargs, + device=device, + progress=progress, + skip_timesteps=skip_timesteps, + init_image=init_image, + randomize_class=randomize_class, + cond_fn_with_grad=cond_fn_with_grad, + order=order, + ): + final = sample + return final["sample"] + + def plms_sample_loop_progressive( + self, + model, + shape, + noise=None, + clip_denoised=True, + denoised_fn=None, + cond_fn=None, + model_kwargs=None, + device=None, + progress=False, + skip_timesteps=0, + init_image=None, + randomize_class=False, + cond_fn_with_grad=False, + order=2, + ): + """ + Use PLMS to sample from the model and yield intermediate samples from each + timestep of PLMS. + + Same usage as p_sample_loop_progressive(). + """ + if device is None: + device = model.parameters()[0].place + assert isinstance(shape, (tuple, list)) + if noise is not None: + img = noise + else: + img = paddle.randn(shape) + + if skip_timesteps and init_image is None: + init_image = paddle.zeros_like(img) + + indices = list(range(self.num_timesteps - skip_timesteps))[::-1] + + if init_image is not None: + my_t = paddle.ones([shape[0]], dtype='int64') * indices[0] + img = self.q_sample(init_image, my_t, img) + + if progress: + # Lazy import so that we don't depend on tqdm. + from tqdm.auto import tqdm + + indices = tqdm(indices) + + old_out = None + + for i in indices: + t = paddle.to_tensor([i] * shape[0], place=device) + if randomize_class and 'y' in model_kwargs: + model_kwargs['y'] = paddle.randint(low=0, high=model.num_classes, shape=model_kwargs['y'].shape) + # with paddle.no_grad(): + out = self.plms_sample( + model, + img, + t, + clip_denoised=clip_denoised, + denoised_fn=denoised_fn, + cond_fn=cond_fn, + model_kwargs=model_kwargs, + cond_fn_with_grad=cond_fn_with_grad, + order=order, + old_out=old_out, + ) + yield out + old_out = out + img = out["sample"] + + def _vb_terms_bpd(self, model, x_start, x_t, t, clip_denoised=True, model_kwargs=None): + """ + Get a term for the variational lower-bound. + + The resulting units are bits (rather than nats, as one might expect). + This allows for comparison to other papers. + + :return: a dict with the following keys: + - 'output': a shape [N] tensor of NLLs or KLs. + - 'pred_xstart': the x_0 predictions. + """ + true_mean, _, true_log_variance_clipped = self.q_posterior_mean_variance(x_start=x_start, x_t=x_t, t=t) + out = self.p_mean_variance(model, x_t, t, clip_denoised=clip_denoised, model_kwargs=model_kwargs) + kl = normal_kl(true_mean, true_log_variance_clipped, out["mean"], out["log_variance"]) + kl = mean_flat(kl) / np.log(2.0) + + decoder_nll = -discretized_gaussian_log_likelihood( + x_start, means=out["mean"], log_scales=0.5 * out["log_variance"]) + assert decoder_nll.shape == x_start.shape + decoder_nll = mean_flat(decoder_nll) / np.log(2.0) + + # At the first timestep return the decoder NLL, + # otherwise return KL(q(x_{t-1}|x_t,x_0) || p(x_{t-1}|x_t)) + output = paddle.where((t == 0), decoder_nll, kl) + return {"output": output, "pred_xstart": out["pred_xstart"]} + + def training_losses(self, model, x_start, t, model_kwargs=None, noise=None): + """ + Compute training losses for a single timestep. + + :param model: the model to evaluate loss on. + :param x_start: the [N x C x ...] tensor of inputs. + :param t: a batch of timestep indices. + :param model_kwargs: if not None, a dict of extra keyword arguments to + pass to the model. This can be used for conditioning. + :param noise: if specified, the specific Gaussian noise to try to remove. + :return: a dict with the key "loss" containing a tensor of shape [N]. + Some mean or variance settings may also have other keys. + """ + if model_kwargs is None: + model_kwargs = {} + if noise is None: + # noise = th.randn_like(x_start) + noise = paddle.randn(x_start.shape, x_start.dtype) + x_t = self.q_sample(x_start, t, noise=noise) + + terms = {} + + if self.loss_type == LossType.KL or self.loss_type == LossType.RESCALED_KL: + terms["loss"] = self._vb_terms_bpd( + model=model, + x_start=x_start, + x_t=x_t, + t=t, + clip_denoised=False, + model_kwargs=model_kwargs, + )["output"] + if self.loss_type == LossType.RESCALED_KL: + terms["loss"] *= self.num_timesteps + elif self.loss_type == LossType.MSE or self.loss_type == LossType.RESCALED_MSE: + model_output = model(x_t, self._scale_timesteps(t), **model_kwargs) + + if self.model_var_type in [ + ModelVarType.LEARNED, + ModelVarType.LEARNED_RANGE, + ]: + B, C = x_t.shape[:2] + assert model_output.shape == (B, C * 2, *x_t.shape[2:]) + model_output, model_var_values = paddle.split(model_output, 2, dim=1) + # Learn the variance using the variational bound, but don't let + # it affect our mean prediction. + frozen_out = paddle.concat([model_output.detach(), model_var_values], axis=1) + terms["vb"] = self._vb_terms_bpd( + model=lambda *args, r=frozen_out: r, + x_start=x_start, + x_t=x_t, + t=t, + clip_denoised=False, + )["output"] + if self.loss_type == LossType.RESCALED_MSE: + # Divide by 1000 for equivalence with initial implementation. + # Without a factor of 1/1000, the VB term hurts the MSE term. + terms["vb"] *= self.num_timesteps / 1000.0 + + target = { + ModelMeanType.PREVIOUS_X: self.q_posterior_mean_variance(x_start=x_start, x_t=x_t, t=t)[0], + ModelMeanType.START_X: x_start, + ModelMeanType.EPSILON: noise, + }[self.model_mean_type] + assert model_output.shape == target.shape == x_start.shape + terms["mse"] = mean_flat((target - model_output)**2) + if "vb" in terms: + terms["loss"] = terms["mse"] + terms["vb"] + else: + terms["loss"] = terms["mse"] + else: + raise NotImplementedError(self.loss_type) + + return terms + + def _prior_bpd(self, x_start): + """ + Get the prior KL term for the variational lower-bound, measured in + bits-per-dim. + + This term can't be optimized, as it only depends on the encoder. + + :param x_start: the [N x C x ...] tensor of inputs. + :return: a batch of [N] KL values (in bits), one per batch element. + """ + batch_size = x_start.shape[0] + t = paddle.to_tensor([self.num_timesteps - 1] * batch_size, place=x_start.place) + qt_mean, _, qt_log_variance = self.q_mean_variance(x_start, t) + kl_prior = normal_kl(mean1=qt_mean, logvar1=qt_log_variance, mean2=0.0, logvar2=0.0) + return mean_flat(kl_prior) / np.log(2.0) + + def calc_bpd_loop(self, model, x_start, clip_denoised=True, model_kwargs=None): + """ + Compute the entire variational lower-bound, measured in bits-per-dim, + as well as other related quantities. + + :param model: the model to evaluate loss on. + :param x_start: the [N x C x ...] tensor of inputs. + :param clip_denoised: if True, clip denoised samples. + :param model_kwargs: if not None, a dict of extra keyword arguments to + pass to the model. This can be used for conditioning. + + :return: a dict containing the following keys: + - total_bpd: the total variational lower-bound, per batch element. + - prior_bpd: the prior term in the lower-bound. + - vb: an [N x T] tensor of terms in the lower-bound. + - xstart_mse: an [N x T] tensor of x_0 MSEs for each timestep. + - mse: an [N x T] tensor of epsilon MSEs for each timestep. + """ + device = x_start.place + batch_size = x_start.shape[0] + + vb = [] + xstart_mse = [] + mse = [] + for t in list(range(self.num_timesteps))[::-1]: + t_batch = paddle.to_tensor([t] * batch_size, place=device) + # noise = th.randn_like(x_start) + noise = paddle.randn(x_start.shape, x_start.dtype) + x_t = self.q_sample(x_start=x_start, t=t_batch, noise=noise) + # Calculate VLB term at the current timestep + # with paddle.no_grad(): + out = self._vb_terms_bpd( + model, + x_start=x_start, + x_t=x_t, + t=t_batch, + clip_denoised=clip_denoised, + model_kwargs=model_kwargs, + ) + vb.append(out["output"]) + xstart_mse.append(mean_flat((out["pred_xstart"] - x_start)**2)) + eps = self._predict_eps_from_xstart(x_t, t_batch, out["pred_xstart"]) + mse.append(mean_flat((eps - noise)**2)) + + vb = paddle.stack(vb, axis=1) + xstart_mse = paddle.stack(xstart_mse, axis=1) + mse = paddle.stack(mse, axis=1) + + prior_bpd = self._prior_bpd(x_start) + total_bpd = vb.sum(axis=1) + prior_bpd + return { + "total_bpd": total_bpd, + "prior_bpd": prior_bpd, + "vb": vb, + "xstart_mse": xstart_mse, + "mse": mse, + } + + +def _extract_into_tensor(arr, timesteps, broadcast_shape): + """ + Extract values from a 1-D numpy array for a batch of indices. + + :param arr: the 1-D numpy array. + :param timesteps: a tensor of indices into the array to extract. + :param broadcast_shape: a larger shape of K dimensions with the batch + dimension equal to the length of timesteps. + :return: a tensor of shape [batch_size, 1, ...] where the shape has K dims. + """ + res = paddle.to_tensor(arr, place=timesteps.place)[timesteps] + while len(res.shape) < len(broadcast_shape): + res = res[..., None] + return res.expand(broadcast_shape) diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/model/losses.py b/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/model/losses.py new file mode 100755 index 000000000..5c3970de5 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/model/losses.py @@ -0,0 +1,86 @@ +""" +Helpers for various likelihood-based losses implemented by Paddle. These are ported from the original +Ho et al. diffusion models codebase: +https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/utils.py +""" +import numpy as np +import paddle +import paddle.nn.functional as F + + +def normal_kl(mean1, logvar1, mean2, logvar2): + """ + Compute the KL divergence between two gaussians. + + Shapes are automatically broadcasted, so batches can be compared to + scalars, among other use cases. + """ + tensor = None + for obj in (mean1, logvar1, mean2, logvar2): + if isinstance(obj, paddle.Tensor): + tensor = obj + break + assert tensor is not None, "at least one argument must be a Tensor" + + # Force variances to be Tensors. Broadcasting helps convert scalars to + # Tensors, but it does not work for th.exp(). + logvar1, logvar2 = [x if isinstance(x, paddle.Tensor) else paddle.to_tensor(x) for x in (logvar1, logvar2)] + + return 0.5 * (-1.0 + logvar2 - logvar1 + paddle.exp(logvar1 - logvar2) + + ((mean1 - mean2)**2) * paddle.exp(-logvar2)) + + +def approx_standard_normal_cdf(x): + """ + A fast approximation of the cumulative distribution function of the + standard normal. + """ + return 0.5 * (1.0 + paddle.tanh(np.sqrt(2.0 / np.pi) * (x + 0.044715 * paddle.pow(x, 3)))) + + +def discretized_gaussian_log_likelihood(x, *, means, log_scales): + """ + Compute the log-likelihood of a Gaussian distribution discretizing to a + given image. + + :param x: the target images. It is assumed that this was uint8 values, + rescaled to the range [-1, 1]. + :param means: the Gaussian mean Tensor. + :param log_scales: the Gaussian log stddev Tensor. + :return: a tensor like x of log probabilities (in nats). + """ + assert x.shape == means.shape == log_scales.shape + centered_x = x - means + inv_stdv = paddle.exp(-log_scales) + plus_in = inv_stdv * (centered_x + 1.0 / 255.0) + cdf_plus = approx_standard_normal_cdf(plus_in) + min_in = inv_stdv * (centered_x - 1.0 / 255.0) + cdf_min = approx_standard_normal_cdf(min_in) + log_cdf_plus = paddle.log(cdf_plus.clip(min=1e-12)) + log_one_minus_cdf_min = paddle.log((1.0 - cdf_min).clip(min=1e-12)) + cdf_delta = cdf_plus - cdf_min + log_probs = paddle.where( + x < -0.999, + log_cdf_plus, + paddle.where(x > 0.999, log_one_minus_cdf_min, paddle.log(cdf_delta.clip(min=1e-12))), + ) + assert log_probs.shape == x.shape + return log_probs + + +def spherical_dist_loss(x, y): + x = F.normalize(x, axis=-1) + y = F.normalize(y, axis=-1) + return (x - y).norm(axis=-1).divide(paddle.to_tensor(2.0)).asin().pow(2).multiply(paddle.to_tensor(2.0)) + + +def tv_loss(input): + """L2 total variation loss, as in Mahendran et al.""" + input = F.pad(input, (0, 1, 0, 1), 'replicate') + x_diff = input[..., :-1, 1:] - input[..., :-1, :-1] + y_diff = input[..., 1:, :-1] - input[..., :-1, :-1] + return (x_diff**2 + y_diff**2).mean([1, 2, 3]) + + +def range_loss(input): + return (input - input.clip(-1, 1)).pow(2).mean([1, 2, 3]) diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/model/make_cutouts.py b/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/model/make_cutouts.py new file mode 100755 index 000000000..f92953c3f --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/model/make_cutouts.py @@ -0,0 +1,177 @@ +''' +This code is rewritten by Paddle based on Jina-ai/discoart. +https://github.com/jina-ai/discoart/blob/main/discoart/nn/make_cutouts.py +''' +import math + +import paddle +import paddle.nn as nn +from disco_diffusion_clip_rn101.resize_right.resize_right import resize +from paddle.nn import functional as F + +from . import transforms as T + +skip_augs = False # @param{type: 'boolean'} + + +def sinc(x): + return paddle.where(x != 0, paddle.sin(math.pi * x) / (math.pi * x), x.new_ones([])) + + +def lanczos(x, a): + cond = paddle.logical_and(-a < x, x < a) + out = paddle.where(cond, sinc(x) * sinc(x / a), x.new_zeros([])) + return out / out.sum() + + +def ramp(ratio, width): + n = math.ceil(width / ratio + 1) + out = paddle.empty([n]) + cur = 0 + for i in range(out.shape[0]): + out[i] = cur + cur += ratio + return paddle.concat([-out[1:].flip([0]), out])[1:-1] + + +class MakeCutouts(nn.Layer): + + def __init__(self, cut_size, cutn, skip_augs=False): + super().__init__() + self.cut_size = cut_size + self.cutn = cutn + self.skip_augs = skip_augs + self.augs = nn.Sequential(*[ + T.RandomHorizontalFlip(prob=0.5), + T.Lambda(lambda x: x + paddle.randn(x.shape) * 0.01), + T.RandomAffine(degrees=15, translate=(0.1, 0.1)), + T.Lambda(lambda x: x + paddle.randn(x.shape) * 0.01), + T.RandomPerspective(distortion_scale=0.4, p=0.7), + T.Lambda(lambda x: x + paddle.randn(x.shape) * 0.01), + T.RandomGrayscale(p=0.15), + T.Lambda(lambda x: x + paddle.randn(x.shape) * 0.01), + T.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1), + ]) + + def forward(self, input): + input = T.Pad(input.shape[2] // 4, fill=0)(input) + sideY, sideX = input.shape[2:4] + max_size = min(sideX, sideY) + + cutouts = [] + for ch in range(self.cutn): + if ch > self.cutn - self.cutn // 4: + cutout = input.clone() + else: + size = int(max_size * + paddle.zeros(1, ).normal_(mean=0.8, std=0.3).clip(float(self.cut_size / max_size), 1.0)) + offsetx = paddle.randint(0, abs(sideX - size + 1), ()) + offsety = paddle.randint(0, abs(sideY - size + 1), ()) + cutout = input[:, :, offsety:offsety + size, offsetx:offsetx + size] + + if not self.skip_augs: + cutout = self.augs(cutout) + cutouts.append(resample(cutout, (self.cut_size, self.cut_size))) + del cutout + + cutouts = paddle.concat(cutouts, axis=0) + return cutouts + + +class MakeCutoutsDango(nn.Layer): + + def __init__(self, cut_size, Overview=4, InnerCrop=0, IC_Size_Pow=0.5, IC_Grey_P=0.2): + super().__init__() + self.cut_size = cut_size + self.Overview = Overview + self.InnerCrop = InnerCrop + self.IC_Size_Pow = IC_Size_Pow + self.IC_Grey_P = IC_Grey_P + self.augs = nn.Sequential(*[ + T.RandomHorizontalFlip(prob=0.5), + T.Lambda(lambda x: x + paddle.randn(x.shape) * 0.01), + T.RandomAffine( + degrees=10, + translate=(0.05, 0.05), + interpolation=T.InterpolationMode.BILINEAR, + ), + T.Lambda(lambda x: x + paddle.randn(x.shape) * 0.01), + T.RandomGrayscale(p=0.1), + T.Lambda(lambda x: x + paddle.randn(x.shape) * 0.01), + T.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1), + ]) + + def forward(self, input): + cutouts = [] + gray = T.Grayscale(3) + sideY, sideX = input.shape[2:4] + max_size = min(sideX, sideY) + min_size = min(sideX, sideY, self.cut_size) + output_shape = [1, 3, self.cut_size, self.cut_size] + pad_input = F.pad( + input, + ( + (sideY - max_size) // 2, + (sideY - max_size) // 2, + (sideX - max_size) // 2, + (sideX - max_size) // 2, + ), + **padargs, + ) + cutout = resize(pad_input, out_shape=output_shape) + + if self.Overview > 0: + if self.Overview <= 4: + if self.Overview >= 1: + cutouts.append(cutout) + if self.Overview >= 2: + cutouts.append(gray(cutout)) + if self.Overview >= 3: + cutouts.append(cutout[:, :, :, ::-1]) + if self.Overview == 4: + cutouts.append(gray(cutout[:, :, :, ::-1])) + else: + cutout = resize(pad_input, out_shape=output_shape) + for _ in range(self.Overview): + cutouts.append(cutout) + + if self.InnerCrop > 0: + for i in range(self.InnerCrop): + size = int(paddle.rand([1])**self.IC_Size_Pow * (max_size - min_size) + min_size) + offsetx = paddle.randint(0, sideX - size + 1) + offsety = paddle.randint(0, sideY - size + 1) + cutout = input[:, :, offsety:offsety + size, offsetx:offsetx + size] + if i <= int(self.IC_Grey_P * self.InnerCrop): + cutout = gray(cutout) + cutout = resize(cutout, out_shape=output_shape) + cutouts.append(cutout) + + cutouts = paddle.concat(cutouts) + if skip_augs is not True: + cutouts = self.augs(cutouts) + return cutouts + + +def resample(input, size, align_corners=True): + n, c, h, w = input.shape + dh, dw = size + + input = input.reshape([n * c, 1, h, w]) + + if dh < h: + kernel_h = lanczos(ramp(dh / h, 2), 2).to(input.device, input.dtype) + pad_h = (kernel_h.shape[0] - 1) // 2 + input = F.pad(input, (0, 0, pad_h, pad_h), 'reflect') + input = F.conv2d(input, kernel_h[None, None, :, None]) + + if dw < w: + kernel_w = lanczos(ramp(dw / w, 2), 2).to(input.device, input.dtype) + pad_w = (kernel_w.shape[0] - 1) // 2 + input = F.pad(input, (pad_w, pad_w, 0, 0), 'reflect') + input = F.conv2d(input, kernel_w[None, None, None, :]) + + input = input.reshape([n, c, h, w]) + return F.interpolate(input, size, mode='bicubic', align_corners=align_corners) + + +padargs = {} diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/model/nn.py b/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/model/nn.py new file mode 100755 index 000000000..d618183e2 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/model/nn.py @@ -0,0 +1,127 @@ +""" +Various utilities for neural networks implemented by Paddle. This code is rewritten based on: +https://github.com/openai/guided-diffusion/blob/main/guided_diffusion/nn.py +""" +import math + +import paddle +import paddle.nn as nn + + +class SiLU(nn.Layer): + + def forward(self, x): + return x * nn.functional.sigmoid(x) + + +class GroupNorm32(nn.GroupNorm): + + def forward(self, x): + return super().forward(x) + + +def conv_nd(dims, *args, **kwargs): + """ + Create a 1D, 2D, or 3D convolution module. + """ + if dims == 1: + return nn.Conv1D(*args, **kwargs) + elif dims == 2: + return nn.Conv2D(*args, **kwargs) + elif dims == 3: + return nn.Conv3D(*args, **kwargs) + raise ValueError(f"unsupported dimensions: {dims}") + + +def linear(*args, **kwargs): + """ + Create a linear module. + """ + return nn.Linear(*args, **kwargs) + + +def avg_pool_nd(dims, *args, **kwargs): + """ + Create a 1D, 2D, or 3D average pooling module. + """ + if dims == 1: + return nn.AvgPool1D(*args, **kwargs) + elif dims == 2: + return nn.AvgPool2D(*args, **kwargs) + elif dims == 3: + return nn.AvgPool3D(*args, **kwargs) + raise ValueError(f"unsupported dimensions: {dims}") + + +def update_ema(target_params, source_params, rate=0.99): + """ + Update target parameters to be closer to those of source parameters using + an exponential moving average. + + :param target_params: the target parameter sequence. + :param source_params: the source parameter sequence. + :param rate: the EMA rate (closer to 1 means slower). + """ + for targ, src in zip(target_params, source_params): + targ.detach().mul_(rate).add_(src, alpha=1 - rate) + + +def zero_module(module): + """ + Zero out the parameters of a module and return it. + """ + for p in module.parameters(): + p.detach().zero_() + return module + + +def scale_module(module, scale): + """ + Scale the parameters of a module and return it. + """ + for p in module.parameters(): + p.detach().mul_(scale) + return module + + +def mean_flat(tensor): + """ + Take the mean over all non-batch dimensions. + """ + return tensor.mean(axis=list(range(1, len(tensor.shape)))) + + +def normalization(channels): + """ + Make a standard normalization layer. + + :param channels: number of input channels. + :return: an nn.Module for normalization. + """ + return GroupNorm32(32, channels) + + +def timestep_embedding(timesteps, dim, max_period=10000): + """ + Create sinusoidal timestep embeddings. + + :param timesteps: a 1-D Tensor of N indices, one per batch element. + These may be fractional. + :param dim: the dimension of the output. + :param max_period: controls the minimum frequency of the embeddings. + :return: an [N x dim] Tensor of positional embeddings. + """ + half = dim // 2 + freqs = paddle.exp(-math.log(max_period) * paddle.arange(start=0, end=half, dtype=paddle.float32) / half) + args = paddle.cast(timesteps[:, None], 'float32') * freqs[None] + embedding = paddle.concat([paddle.cos(args), paddle.sin(args)], axis=-1) + if dim % 2: + embedding = paddle.concat([embedding, paddle.zeros_like(embedding[:, :1])], axis=-1) + return embedding + + +def checkpoint(func, inputs, params, flag): + """ + This function is disabled. And now just forward. + """ + return func(*inputs) diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/model/perlin_noises.py b/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/model/perlin_noises.py new file mode 100755 index 000000000..6dacb331b --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/model/perlin_noises.py @@ -0,0 +1,78 @@ +''' +Perlin noise implementation by Paddle. +This code is rewritten based on: +https://github.com/jina-ai/discoart/blob/main/discoart/nn/perlin_noises.py +''' +import numpy as np +import paddle +import paddle.vision.transforms as TF +from PIL import Image +from PIL import ImageOps + + +def interp(t): + return 3 * t**2 - 2 * t**3 + + +def perlin(width, height, scale=10): + gx, gy = paddle.randn([2, width + 1, height + 1, 1, 1]) + xs = paddle.linspace(0, 1, scale + 1)[:-1, None] + ys = paddle.linspace(0, 1, scale + 1)[None, :-1] + wx = 1 - interp(xs) + wy = 1 - interp(ys) + dots = 0 + dots += wx * wy * (gx[:-1, :-1] * xs + gy[:-1, :-1] * ys) + dots += (1 - wx) * wy * (-gx[1:, :-1] * (1 - xs) + gy[1:, :-1] * ys) + dots += wx * (1 - wy) * (gx[:-1, 1:] * xs - gy[:-1, 1:] * (1 - ys)) + dots += (1 - wx) * (1 - wy) * (-gx[1:, 1:] * (1 - xs) - gy[1:, 1:] * (1 - ys)) + return dots.transpose([0, 2, 1, 3]).reshape([width * scale, height * scale]) + + +def perlin_ms(octaves, width, height, grayscale): + out_array = [0.5] if grayscale else [0.5, 0.5, 0.5] + # out_array = [0.0] if grayscale else [0.0, 0.0, 0.0] + for i in range(1 if grayscale else 3): + scale = 2**len(octaves) + oct_width = width + oct_height = height + for oct in octaves: + p = perlin(oct_width, oct_height, scale) + out_array[i] += p * oct + scale //= 2 + oct_width *= 2 + oct_height *= 2 + return paddle.concat(out_array) + + +def create_perlin_noise(octaves, width, height, grayscale, side_y, side_x): + out = perlin_ms(octaves, width, height, grayscale) + if grayscale: + out = TF.resize(size=(side_y, side_x), img=out.numpy()) + out = np.uint8(out) + out = Image.fromarray(out).convert('RGB') + else: + out = out.reshape([-1, 3, out.shape[0] // 3, out.shape[1]]) + out = out.squeeze().transpose([1, 2, 0]).numpy() + out = TF.resize(size=(side_y, side_x), img=out) + out = out.clip(0, 1) * 255 + out = np.uint8(out) + out = Image.fromarray(out) + + out = ImageOps.autocontrast(out) + return out + + +def regen_perlin(perlin_mode, side_y, side_x, batch_size): + if perlin_mode == 'color': + init = create_perlin_noise([1.5**-i * 0.5 for i in range(12)], 1, 1, False, side_y, side_x) + init2 = create_perlin_noise([1.5**-i * 0.5 for i in range(8)], 4, 4, False, side_y, side_x) + elif perlin_mode == 'gray': + init = create_perlin_noise([1.5**-i * 0.5 for i in range(12)], 1, 1, True, side_y, side_x) + init2 = create_perlin_noise([1.5**-i * 0.5 for i in range(8)], 4, 4, True, side_y, side_x) + else: + init = create_perlin_noise([1.5**-i * 0.5 for i in range(12)], 1, 1, False, side_y, side_x) + init2 = create_perlin_noise([1.5**-i * 0.5 for i in range(8)], 4, 4, True, side_y, side_x) + + init = (TF.to_tensor(init).add(TF.to_tensor(init2)).divide(paddle.to_tensor(2.0)).unsqueeze(0) * 2 - 1) + del init2 + return init.expand([batch_size, -1, -1, -1]) diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/model/respace.py b/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/model/respace.py new file mode 100755 index 000000000..c001c70d0 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/model/respace.py @@ -0,0 +1,123 @@ +''' +This code is rewritten by Paddle based on +https://github.com/openai/guided-diffusion/blob/main/guided_diffusion/respace.py +''' +import numpy as np +import paddle + +from .gaussian_diffusion import GaussianDiffusion + + +def space_timesteps(num_timesteps, section_counts): + """ + Create a list of timesteps to use from an original diffusion process, + given the number of timesteps we want to take from equally-sized portions + of the original process. + + For example, if there's 300 timesteps and the section counts are [10,15,20] + then the first 100 timesteps are strided to be 10 timesteps, the second 100 + are strided to be 15 timesteps, and the final 100 are strided to be 20. + + If the stride is a string starting with "ddim", then the fixed striding + from the DDIM paper is used, and only one section is allowed. + + :param num_timesteps: the number of diffusion steps in the original + process to divide up. + :param section_counts: either a list of numbers, or a string containing + comma-separated numbers, indicating the step count + per section. As a special case, use "ddimN" where N + is a number of steps to use the striding from the + DDIM paper. + :return: a set of diffusion steps from the original process to use. + """ + if isinstance(section_counts, str): + if section_counts.startswith("ddim"): + desired_count = int(section_counts[len("ddim"):]) + for i in range(1, num_timesteps): + if len(range(0, num_timesteps, i)) == desired_count: + return set(range(0, num_timesteps, i)) + raise ValueError(f"cannot create exactly {num_timesteps} steps with an integer stride") + section_counts = [int(x) for x in section_counts.split(",")] + size_per = num_timesteps // len(section_counts) + extra = num_timesteps % len(section_counts) + start_idx = 0 + all_steps = [] + for i, section_count in enumerate(section_counts): + size = size_per + (1 if i < extra else 0) + if size < section_count: + raise ValueError(f"cannot divide section of {size} steps into {section_count}") + if section_count <= 1: + frac_stride = 1 + else: + frac_stride = (size - 1) / (section_count - 1) + cur_idx = 0.0 + taken_steps = [] + for _ in range(section_count): + taken_steps.append(start_idx + round(cur_idx)) + cur_idx += frac_stride + all_steps += taken_steps + start_idx += size + return set(all_steps) + + +class SpacedDiffusion(GaussianDiffusion): + """ + A diffusion process which can skip steps in a base diffusion process. + + :param use_timesteps: a collection (sequence or set) of timesteps from the + original diffusion process to retain. + :param kwargs: the kwargs to create the base diffusion process. + """ + + def __init__(self, use_timesteps, **kwargs): + self.use_timesteps = set(use_timesteps) + self.timestep_map = [] + self.original_num_steps = len(kwargs["betas"]) + + base_diffusion = GaussianDiffusion(**kwargs) # pylint: disable=missing-kwoa + last_alpha_cumprod = 1.0 + new_betas = [] + for i, alpha_cumprod in enumerate(base_diffusion.alphas_cumprod): + if i in self.use_timesteps: + new_betas.append(1 - alpha_cumprod / last_alpha_cumprod) + last_alpha_cumprod = alpha_cumprod + self.timestep_map.append(i) + kwargs["betas"] = np.array(new_betas) + super().__init__(**kwargs) + + def p_mean_variance(self, model, *args, **kwargs): # pylint: disable=signature-differs + return super().p_mean_variance(self._wrap_model(model), *args, **kwargs) + + def training_losses(self, model, *args, **kwargs): # pylint: disable=signature-differs + return super().training_losses(self._wrap_model(model), *args, **kwargs) + + def condition_mean(self, cond_fn, *args, **kwargs): + return super().condition_mean(self._wrap_model(cond_fn), *args, **kwargs) + + def condition_score(self, cond_fn, *args, **kwargs): + return super().condition_score(self._wrap_model(cond_fn), *args, **kwargs) + + def _wrap_model(self, model): + if isinstance(model, _WrappedModel): + return model + return _WrappedModel(model, self.timestep_map, self.rescale_timesteps, self.original_num_steps) + + def _scale_timesteps(self, t): + # Scaling is done by the wrapped model. + return t + + +class _WrappedModel: + + def __init__(self, model, timestep_map, rescale_timesteps, original_num_steps): + self.model = model + self.timestep_map = timestep_map + self.rescale_timesteps = rescale_timesteps + self.original_num_steps = original_num_steps + + def __call__(self, x, ts, **kwargs): + map_tensor = paddle.to_tensor(self.timestep_map, place=ts.place, dtype=ts.dtype) + new_ts = map_tensor[ts] + if self.rescale_timesteps: + new_ts = paddle.cast(new_ts, 'float32') * (1000.0 / self.original_num_steps) + return self.model(x, new_ts, **kwargs) diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/model/script_util.py b/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/model/script_util.py new file mode 100755 index 000000000..d728a5430 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/model/script_util.py @@ -0,0 +1,201 @@ +''' +This code is based on +https://github.com/openai/guided-diffusion/blob/main/guided_diffusion/script_util.py +''' +import argparse +import inspect + +from . import gaussian_diffusion as gd +from .respace import space_timesteps +from .respace import SpacedDiffusion +from .unet import EncoderUNetModel +from .unet import SuperResModel +from .unet import UNetModel + +NUM_CLASSES = 1000 + + +def diffusion_defaults(): + """ + Defaults for image and classifier training. + """ + return dict( + learn_sigma=False, + diffusion_steps=1000, + noise_schedule="linear", + timestep_respacing="", + use_kl=False, + predict_xstart=False, + rescale_timesteps=False, + rescale_learned_sigmas=False, + ) + + +def model_and_diffusion_defaults(): + """ + Defaults for image training. + """ + res = dict( + image_size=64, + num_channels=128, + num_res_blocks=2, + num_heads=4, + num_heads_upsample=-1, + num_head_channels=-1, + attention_resolutions="16,8", + channel_mult="", + dropout=0.0, + class_cond=False, + use_checkpoint=False, + use_scale_shift_norm=True, + resblock_updown=False, + use_fp16=False, + use_new_attention_order=False, + ) + res.update(diffusion_defaults()) + return res + + +def create_model_and_diffusion( + image_size, + class_cond, + learn_sigma, + num_channels, + num_res_blocks, + channel_mult, + num_heads, + num_head_channels, + num_heads_upsample, + attention_resolutions, + dropout, + diffusion_steps, + noise_schedule, + timestep_respacing, + use_kl, + predict_xstart, + rescale_timesteps, + rescale_learned_sigmas, + use_checkpoint, + use_scale_shift_norm, + resblock_updown, + use_fp16, + use_new_attention_order, +): + model = create_model( + image_size, + num_channels, + num_res_blocks, + channel_mult=channel_mult, + learn_sigma=learn_sigma, + class_cond=class_cond, + use_checkpoint=use_checkpoint, + attention_resolutions=attention_resolutions, + num_heads=num_heads, + num_head_channels=num_head_channels, + num_heads_upsample=num_heads_upsample, + use_scale_shift_norm=use_scale_shift_norm, + dropout=dropout, + resblock_updown=resblock_updown, + use_fp16=use_fp16, + use_new_attention_order=use_new_attention_order, + ) + diffusion = create_gaussian_diffusion( + steps=diffusion_steps, + learn_sigma=learn_sigma, + noise_schedule=noise_schedule, + use_kl=use_kl, + predict_xstart=predict_xstart, + rescale_timesteps=rescale_timesteps, + rescale_learned_sigmas=rescale_learned_sigmas, + timestep_respacing=timestep_respacing, + ) + return model, diffusion + + +def create_model( + image_size, + num_channels, + num_res_blocks, + channel_mult="", + learn_sigma=False, + class_cond=False, + use_checkpoint=False, + attention_resolutions="16", + num_heads=1, + num_head_channels=-1, + num_heads_upsample=-1, + use_scale_shift_norm=False, + dropout=0, + resblock_updown=False, + use_fp16=False, + use_new_attention_order=False, +): + if channel_mult == "": + if image_size == 512: + channel_mult = (0.5, 1, 1, 2, 2, 4, 4) + elif image_size == 256: + channel_mult = (1, 1, 2, 2, 4, 4) + elif image_size == 128: + channel_mult = (1, 1, 2, 3, 4) + elif image_size == 64: + channel_mult = (1, 2, 3, 4) + else: + raise ValueError(f"unsupported image size: {image_size}") + else: + channel_mult = tuple(int(ch_mult) for ch_mult in channel_mult.split(",")) + + attention_ds = [] + for res in attention_resolutions.split(","): + attention_ds.append(image_size // int(res)) + + return UNetModel( + image_size=image_size, + in_channels=3, + model_channels=num_channels, + out_channels=(3 if not learn_sigma else 6), + num_res_blocks=num_res_blocks, + attention_resolutions=tuple(attention_ds), + dropout=dropout, + channel_mult=channel_mult, + num_classes=(NUM_CLASSES if class_cond else None), + use_checkpoint=use_checkpoint, + use_fp16=use_fp16, + num_heads=num_heads, + num_head_channels=num_head_channels, + num_heads_upsample=num_heads_upsample, + use_scale_shift_norm=use_scale_shift_norm, + resblock_updown=resblock_updown, + use_new_attention_order=use_new_attention_order, + ) + + +def create_gaussian_diffusion( + *, + steps=1000, + learn_sigma=False, + sigma_small=False, + noise_schedule="linear", + use_kl=False, + predict_xstart=False, + rescale_timesteps=False, + rescale_learned_sigmas=False, + timestep_respacing="", +): + betas = gd.get_named_beta_schedule(noise_schedule, steps) + if use_kl: + loss_type = gd.LossType.RESCALED_KL + elif rescale_learned_sigmas: + loss_type = gd.LossType.RESCALED_MSE + else: + loss_type = gd.LossType.MSE + if not timestep_respacing: + timestep_respacing = [steps] + return SpacedDiffusion( + use_timesteps=space_timesteps(steps, timestep_respacing), + betas=betas, + model_mean_type=(gd.ModelMeanType.EPSILON if not predict_xstart else gd.ModelMeanType.START_X), + model_var_type=((gd.ModelVarType.FIXED_LARGE if not sigma_small else gd.ModelVarType.FIXED_SMALL) + if not learn_sigma else gd.ModelVarType.LEARNED_RANGE), + loss_type=loss_type, + rescale_timesteps=rescale_timesteps, + ) diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/model/sec_diff.py b/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/model/sec_diff.py new file mode 100755 index 000000000..1e361f18f --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/model/sec_diff.py @@ -0,0 +1,135 @@ +''' +This code is rewritten by Paddle based on +https://github.com/jina-ai/discoart/blob/main/discoart/nn/sec_diff.py +''' +import math +from dataclasses import dataclass +from functools import partial + +import paddle +import paddle.nn as nn + + +@dataclass +class DiffusionOutput: + v: paddle.Tensor + pred: paddle.Tensor + eps: paddle.Tensor + + +class SkipBlock(nn.Layer): + + def __init__(self, main, skip=None): + super().__init__() + self.main = nn.Sequential(*main) + self.skip = skip if skip else nn.Identity() + + def forward(self, input): + return paddle.concat([self.main(input), self.skip(input)], axis=1) + + +def append_dims(x, n): + return x[(Ellipsis, *(None, ) * (n - x.ndim))] + + +def expand_to_planes(x, shape): + return paddle.tile(append_dims(x, len(shape)), [1, 1, *shape[2:]]) + + +def alpha_sigma_to_t(alpha, sigma): + return paddle.atan2(sigma, alpha) * 2 / math.pi + + +def t_to_alpha_sigma(t): + return paddle.cos(t * math.pi / 2), paddle.sin(t * math.pi / 2) + + +class SecondaryDiffusionImageNet2(nn.Layer): + + def __init__(self): + super().__init__() + c = 64 # The base channel count + cs = [c, c * 2, c * 2, c * 4, c * 4, c * 8] + + self.timestep_embed = FourierFeatures(1, 16) + self.down = nn.AvgPool2D(2) + self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False) + + self.net = nn.Sequential( + ConvBlock(3 + 16, cs[0]), + ConvBlock(cs[0], cs[0]), + SkipBlock([ + self.down, + ConvBlock(cs[0], cs[1]), + ConvBlock(cs[1], cs[1]), + SkipBlock([ + self.down, + ConvBlock(cs[1], cs[2]), + ConvBlock(cs[2], cs[2]), + SkipBlock([ + self.down, + ConvBlock(cs[2], cs[3]), + ConvBlock(cs[3], cs[3]), + SkipBlock([ + self.down, + ConvBlock(cs[3], cs[4]), + ConvBlock(cs[4], cs[4]), + SkipBlock([ + self.down, + ConvBlock(cs[4], cs[5]), + ConvBlock(cs[5], cs[5]), + ConvBlock(cs[5], cs[5]), + ConvBlock(cs[5], cs[4]), + self.up, + ]), + ConvBlock(cs[4] * 2, cs[4]), + ConvBlock(cs[4], cs[3]), + self.up, + ]), + ConvBlock(cs[3] * 2, cs[3]), + ConvBlock(cs[3], cs[2]), + self.up, + ]), + ConvBlock(cs[2] * 2, cs[2]), + ConvBlock(cs[2], cs[1]), + self.up, + ]), + ConvBlock(cs[1] * 2, cs[1]), + ConvBlock(cs[1], cs[0]), + self.up, + ]), + ConvBlock(cs[0] * 2, cs[0]), + nn.Conv2D(cs[0], 3, 3, padding=1), + ) + + def forward(self, input, t): + timestep_embed = expand_to_planes(self.timestep_embed(t[:, None]), input.shape) + v = self.net(paddle.concat([input, timestep_embed], axis=1)) + alphas, sigmas = map(partial(append_dims, n=v.ndim), t_to_alpha_sigma(t)) + pred = input * alphas - v * sigmas + eps = input * sigmas + v * alphas + return DiffusionOutput(v, pred, eps) + + +class FourierFeatures(nn.Layer): + + def __init__(self, in_features, out_features, std=1.0): + super().__init__() + assert out_features % 2 == 0 + # self.weight = nn.Parameter(paddle.randn([out_features // 2, in_features]) * std) + self.weight = paddle.create_parameter([out_features // 2, in_features], + dtype='float32', + default_initializer=nn.initializer.Normal(mean=0.0, std=std)) + + def forward(self, input): + f = 2 * math.pi * input @ self.weight.T + return paddle.concat([f.cos(), f.sin()], axis=-1) + + +class ConvBlock(nn.Sequential): + + def __init__(self, c_in, c_out): + super().__init__( + nn.Conv2D(c_in, c_out, 3, padding=1), + nn.ReLU(), + ) diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/model/transforms.py b/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/model/transforms.py new file mode 100755 index 000000000..e0b620b01 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/model/transforms.py @@ -0,0 +1,757 @@ +''' +This code is rewritten by Paddle based on +https://github.com/pytorch/vision/blob/main/torchvision/transforms/transforms.py +''' +import math +import numbers +import warnings +from enum import Enum +from typing import Any +from typing import Dict +from typing import List +from typing import Optional +from typing import Sequence +from typing import Tuple +from typing import Union + +import paddle +import paddle.nn as nn +from paddle import Tensor +from paddle.nn import functional as F +from paddle.nn.functional import grid_sample +from paddle.vision import transforms as T + + +class Normalize(nn.Layer): + + def __init__(self, mean, std): + super(Normalize, self).__init__() + self.mean = paddle.to_tensor(mean) + self.std = paddle.to_tensor(std) + + def forward(self, tensor: Tensor): + dtype = tensor.dtype + mean = paddle.to_tensor(self.mean, dtype=dtype) + std = paddle.to_tensor(self.std, dtype=dtype) + mean = mean.reshape([1, -1, 1, 1]) + std = std.reshape([1, -1, 1, 1]) + result = tensor.subtract(mean).divide(std) + return result + + +class InterpolationMode(Enum): + """Interpolation modes + Available interpolation methods are ``nearest``, ``bilinear``, ``bicubic``, ``box``, ``hamming``, and ``lanczos``. + """ + + NEAREST = "nearest" + BILINEAR = "bilinear" + BICUBIC = "bicubic" + # For PIL compatibility + BOX = "box" + HAMMING = "hamming" + LANCZOS = "lanczos" + + +class Grayscale(nn.Layer): + + def __init__(self, num_output_channels): + super(Grayscale, self).__init__() + self.num_output_channels = num_output_channels + + def forward(self, x): + output = (0.2989 * x[:, 0:1, :, :] + 0.587 * x[:, 1:2, :, :] + 0.114 * x[:, 2:3, :, :]) + if self.num_output_channels == 3: + return output.expand(x.shape) + + return output + + +class Lambda(nn.Layer): + + def __init__(self, func): + super(Lambda, self).__init__() + self.transform = func + + def forward(self, x): + return self.transform(x) + + +class RandomGrayscale(nn.Layer): + + def __init__(self, p): + super(RandomGrayscale, self).__init__() + self.prob = p + self.transform = Grayscale(3) + + def forward(self, x): + if paddle.rand([1]) < self.prob: + return self.transform(x) + else: + return x + + +class RandomHorizontalFlip(nn.Layer): + + def __init__(self, prob): + super(RandomHorizontalFlip, self).__init__() + self.prob = prob + + def forward(self, x): + if paddle.rand([1]) < self.prob: + return x[:, :, :, ::-1] + else: + return x + + +def _blend(img1: Tensor, img2: Tensor, ratio: float) -> Tensor: + ratio = float(ratio) + bound = 1.0 + return (ratio * img1 + (1.0 - ratio) * img2).clip(0, bound) + + +def trunc_div(a, b): + ipt = paddle.divide(a, b) + sign_ipt = paddle.sign(ipt) + abs_ipt = paddle.abs(ipt) + abs_ipt = paddle.floor(abs_ipt) + out = paddle.multiply(sign_ipt, abs_ipt) + return out + + +def fmod(a, b): + return a - trunc_div(a, b) * b + + +def _rgb2hsv(img: Tensor) -> Tensor: + r, g, b = img.unbind(axis=-3) + + # Implementation is based on https://github.com/python-pillow/Pillow/blob/4174d4267616897df3746d315d5a2d0f82c656ee/ + # src/libImaging/Convert.c#L330 + maxc = paddle.max(img, axis=-3) + minc = paddle.min(img, axis=-3) + + # The algorithm erases S and H channel where `maxc = minc`. This avoids NaN + # from happening in the results, because + # + S channel has division by `maxc`, which is zero only if `maxc = minc` + # + H channel has division by `(maxc - minc)`. + # + # Instead of overwriting NaN afterwards, we just prevent it from occuring so + # we don't need to deal with it in case we save the NaN in a buffer in + # backprop, if it is ever supported, but it doesn't hurt to do so. + eqc = maxc == minc + + cr = maxc - minc + # Since `eqc => cr = 0`, replacing denominator with 1 when `eqc` is fine. + ones = paddle.ones_like(maxc) + s = cr / paddle.where(eqc, ones, maxc) + # Note that `eqc => maxc = minc = r = g = b`. So the following calculation + # of `h` would reduce to `bc - gc + 2 + rc - bc + 4 + rc - bc = 6` so it + # would not matter what values `rc`, `gc`, and `bc` have here, and thus + # replacing denominator with 1 when `eqc` is fine. + cr_divisor = paddle.where(eqc, ones, cr) + rc = (maxc - r) / cr_divisor + gc = (maxc - g) / cr_divisor + bc = (maxc - b) / cr_divisor + + hr = (maxc == r).cast('float32') * (bc - gc) + hg = ((maxc == g) & (maxc != r)).cast('float32') * (2.0 + rc - bc) + hb = ((maxc != g) & (maxc != r)).cast('float32') * (4.0 + gc - rc) + h = hr + hg + hb + h = fmod((h / 6.0 + 1.0), paddle.to_tensor(1.0)) + return paddle.stack((h, s, maxc), axis=-3) + + +def _hsv2rgb(img: Tensor) -> Tensor: + h, s, v = img.unbind(axis=-3) + i = paddle.floor(h * 6.0) + f = (h * 6.0) - i + i = i.cast(dtype='int32') + + p = paddle.clip((v * (1.0 - s)), 0.0, 1.0) + q = paddle.clip((v * (1.0 - s * f)), 0.0, 1.0) + t = paddle.clip((v * (1.0 - s * (1.0 - f))), 0.0, 1.0) + i = i % 6 + + mask = i.unsqueeze(axis=-3) == paddle.arange(6).reshape([-1, 1, 1]) + + a1 = paddle.stack((v, q, p, p, t, v), axis=-3) + a2 = paddle.stack((t, v, v, q, p, p), axis=-3) + a3 = paddle.stack((p, p, t, v, v, q), axis=-3) + a4 = paddle.stack((a1, a2, a3), axis=-4) + + return paddle.einsum("...ijk, ...xijk -> ...xjk", mask.cast(dtype=img.dtype), a4) + + +def adjust_brightness(img: Tensor, brightness_factor: float) -> Tensor: + if brightness_factor < 0: + raise ValueError(f"brightness_factor ({brightness_factor}) is not non-negative.") + + return _blend(img, paddle.zeros_like(img), brightness_factor) + + +def adjust_contrast(img: Tensor, contrast_factor: float) -> Tensor: + if contrast_factor < 0: + raise ValueError(f"contrast_factor ({contrast_factor}) is not non-negative.") + + c = img.shape[1] + + if c == 3: + output = (0.2989 * img[:, 0:1, :, :] + 0.587 * img[:, 1:2, :, :] + 0.114 * img[:, 2:3, :, :]) + mean = paddle.mean(output, axis=(-3, -2, -1), keepdim=True) + + else: + mean = paddle.mean(img, axis=(-3, -2, -1), keepdim=True) + + return _blend(img, mean, contrast_factor) + + +def adjust_hue(img: Tensor, hue_factor: float) -> Tensor: + if not (-0.5 <= hue_factor <= 0.5): + raise ValueError(f"hue_factor ({hue_factor}) is not in [-0.5, 0.5].") + + img = _rgb2hsv(img) + h, s, v = img.unbind(axis=-3) + h = fmod(h + hue_factor, paddle.to_tensor(1.0)) + img = paddle.stack((h, s, v), axis=-3) + img_hue_adj = _hsv2rgb(img) + return img_hue_adj + + +def adjust_saturation(img: Tensor, saturation_factor: float) -> Tensor: + if saturation_factor < 0: + raise ValueError(f"saturation_factor ({saturation_factor}) is not non-negative.") + + output = (0.2989 * img[:, 0:1, :, :] + 0.587 * img[:, 1:2, :, :] + 0.114 * img[:, 2:3, :, :]) + + return _blend(img, output, saturation_factor) + + +class ColorJitter(nn.Layer): + + def __init__(self, brightness=0, contrast=0, saturation=0, hue=0): + super(ColorJitter, self).__init__() + self.brightness = self._check_input(brightness, "brightness") + self.contrast = self._check_input(contrast, "contrast") + self.saturation = self._check_input(saturation, "saturation") + self.hue = self._check_input(hue, "hue", center=0, bound=(-0.5, 0.5), clip_first_on_zero=False) + + def _check_input(self, value, name, center=1, bound=(0, float("inf")), clip_first_on_zero=True): + if isinstance(value, numbers.Number): + if value < 0: + raise ValueError(f"If {name} is a single number, it must be non negative.") + value = [center - float(value), center + float(value)] + if clip_first_on_zero: + value[0] = max(value[0], 0.0) + elif isinstance(value, (tuple, list)) and len(value) == 2: + if not bound[0] <= value[0] <= value[1] <= bound[1]: + raise ValueError(f"{name} values should be between {bound}") + else: + raise TypeError(f"{name} should be a single number or a list/tuple with length 2.") + + # if value is 0 or (1., 1.) for brightness/contrast/saturation + # or (0., 0.) for hue, do nothing + if value[0] == value[1] == center: + value = None + return value + + @staticmethod + def get_params( + brightness: Optional[List[float]], + contrast: Optional[List[float]], + saturation: Optional[List[float]], + hue: Optional[List[float]], + ) -> Tuple[Tensor, Optional[float], Optional[float], Optional[float], Optional[float]]: + """Get the parameters for the randomized transform to be applied on image. + + Args: + brightness (tuple of float (min, max), optional): The range from which the brightness_factor is chosen + uniformly. Pass None to turn off the transformation. + contrast (tuple of float (min, max), optional): The range from which the contrast_factor is chosen + uniformly. Pass None to turn off the transformation. + saturation (tuple of float (min, max), optional): The range from which the saturation_factor is chosen + uniformly. Pass None to turn off the transformation. + hue (tuple of float (min, max), optional): The range from which the hue_factor is chosen uniformly. + Pass None to turn off the transformation. + + Returns: + tuple: The parameters used to apply the randomized transform + along with their random order. + """ + fn_idx = paddle.randperm(4) + + b = None if brightness is None else paddle.empty([1]).uniform_(brightness[0], brightness[1]) + c = None if contrast is None else paddle.empty([1]).uniform_(contrast[0], contrast[1]) + s = None if saturation is None else paddle.empty([1]).uniform_(saturation[0], saturation[1]) + h = None if hue is None else paddle.empty([1]).uniform_(hue[0], hue[1]) + + return fn_idx, b, c, s, h + + def forward(self, img): + """ + Args: + img (PIL Image or Tensor): Input image. + + Returns: + PIL Image or Tensor: Color jittered image. + """ + fn_idx, brightness_factor, contrast_factor, saturation_factor, hue_factor = self.get_params( + self.brightness, self.contrast, self.saturation, self.hue) + + for fn_id in fn_idx: + if fn_id == 0 and brightness_factor is not None: + img = adjust_brightness(img, brightness_factor) + elif fn_id == 1 and contrast_factor is not None: + img = adjust_contrast(img, contrast_factor) + elif fn_id == 2 and saturation_factor is not None: + img = adjust_saturation(img, saturation_factor) + elif fn_id == 3 and hue_factor is not None: + img = adjust_hue(img, hue_factor) + + return img + + def __repr__(self) -> str: + s = (f"{self.__class__.__name__}(" + f"brightness={self.brightness}" + f", contrast={self.contrast}" + f", saturation={self.saturation}" + f", hue={self.hue})") + return s + + +def _apply_grid_transform(img: Tensor, grid: Tensor, mode: str, fill: Optional[List[float]]) -> Tensor: + + if img.shape[0] > 1: + # Apply same grid to a batch of images + grid = grid.expand([img.shape[0], grid.shape[1], grid.shape[2], grid.shape[3]]) + + # Append a dummy mask for customized fill colors, should be faster than grid_sample() twice + if fill is not None: + dummy = paddle.ones((img.shape[0], 1, img.shape[2], img.shape[3]), dtype=img.dtype) + img = paddle.concat((img, dummy), axis=1) + + img = grid_sample(img, grid, mode=mode, padding_mode="zeros", align_corners=False) + + # Fill with required color + if fill is not None: + mask = img[:, -1:, :, :] # N * 1 * H * W + img = img[:, :-1, :, :] # N * C * H * W + mask = mask.expand_as(img) + len_fill = len(fill) if isinstance(fill, (tuple, list)) else 1 + fill_img = paddle.to_tensor(fill, dtype=img.dtype).reshape([1, len_fill, 1, 1]).expand_as(img) + if mode == "nearest": + mask = mask < 0.5 + img[mask] = fill_img[mask] + else: # 'bilinear' + img = img * mask + (1.0 - mask) * fill_img + return img + + +def _gen_affine_grid( + theta: Tensor, + w: int, + h: int, + ow: int, + oh: int, +) -> Tensor: + # https://github.com/pytorch/pytorch/blob/74b65c32be68b15dc7c9e8bb62459efbfbde33d8/aten/src/ATen/native/ + # AffineGridGenerator.cpp#L18 + # Difference with AffineGridGenerator is that: + # 1) we normalize grid values after applying theta + # 2) we can normalize by other image size, such that it covers "extend" option like in PIL.Image.rotate + + d = 0.5 + base_grid = paddle.empty([1, oh, ow, 3], dtype=theta.dtype) + x_grid = paddle.linspace(-ow * 0.5 + d, ow * 0.5 + d - 1, num=ow) + base_grid[..., 0] = (x_grid) + y_grid = paddle.linspace(-oh * 0.5 + d, oh * 0.5 + d - 1, num=oh).unsqueeze_(-1) + base_grid[..., 1] = (y_grid) + base_grid[..., 2] = 1.0 + rescaled_theta = theta.transpose([0, 2, 1]) / paddle.to_tensor([0.5 * w, 0.5 * h], dtype=theta.dtype) + output_grid = base_grid.reshape([1, oh * ow, 3]).bmm(rescaled_theta) + return output_grid.reshape([1, oh, ow, 2]) + + +def affine_impl(img: Tensor, + matrix: List[float], + interpolation: str = "nearest", + fill: Optional[List[float]] = None) -> Tensor: + theta = paddle.to_tensor(matrix, dtype=img.dtype).reshape([1, 2, 3]) + shape = img.shape + # grid will be generated on the same device as theta and img + grid = _gen_affine_grid(theta, w=shape[-1], h=shape[-2], ow=shape[-1], oh=shape[-2]) + return _apply_grid_transform(img, grid, interpolation, fill=fill) + + +def _get_inverse_affine_matrix(center: List[float], + angle: float, + translate: List[float], + scale: float, + shear: List[float], + inverted: bool = True) -> List[float]: + # Helper method to compute inverse matrix for affine transformation + + # Pillow requires inverse affine transformation matrix: + # Affine matrix is : M = T * C * RotateScaleShear * C^-1 + # + # where T is translation matrix: [1, 0, tx | 0, 1, ty | 0, 0, 1] + # C is translation matrix to keep center: [1, 0, cx | 0, 1, cy | 0, 0, 1] + # RotateScaleShear is rotation with scale and shear matrix + # + # RotateScaleShear(a, s, (sx, sy)) = + # = R(a) * S(s) * SHy(sy) * SHx(sx) + # = [ s*cos(a - sy)/cos(sy), s*(-cos(a - sy)*tan(sx)/cos(sy) - sin(a)), 0 ] + # [ s*sin(a + sy)/cos(sy), s*(-sin(a - sy)*tan(sx)/cos(sy) + cos(a)), 0 ] + # [ 0 , 0 , 1 ] + # where R is a rotation matrix, S is a scaling matrix, and SHx and SHy are the shears: + # SHx(s) = [1, -tan(s)] and SHy(s) = [1 , 0] + # [0, 1 ] [-tan(s), 1] + # + # Thus, the inverse is M^-1 = C * RotateScaleShear^-1 * C^-1 * T^-1 + + rot = math.radians(angle) + sx = math.radians(shear[0]) + sy = math.radians(shear[1]) + + cx, cy = center + tx, ty = translate + + # RSS without scaling + a = math.cos(rot - sy) / math.cos(sy) + b = -math.cos(rot - sy) * math.tan(sx) / math.cos(sy) - math.sin(rot) + c = math.sin(rot - sy) / math.cos(sy) + d = -math.sin(rot - sy) * math.tan(sx) / math.cos(sy) + math.cos(rot) + + if inverted: + # Inverted rotation matrix with scale and shear + # det([[a, b], [c, d]]) == 1, since det(rotation) = 1 and det(shear) = 1 + matrix = [d, -b, 0.0, -c, a, 0.0] + matrix = [x / scale for x in matrix] + # Apply inverse of translation and of center translation: RSS^-1 * C^-1 * T^-1 + matrix[2] += matrix[0] * (-cx - tx) + matrix[1] * (-cy - ty) + matrix[5] += matrix[3] * (-cx - tx) + matrix[4] * (-cy - ty) + # Apply center translation: C * RSS^-1 * C^-1 * T^-1 + matrix[2] += cx + matrix[5] += cy + else: + matrix = [a, b, 0.0, c, d, 0.0] + matrix = [x * scale for x in matrix] + # Apply inverse of center translation: RSS * C^-1 + matrix[2] += matrix[0] * (-cx) + matrix[1] * (-cy) + matrix[5] += matrix[3] * (-cx) + matrix[4] * (-cy) + # Apply translation and center : T * C * RSS * C^-1 + matrix[2] += cx + tx + matrix[5] += cy + ty + + return matrix + + +def affine( + img: Tensor, + angle: float, + translate: List[int], + scale: float, + shear: List[float], + interpolation: InterpolationMode = InterpolationMode.NEAREST, + fill: Optional[List[float]] = None, + resample: Optional[int] = None, + fillcolor: Optional[List[float]] = None, + center: Optional[List[int]] = None, +) -> Tensor: + """Apply affine transformation on the image keeping image center invariant. + If the image is paddle Tensor, it is expected + to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions. + + Args: + img (PIL Image or Tensor): image to transform. + angle (number): rotation angle in degrees between -180 and 180, clockwise direction. + translate (sequence of integers): horizontal and vertical translations (post-rotation translation) + scale (float): overall scale + shear (float or sequence): shear angle value in degrees between -180 to 180, clockwise direction. + If a sequence is specified, the first value corresponds to a shear parallel to the x axis, while + the second value corresponds to a shear parallel to the y axis. + interpolation (InterpolationMode): Desired interpolation enum defined by + :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``. + If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported. + For backward compatibility integer values (e.g. ``PIL.Image[.Resampling].NEAREST``) are still accepted, + but deprecated since 0.13 and will be removed in 0.15. Please use InterpolationMode enum. + fill (sequence or number, optional): Pixel fill value for the area outside the transformed + image. If given a number, the value is used for all bands respectively. + + .. note:: + In torchscript mode single int/float value is not supported, please use a sequence + of length 1: ``[value, ]``. + fillcolor (sequence or number, optional): + .. warning:: + This parameter was deprecated in ``0.12`` and will be removed in ``0.14``. Please use ``fill`` instead. + resample (int, optional): + .. warning:: + This parameter was deprecated in ``0.12`` and will be removed in ``0.14``. Please use ``interpolation`` + instead. + center (sequence, optional): Optional center of rotation. Origin is the upper left corner. + Default is the center of the image. + + Returns: + PIL Image or Tensor: Transformed image. + """ + + # Backward compatibility with integer value + if isinstance(interpolation, int): + warnings.warn("Argument 'interpolation' of type int is deprecated since 0.13 and will be removed in 0.15. " + "Please use InterpolationMode enum.") + interpolation = _interpolation_modes_from_int(interpolation) + + if fillcolor is not None: + warnings.warn("The parameter 'fillcolor' is deprecated since 0.12 and will be removed in 0.14. " + "Please use 'fill' instead.") + fill = fillcolor + + if not isinstance(angle, (int, float)): + raise TypeError("Argument angle should be int or float") + + if not isinstance(translate, (list, tuple)): + raise TypeError("Argument translate should be a sequence") + + if len(translate) != 2: + raise ValueError("Argument translate should be a sequence of length 2") + + if scale <= 0.0: + raise ValueError("Argument scale should be positive") + + if not isinstance(shear, (numbers.Number, (list, tuple))): + raise TypeError("Shear should be either a single value or a sequence of two values") + + if not isinstance(interpolation, InterpolationMode): + raise TypeError("Argument interpolation should be a InterpolationMode") + + if isinstance(angle, int): + angle = float(angle) + + if isinstance(translate, tuple): + translate = list(translate) + + if isinstance(shear, numbers.Number): + shear = [shear, 0.0] + + if isinstance(shear, tuple): + shear = list(shear) + + if len(shear) == 1: + shear = [shear[0], shear[0]] + + if len(shear) != 2: + raise ValueError(f"Shear should be a sequence containing two values. Got {shear}") + + if center is not None and not isinstance(center, (list, tuple)): + raise TypeError("Argument center should be a sequence") + center_f = [0.0, 0.0] + if center is not None: + _, height, width = img.shape[0], img.shape[1], img.shape[2] + # Center values should be in pixel coordinates but translated such that (0, 0) corresponds to image center. + center_f = [1.0 * (c - s * 0.5) for c, s in zip(center, [width, height])] + + translate_f = [1.0 * t for t in translate] + matrix = _get_inverse_affine_matrix(center_f, angle, translate_f, scale, shear) + return affine_impl(img, matrix=matrix, interpolation=interpolation.value, fill=fill) + + +def _interpolation_modes_from_int(i: int) -> InterpolationMode: + inverse_modes_mapping = { + 0: InterpolationMode.NEAREST, + 2: InterpolationMode.BILINEAR, + 3: InterpolationMode.BICUBIC, + 4: InterpolationMode.BOX, + 5: InterpolationMode.HAMMING, + 1: InterpolationMode.LANCZOS, + } + return inverse_modes_mapping[i] + + +def _check_sequence_input(x, name, req_sizes): + msg = req_sizes[0] if len(req_sizes) < 2 else " or ".join([str(s) for s in req_sizes]) + if not isinstance(x, Sequence): + raise TypeError(f"{name} should be a sequence of length {msg}.") + if len(x) not in req_sizes: + raise ValueError(f"{name} should be sequence of length {msg}.") + + +def _setup_angle(x, name, req_sizes=(2, )): + if isinstance(x, numbers.Number): + if x < 0: + raise ValueError(f"If {name} is a single number, it must be positive.") + x = [-x, x] + else: + _check_sequence_input(x, name, req_sizes) + + return [float(d) for d in x] + + +class RandomAffine(nn.Layer): + """Random affine transformation of the image keeping center invariant. + If the image is paddle Tensor, it is expected + to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions. + + Args: + degrees (sequence or number): Range of degrees to select from. + If degrees is a number instead of sequence like (min, max), the range of degrees + will be (-degrees, +degrees). Set to 0 to deactivate rotations. + translate (tuple, optional): tuple of maximum absolute fraction for horizontal + and vertical translations. For example translate=(a, b), then horizontal shift + is randomly sampled in the range -img_width * a < dx < img_width * a and vertical shift is + randomly sampled in the range -img_height * b < dy < img_height * b. Will not translate by default. + scale (tuple, optional): scaling factor interval, e.g (a, b), then scale is + randomly sampled from the range a <= scale <= b. Will keep original scale by default. + shear (sequence or number, optional): Range of degrees to select from. + If shear is a number, a shear parallel to the x axis in the range (-shear, +shear) + will be applied. Else if shear is a sequence of 2 values a shear parallel to the x axis in the + range (shear[0], shear[1]) will be applied. Else if shear is a sequence of 4 values, + a x-axis shear in (shear[0], shear[1]) and y-axis shear in (shear[2], shear[3]) will be applied. + Will not apply shear by default. + interpolation (InterpolationMode): Desired interpolation enum defined by + :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``. + If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported. + For backward compatibility integer values (e.g. ``PIL.Image[.Resampling].NEAREST``) are still accepted, + but deprecated since 0.13 and will be removed in 0.15. Please use InterpolationMode enum. + fill (sequence or number): Pixel fill value for the area outside the transformed + image. Default is ``0``. If given a number, the value is used for all bands respectively. + fillcolor (sequence or number, optional): + .. warning:: + This parameter was deprecated in ``0.12`` and will be removed in ``0.14``. Please use ``fill`` instead. + resample (int, optional): + .. warning:: + This parameter was deprecated in ``0.12`` and will be removed in ``0.14``. Please use ``interpolation`` + instead. + center (sequence, optional): Optional center of rotation, (x, y). Origin is the upper left corner. + Default is the center of the image. + + .. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters + + """ + + def __init__( + self, + degrees, + translate=None, + scale=None, + shear=None, + interpolation=InterpolationMode.NEAREST, + fill=0, + fillcolor=None, + resample=None, + center=None, + ): + super(RandomAffine, self).__init__() + if resample is not None: + warnings.warn("The parameter 'resample' is deprecated since 0.12 and will be removed in 0.14. " + "Please use 'interpolation' instead.") + interpolation = _interpolation_modes_from_int(resample) + + # Backward compatibility with integer value + if isinstance(interpolation, int): + warnings.warn("Argument 'interpolation' of type int is deprecated since 0.13 and will be removed in 0.15. " + "Please use InterpolationMode enum.") + interpolation = _interpolation_modes_from_int(interpolation) + + if fillcolor is not None: + warnings.warn("The parameter 'fillcolor' is deprecated since 0.12 and will be removed in 0.14. " + "Please use 'fill' instead.") + fill = fillcolor + + self.degrees = _setup_angle(degrees, name="degrees", req_sizes=(2, )) + + if translate is not None: + _check_sequence_input(translate, "translate", req_sizes=(2, )) + for t in translate: + if not (0.0 <= t <= 1.0): + raise ValueError("translation values should be between 0 and 1") + self.translate = translate + + if scale is not None: + _check_sequence_input(scale, "scale", req_sizes=(2, )) + for s in scale: + if s <= 0: + raise ValueError("scale values should be positive") + self.scale = scale + + if shear is not None: + self.shear = _setup_angle(shear, name="shear", req_sizes=(2, 4)) + else: + self.shear = shear + + self.resample = self.interpolation = interpolation + + if fill is None: + fill = 0 + elif not isinstance(fill, (Sequence, numbers.Number)): + raise TypeError("Fill should be either a sequence or a number.") + + self.fillcolor = self.fill = fill + + if center is not None: + _check_sequence_input(center, "center", req_sizes=(2, )) + + self.center = center + + @staticmethod + def get_params( + degrees: List[float], + translate: Optional[List[float]], + scale_ranges: Optional[List[float]], + shears: Optional[List[float]], + img_size: List[int], + ) -> Tuple[float, Tuple[int, int], float, Tuple[float, float]]: + """Get parameters for affine transformation + + Returns: + params to be passed to the affine transformation + """ + angle = float(paddle.empty([1]).uniform_(float(degrees[0]), float(degrees[1]))) + if translate is not None: + max_dx = float(translate[0] * img_size[0]) + max_dy = float(translate[1] * img_size[1]) + tx = int(float(paddle.empty([1]).uniform_(-max_dx, max_dx))) + ty = int(float(paddle.empty([1]).uniform_(-max_dy, max_dy))) + translations = (tx, ty) + else: + translations = (0, 0) + + if scale_ranges is not None: + scale = float(paddle.empty([1]).uniform_(scale_ranges[0], scale_ranges[1])) + else: + scale = 1.0 + + shear_x = shear_y = 0.0 + if shears is not None: + shear_x = float(paddle.empty([1]).uniform_(shears[0], shears[1])) + if len(shears) == 4: + shear_y = float(paddle.empty([1]).uniform_(shears[2], shears[3])) + + shear = (shear_x, shear_y) + + return angle, translations, scale, shear + + def forward(self, img): + fill = self.fill + channels, height, width = img.shape[1], img.shape[2], img.shape[3] + if isinstance(fill, (int, float)): + fill = [float(fill)] * channels + else: + fill = [float(f) for f in fill] + + img_size = [width, height] # flip for keeping BC on get_params call + + ret = self.get_params(self.degrees, self.translate, self.scale, self.shear, img_size) + + return affine(img, *ret, interpolation=self.interpolation, fill=fill, center=self.center) + + def __repr__(self) -> str: + s = f"{self.__class__.__name__}(degrees={self.degrees}" + s += f", translate={self.translate}" if self.translate is not None else "" + s += f", scale={self.scale}" if self.scale is not None else "" + s += f", shear={self.shear}" if self.shear is not None else "" + s += f", interpolation={self.interpolation.value}" if self.interpolation != InterpolationMode.NEAREST else "" + s += f", fill={self.fill}" if self.fill != 0 else "" + s += f", center={self.center}" if self.center is not None else "" + s += ")" + + return s diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/model/unet.py b/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/model/unet.py new file mode 100755 index 000000000..56f3ad61e --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/model/unet.py @@ -0,0 +1,838 @@ +''' +This code is rewritten by Paddle based on +https://github.com/openai/guided-diffusion/blob/main/guided_diffusion/unet.py +''' +import math +from abc import abstractmethod + +import numpy as np +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + +from .nn import avg_pool_nd +from .nn import checkpoint +from .nn import conv_nd +from .nn import linear +from .nn import normalization +from .nn import SiLU +from .nn import timestep_embedding +from .nn import zero_module + + +class AttentionPool2d(nn.Layer): + """ + Adapted from CLIP: https://github.com/openai/CLIP/blob/main/clip/model.py + """ + + def __init__( + self, + spacial_dim: int, + embed_dim: int, + num_heads_channels: int, + output_dim: int = None, + ): + super().__init__() + # self.positional_embedding = nn.Parameter( + # th.randn(embed_dim, spacial_dim ** 2 + 1) / embed_dim ** 0.5 + # ) + positional_embedding = self.create_parameter(paddle.randn(embed_dim, spacial_dim**2 + 1) / embed_dim**0.5) + self.add_parameter("positional_embedding", positional_embedding) + self.qkv_proj = conv_nd(1, embed_dim, 3 * embed_dim, 1) + self.c_proj = conv_nd(1, embed_dim, output_dim or embed_dim, 1) + self.num_heads = embed_dim // num_heads_channels + self.attention = QKVAttention(self.num_heads) + + def forward(self, x): + b, c, *_spatial = x.shape + # x = x.reshape(b, c, -1) # NC(HW) + x = paddle.reshape(x, [b, c, -1]) + x = paddle.concat([x.mean(dim=-1, keepdim=True), x], axis=-1) # NC(HW+1) + x = x + paddle.cast(self.positional_embedding[None, :, :], x.dtype) # NC(HW+1) + x = self.qkv_proj(x) + x = self.attention(x) + x = self.c_proj(x) + return x[:, :, 0] + + +class TimestepBlock(nn.Layer): + """ + Any module where forward() takes timestep embeddings as a second argument. + """ + + @abstractmethod + def forward(self, x, emb): + """ + Apply the module to `x` given `emb` timestep embeddings. + """ + + +class TimestepEmbedSequential(nn.Sequential, TimestepBlock): + """ + A sequential module that passes timestep embeddings to the children that + support it as an extra input. + """ + + def forward(self, x, emb): + for layer in self: + if isinstance(layer, TimestepBlock): + x = layer(x, emb) + else: + x = layer(x) + return x + + +class Upsample(nn.Layer): + """ + An upsampling layer with an optional convolution. + + :param channels: channels in the inputs and outputs. + :param use_conv: a bool determining if a convolution is applied. + :param dims: determines if the signal is 1D, 2D, or 3D. If 3D, then + upsampling occurs in the inner-two dimensions. + """ + + def __init__(self, channels, use_conv, dims=2, out_channels=None): + super().__init__() + self.channels = channels + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.dims = dims + if use_conv: + self.conv = conv_nd(dims, self.channels, self.out_channels, 3, padding=1) + + def forward(self, x): + assert x.shape[1] == self.channels + if self.dims == 3: + x = F.interpolate(x, (x.shape[2], x.shape[3] * 2, x.shape[4] * 2), mode="nearest") + else: + x = F.interpolate(x, scale_factor=2, mode="nearest") + if self.use_conv: + x = self.conv(x) + return x + + +class Downsample(nn.Layer): + """ + A downsampling layer with an optional convolution. + + :param channels: channels in the inputs and outputs. + :param use_conv: a bool determining if a convolution is applied. + :param dims: determines if the signal is 1D, 2D, or 3D. If 3D, then + downsampling occurs in the inner-two dimensions. + """ + + def __init__(self, channels, use_conv, dims=2, out_channels=None): + super().__init__() + self.channels = channels + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.dims = dims + stride = 2 if dims != 3 else (1, 2, 2) + if use_conv: + self.op = conv_nd(dims, self.channels, self.out_channels, 3, stride=stride, padding=1) + else: + assert self.channels == self.out_channels + self.op = avg_pool_nd(dims, kernel_size=stride, stride=stride) + + def forward(self, x): + assert x.shape[1] == self.channels + return self.op(x) + + +class ResBlock(TimestepBlock): + """ + A residual block that can optionally change the number of channels. + + :param channels: the number of input channels. + :param emb_channels: the number of timestep embedding channels. + :param dropout: the rate of dropout. + :param out_channels: if specified, the number of out channels. + :param use_conv: if True and out_channels is specified, use a spatial + convolution instead of a smaller 1x1 convolution to change the + channels in the skip connection. + :param dims: determines if the signal is 1D, 2D, or 3D. + :param use_checkpoint: if True, use gradient checkpointing on this module. + :param up: if True, use this block for upsampling. + :param down: if True, use this block for downsampling. + """ + + def __init__( + self, + channels, + emb_channels, + dropout, + out_channels=None, + use_conv=False, + use_scale_shift_norm=False, + dims=2, + use_checkpoint=False, + up=False, + down=False, + ): + super().__init__() + self.channels = channels + self.emb_channels = emb_channels + self.dropout = dropout + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.use_checkpoint = use_checkpoint + self.use_scale_shift_norm = use_scale_shift_norm + + self.in_layers = nn.Sequential( + normalization(channels), + SiLU(), + conv_nd(dims, channels, self.out_channels, 3, padding=1), + ) + + self.updown = up or down + + if up: + self.h_upd = Upsample(channels, False, dims) + self.x_upd = Upsample(channels, False, dims) + elif down: + self.h_upd = Downsample(channels, False, dims) + self.x_upd = Downsample(channels, False, dims) + else: + self.h_upd = self.x_upd = nn.Identity() + + self.emb_layers = nn.Sequential( + SiLU(), + linear( + emb_channels, + 2 * self.out_channels if use_scale_shift_norm else self.out_channels, + ), + ) + self.out_layers = nn.Sequential( + normalization(self.out_channels), + SiLU(), + nn.Dropout(p=dropout), + zero_module(conv_nd(dims, self.out_channels, self.out_channels, 3, padding=1)), + ) + + if self.out_channels == channels: + self.skip_connection = nn.Identity() + elif use_conv: + self.skip_connection = conv_nd(dims, channels, self.out_channels, 3, padding=1) + else: + self.skip_connection = conv_nd(dims, channels, self.out_channels, 1) + + def forward(self, x, emb): + """ + Apply the block to a Tensor, conditioned on a timestep embedding. + + :param x: an [N x C x ...] Tensor of features. + :param emb: an [N x emb_channels] Tensor of timestep embeddings. + :return: an [N x C x ...] Tensor of outputs. + """ + return checkpoint(self._forward, (x, emb), self.parameters(), self.use_checkpoint) + + def _forward(self, x, emb): + if self.updown: + in_rest, in_conv = self.in_layers[:-1], self.in_layers[-1] + h = in_rest(x) + h = self.h_upd(h) + x = self.x_upd(x) + h = in_conv(h) + else: + h = self.in_layers(x) + emb_out = self.emb_layers(emb) + emb_out = paddle.cast(emb_out, h.dtype) + while len(emb_out.shape) < len(h.shape): + emb_out = emb_out[..., None] + if self.use_scale_shift_norm: + out_norm, out_rest = self.out_layers[0], self.out_layers[1:] + scale, shift = paddle.chunk(emb_out, 2, axis=1) + h = out_norm(h) * (1 + scale) + shift + h = out_rest(h) + else: + h = h + emb_out + h = self.out_layers(h) + return self.skip_connection(x) + h + + +class AttentionBlock(nn.Layer): + """ + An attention block that allows spatial positions to attend to each other. + + Originally ported from here, but adapted to the N-d case. + https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/models/unet.py#L66. + """ + + def __init__( + self, + channels, + num_heads=1, + num_head_channels=-1, + use_checkpoint=False, + use_new_attention_order=False, + ): + super().__init__() + self.channels = channels + if num_head_channels == -1: + self.num_heads = num_heads + else: + assert (channels % num_head_channels == 0 + ), f"q,k,v channels {channels} is not divisible by num_head_channels {num_head_channels}" + self.num_heads = channels // num_head_channels + self.use_checkpoint = use_checkpoint + self.norm = normalization(channels) + self.qkv = conv_nd(1, channels, channels * 3, 1) + if use_new_attention_order: + # split qkv before split heads + self.attention = QKVAttention(self.num_heads) + else: + # split heads before split qkv + self.attention = QKVAttentionLegacy(self.num_heads) + + self.proj_out = zero_module(conv_nd(1, channels, channels, 1)) + + def forward(self, x): + return checkpoint(self._forward, (x, ), self.parameters(), self.use_checkpoint) + + def _forward(self, x): + b, c, *spatial = x.shape + # x = x.reshape(b, c, -1) + x = paddle.reshape(x, [b, c, -1]) + qkv = self.qkv(self.norm(x)) + h = self.attention(qkv) + h = self.proj_out(h) + # return (x + h).reshape(b, c, *spatial) + return paddle.reshape(x + h, [b, c, *spatial]) + + +def count_flops_attn(model, _x, y): + """ + A counter for the `thop` package to count the operations in an + attention operation. + Meant to be used like: + macs, params = thop.profile( + model, + inputs=(inputs, timestamps), + custom_ops={QKVAttention: QKVAttention.count_flops}, + ) + """ + b, c, *spatial = y[0].shape + num_spatial = int(np.prod(spatial)) + # We perform two matmuls with the same number of ops. + # The first computes the weight matrix, the second computes + # the combination of the value vectors. + matmul_ops = 2 * b * (num_spatial**2) * c + model.total_ops += paddle.to_tensor([matmul_ops], dtype='float64') + + +class QKVAttentionLegacy(nn.Layer): + """ + A module which performs QKV attention. Matches legacy QKVAttention + input/ouput heads shaping + """ + + def __init__(self, n_heads): + super().__init__() + self.n_heads = n_heads + + def forward(self, qkv): + """ + Apply QKV attention. + + :param qkv: an [N x (H * 3 * C) x T] tensor of Qs, Ks, and Vs. + :return: an [N x (H * C) x T] tensor after attention. + """ + bs, width, length = qkv.shape + assert width % (3 * self.n_heads) == 0 + ch = width // (3 * self.n_heads) + # q, k, v = qkv.reshape(bs * self.n_heads, ch * 3, length).split(ch, dim=1) + q, k, v = paddle.reshape(qkv, [bs * self.n_heads, ch * 3, length]).split(3, axis=1) + scale = 1 / math.sqrt(math.sqrt(ch)) + weight = paddle.einsum("bct,bcs->bts", q * scale, k * scale) # More stable with f16 than dividing afterwards + weight = paddle.cast(nn.functional.softmax(paddle.cast(weight, 'float32'), axis=-1), weight.dtype) + a = paddle.einsum("bts,bcs->bct", weight, v) + # return a.reshape(bs, -1, length) + return paddle.reshape(a, [bs, -1, length]) + + @staticmethod + def count_flops(model, _x, y): + return count_flops_attn(model, _x, y) + + +class QKVAttention(nn.Layer): + """ + A module which performs QKV attention and splits in a different order. + """ + + def __init__(self, n_heads): + super().__init__() + self.n_heads = n_heads + + def forward(self, qkv): + """ + Apply QKV attention. + + :param qkv: an [N x (3 * H * C) x T] tensor of Qs, Ks, and Vs. + :return: an [N x (H * C) x T] tensor after attention. + """ + bs, width, length = qkv.shape + assert width % (3 * self.n_heads) == 0 + ch = width // (3 * self.n_heads) + q, k, v = qkv.chunk(3, axis=1) + scale = 1 / math.sqrt(math.sqrt(ch)) + weight = paddle.einsum( + "bct,bcs->bts", + (q * scale).view(bs * self.n_heads, ch, length), + (k * scale).view(bs * self.n_heads, ch, length), + ) # More stable with f16 than dividing afterwards + weight = paddle.cast(nn.functional.softmax(paddle.cast(weight, 'float32'), axis=-1), weight.dtype) + a = paddle.einsum("bts,bcs->bct", weight, v.reshape(bs * self.n_heads, ch, length)) + # return a.reshape(bs, -1, length) + return paddle.reshape(a, [bs, -1, length]) + + @staticmethod + def count_flops(model, _x, y): + return count_flops_attn(model, _x, y) + + +class UNetModel(nn.Layer): + """ + The full UNet model with attention and timestep embedding. + + :param in_channels: channels in the input Tensor. + :param model_channels: base channel count for the model. + :param out_channels: channels in the output Tensor. + :param num_res_blocks: number of residual blocks per downsample. + :param attention_resolutions: a collection of downsample rates at which + attention will take place. May be a set, list, or tuple. + For example, if this contains 4, then at 4x downsampling, attention + will be used. + :param dropout: the dropout probability. + :param channel_mult: channel multiplier for each level of the UNet. + :param conv_resample: if True, use learned convolutions for upsampling and + downsampling. + :param dims: determines if the signal is 1D, 2D, or 3D. + :param num_classes: if specified (as an int), then this model will be + class-conditional with `num_classes` classes. + :param use_checkpoint: use gradient checkpointing to reduce memory usage. + :param num_heads: the number of attention heads in each attention layer. + :param num_heads_channels: if specified, ignore num_heads and instead use + a fixed channel width per attention head. + :param num_heads_upsample: works with num_heads to set a different number + of heads for upsampling. Deprecated. + :param use_scale_shift_norm: use a FiLM-like conditioning mechanism. + :param resblock_updown: use residual blocks for up/downsampling. + :param use_new_attention_order: use a different attention pattern for potentially + increased efficiency. + """ + + def __init__( + self, + image_size, + in_channels, + model_channels, + out_channels, + num_res_blocks, + attention_resolutions, + dropout=0, + channel_mult=(1, 2, 4, 8), + conv_resample=True, + dims=2, + num_classes=None, + use_checkpoint=False, + use_fp16=False, + num_heads=1, + num_head_channels=-1, + num_heads_upsample=-1, + use_scale_shift_norm=False, + resblock_updown=False, + use_new_attention_order=False, + ): + super().__init__() + + if num_heads_upsample == -1: + num_heads_upsample = num_heads + + self.image_size = image_size + self.in_channels = in_channels + self.model_channels = model_channels + self.out_channels = out_channels + self.num_res_blocks = num_res_blocks + self.attention_resolutions = attention_resolutions + self.dropout = dropout + self.channel_mult = channel_mult + self.conv_resample = conv_resample + self.num_classes = num_classes + self.use_checkpoint = use_checkpoint + self.dtype = paddle.float16 if use_fp16 else paddle.float32 + self.num_heads = num_heads + self.num_head_channels = num_head_channels + self.num_heads_upsample = num_heads_upsample + + time_embed_dim = model_channels * 4 + self.time_embed = nn.Sequential( + linear(model_channels, time_embed_dim), + SiLU(), + linear(time_embed_dim, time_embed_dim), + ) + + if self.num_classes is not None: + self.label_emb = nn.Embedding(num_classes, time_embed_dim) + + ch = input_ch = int(channel_mult[0] * model_channels) + self.input_blocks = nn.LayerList([TimestepEmbedSequential(conv_nd(dims, in_channels, ch, 3, padding=1))]) + self._feature_size = ch + input_block_chans = [ch] + ds = 1 + for level, mult in enumerate(channel_mult): + for _ in range(num_res_blocks): + layers = [ + ResBlock( + ch, + time_embed_dim, + dropout, + out_channels=int(mult * model_channels), + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ) + ] + ch = int(mult * model_channels) + if ds in attention_resolutions: + layers.append( + AttentionBlock( + ch, + use_checkpoint=use_checkpoint, + num_heads=num_heads, + num_head_channels=num_head_channels, + use_new_attention_order=use_new_attention_order, + )) + self.input_blocks.append(TimestepEmbedSequential(*layers)) + self._feature_size += ch + input_block_chans.append(ch) + if level != len(channel_mult) - 1: + out_ch = ch + self.input_blocks.append( + TimestepEmbedSequential( + ResBlock( + ch, + time_embed_dim, + dropout, + out_channels=out_ch, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + down=True, + ) if resblock_updown else Downsample(ch, conv_resample, dims=dims, out_channels=out_ch))) + ch = out_ch + input_block_chans.append(ch) + ds *= 2 + self._feature_size += ch + + self.middle_block = TimestepEmbedSequential( + ResBlock( + ch, + time_embed_dim, + dropout, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ), + AttentionBlock( + ch, + use_checkpoint=use_checkpoint, + num_heads=num_heads, + num_head_channels=num_head_channels, + use_new_attention_order=use_new_attention_order, + ), + ResBlock( + ch, + time_embed_dim, + dropout, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ), + ) + self._feature_size += ch + + self.output_blocks = nn.LayerList([]) + for level, mult in list(enumerate(channel_mult))[::-1]: + for i in range(num_res_blocks + 1): + ich = input_block_chans.pop() + layers = [ + ResBlock( + ch + ich, + time_embed_dim, + dropout, + out_channels=int(model_channels * mult), + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ) + ] + ch = int(model_channels * mult) + if ds in attention_resolutions: + layers.append( + AttentionBlock( + ch, + use_checkpoint=use_checkpoint, + num_heads=num_heads_upsample, + num_head_channels=num_head_channels, + use_new_attention_order=use_new_attention_order, + )) + if level and i == num_res_blocks: + out_ch = ch + layers.append( + ResBlock( + ch, + time_embed_dim, + dropout, + out_channels=out_ch, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + up=True, + ) if resblock_updown else Upsample(ch, conv_resample, dims=dims, out_channels=out_ch)) + ds //= 2 + self.output_blocks.append(TimestepEmbedSequential(*layers)) + self._feature_size += ch + + self.out = nn.Sequential( + normalization(ch), + SiLU(), + zero_module(conv_nd(dims, input_ch, out_channels, 3, padding=1)), + ) + + def forward(self, x, timesteps, y=None): + """ + Apply the model to an input batch. + + :param x: an [N x C x ...] Tensor of inputs. + :param timesteps: a 1-D batch of timesteps. + :param y: an [N] Tensor of labels, if class-conditional. + :return: an [N x C x ...] Tensor of outputs. + """ + assert (y is not None) == (self.num_classes + is not None), "must specify y if and only if the model is class-conditional" + + hs = [] + emb = self.time_embed(timestep_embedding(timesteps, self.model_channels)) + if self.num_classes is not None: + assert y.shape == (x.shape[0], ) + emb = emb + self.label_emb(y) + + h = paddle.cast(x, self.dtype) + for module in self.input_blocks: + h = module(h, emb) + hs.append(h) + h = self.middle_block(h, emb) + for module in self.output_blocks: + h = paddle.concat([h, hs.pop()], axis=1) + h = module(h, emb) + # h = paddle.cast(h, x.dtype) + return self.out(h) + + +class SuperResModel(UNetModel): + """ + A UNetModel that performs super-resolution. + + Expects an extra kwarg `low_res` to condition on a low-resolution image. + """ + + def __init__(self, image_size, in_channels, *args, **kwargs): + super().__init__(image_size, in_channels * 2, *args, **kwargs) + + def forward(self, x, timesteps, low_res=None, **kwargs): + _, _, new_height, new_width = x.shape + upsampled = F.interpolate(low_res, (new_height, new_width), mode="bilinear") + x = paddle.concat([x, upsampled], axis=1) + return super().forward(x, timesteps, **kwargs) + + +class EncoderUNetModel(nn.Layer): + """ + The half UNet model with attention and timestep embedding. + + For usage, see UNet. + """ + + def __init__( + self, + image_size, + in_channels, + model_channels, + out_channels, + num_res_blocks, + attention_resolutions, + dropout=0, + channel_mult=(1, 2, 4, 8), + conv_resample=True, + dims=2, + use_checkpoint=False, + use_fp16=False, + num_heads=1, + num_head_channels=-1, + num_heads_upsample=-1, + use_scale_shift_norm=False, + resblock_updown=False, + use_new_attention_order=False, + pool="adaptive", + ): + super().__init__() + + if num_heads_upsample == -1: + num_heads_upsample = num_heads + + self.in_channels = in_channels + self.model_channels = model_channels + self.out_channels = out_channels + self.num_res_blocks = num_res_blocks + self.attention_resolutions = attention_resolutions + self.dropout = dropout + self.channel_mult = channel_mult + self.conv_resample = conv_resample + self.use_checkpoint = use_checkpoint + self.dtype = paddle.float16 if use_fp16 else paddle.float32 + self.num_heads = num_heads + self.num_head_channels = num_head_channels + self.num_heads_upsample = num_heads_upsample + + time_embed_dim = model_channels * 4 + self.time_embed = nn.Sequential( + linear(model_channels, time_embed_dim), + SiLU(), + linear(time_embed_dim, time_embed_dim), + ) + + ch = int(channel_mult[0] * model_channels) + self.input_blocks = nn.LayerList([TimestepEmbedSequential(conv_nd(dims, in_channels, ch, 3, padding=1))]) + self._feature_size = ch + input_block_chans = [ch] + ds = 1 + for level, mult in enumerate(channel_mult): + for _ in range(num_res_blocks): + layers = [ + ResBlock( + ch, + time_embed_dim, + dropout, + out_channels=int(mult * model_channels), + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ) + ] + ch = int(mult * model_channels) + if ds in attention_resolutions: + layers.append( + AttentionBlock( + ch, + use_checkpoint=use_checkpoint, + num_heads=num_heads, + num_head_channels=num_head_channels, + use_new_attention_order=use_new_attention_order, + )) + self.input_blocks.append(TimestepEmbedSequential(*layers)) + self._feature_size += ch + input_block_chans.append(ch) + if level != len(channel_mult) - 1: + out_ch = ch + self.input_blocks.append( + TimestepEmbedSequential( + ResBlock( + ch, + time_embed_dim, + dropout, + out_channels=out_ch, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + down=True, + ) if resblock_updown else Downsample(ch, conv_resample, dims=dims, out_channels=out_ch))) + ch = out_ch + input_block_chans.append(ch) + ds *= 2 + self._feature_size += ch + + self.middle_block = TimestepEmbedSequential( + ResBlock( + ch, + time_embed_dim, + dropout, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ), + AttentionBlock( + ch, + use_checkpoint=use_checkpoint, + num_heads=num_heads, + num_head_channels=num_head_channels, + use_new_attention_order=use_new_attention_order, + ), + ResBlock( + ch, + time_embed_dim, + dropout, + dims=dims, + use_checkpoint=use_checkpoint, + use_scale_shift_norm=use_scale_shift_norm, + ), + ) + self._feature_size += ch + self.pool = pool + if pool == "adaptive": + self.out = nn.Sequential( + normalization(ch), + SiLU(), + nn.AdaptiveAvgPool2D((1, 1)), + zero_module(conv_nd(dims, ch, out_channels, 1)), + nn.Flatten(), + ) + elif pool == "attention": + assert num_head_channels != -1 + self.out = nn.Sequential( + normalization(ch), + SiLU(), + AttentionPool2d((image_size // ds), ch, num_head_channels, out_channels), + ) + elif pool == "spatial": + self.out = nn.Sequential( + nn.Linear(self._feature_size, 2048), + nn.ReLU(), + nn.Linear(2048, self.out_channels), + ) + elif pool == "spatial_v2": + self.out = nn.Sequential( + nn.Linear(self._feature_size, 2048), + normalization(2048), + SiLU(), + nn.Linear(2048, self.out_channels), + ) + else: + raise NotImplementedError(f"Unexpected {pool} pooling") + + def forward(self, x, timesteps): + """ + Apply the model to an input batch. + + :param x: an [N x C x ...] Tensor of inputs. + :param timesteps: a 1-D batch of timesteps. + :return: an [N x K] Tensor of outputs. + """ + emb = self.time_embed(timestep_embedding(timesteps, self.model_channels)) + + results = [] + # h = x.type(self.dtype) + h = paddle.cast(x, self.dtype) + for module in self.input_blocks: + h = module(h, emb) + if self.pool.startswith("spatial"): + # results.append(h.type(x.dtype).mean(axis=(2, 3))) + results.append(paddle.cast(h, x.dtype).mean(axis=(2, 3))) + h = self.middle_block(h, emb) + if self.pool.startswith("spatial"): + results.append(paddle.cast(h, x.dtype).mean(axis=(2, 3))) + h = paddle.concat(results, axis=-1) + return self.out(h) + else: + # h = h.type(x.dtype) + h = paddle.cast(h, x.dtype) + return self.out(h) diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/resources/default.yml b/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/resources/default.yml new file mode 100755 index 000000000..97c3c1b98 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/resources/default.yml @@ -0,0 +1,47 @@ +text_prompts: + - A beautiful painting of a singular lighthouse, shining its light across a tumultuous sea of blood by greg rutkowski and thomas kinkade, Trending on artstation. + +init_image: + +width_height: [ 1280, 768] + +skip_steps: 10 +steps: 250 + +cut_ic_pow: 1 +init_scale: 1000 +clip_guidance_scale: 5000 + +tv_scale: 0 +range_scale: 150 +sat_scale: 0 +cutn_batches: 4 + +diffusion_model: 512x512_diffusion_uncond_finetune_008100 +use_secondary_model: True +diffusion_sampling_mode: ddim + +perlin_init: False +perlin_mode: mixed +seed: 445467575 +eta: 0.8 +clamp_grad: True +clamp_max: 0.05 + +randomize_class: True +clip_denoised: False +fuzzy_prompt: False +rand_mag: 0.05 + +cut_overview: "[12]*400+[4]*600" +cut_innercut: "[4]*400+[12]*600" +cut_icgray_p: "[0.2]*400+[0]*600" + +display_rate: 10 +n_batches: 1 +batch_size: 1 +batch_name: '' +clip_models: + - VIT + - RN50 + - RN101 diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/resources/docstrings.yml b/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/resources/docstrings.yml new file mode 100755 index 000000000..702015e1c --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/resources/docstrings.yml @@ -0,0 +1,103 @@ +text_prompts: | + Phrase, sentence, or string of words and phrases describing what the image should look like. The words will be analyzed by the AI and will guide the diffusion process toward the image(s) you describe. These can include commas and weights to adjust the relative importance of each element. E.g. "A beautiful painting of a singular lighthouse, shining its light across a tumultuous sea of blood by greg rutkowski and thomas kinkade, Trending on artstation." + Notice that this prompt loosely follows a structure: [subject], [prepositional details], [setting], [meta modifiers and artist]; this is a good starting point for your experiments. + Developing text prompts takes practice and experience, and is not the subject of this guide. If you are a beginner to writing text prompts, a good place to start is on a simple AI art app like Night Cafe, starry ai or WOMBO prior to using DD, to get a feel for how text gets translated into images by GAN tools. These other apps use different technologies, but many of the same principles apply. +init_image: | + Recall that in the image sequence above, the first image shown is just noise. If an init_image is provided, diffusion will replace the noise with the init_image as its starting state. To use an init_image, upload the image to the Colab instance or your Google Drive, and enter the full image path here. + If using an init_image, you may need to increase skip_steps to ~ 50% of total steps to retain the character of the init. See skip_steps above for further discussion. +width_height: | + Desired final image size, in pixels. You can have a square, wide, or tall image, but each edge length should be set to a multiple of 64px, and a minimum of 512px on the default CLIP model setting. If you forget to use multiples of 64px in your dimensions, DD will adjust the dimensions of your image to make it so. + +skip_steps: | + Consider the chart shown here. Noise scheduling (denoise strength) starts very high and progressively gets lower and lower as diffusion steps progress. The noise levels in the first few steps are very high, so images change dramatically in early steps. + As DD moves along the curve, noise levels (and thus the amount an image changes per step) declines, and image coherence from one step to the next increases. + The first few steps of denoising are often so dramatic that some steps (maybe 10-15% of total) can be skipped without affecting the final image. You can experiment with this as a way to cut render times. + If you skip too many steps, however, the remaining noise may not be high enough to generate new content, and thus may not have ‘time left’ to finish an image satisfactorily. + Also, depending on your other settings, you may need to skip steps to prevent CLIP from overshooting your goal, resulting in ‘blown out’ colors (hyper saturated, solid white, or solid black regions) or otherwise poor image quality. Consider that the denoising process is at its strongest in the early steps, so skipping steps can sometimes mitigate other problems. + Lastly, if using an init_image, you will need to skip ~50% of the diffusion steps to retain the shapes in the original init image. + However, if you’re using an init_image, you can also adjust skip_steps up or down for creative reasons. With low skip_steps you can get a result "inspired by" the init_image which will retain the colors and rough layout and shapes but look quite different. With high skip_steps you can preserve most of the init_image contents and just do fine tuning of the texture. + +steps: | + When creating an image, the denoising curve is subdivided into steps for processing. Each step (or iteration) involves the AI looking at subsets of the image called ‘cuts’ and calculating the ‘direction’ the image should be guided to be more like the prompt. Then it adjusts the image with the help of the diffusion denoiser, and moves to the next step. + Increasing steps will provide more opportunities for the AI to adjust the image, and each adjustment will be smaller, and thus will yield a more precise, detailed image. Increasing steps comes at the expense of longer render times. Also, while increasing steps should generally increase image quality, there is a diminishing return on additional steps beyond 250 - 500 steps. However, some intricate images can take 1000, 2000, or more steps. It is really up to the user. + Just know that the render time is directly related to the number of steps, and many other parameters have a major impact on image quality, without costing additional time. + +cut_ic_pow: | + This sets the size of the border used for inner cuts. High cut_ic_pow values have larger borders, and therefore the cuts themselves will be smaller and provide finer details. If you have too many or too-small inner cuts, you may lose overall image coherency and/or it may cause an undesirable ‘mosaic’ effect. Low cut_ic_pow values will allow the inner cuts to be larger, helping image coherency while still helping with some details. + +init_scale: | + This controls how strongly CLIP will try to match the init_image provided. This is balanced against the clip_guidance_scale (CGS) above. Too much init scale, and the image won’t change much during diffusion. Too much CGS and the init image will be lost. +clip_guidance_scale: | + CGS is one of the most important parameters you will use. It tells DD how strongly you want CLIP to move toward your prompt each timestep. Higher is generally better, but if CGS is too strong it will overshoot the goal and distort the image. So a happy medium is needed, and it takes experience to learn how to adjust CGS. + Note that this parameter generally scales with image dimensions. In other words, if you increase your total dimensions by 50% (e.g. a change from 512 x 512 to 512 x 768), then to maintain the same effect on the image, you’d want to increase clip_guidance_scale from 5000 to 7500. + Of the basic settings, clip_guidance_scale, steps and skip_steps are the most important contributors to image quality, so learn them well. +tv_scale: | + Total variance denoising. Optional, set to zero to turn off. Controls ‘smoothness’ of final output. If used, tv_scale will try to smooth out your final image to reduce overall noise. If your image is too ‘crunchy’, increase tv_scale. TV denoising is good at preserving edges while smoothing away noise in flat regions. See https://en.wikipedia.org/wiki/Total_variation_denoising +range_scale: | + Optional, set to zero to turn off. Used for adjustment of color contrast. Lower range_scale will increase contrast. Very low numbers create a reduced color palette, resulting in more vibrant or poster-like images. Higher range_scale will reduce contrast, for more muted images. + +sat_scale: | + Saturation scale. Optional, set to zero to turn off. If used, sat_scale will help mitigate oversaturation. If your image is too saturated, increase sat_scale to reduce the saturation. +cutn_batches: | + Each iteration, the AI cuts the image into smaller pieces known as cuts, and compares each cut to the prompt to decide how to guide the next diffusion step. More cuts can generally lead to better images, since DD has more chances to fine-tune the image precision in each timestep. + Additional cuts are memory intensive, however, and if DD tries to evaluate too many cuts at once, it can run out of memory. You can use cutn_batches to increase cuts per timestep without increasing memory usage. + At the default settings, DD is scheduled to do 16 cuts per timestep. If cutn_batches is set to 1, there will indeed only be 16 cuts total per timestep. + However, if cutn_batches is increased to 4, DD will do 64 cuts total in each timestep, divided into 4 sequential batches of 16 cuts each. Because the cuts are being evaluated only 16 at a time, DD uses the memory required for only 16 cuts, but gives you the quality benefit of 64 cuts. The tradeoff, of course, is that this will take ~4 times as long to render each image. + So, (scheduled cuts) x (cutn_batches) = (total cuts per timestep). Increasing cutn_batches will increase render times, however, as the work is being done sequentially. DD’s default cut schedule is a good place to start, but the cut schedule can be adjusted in the Cutn Scheduling section, explained below. + +diffusion_model: Diffusion_model of choice. + +use_secondary_model: | + Option to use a secondary purpose-made diffusion model to clean up interim diffusion images for CLIP evaluation. If this option is turned off, DD will use the regular (large) diffusion model. Using the secondary model is faster - one user reported a 50% improvement in render speed! However, the secondary model is much smaller, and may reduce image quality and detail. I suggest you experiment with this. + +diffusion_sampling_mode: | + Two alternate diffusion denoising algorithms. ddim has been around longer, and is more established and tested. plms is a newly added alternate method that promises good diffusion results in fewer steps, but has not been as fully tested and may have side effects. This new plms mode is actively being researched in the #settings-and-techniques channel in the DD Discord. + +perlin_init: | + Normally, DD will use an image filled with random noise as a starting point for the diffusion curve. If perlin_init is selected, DD will instead use a Perlin noise model as an initial state. Perlin has very interesting characteristics, distinct from random noise, so it’s worth experimenting with this for your projects. Beyond perlin, you can, of course, generate your own noise images (such as with GIMP, etc) and use them as an init_image (without skipping steps). + Choosing perlin_init does not affect the actual diffusion process, just the starting point for the diffusion. Please note that selecting a perlin_init will replace and override any init_image you may have specified. Further, because the 2D, 3D and video animation systems all rely on the init_image system, if you enable Perlin while using animation modes, the perlin_init will jump in front of any previous image or video input, and DD will NOT give you the expected sequence of coherent images. All of that said, using Perlin and animation modes together do make a very colorful rainbow effect, which can be used creatively. + +perlin_mode: | + sets type of Perlin noise: colored, gray, or a mix of both, giving you additional options for noise types. Experiment to see what these do in your projects. +seed: | + Deep in the diffusion code, there is a random number ‘seed’ which is used as the basis for determining the initial state of the diffusion. By default, this is random, but you can also specify your own seed. This is useful if you like a particular result and would like to run more iterations that will be similar. + After each run, the actual seed value used will be reported in the parameters report, and can be reused if desired by entering seed # here. If a specific numerical seed is used repeatedly, the resulting images will be quite similar but not identical. +eta: | + eta (greek letter η) is a diffusion model variable that mixes in a random amount of scaled noise into each timestep. 0 is no noise, 1.0 is more noise. As with most DD parameters, you can go below zero for eta, but it may give you unpredictable results. + The steps parameter has a close relationship with the eta parameter. If you set eta to 0, then you can get decent output with only 50-75 steps. Setting eta to 1.0 favors higher step counts, ideally around 250 and up. eta has a subtle, unpredictable effect on image, so you’ll need to experiment to see how this affects your projects. +clamp_grad: | + As I understand it, clamp_grad is an internal limiter that stops DD from producing extreme results. Try your images with and without clamp_grad. If the image changes drastically with clamp_grad turned off, it probably means your clip_guidance_scale is too high and should be reduced. +clamp_max: | + Sets the value of the clamp_grad limitation. Default is 0.05, providing for smoother, more muted coloration in images, but setting higher values (0.15-0.3) can provide interesting contrast and vibrancy. + +randomize_class: +clip_denoised: False +fuzzy_prompt: | + Controls whether to add multiple noisy prompts to the prompt losses. If True, can increase variability of image output. Experiment with this. +rand_mag: | + Affects only the fuzzy_prompt. Controls the magnitude of the random noise added by fuzzy_prompt. + +cut_overview: The schedule of overview cuts +cut_innercut: The schedule of inner cuts +cut_icgray_p: This sets the size of the border used for inner cuts. High cut_ic_pow values have larger borders, and therefore the cuts themselves will be smaller and provide finer details. If you have too many or too-small inner cuts, you may lose overall image coherency and/or it may cause an undesirable ‘mosaic’ effect. Low cut_ic_pow values will allow the inner cuts to be larger, helping image coherency while still helping with some details. + +display_rate: | + During a diffusion run, you can monitor the progress of each image being created with this variable. If display_rate is set to 50, DD will show you the in-progress image every 50 timesteps. Setting this to a lower value, like 5 or 10, is a good way to get an early peek at where your image is heading. If you don’t like the progression, just interrupt execution, change some settings, and re-run. If you are planning a long, unmonitored batch, it’s better to set display_rate equal to steps, because displaying interim images does slow Colab down slightly. +n_batches: | + This variable sets the number of still images you want DD to create. If you are using an animation mode (see below for details) DD will ignore n_batches and create a single set of animated frames based on the animation settings. +batch_name: | + The name of the batch, the batch id will be named as "discoart-[batch_name]-seed". To avoid your artworks be overridden by other users, please use a unique name. +clip_models: | + CLIP Model selectors. ViT-B/32, ViT-B/16, ViT-L/14, RN101, RN50, RN50x4, RN50x16, RN50x64. + These various CLIP models are available for you to use during image generation. Models have different styles or ‘flavors,’ so look around. + You can mix in multiple models as well for different results. However, keep in mind that some models are extremely memory-hungry, and turning on additional models will take additional memory and may cause a crash. + The rough order of speed/mem usage is (smallest/fastest to largest/slowest): + ViT-B/32 + RN50 + RN101 + ViT-B/16 + RN50x4 + RN50x16 + RN50x64 + ViT-L/14 + For RN50x64 & ViTL14 you may need to use fewer cuts, depending on your VRAM. diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/runner.py b/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/runner.py new file mode 100755 index 000000000..7013c945a --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn101/reverse_diffusion/runner.py @@ -0,0 +1,285 @@ +''' +This code is rewritten by Paddle based on Jina-ai/discoart. +https://github.com/jina-ai/discoart/blob/main/discoart/runner.py +''' +import gc +import os +import random +from threading import Thread + +import disco_diffusion_clip_rn101.clip.clip as clip +import numpy as np +import paddle +import paddle.vision.transforms as T +import paddle_lpips as lpips +from docarray import Document +from docarray import DocumentArray +from IPython import display +from ipywidgets import Output +from PIL import Image + +from .helper import logger +from .helper import parse_prompt +from .model.losses import range_loss +from .model.losses import spherical_dist_loss +from .model.losses import tv_loss +from .model.make_cutouts import MakeCutoutsDango +from .model.sec_diff import alpha_sigma_to_t +from .model.sec_diff import SecondaryDiffusionImageNet2 +from .model.transforms import Normalize + + +def do_run(args, models) -> 'DocumentArray': + logger.info('preparing models...') + model, diffusion, clip_models, secondary_model = models + normalize = Normalize( + mean=[0.48145466, 0.4578275, 0.40821073], + std=[0.26862954, 0.26130258, 0.27577711], + ) + lpips_model = lpips.LPIPS(net='vgg') + for parameter in lpips_model.parameters(): + parameter.stop_gradient = True + side_x = (args.width_height[0] // 64) * 64 + side_y = (args.width_height[1] // 64) * 64 + cut_overview = eval(args.cut_overview) + cut_innercut = eval(args.cut_innercut) + cut_icgray_p = eval(args.cut_icgray_p) + + from .model.perlin_noises import create_perlin_noise, regen_perlin + + seed = args.seed + + skip_steps = args.skip_steps + + loss_values = [] + + if seed is not None: + np.random.seed(seed) + random.seed(seed) + paddle.seed(seed) + + model_stats = [] + for clip_model in clip_models: + model_stat = { + 'clip_model': None, + 'target_embeds': [], + 'make_cutouts': None, + 'weights': [], + } + model_stat['clip_model'] = clip_model + + if isinstance(args.text_prompts, str): + args.text_prompts = [args.text_prompts] + + for prompt in args.text_prompts: + txt, weight = parse_prompt(prompt) + txt = clip_model.encode_text(clip.tokenize(prompt)) + if args.fuzzy_prompt: + for i in range(25): + model_stat['target_embeds'].append((txt + paddle.randn(txt.shape) * args.rand_mag).clip(0, 1)) + model_stat['weights'].append(weight) + else: + model_stat['target_embeds'].append(txt) + model_stat['weights'].append(weight) + + model_stat['target_embeds'] = paddle.concat(model_stat['target_embeds']) + model_stat['weights'] = paddle.to_tensor(model_stat['weights']) + if model_stat['weights'].sum().abs() < 1e-3: + raise RuntimeError('The weights must not sum to 0.') + model_stat['weights'] /= model_stat['weights'].sum().abs() + model_stats.append(model_stat) + + init = None + if args.init_image: + d = Document(uri=args.init_image).load_uri_to_image_tensor(side_x, side_y) + init = T.to_tensor(d.tensor).unsqueeze(0) * 2 - 1 + + if args.perlin_init: + if args.perlin_mode == 'color': + init = create_perlin_noise([1.5**-i * 0.5 for i in range(12)], 1, 1, False, side_y, side_x) + init2 = create_perlin_noise([1.5**-i * 0.5 for i in range(8)], 4, 4, False, side_y, side_x) + elif args.perlin_mode == 'gray': + init = create_perlin_noise([1.5**-i * 0.5 for i in range(12)], 1, 1, True, side_y, side_x) + init2 = create_perlin_noise([1.5**-i * 0.5 for i in range(8)], 4, 4, True, side_y, side_x) + else: + init = create_perlin_noise([1.5**-i * 0.5 for i in range(12)], 1, 1, False, side_y, side_x) + init2 = create_perlin_noise([1.5**-i * 0.5 for i in range(8)], 4, 4, True, side_y, side_x) + init = (T.to_tensor(init).add(T.to_tensor(init2)).divide(paddle.to_tensor(2.0)).unsqueeze(0) * 2 - 1) + del init2 + + cur_t = None + + def cond_fn(x, t, y=None): + x_is_NaN = False + n = x.shape[0] + if secondary_model: + alpha = paddle.to_tensor(diffusion.sqrt_alphas_cumprod[cur_t], dtype='float32') + sigma = paddle.to_tensor(diffusion.sqrt_one_minus_alphas_cumprod[cur_t], dtype='float32') + cosine_t = alpha_sigma_to_t(alpha, sigma) + x = paddle.to_tensor(x.detach(), dtype='float32') + x.stop_gradient = False + cosine_t = paddle.tile(paddle.to_tensor(cosine_t.detach().cpu().numpy()), [n]) + cosine_t.stop_gradient = False + out = secondary_model(x, cosine_t).pred + fac = diffusion.sqrt_one_minus_alphas_cumprod[cur_t] + x_in_d = out * fac + x * (1 - fac) + x_in = x_in_d.detach() + x_in.stop_gradient = False + x_in_grad = paddle.zeros_like(x_in, dtype='float32') + else: + t = paddle.ones([n], dtype='int64') * cur_t + out = diffusion.p_mean_variance(model, x, t, clip_denoised=False, model_kwargs={'y': y}) + fac = diffusion.sqrt_one_minus_alphas_cumprod[cur_t] + x_in_d = out['pred_xstart'] * fac + x * (1 - fac) + x_in = x_in_d.detach() + x_in.stop_gradient = False + x_in_grad = paddle.zeros_like(x_in, dtype='float32') + for model_stat in model_stats: + for i in range(args.cutn_batches): + t_int = (int(t.item()) + 1) # errors on last step without +1, need to find source + # when using SLIP Base model the dimensions need to be hard coded to avoid AttributeError: 'VisionTransformer' object has no attribute 'input_resolution' + try: + input_resolution = model_stat['clip_model'].visual.input_resolution + except: + input_resolution = 224 + + cuts = MakeCutoutsDango( + input_resolution, + Overview=cut_overview[1000 - t_int], + InnerCrop=cut_innercut[1000 - t_int], + IC_Size_Pow=args.cut_ic_pow, + IC_Grey_P=cut_icgray_p[1000 - t_int], + ) + clip_in = normalize(cuts(x_in.add(paddle.to_tensor(1.0)).divide(paddle.to_tensor(2.0)))) + image_embeds = (model_stat['clip_model'].encode_image(clip_in)) + + dists = spherical_dist_loss( + image_embeds.unsqueeze(1), + model_stat['target_embeds'].unsqueeze(0), + ) + + dists = dists.reshape([ + cut_overview[1000 - t_int] + cut_innercut[1000 - t_int], + n, + -1, + ]) + losses = dists.multiply(model_stat['weights']).sum(2).mean(0) + loss_values.append(losses.sum().item()) # log loss, probably shouldn't do per cutn_batch + + x_in_grad += (paddle.grad(losses.sum() * args.clip_guidance_scale, x_in)[0] / args.cutn_batches) + tv_losses = tv_loss(x_in) + range_losses = range_loss(x_in) + sat_losses = paddle.abs(x_in - x_in.clip(min=-1, max=1)).mean() + loss = (tv_losses.sum() * args.tv_scale + range_losses.sum() * args.range_scale + + sat_losses.sum() * args.sat_scale) + if init is not None and args.init_scale: + init_losses = lpips_model(x_in, init) + loss = loss + init_losses.sum() * args.init_scale + x_in_grad += paddle.grad(loss, x_in)[0] + if not paddle.isnan(x_in_grad).any(): + grad = -paddle.grad(x_in_d, x, x_in_grad)[0] + else: + x_is_NaN = True + grad = paddle.zeros_like(x) + if args.clamp_grad and not x_is_NaN: + magnitude = grad.square().mean().sqrt() + return (grad * magnitude.clip(max=args.clamp_max) / magnitude) + return grad + + if args.diffusion_sampling_mode == 'ddim': + sample_fn = diffusion.ddim_sample_loop_progressive + else: + sample_fn = diffusion.plms_sample_loop_progressive + + logger.info('creating artwork...') + + image_display = Output() + da_batches = DocumentArray() + + for _nb in range(args.n_batches): + display.clear_output(wait=True) + display.display(args.name_docarray, image_display) + gc.collect() + paddle.device.cuda.empty_cache() + + d = Document(tags=vars(args)) + da_batches.append(d) + + cur_t = diffusion.num_timesteps - skip_steps - 1 + + if args.perlin_init: + init = regen_perlin(args.perlin_mode, side_y, side_x, args.batch_size) + + if args.diffusion_sampling_mode == 'ddim': + samples = sample_fn( + model, + (args.batch_size, 3, side_y, side_x), + clip_denoised=args.clip_denoised, + model_kwargs={}, + cond_fn=cond_fn, + progress=True, + skip_timesteps=skip_steps, + init_image=init, + randomize_class=args.randomize_class, + eta=args.eta, + ) + else: + samples = sample_fn( + model, + (args.batch_size, 3, side_y, side_x), + clip_denoised=args.clip_denoised, + model_kwargs={}, + cond_fn=cond_fn, + progress=True, + skip_timesteps=skip_steps, + init_image=init, + randomize_class=args.randomize_class, + order=2, + ) + + threads = [] + for j, sample in enumerate(samples): + cur_t -= 1 + with image_display: + if j % args.display_rate == 0 or cur_t == -1: + for _, image in enumerate(sample['pred_xstart']): + image = (image + 1) / 2 + image = image.clip(0, 1).squeeze().transpose([1, 2, 0]).numpy() * 255 + image = np.uint8(image) + image = Image.fromarray(image) + + image.save(os.path.join(args.output_dir, 'progress-{}.png'.format(_nb))) + c = Document(tags={'cur_t': cur_t}) + c.load_pil_image_to_datauri(image) + d.chunks.append(c) + display.clear_output(wait=True) + display.display(display.Image(os.path.join(args.output_dir, 'progress-{}.png'.format(_nb)))) + d.chunks.plot_image_sprites(os.path.join(args.output_dir, + f'{args.name_docarray}-progress-{_nb}.png'), + show_index=True) + t = Thread( + target=_silent_push, + args=( + da_batches, + args.name_docarray, + ), + ) + threads.append(t) + t.start() + + if cur_t == -1: + d.load_pil_image_to_datauri(image) + + for t in threads: + t.join() + display.clear_output(wait=True) + logger.info(f'done! {args.name_docarray}') + da_batches.plot_image_sprites(skip_empty=True, show_index=True, keep_aspect_ratio=True) + return da_batches + + +def _silent_push(da_batches: DocumentArray, name: str) -> None: + try: + da_batches.push(name) + except Exception as ex: + logger.debug(f'push failed: {ex}') From c52c4ed8f1a89faf7deae83fc72f453291b45cc1 Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Fri, 19 Aug 2022 13:22:30 +0800 Subject: [PATCH 034/117] Add ERNIE 3.0 Zeus --- .../text/text_generation/ernie_zeus/README.md | 297 ++++++++++++ .../text/text_generation/ernie_zeus/module.py | 451 ++++++++++++++++++ .../ernie_zeus/requirements.txt | 1 + 3 files changed, 749 insertions(+) create mode 100644 modules/text/text_generation/ernie_zeus/README.md create mode 100644 modules/text/text_generation/ernie_zeus/module.py create mode 100644 modules/text/text_generation/ernie_zeus/requirements.txt diff --git a/modules/text/text_generation/ernie_zeus/README.md b/modules/text/text_generation/ernie_zeus/README.md new file mode 100644 index 000000000..920d55293 --- /dev/null +++ b/modules/text/text_generation/ernie_zeus/README.md @@ -0,0 +1,297 @@ +# ernie_zeus + +|模型名称|ernie_zeus| +| :--- | :---: | +|类别|文本-文本生成| +|网络|-| +|数据集|-| +|是否支持Fine-tuning|否| +|模型大小|-| +|最新更新日期|2022-08-16| +|数据指标|-| + +## 一、模型基本信息 +### 应用效果展示 +- 作文创作: + - 作文标题:诚以养德,信以修身 + + - 作文:翻开我的书橱,展现在眼前的就是《曾国藩家书》。每当读起这些充满哲理的内容时,心里总会不禁佩服他。他虽出生于官宦之家,但并没有因此而骄傲自大,从小养成了平淡做人、踏实肯干的好品质,最后更赢得了属下和朋友们对他的一致认同和赞赏。由此可见,只要平时注意锻炼自己,处事脚踏实地,定能收获一番丰硕的成果!记得有句话叫“以诚待人”。我觉得曾国藩就是始终把做到真诚与诚信作为修身立业的准则和美德。 + +- 文案创作: + - 产品描述:芍药香氛的沐浴乳 + + - 文案:使用多种纯天然草本植物精华,泡沫细腻绵密,丰富的维他命及矿物质滋养皮肤。成分温和安全,适合干性、中性肌肤或敏感性肌肤使用! + +### 模型介绍 +ERNIE 3.0 Zeus 是 ERNIE 3.0 系列模型的最新升级。其除了对无标注数据和知识图谱的学习之外,还通过持续学习对百余种不同形式的任务数据学习。实现了任务知识增强,显著提升了模型的零样本/小样本学习能力。 + +更多详情参考 [文心大模型官网](https://wenxin.baidu.com/wenxin) 及 [ERNIE 3.0 Zeus 项目主页](https://wenxin.baidu.com/wenxin/modelbasedetail/ernie3_zeus)。 + +## 二、安装 +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.2.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install ernie_zeus + ``` + + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +- ### 3. 使用申请(可选) + - 请前往 [文心旸谷社区](https://wenxin.baidu.com/moduleApi/key) 申请使用本模型所需的 API key 和 Secret Key。 + + +## 三、模型 API 预测 +- ### 1. 命令行预测 + + - ```bash + # 作文创作 + $ hub run ernie_zeus \ + --task composition_generation \ + --text '诚以养德,信以修身' + ``` + + - **参数** + - --task(str): 指定任务名称,与 API 名称保持一直 + - --text(str): 根据不同的任务输入所需的文本。 + - 其他参数请参考后续 API 章节。 + +- ### 2. 预测代码示例 + + - ```python + import paddlehub as hub + + # 加载模型 + model = hub.Module(name='ernie_zeus') + + # 作文创作 + result = model.composition_generation( + text='诚以养德,信以修身' + ) + + print(result) + ``` + +- ### 3. API + - ```python + def __init__( + api_key: str = '', + secret_key: str = '' + ) -> None + ``` + + - 初始化 API + + - **参数** + + - api_key(str): API Key。(可选) + - secret_key(str): Secret Key。(可选) + + - ```python + def custom_generation( + text: str, + min_dec_len: int = 1, + seq_len: int = 128, + topp: float = 1.0, + penalty_score: float = 1.0, + stop_token: str = '', + task_prompt: str = '', + penalty_text: str = '', + choice_text: str = '', + is_unidirectional: bool = False, + min_dec_penalty_text: str = '', + logits_bias: int = -10000, + mask_type: str = 'word', + api_key: str = '', + secret_key: str = '' + ) -> str + ``` + - 自定义文本生成 API + + - **参数** + - text(srt): 模型的输入文本, 为 prompt 形式的输入。文本长度 [1, 1000]。注: ERNIE 3.0-1.5B 模型取值范围 ≤ 512。 + - min_dec_len(int): 输出结果的最小长度, 避免因模型生成 END 或者遇到用户指定的 stop_token 而生成长度过短的情况,与 seq_len 结合使用来设置生成文本的长度范围 [1, seq_len]。 + - seq_len(int): 输出结果的最大长度, 因模型生成 END 或者遇到用户指定的 stop_token, 实际返回结果可能会小于这个长度, 与 min_dec_len 结合使用来控制生成文本的长度范围 [1, 1000]。(注: ERNIE 3.0-1.5B 模型取值范围 ≤ 512) + - topp(float): 影响输出文本的多样性, 取值越大, 生成文本的多样性越强。取值范围 [0.0, 1.0]。 + - penalty_score(float): 通过对已生成的 token 增加惩罚, 减少重复生成的现象。值越大表示惩罚越大。取值范围 [1.0, 2.0]。 + - stop_token(str): 预测结果解析时使用的结束字符串, 碰到对应字符串则直接截断并返回。可以通过设置该值, 过滤掉 few-shot 等场景下模型重复的 cases。 + - task_prompt(str): 指定预置的任务模板, 效果更好。 + PARAGRAPH: 引导模型生成一段文章; SENT: 引导模型生成一句话; ENTITY: 引导模型生成词组; + Summarization: 摘要; MT: 翻译; Text2Annotation: 抽取; Correction: 纠错; + QA_MRC: 阅读理解; Dialogue: 对话; QA_Closed_book: 闭卷问答; QA_Multi_Choice: 多选问答; + QuestionGeneration: 问题生成; Paraphrasing: 复述; NLI: 文本蕴含识别; SemanticMatching: 匹配; + Text2SQL: 文本描述转SQL; TextClassification: 文本分类; SentimentClassification: 情感分析; + zuowen: 写作文; adtext: 写文案; couplet: 对对联; novel: 写小说; cloze: 文本补全; Misc: 其它任务。 + - penalty_text(str): 模型会惩罚该字符串中的 token。通过设置该值, 可以减少某些冗余与异常字符的生成。 + - choice_text(str): 模型只能生成该字符串中的 token 的组合。通过设置该值, 可以对某些抽取式任务进行定向调优。 + - is_unidirectional(bool): False 表示模型为双向生成, True 表示模型为单向生成。建议续写与 few-shot 等通用场景建议采用单向生成方式, 而完型填空等任务相关场景建议采用双向生成方式。 + - min_dec_penalty_text(str): 与最小生成长度搭配使用, 可以在 min_dec_len 步前不让模型生成该字符串中的 tokens。 + - logits_bias(int): 配合 penalty_text 使用, 对给定的 penalty_text 中的 token 增加一个 logits_bias, 可以通过设置该值屏蔽某些 token 生成的概率。 + - mask_type(str): 设置该值可以控制模型生成粒度。可选参数为 word, sentence, paragraph。 + + - **返回** + - text(str): 生成的文本。 + + - ```python + def text_cloze( + text: str, + min_dec_len: int = 1, + seq_len: int = 512, + topp: float = 0.9, + penalty_score: float = 1.0 + ) -> str + ``` + + - 完形填空 API + + - **参数** + - text(str): 文字段落。使用 [MASK] 标记待补全文字。 + - min_dec_len(int): 输出结果的最小长度, 避免因模型生成 END 或者遇到用户指定的 stop_token 而生成长度过短的情况,与 seq_len 结合使用来设置生成文本的长度范围 [1, seq_len]。 + - seq_len(int): 输出结果的最大长度, 因模型生成 END 或者遇到用户指定的 stop_token, 实际返回结果可能会小于这个长度, 与 min_dec_len 结合使用来控制生成文本的长度范围 [1, 1000]。(注: ERNIE 3.0-1.5B 模型取值范围 ≤ 512) + - topp(float): 影响输出文本的多样性, 取值越大, 生成文本的多样性越强。取值范围 [0.0, 1.0]。 + - penalty_score(float): 通过对已生成的 token 增加惩罚, 减少重复生成的现象。值越大表示惩罚越大。取值范围 [1.0, 2.0]。 + + - **返回** + - text(str): 补全词语 + + - ```python + def composition_generation( + text: str, + min_dec_len: int = 128, + seq_len: int = 512, + topp: float = 0.9, + penalty_score: float = 1.2 + ) -> str + ``` + - 作文创作 API + + - **参数** + - text(str): 作文题目。 + - min_dec_len(int): 输出结果的最小长度, 避免因模型生成 END 或者遇到用户指定的 stop_token 而生成长度过短的情况,与 seq_len 结合使用来设置生成文本的长度范围 [1, seq_len]。 + - seq_len(int): 输出结果的最大长度, 因模型生成 END 或者遇到用户指定的 stop_token, 实际返回结果可能会小于这个长度, 与 min_dec_len 结合使用来控制生成文本的长度范围 [1, 1000]。(注: ERNIE 3.0-1.5B 模型取值范围 ≤ 512) + - topp(float): 影响输出文本的多样性, 取值越大, 生成文本的多样性越强。取值范围 [0.0, 1.0]。 + - penalty_score(float): 通过对已生成的 token 增加惩罚, 减少重复生成的现象。值越大表示惩罚越大。取值范围 [1.0, 2.0]。 + + - **返回** + - text(str): 作文内容。 + + - ```python + def answer_generation( + text: str, + min_dec_len: int = 2, + seq_len: int = 512, + topp: float = 0.9, + penalty_score: float = 1.2 + ) -> str + ``` + - 自由问答 API + + - **参数** + - text(str): 问题内容。 + - min_dec_len(int): 输出结果的最小长度, 避免因模型生成 END 或者遇到用户指定的 stop_token 而生成长度过短的情况,与 seq_len 结合使用来设置生成文本的长度范围 [1, seq_len]。 + - seq_len(int): 输出结果的最大长度, 因模型生成 END 或者遇到用户指定的 stop_token, 实际返回结果可能会小于这个长度, 与 min_dec_len 结合使用来控制生成文本的长度范围 [1, 1000]。(注: ERNIE 3.0-1.5B 模型取值范围 ≤ 512) + - topp(float): 影响输出文本的多样性, 取值越大, 生成文本的多样性越强。取值范围 [0.0, 1.0]。 + - penalty_score(float): 通过对已生成的 token 增加惩罚, 减少重复生成的现象。值越大表示惩罚越大。取值范围 [1.0, 2.0]。 + + - **返回** + - text(str): 问题答案。 + + + - ```python + def couplet_continuation( + text: str, + min_dec_len: int = 2, + seq_len: int = 512, + topp: float = 0.9, + penalty_score: float = 1.0 + ) -> str + ``` + - 对联续写 API + + - **参数** + - text(str): 对联上联。 + - min_dec_len(int): 输出结果的最小长度, 避免因模型生成 END 或者遇到用户指定的 stop_token 而生成长度过短的情况,与 seq_len 结合使用来设置生成文本的长度范围 [1, seq_len]。 + - seq_len(int): 输出结果的最大长度, 因模型生成 END 或者遇到用户指定的 stop_token, 实际返回结果可能会小于这个长度, 与 min_dec_len 结合使用来控制生成文本的长度范围 [1, 1000]。(注: ERNIE 3.0-1.5B 模型取值范围 ≤ 512) + - topp(float): 影响输出文本的多样性, 取值越大, 生成文本的多样性越强。取值范围 [0.0, 1.0]。 + - penalty_score(float): 通过对已生成的 token 增加惩罚, 减少重复生成的现象。值越大表示惩罚越大。取值范围 [1.0, 2.0]。 + + - **返回** + - text(str): 对联下联。 + + - ```python + def copywriting_generation( + text: str, + min_dec_len: int = 32, + seq_len: int = 512, + topp: float = 0.9, + penalty_score: float = 1.2 + ) -> str + ``` + - 文案创作 API + + - **参数** + - text(str): 产品描述。 + - min_dec_len(int): 输出结果的最小长度, 避免因模型生成 END 或者遇到用户指定的 stop_token 而生成长度过短的情况,与 seq_len 结合使用来设置生成文本的长度范围 [1, seq_len]。 + - seq_len(int): 输出结果的最大长度, 因模型生成 END 或者遇到用户指定的 stop_token, 实际返回结果可能会小于这个长度, 与 min_dec_len 结合使用来控制生成文本的长度范围 [1, 1000]。(注: ERNIE 3.0-1.5B 模型取值范围 ≤ 512) + - topp(float): 影响输出文本的多样性, 取值越大, 生成文本的多样性越强。取值范围 [0.0, 1.0]。 + - penalty_score(float): 通过对已生成的 token 增加惩罚, 减少重复生成的现象。值越大表示惩罚越大。取值范围 [1.0, 2.0]。 + + - **返回** + - text(str): 产品文案。 + + - ```python + def novel_continuation( + text: str, + min_dec_len: int = 2, + seq_len: int = 512, + topp: float = 0.9, + penalty_score: float = 1.2 + ) -> str + ``` + - 小说续写 API + + - **参数** + - text(str): 小说上文。 + - min_dec_len(int): 输出结果的最小长度, 避免因模型生成 END 或者遇到用户指定的 stop_token 而生成长度过短的情况,与 seq_len 结合使用来设置生成文本的长度范围 [1, seq_len]。 + - seq_len(int): 输出结果的最大长度, 因模型生成 END 或者遇到用户指定的 stop_token, 实际返回结果可能会小于这个长度, 与 min_dec_len 结合使用来控制生成文本的长度范围 [1, 1000]。(注: ERNIE 3.0-1.5B 模型取值范围 ≤ 512) + - topp(float): 影响输出文本的多样性, 取值越大, 生成文本的多样性越强。取值范围 [0.0, 1.0]。 + - penalty_score(float): 通过对已生成的 token 增加惩罚, 减少重复生成的现象。值越大表示惩罚越大。取值范围 [1.0, 2.0]。 + + - **返回** + - text(str): 小说下文。 + + - ```python + def text_summarization( + text: str, + min_dec_len: int = 4, + seq_len: int = 512, + topp: float = 0.0, + penalty_score: float = 1.0 + ) -> str + ``` + - 文本摘要 API + + - **参数** + - text(str): 文本段落。 + - min_dec_len(int): 输出结果的最小长度, 避免因模型生成 END 或者遇到用户指定的 stop_token 而生成长度过短的情况,与 seq_len 结合使用来设置生成文本的长度范围 [1, seq_len]。 + - seq_len(int): 输出结果的最大长度, 因模型生成 END 或者遇到用户指定的 stop_token, 实际返回结果可能会小于这个长度, 与 min_dec_len 结合使用来控制生成文本的长度范围 [1, 1000]。(注: ERNIE 3.0-1.5B 模型取值范围 ≤ 512) + - topp(float): 影响输出文本的多样性, 取值越大, 生成文本的多样性越强。取值范围 [0.0, 1.0]。 + - penalty_score(float): 通过对已生成的 token 增加惩罚, 减少重复生成的现象。值越大表示惩罚越大。取值范围 [1.0, 2.0]。 + + - **返回** + - text(str): 段落摘要。 +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + ```shell + $ hub install ernie_zeus == 1.0.0 + ``` \ No newline at end of file diff --git a/modules/text/text_generation/ernie_zeus/module.py b/modules/text/text_generation/ernie_zeus/module.py new file mode 100644 index 000000000..169c6e811 --- /dev/null +++ b/modules/text/text_generation/ernie_zeus/module.py @@ -0,0 +1,451 @@ +import json +import argparse + +import requests +from paddlehub.module.module import moduleinfo, runnable + + +def get_access_token(ak: str = '', sk: str = '') -> str: + ''' + Get Access Token + + Params: + ak(str): API Key + sk(str): Secret Key + + Return: + access_token(str): Access Token + ''' + url = 'https://wenxin.baidu.com/younger/portal/api/oauth/token' + headers = { + 'Content-Type': 'application/x-www-form-urlencoded' + } + datas = { + 'grant_type': 'client_credentials', + 'client_id': ak if ak != '' else 'G26BfAOLpGIRBN5XrOV2eyPA25CE01lE', + 'client_secret': sk if sk != '' else 'txLZOWIjEqXYMU3lSm05ViW4p9DWGOWs' + } + + responses = requests.post(url, datas, headers=headers) + + assert responses.status_code == 200, f"Network Error {responses.status_code}." + + results = json.loads(responses.text) + + assert results['msg'] == 'success', f"Error message: '{results['msg']}'. Please check the ak and sk." + + return results['data'] + + +@moduleinfo( + name='ernie_zeus', + type='nlp/text_generation', + author='paddlepaddle', + author_email='', + summary='ernie_zeus', + version='1.0.0' +) +class ERNIEZeus: + def __init__(self, ak: str = '', sk: str = '') -> None: + self.access_token = get_access_token(ak, sk) + + def custom_generation(self, + text: str, + min_dec_len: int = 1, + seq_len: int = 128, + topp: float = 1.0, + penalty_score: float = 1.0, + stop_token: str = '', + task_prompt: str = '', + penalty_text: str = '', + choice_text: str = '', + is_unidirectional: bool = False, + min_dec_penalty_text: str = '', + logits_bias: int = -10000, + mask_type: str = 'word') -> str: + ''' + ERNIE 3.0 Zeus 自定义接口 + + Params: + text(srt): 模型的输入文本, 为 prompt 形式的输入。文本长度 [1, 1000]。注: ERNIE 3.0-1.5B 模型取值范围 ≤ 512。 + min_dec_len(int): 输出结果的最小长度, 避免因模型生成 END 或者遇到用户指定的 stop_token 而生成长度过短的情况,与 seq_len 结合使用来设置生成文本的长度范围 [1, seq_len]。 + seq_len(int): 输出结果的最大长度, 因模型生成 END 或者遇到用户指定的 stop_token, 实际返回结果可能会小于这个长度, 与 min_dec_len 结合使用来控制生成文本的长度范围 [1, 1000]。(注: ERNIE 3.0-1.5B 模型取值范围 ≤ 512) + topp(float): 影响输出文本的多样性, 取值越大, 生成文本的多样性越强。取值范围 [0.0, 1.0]。 + penalty_score(float): 通过对已生成的 token 增加惩罚, 减少重复生成的现象。值越大表示惩罚越大。取值范围 [1.0, 2.0]。 + stop_token(str): 预测结果解析时使用的结束字符串, 碰到对应字符串则直接截断并返回。可以通过设置该值, 过滤掉 few-shot 等场景下模型重复的 cases。 + task_prompt(str): 指定预置的任务模板, 效果更好。 + PARAGRAPH: 引导模型生成一段文章; SENT: 引导模型生成一句话; ENTITY: 引导模型生成词组; + Summarization: 摘要; MT: 翻译; Text2Annotation: 抽取; Correction: 纠错; + QA_MRC: 阅读理解; Dialogue: 对话; QA_Closed_book: 闭卷问答; QA_Multi_Choice: 多选问答; + QuestionGeneration: 问题生成; Paraphrasing: 复述; NLI: 文本蕴含识别; SemanticMatching: 匹配; + Text2SQL: 文本描述转SQL; TextClassification: 文本分类; SentimentClassification: 情感分析; + zuowen: 写作文; adtext: 写文案; couplet: 对对联; novel: 写小说; cloze: 文本补全; Misc: 其它任务。 + penalty_text(str): 模型会惩罚该字符串中的 token。通过设置该值, 可以减少某些冗余与异常字符的生成。 + choice_text(str): 模型只能生成该字符串中的 token 的组合。通过设置该值, 可以对某些抽取式任务进行定向调优。 + is_unidirectional(bool): False 表示模型为双向生成, True 表示模型为单向生成。建议续写与 few-shot 等通用场景建议采用单向生成方式, 而完型填空等任务相关场景建议采用双向生成方式。 + min_dec_penalty_text(str): 与最小生成长度搭配使用, 可以在 min_dec_len 步前不让模型生成该字符串中的 tokens。 + logits_bias(int): 配合 penalty_text 使用, 对给定的 penalty_text 中的 token 增加一个 logits_bias, 可以通过设置该值屏蔽某些 token 生成的概率。 + mask_type(str): 设置该值可以控制模型生成粒度。可选参数为 word, sentence, paragraph。 + + Return: + text(str): 生成的文本 + ''' + url = 'https://wenxin.baidu.com/moduleApi/portal/api/rest/1.0/ernie/3.0.28/zeus?from=paddlehub' + access_token = self.access_token + headers = { + 'Content-Type': 'application/x-www-form-urlencoded' + } + datas = { + 'access_token': access_token, + 'text': text, + 'min_dec_len': min_dec_len, + 'seq_len': seq_len, + 'topp': topp, + 'penalty_score': penalty_score, + 'stop_token': stop_token, + 'task_prompt': task_prompt, + 'penalty_text': penalty_text, + 'choice_text': choice_text, + 'is_unidirectional': int(is_unidirectional), + 'min_dec_penalty_text': min_dec_penalty_text, + 'logits_bias': logits_bias, + 'mask_type': mask_type, + } + + responses = requests.post(url, datas, headers=headers) + + assert responses.status_code == 200, f"Network Error {responses.status_code}." + + results = json.loads(responses.text) + + assert results['code'] == 0, f"Error message: '{results['msg']}'." + + return results['data']['result'] + + def text_generation(self, + text: str, + min_dec_len: int = 4, + seq_len: int = 512, + topp: float = 0.9, + penalty_score: float = 1.2) -> str: + ''' + 文本生成 + ''' + return self.custom_generation( + text, + min_dec_len, + seq_len, + topp, + penalty_score, + stop_token='', + task_prompt='PARAGRAPH', + penalty_text='[{[gEND]', + choice_text='', + is_unidirectional=True, + min_dec_penalty_text='。?:![]', + logits_bias=-10, + mask_type='paragraph' + ) + + def text_summarization(self, + text: str, + min_dec_len: int = 4, + seq_len: int = 512, + topp: float = 0.0, + penalty_score: float = 1.0) -> str: + ''' + 摘要生成 + ''' + text = "文章:{} 摘要:".format(text) + return self.custom_generation( + text, + min_dec_len, + seq_len, + topp, + penalty_score, + stop_token='', + task_prompt='Summarization', + penalty_text='', + choice_text='', + is_unidirectional=False, + min_dec_penalty_text='', + logits_bias=-10000, + mask_type='word' + ) + + def copywriting_generation(self, + text: str, + min_dec_len: int = 32, + seq_len: int = 512, + topp: float = 0.9, + penalty_score: float = 1.2) -> str: + ''' + 文案生成 + ''' + text = "标题:{} 文案:".format(text) + return self.custom_generation( + text, + min_dec_len, + seq_len, + topp, + penalty_score, + stop_token='', + task_prompt='adtext', + penalty_text='', + choice_text='', + is_unidirectional=False, + min_dec_penalty_text='', + logits_bias=-10000, + mask_type='word' + ) + + def novel_continuation(self, + text: str, + min_dec_len: int = 2, + seq_len: int = 512, + topp: float = 0.9, + penalty_score: float = 1.2) -> str: + ''' + 小说续写 + ''' + text = "上文:{} 下文:".format(text) + return self.custom_generation( + text, + min_dec_len, + seq_len, + topp, + penalty_score, + stop_token='', + task_prompt='gPARAGRAPH', + penalty_text='', + choice_text='', + is_unidirectional=True, + min_dec_penalty_text='。?:![]', + logits_bias=-5, + mask_type='paragraph' + ) + + def answer_generation(self, + text: str, + min_dec_len: int = 2, + seq_len: int = 512, + topp: float = 0.9, + penalty_score: float = 1.2) -> str: + ''' + 自由问答 + ''' + text = "问题:{} 回答:".format(text) + return self.custom_generation( + text, + min_dec_len, + seq_len, + topp, + penalty_score, + stop_token='', + task_prompt='qa', + penalty_text='[gEND]', + choice_text='', + is_unidirectional=True, + min_dec_penalty_text='。?:![]', + logits_bias=-5, + mask_type='paragraph' + ) + + def couplet_continuation(self, + text: str, + min_dec_len: int = 2, + seq_len: int = 512, + topp: float = 0.9, + penalty_score: float = 1.0) -> str: + ''' + 对联续写 + ''' + text = "上联:{} 下联:".format(text) + return self.custom_generation( + text, + min_dec_len, + seq_len, + topp, + penalty_score, + stop_token='', + task_prompt='couplet', + penalty_text='', + choice_text='', + is_unidirectional=False, + min_dec_penalty_text='', + logits_bias=-10000, + mask_type='word' + ) + + def composition_generation(self, + text: str, + min_dec_len: int = 128, + seq_len: int = 512, + topp: float = 0.9, + penalty_score: float = 1.2) -> str: + ''' + 作文创作 + ''' + text = "作文题目:{} 正文:".format(text) + return self.custom_generation( + text, + min_dec_len, + seq_len, + topp, + penalty_score, + stop_token='', + task_prompt='zuowen', + penalty_text='', + choice_text='', + is_unidirectional=False, + min_dec_penalty_text='', + logits_bias=-10000, + mask_type='word' + ) + + def text_cloze(self, + text: str, + min_dec_len: int = 1, + seq_len: int = 512, + topp: float = 0.9, + penalty_score: float = 1.0) -> str: + ''' + 完形填空 + ''' + return self.custom_generation( + text, + min_dec_len, + seq_len, + topp, + penalty_score, + stop_token='', + task_prompt='cloze', + penalty_text='', + choice_text='', + is_unidirectional=False, + min_dec_penalty_text='', + logits_bias=-10000, + mask_type='word' + ) + + @runnable + def cmd(self, argvs): + parser = argparse.ArgumentParser( + description="Run the {}".format(self.name), + prog="hub run {}".format(self.name), + usage='%(prog)s', + add_help=True) + + parser.add_argument('--text', type=str, required=True) + parser.add_argument('--min_dec_len', type=int, default=1) + parser.add_argument('--seq_len', type=int, default=128) + parser.add_argument('--topp', type=float, default=1.0) + parser.add_argument('--penalty_score', type=float, default=1.0) + parser.add_argument('--stop_token', type=str, default='') + parser.add_argument('--task_prompt', type=str, default='') + parser.add_argument('--penalty_text', type=str, default='') + parser.add_argument('--choice_text', type=str, default='') + parser.add_argument('--is_unidirectional', type=bool, default=False) + parser.add_argument('--min_dec_penalty_text', type=str, default='') + parser.add_argument('--logits_bias', type=int, default=-10000) + parser.add_argument('--mask_type', type=str, default='word') + parser.add_argument('--ak', type=str, default='') + parser.add_argument('--sk', type=str, default='') + parser.add_argument('--task', type=str, default='custom_generation') + + args = parser.parse_args(argvs) + + func = getattr(self, args.task) + + if (args.ak != '') and (args.sk != ''): + self.access_token = get_access_token(args.ak, args.sk) + + kwargs = vars(args) + if kwargs['task'] not in ['custom_generation']: + kwargs.pop('stop_token') + kwargs.pop('task_prompt') + kwargs.pop('penalty_text') + kwargs.pop('choice_text') + kwargs.pop('is_unidirectional') + kwargs.pop('min_dec_penalty_text') + kwargs.pop('logits_bias') + kwargs.pop('mask_type') + default_kwargs = { + 'min_dec_len': 1, + 'seq_len': 128, + 'topp': 1.0, + 'penalty_score': 1.0 + } + else: + default_kwargs = { + 'min_dec_len': 1, + 'seq_len': 128, + 'topp': 1.0, + 'penalty_score': 1.0, + 'stop_token': '', + 'task_prompt': '', + 'penalty_text': '', + 'choice_text': '', + 'is_unidirectional': False, + 'min_dec_penalty_text': '', + 'logits_bias': -10000, + 'mask_type': 'word' + } + kwargs.pop('task') + kwargs.pop('ak') + kwargs.pop('sk') + + for k in default_kwargs.keys(): + if kwargs[k] == default_kwargs[k]: + kwargs.pop(k) + + return func(**kwargs) + + +if __name__ == '__main__': + ernie_zeus = ERNIEZeus() + + result = ernie_zeus.custom_generation( + '你好,' + ) + print(result) + + result = ernie_zeus.text_generation( + '给宠物猫起一些可爱的名字。名字:' + ) + print(result) + + result = ernie_zeus.text_summarization( + '在芬兰、瑞典提交“入约”申请近一个月来,北约成员国内部尚未对此达成一致意见。与此同时,俄罗斯方面也多次对北约“第六轮扩张”发出警告。据北约官网显示,北约秘书长斯托尔滕贝格将于本月12日至13日出访瑞典和芬兰,并将分别与两国领导人进行会晤。' + ) + print(result) + + result = ernie_zeus.copywriting_generation( + '芍药香氛的沐浴乳' + ) + print(result) + + result = ernie_zeus.novel_continuation( + '昆仑山可以说是天下龙脉的根源,所有的山脉都可以看作是昆仑的分支。这些分出来的枝枝杈杈,都可以看作是一条条独立的龙脉。' + ) + print(result) + + result = ernie_zeus.answer_generation( + '交朋友的原则是什么?' + ) + print(result) + + result = ernie_zeus.couplet_continuation( + '五湖四海皆春色' + ) + print(result) + + result = ernie_zeus.composition_generation( + '诚以养德,信以修身' + ) + print(result) + + result = ernie_zeus.text_cloze( + '她有着一双[MASK]的眼眸。' + ) + print(result) diff --git a/modules/text/text_generation/ernie_zeus/requirements.txt b/modules/text/text_generation/ernie_zeus/requirements.txt new file mode 100644 index 000000000..f2293605c --- /dev/null +++ b/modules/text/text_generation/ernie_zeus/requirements.txt @@ -0,0 +1 @@ +requests From 3dd26f86a850b704f20ffffff95ac5ac01c7d0af Mon Sep 17 00:00:00 2001 From: chenjian <1435317881@qq.com> Date: Fri, 19 Aug 2022 19:31:57 +0800 Subject: [PATCH 035/117] Add release note v2.3.0 --- README.md | 20 +++++++++++++++----- README_ch.md | 24 ++++++++++++++++++------ docs/docs_ch/release.md | 10 ++++++++++ docs/docs_en/release.md | 9 +++++++++ 4 files changed, 52 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 0fbb0bb87..fba466927 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ English | [简体中文](README_ch.md) ## Introduction and Features - **PaddleHub** aims to provide developers with rich, high-quality, and directly usable pre-trained models. -- **Abundant Pre-trained Models**: 360+ pre-trained models cover the 5 major categories, including Image, Text, Audio, Video, and Industrial application. All of them are free for download and offline usage. +- **Abundant Pre-trained Models**: 360+ pre-trained models cover the 6 major categories, including Wenxin large models, Image, Text, Audio, Video, and Industrial application. All of them are free for download and offline usage. - **No Need for Deep Learning Background**: you can use AI models quickly and enjoy the dividends of the artificial intelligence era. - **Quick Model Prediction**: model prediction can be realized through a few lines of scripts to quickly experience the model effect. - **Model As Service**: one-line command to build deep learning model API service deployment capabilities. @@ -37,22 +37,32 @@ English | [简体中文](README_ch.md) - **Cross-platform**: support Linux, Windows, MacOS and other operating systems. ### Recent updates +- **🔥2022.08.19:** The v2.3.0 version is released, supports Wenxin large models and five text-to-image models based on disco diffusion(dd). + - Support [Wenxin large models API](https://wenxin.baidu.com/moduleApi) for Baidu ERNIE large-scale pre-trained model, including [**ERNIE-ViLG** model](https://aistudio.baidu.com/aistudio/projectdetail/4445016), which supports text-to-image task, and [**ERNIE 3.0 Zeus**](https://aistudio.baidu.com/aistudio/projectdetail/4445054) model, which supports applications such as writing essays, summarization, couplets, question answering, writing novels and completing text. + - Add five text-to-image domain models based on disco diffusion(dd), three for [English](https://aistudio.baidu.com/aistudio/projectdetail/4444984) and two for Chinese. Welcome to enjoy our **ERNIE-ViL**-based Chinese text-to-image module [disco_diffusion_ernievil_base](https://aistudio.baidu.com/aistudio/projectdetail/4444998) in aistudio. - **2022.02.18:** Added Huggingface Org, add spaces and models to the org: [PaddlePaddle Huggingface](https://huggingface.co/PaddlePaddle) -- **2021.12.22**,The v2.2.0 version is released. [1]More than 100 new models released,including dialog, speech, segmentation, OCR, text processing, GANs, and many other categories. The total number of pre-trained models reaches [**【360】**](https://www.paddlepaddle.org.cn/hublist). [2]Add an [indexed file](./modules/README.md) including useful information of pretrained models supported by PaddleHub. [3]Refactor README of pretrained models. -- **2021.05.12:** Add an open-domain dialogue system, i.e., [plato-mini](https://www.paddlepaddle.org.cn/hubdetail?name=plato-mini&en_category=TextGeneration), to make it easy to build a chatbot in wechat with the help of the wechaty, [See Demo](https://github.com/KPatr1ck/paddlehub-wechaty-demo) -- **2021.04.27:** The v2.1.0 version is released. [1] Add supports for five new models, including two high-precision semantic segmentation models based on VOC dataset and three voice classification models. [2] Enforce the transfer learning capabilities for image semantic segmentation, text semantic matching and voice classification on related datasets. [3] Add the export function APIs for two kinds of model formats, i.,e, ONNX and PaddleInference. [4] Add the support for [BentoML](https://github.com/bentoml/BentoML/), which is a cloud native framework for serving deployment. Users can easily serve pre-trained models from PaddleHub by following the [Tutorial notebooks](https://github.com/PaddlePaddle/PaddleHub/blob/release/v2.1/demo/serving/bentoml/cloud-native-model-serving-with-bentoml.ipynb). Also, see this announcement and [Release note](https://github.com/bentoml/BentoML/releases/tag/v0.12.1) from BentoML. (Many thanks to @[parano](https://github.com/parano) @[cqvu](https://github.com/cqvu) @[deehrlic](https://github.com/deehrlic) for contributing this feature in PaddleHub). [5] The total number of pre-trained models reaches **【300】**. -- **2021.02.18:** The v2.0.0 version is released, making model development and debugging easier, and the finetune task is more flexible and easy to use.The ability to transfer learning for visual tasks is fully upgraded, supporting various tasks such as image classification, image coloring, and style transfer; Transformer models such as BERT, ERNIE, and RoBERTa are upgraded to dynamic graphs, supporting Fine-Tune capabilities for text classification and sequence labeling; Optimize the Serving capability, support multi-card prediction, automatic load balancing, and greatly improve performance; the new automatic data enhancement capability Auto Augment can efficiently search for data enhancement strategy combinations suitable for data sets. 61 new word vector models were added, including 51 Chinese models and 10 English models; add 4 image segmentation models, 2 depth models, 7 image generation models, and 3 text generation models, the total number of pre-trained models reaches **【274】**. +- **🔥2021.12.22**,The v2.2.0 version is released. [1]More than 100 new models released,including dialog, speech, segmentation, OCR, text processing, GANs, and many other categories. The total number of pre-trained models reaches [**【360】**](https://www.paddlepaddle.org.cn/hublist). [2]Add an [indexed file](./modules/README.md) including useful information of pretrained models supported by PaddleHub. [3]Refactor README of pretrained models. + - [【more】](./docs/docs_en/release.md) ## Visualization Demo [[More]](./docs/docs_en/visualization.md) [[ModelList]](./modules) + + +### **[Wenxin large models](https://www.paddlepaddle.org.cn/hubdetail?name=ernie_vilg&en_category=TextToImage)** +- Include ERNIE-ViL、ERNIE 3.0 Zeus, supports applications such as text-to-image, writing essays, summarization, couplets, question answering, writing novels and completing text. +

+ +
+ ### **[Computer Vision (212 models)](./modules#Image)**
+ - Many thanks to CopyRight@[PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR)、[PaddleDetection](https://github.com/PaddlePaddle/PaddleDetection)、[PaddleGAN](https://github.com/PaddlePaddle/PaddleGAN)、[AnimeGAN](https://github.com/TachibanaYoshino/AnimeGANv2)、[openpose](https://github.com/CMU-Perceptual-Computing-Lab/openpose)、[PaddleSeg](https://github.com/PaddlePaddle/PaddleSeg)、[Zhengxia Zou](https://github.com/jiupinjia/SkyAR)、[PaddleClas](https://github.com/PaddlePaddle/PaddleClas) for the pre-trained models, you can try to train your models with them. diff --git a/README_ch.md b/README_ch.md index 7d6811b67..93018de64 100644 --- a/README_ch.md +++ b/README_ch.md @@ -30,7 +30,7 @@ ## 简介与特性 - PaddleHub旨在为开发者提供丰富的、高质量的、直接可用的预训练模型 -- **【模型种类丰富】**: 涵盖CV、NLP、Audio、Video、工业应用主流五大品类的 **360+** 预训练模型,全部开源下载,离线可运行 +- **【模型种类丰富】**: 涵盖大模型、CV、NLP、Audio、Video、工业应用主流六大品类的 **360+** 预训练模型,全部开源下载,离线可运行 - **【超低使用门槛】**:无需深度学习背景、无需数据与训练过程,可快速使用AI模型 - **【一键模型快速预测】**:通过一行命令行或者极简的Python API实现模型调用,可快速体验模型效果 - **【一键模型转服务化】**:一行命令,搭建深度学习模型API服务化部署能力 @@ -38,18 +38,30 @@ - **【跨平台兼容性】**:可运行于Linux、Windows、MacOS等多种操作系统 ## 近期更新 -- **2021.12.22**,发布v2.2.0版本。【1】新增100+高质量模型,涵盖对话、语音处理、语义分割、文字识别、文本处理、图像生成等多个领域,预训练模型总量达到[**【360+】**](https://www.paddlepaddle.org.cn/hublist);【2】新增模型[检索列表](./modules/README_ch.md),包含模型名称、网络、数据集和使用场景等信息,快速定位用户所需的模型;【3】模型文档排版优化,呈现数据集、指标、模型大小等更多实用信息。 -- **2021.05.12**,新增轻量级中文对话模型[plato-mini](https://www.paddlepaddle.org.cn/hubdetail?name=plato-mini&en_category=TextGeneration),可以配合使用wechaty实现微信闲聊机器人,[参考demo](https://github.com/KPatr1ck/paddlehub-wechaty-demo) -- **2021.04.27**,发布v2.1.0版本。【1】新增基于VOC数据集的高精度语义分割模型2个,语音分类模型3个。【2】新增图像语义分割、文本语义匹配、语音分类等相关任务的Fine-Tune能力以及相关任务数据集;完善部署能力:【3】新增ONNX和PaddleInference等模型格式的导出功能。【4】新增[BentoML](https://github.com/bentoml/BentoML) 云原生服务化部署能力,可以支持统一的多框架模型管理和模型部署的工作流,[详细教程](https://github.com/PaddlePaddle/PaddleHub/blob/release/v2.1/demo/serving/bentoml/cloud-native-model-serving-with-bentoml.ipynb). 更多内容可以参考BentoML 最新 v0.12.1 [Releasenote](https://github.com/bentoml/BentoML/releases/tag/v0.12.1).(感谢@[parano](https://github.com/parano) @[cqvu](https://github.com/cqvu) @[deehrlic](https://github.com/deehrlic))的贡献与支持。【5】预训练模型总量达到[**【300】**](https://www.paddlepaddle.org.cn/hublist)个。 -- **2021.02.18**,发布v2.0.0版本,【1】模型开发调试更简单,finetune接口更加灵活易用。视觉类任务迁移学习能力全面升级,支持[图像分类](./demo/image_classification/README.md)、[图像着色](./demo/colorization/README.md)、[风格迁移](./demo/style_transfer/README.md)等多种任务;BERT、ERNIE、RoBERTa等Transformer类模型升级至动态图,支持[文本分类](./demo/text_classification/README.md)、[序列标注](./demo/sequence_labeling/README.md)的Fine-Tune能力;【2】优化服务化部署Serving能力,支持多卡预测、自动负载均衡,性能大幅度提升;【3】新增自动数据增强能力[Auto Augment](./demo/autoaug/README.md),能高效地搜索适合数据集的数据增强策略组合。【4】新增[词向量模型](./modules/text/embedding)61个,其中包含中文模型51个,英文模型10个;新增[图像分割](./modules/thirdparty/image/semantic_segmentation)模型4个、[深度模型](./modules/thirdparty/image/depth_estimation)2个、[图像生成](./modules/thirdparty/image/Image_gan/style_transfer)模型7个、[文本生成](./modules/thirdparty/text/text_generation)模型3个。【5】预训练模型总量达到[**【274】**](https://www.paddlepaddle.org.cn/hublist) 个。 -- [More](./docs/docs_ch/release.md) +- **🔥2022.08.19:** 发布v2.3.0版本新增[文心大模型](https://wenxin.baidu.com/)和disco diffusion(dd)系列文图生成模型。 + - 支持对[文心大模型API](https://wenxin.baidu.com/moduleApi)的调用, 包括 文图生成模型**ERNIE-ViLG**([体验Demo](https://aistudio.baidu.com/aistudio/projectdetail/4445016)), 以及支持写作文、写文案、写摘要、对对联、自由问答、写小说、补全文本等多个应用的语言模型**ERNIE 3.0 Zeus**([体验Demo](https://aistudio.baidu.com/aistudio/projectdetail/4445054))。 + - 新增基于disco diffusion技术的文图生成dd系列模型5个,其中英文模型([体验Demo](https://aistudio.baidu.com/aistudio/projectdetail/4444984))3个,中文模型2个。欢迎点击链接在aistudio上进行体验基于**ERNIE-ViL**开发的中文文图生成模型disco_diffusion_ernievil_base([体验Demo](https://aistudio.baidu.com/aistudio/projectdetail/4444998))。 +- **2022.02.18:** 加入Huggingface,创建了PaddlePaddle的空间并上传了模型: [PaddlePaddle Huggingface](https://huggingface.co/PaddlePaddle)。 + +- **🔥2021.12.22**,发布v2.2.0版本新增[预训练模型库官网](https://www.paddlepaddle.org.cn/hublist)。 + - 新增100+高质量模型,涵盖对话、语音处理、语义分割、文字识别、文本处理、图像生成等多个领域,预训练模型总量达到【360+】; + - 新增模型[检索列表](./modules/README_ch.md),包含模型名称、网络、数据集和使用场景等信息,快速定位用户所需的模型; + - 模型文档排版优化,呈现数据集、指标、模型大小等更多实用信息。 +- [More](./docs/docs_ch/release.md) ## **精品模型效果展示[【更多】](./docs/docs_ch/visualization.md)[【模型库】](./modules/README_ch.md)** +### **[文心大模型](https://www.paddlepaddle.org.cn/hubdetail?name=ernie_vilg&en_category=TextToImage)** +- 包含大模型ERNIE-ViL、ERNIE 3.0 Zeus, 支持文图生成、写作文、写文案、写摘要、对对联、自由问答、写小说、补全文本等多个应用。 +
+ +
+ + ### **[图像类(212个)](./modules/README_ch.md#图像)** - 包括图像分类、人脸检测、口罩检测、车辆检测、人脸/人体/手部关键点检测、人像分割、80+语言文本识别、图像超分/上色/动漫化等
diff --git a/docs/docs_ch/release.md b/docs/docs_ch/release.md index b7e82c9d6..8eebd7202 100755 --- a/docs/docs_ch/release.md +++ b/docs/docs_ch/release.md @@ -1,5 +1,15 @@ # 更新历史 +## `v2.3.0` + +### 【1、支持文图生成新场景】 + - 新增基于disco diffusion技术的文图生成dd系列模型5个,其中英文模型3个,中文模型2个,其中中文文图生成模型[disco_diffusion_ernievil_base](https://aistudio.baidu.com/aistudio/projectdetail/4444998)基于百度自研多模态模型**ERNIE-ViL**开发,欢迎体验。 + +### 【2、支持文心大模型API调用】 + - 新增对文心大模型[**ERNIE-ViLG**](https://aistudio.baidu.com/aistudio/projectdetail/4445016)的API调用,支持文图生成任务。 + - 新增对文心大模型[**ERNIE 3.0 Zeus**](https://aistudio.baidu.com/aistudio/projectdetail/4445054)的API调用,支持写作文、写文案、写摘要、对对联、自由问答、写小说、补全文本等多个应用。 + + ## `v2.1.0` ### 【1、版本迭代】 diff --git a/docs/docs_en/release.md b/docs/docs_en/release.md index 407d95ca8..781ecdf65 100755 --- a/docs/docs_en/release.md +++ b/docs/docs_en/release.md @@ -1,5 +1,14 @@ # Release Note +## `v2.3.0` + +### [1、Support text-to-image domain model] + - Add five text-to-image domain models based on disco diffusion, in which three models are for English and two for Chinese. Especially, Chinese text-to-image model [disco_diffusion_ernievil_base](https://aistudio.baidu.com/aistudio/projectdetail/4444998) is based on Baidu **ERNIE-ViL**,welcome to experience. + +### 【2、Support Wenxin large models API】 + - Add api call for [**ERNIE-ViLG**](https://aistudio.baidu.com/aistudio/projectdetail/4445016) model, which supports text-to-image task。 + - Add api call for [**ERNIE 3.0 Zeus**](https://aistudio.baidu.com/aistudio/projectdetail/4445054) model, which supports applications such as writing essays, summarization, couplets, question answering, writing novels and completing text. + ## `v2.1.0` ### [ 1. Improvements] From 31a5c90a3c5be2f57028c83a32cd14eab014b492 Mon Sep 17 00:00:00 2001 From: chenjian Date: Tue, 30 Aug 2022 17:02:35 +0800 Subject: [PATCH 036/117] Add hub serving for text to image models --- .../disco_diffusion_clip_rn101/README.md | 44 ++++++++++++++++++- .../disco_diffusion_clip_rn101/module.py | 5 +-- .../disco_diffusion_clip_rn50/README.md | 43 +++++++++++++++++- .../disco_diffusion_clip_rn50/module.py | 5 +-- .../disco_diffusion_clip_vitb32/README.md | 44 ++++++++++++++++++- .../disco_diffusion_clip_vitb32/module.py | 11 ++--- .../disco_diffusion_cnclip_vitb16/README.md | 44 ++++++++++++++++++- .../disco_diffusion_cnclip_vitb16/module.py | 5 +-- .../disco_diffusion_ernievil_base/README.md | 44 ++++++++++++++++++- .../disco_diffusion_ernievil_base/module.py | 5 +-- .../image/text_to_image/ernie_vilg/README.md | 41 ++++++++++++++++- .../image/text_to_image/ernie_vilg/module.py | 15 +++++++ 12 files changed, 272 insertions(+), 34 deletions(-) mode change 100644 => 100755 modules/image/text_to_image/disco_diffusion_clip_rn101/README.md mode change 100644 => 100755 modules/image/text_to_image/disco_diffusion_clip_rn50/README.md mode change 100644 => 100755 modules/image/text_to_image/disco_diffusion_clip_vitb32/README.md mode change 100644 => 100755 modules/image/text_to_image/disco_diffusion_cnclip_vitb16/README.md mode change 100644 => 100755 modules/image/text_to_image/ernie_vilg/README.md mode change 100644 => 100755 modules/image/text_to_image/ernie_vilg/module.py diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn101/README.md b/modules/image/text_to_image/disco_diffusion_clip_rn101/README.md old mode 100644 new mode 100755 index f6b446bc6..e194f4463 --- a/modules/image/text_to_image/disco_diffusion_clip_rn101/README.md +++ b/modules/image/text_to_image/disco_diffusion_clip_rn101/README.md @@ -73,7 +73,7 @@ disco_diffusion_clip_rn101 是一个文图生成模型,可以通过输入一 # 展示所有的中间结果 da[0].chunks.plot_image_sprites(skip_empty=True, show_index=True, keep_aspect_ratio=True) # 将整个生成过程保存为一个动态图gif - da[0].chunks.save_gif('disco_diffusion_clip_rn101_out-result.gif', show_index=True, inline_display=True, size_ratio=0.5) + da[0].chunks.save_gif('disco_diffusion_clip_rn101_out-result.gif') ``` - ### 3、API @@ -103,7 +103,47 @@ disco_diffusion_clip_rn101 是一个文图生成模型,可以通过输入一 - **返回** - ra(DocumentArray): DocumentArray对象, 包含`n_batches`个Documents,其中每个Document都保存了迭代过程的所有中间结果。详细可参考[DocumentArray使用文档](https://docarray.jina.ai/fundamentals/documentarray/index.html)。 -## 四、更新历史 +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线文图生成服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m disco_diffusion_clip_rn101 + ``` + + - 这样就完成了一个文图生成的在线服务API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果,返回的预测结果在反序列化后即是上述接口声明中说明的DocumentArray类型,返回后对结果的操作方式和使用generate_image接口完全相同。 + + - ```python + import requests + import json + import cv2 + import base64 + from docarray import DocumentArray + + # 发送HTTP请求 + data = {'text_prompts': 'in the morning light,Overlooking TOKYO city by greg rutkowski and thomas kinkade,Trending on artstation.'} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/disco_diffusion_clip_rn101" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 获取返回结果 + da = DocumentArray.from_base64(r.json()["results"]) + # 手动将最终生成的图像保存到指定路径 + da[0].save_uri_to_file('disco_diffusion_clip_rn101_out-result.png') + # 将生成过程保存为一个动态图gif + da[0].chunks.save_gif('disco_diffusion_clip_rn101_out-result.gif') + + +## 五、更新历史 * 1.0.0 diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn101/module.py b/modules/image/text_to_image/disco_diffusion_clip_rn101/module.py index c59b2f5ff..52c3094a2 100755 --- a/modules/image/text_to_image/disco_diffusion_clip_rn101/module.py +++ b/modules/image/text_to_image/disco_diffusion_clip_rn101/module.py @@ -174,10 +174,7 @@ def serving_method(self, text_prompts, **kwargs): """ Run as a service. """ - results = [] - for text_prompt in text_prompts: - result = self.generate_image(text_prompts=text_prompt, **kwargs)[0].to_base64() - results.append(result) + results = self.generate_image(text_prompts=text_prompts, **kwargs).to_base64() return results @runnable diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn50/README.md b/modules/image/text_to_image/disco_diffusion_clip_rn50/README.md old mode 100644 new mode 100755 index a3a6733de..e811a80ae --- a/modules/image/text_to_image/disco_diffusion_clip_rn50/README.md +++ b/modules/image/text_to_image/disco_diffusion_clip_rn50/README.md @@ -73,7 +73,7 @@ disco_diffusion_clip_rn50 是一个文图生成模型,可以通过输入一段 # 展示所有的中间结果 da[0].chunks.plot_image_sprites(skip_empty=True, show_index=True, keep_aspect_ratio=True) # 将整个生成过程保存为一个动态图gif - da[0].chunks.save_gif('disco_diffusion_clip_rn50_out-result.gif', show_index=True, inline_display=True, size_ratio=0.5) + da[0].chunks.save_gif('disco_diffusion_clip_rn50_out-result.gif') ``` - ### 3、API @@ -103,7 +103,46 @@ disco_diffusion_clip_rn50 是一个文图生成模型,可以通过输入一段 - **返回** - ra(DocumentArray): DocumentArray对象, 包含`n_batches`个Documents,其中每个Document都保存了迭代过程的所有中间结果。详细可参考[DocumentArray使用文档](https://docarray.jina.ai/fundamentals/documentarray/index.html)。 -## 四、更新历史 +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线文图生成服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m disco_diffusion_clip_rn50 + ``` + + - 这样就完成了一个文图生成的在线服务API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果,返回的预测结果在反序列化后即是上述接口声明中说明的DocumentArray类型,返回后对结果的操作方式和使用generate_image接口完全相同。 + + - ```python + import requests + import json + import cv2 + import base64 + from docarray import DocumentArray + + # 发送HTTP请求 + data = {'text_prompts': 'in the morning light,Overlooking TOKYO city by greg rutkowski and thomas kinkade,Trending on artstation.'} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/disco_diffusion_clip_rn50" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 获取返回结果 + da = DocumentArray.from_base64(r.json()["results"]) + # 手动将最终生成的图像保存到指定路径 + da[0].save_uri_to_file('disco_diffusion_clip_rn50_out-result.png') + # 将生成过程保存为一个动态图gif + da[0].chunks.save_gif('disco_diffusion_clip_rn50_out-result.gif') + +## 五、更新历史 * 1.0.0 diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn50/module.py b/modules/image/text_to_image/disco_diffusion_clip_rn50/module.py index 4b681525b..f326066f0 100755 --- a/modules/image/text_to_image/disco_diffusion_clip_rn50/module.py +++ b/modules/image/text_to_image/disco_diffusion_clip_rn50/module.py @@ -174,10 +174,7 @@ def serving_method(self, text_prompts, **kwargs): """ Run as a service. """ - results = [] - for text_prompt in text_prompts: - result = self.generate_image(text_prompts=text_prompt, **kwargs)[0].to_base64() - results.append(result) + results = self.generate_image(text_prompts=text_prompts, **kwargs).to_base64() return results @runnable diff --git a/modules/image/text_to_image/disco_diffusion_clip_vitb32/README.md b/modules/image/text_to_image/disco_diffusion_clip_vitb32/README.md old mode 100644 new mode 100755 index 1a42914c7..6327761d3 --- a/modules/image/text_to_image/disco_diffusion_clip_vitb32/README.md +++ b/modules/image/text_to_image/disco_diffusion_clip_vitb32/README.md @@ -73,7 +73,7 @@ disco_diffusion_clip_vitb32 是一个文图生成模型,可以通过输入一 # 展示所有的中间结果 da[0].chunks.plot_image_sprites(skip_empty=True, show_index=True, keep_aspect_ratio=True) # 将整个生成过程保存为一个动态图gif - da[0].chunks.save_gif('disco_diffusion_clip_vitb32_out-result.gif', show_index=True, inline_display=True, size_ratio=0.5) + da[0].chunks.save_gif('disco_diffusion_clip_vitb32_out-result.gif') ``` - ### 3、API @@ -103,7 +103,47 @@ disco_diffusion_clip_vitb32 是一个文图生成模型,可以通过输入一 - **返回** - ra(DocumentArray): DocumentArray对象, 包含`n_batches`个Documents,其中每个Document都保存了迭代过程的所有中间结果。详细可参考[DocumentArray使用文档](https://docarray.jina.ai/fundamentals/documentarray/index.html)。 -## 四、更新历史 +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线文图生成服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m disco_diffusion_clip_vitb32 + ``` + + - 这样就完成了一个文图生成的在线服务API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果,返回的预测结果在反序列化后即是上述接口声明中说明的DocumentArray类型,返回后对结果的操作方式和使用generate_image接口完全相同。 + + - ```python + import requests + import json + import cv2 + import base64 + from docarray import DocumentArray + + # 发送HTTP请求 + data = {'text_prompts': 'in the morning light,Overlooking TOKYO city by greg rutkowski and thomas kinkade,Trending on artstation.'} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/disco_diffusion_clip_vitb32" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 获取返回结果 + da = DocumentArray.from_base64(r.json()["results"]) + # 手动将最终生成的图像保存到指定路径 + da[0].save_uri_to_file('disco_diffusion_clip_vitb32_out-result.png') + # 将生成过程保存为一个动态图gif + da[0].chunks.save_gif('disco_diffusion_clip_vitb32_out-result.gif') + + +## 五、更新历史 * 1.0.0 diff --git a/modules/image/text_to_image/disco_diffusion_clip_vitb32/module.py b/modules/image/text_to_image/disco_diffusion_clip_vitb32/module.py index fb025bfc9..ca560c356 100755 --- a/modules/image/text_to_image/disco_diffusion_clip_vitb32/module.py +++ b/modules/image/text_to_image/disco_diffusion_clip_vitb32/module.py @@ -19,12 +19,12 @@ from typing import List from typing import Optional -import disco_diffusion_clip_vitb32.clip as clip -import disco_diffusion_clip_vitb32.resize_right as resize_right import paddle -from disco_diffusion_clip_vitb32.reverse_diffusion import create +import disco_diffusion_clip_vitb32.clip as clip +import disco_diffusion_clip_vitb32.resize_right as resize_right import paddlehub as hub +from disco_diffusion_clip_vitb32.reverse_diffusion import create from paddlehub.module.module import moduleinfo from paddlehub.module.module import runnable from paddlehub.module.module import serving @@ -174,10 +174,7 @@ def serving_method(self, text_prompts, **kwargs): """ Run as a service. """ - results = [] - for text_prompt in text_prompts: - result = self.generate_image(text_prompts=text_prompt, **kwargs)[0].to_base64() - results.append(result) + results = self.generate_image(text_prompts=text_prompts, **kwargs).to_base64() return results @runnable diff --git a/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/README.md b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/README.md old mode 100644 new mode 100755 index 135600437..b41b7f8fb --- a/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/README.md +++ b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/README.md @@ -73,7 +73,7 @@ disco_diffusion_cnclip_vitb16 是一个文图生成模型,可以通过输入 # 展示所有的中间结果 da[0].chunks.plot_image_sprites(skip_empty=True, show_index=True, keep_aspect_ratio=True) # 将整个生成过程保存为一个动态图gif - da[0].chunks.save_gif('disco_diffusion_cnclip_vitb16_out-result.gif', show_index=True, inline_display=True, size_ratio=0.5) + da[0].chunks.save_gif('disco_diffusion_cnclip_vitb16_out-result.gif') ``` - ### 3、API @@ -103,7 +103,47 @@ disco_diffusion_cnclip_vitb16 是一个文图生成模型,可以通过输入 - **返回** - ra(DocumentArray): DocumentArray对象, 包含`n_batches`个Documents,其中每个Document都保存了迭代过程的所有中间结果。详细可参考[DocumentArray使用文档](https://docarray.jina.ai/fundamentals/documentarray/index.html)。 -## 四、更新历史 +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线文图生成服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m disco_diffusion_cnclip_vitb16 + ``` + + - 这样就完成了一个文图生成的在线服务API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果,返回的预测结果在反序列化后即是上述接口声明中说明的DocumentArray类型,返回后对结果的操作方式和使用generate_image接口完全相同。 + + - ```python + import requests + import json + import cv2 + import base64 + from docarray import DocumentArray + + # 发送HTTP请求 + data = {'text_prompts': '孤舟蓑笠翁,独钓寒江雪。风格如齐白石所作'} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/disco_diffusion_cnclip_vitb16" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 获取返回结果 + da = DocumentArray.from_base64(r.json()["results"]) + # 手动将最终生成的图像保存到指定路径 + da[0].save_uri_to_file('disco_diffusion_cnclip_vitb16_out-result.png') + # 将生成过程保存为一个动态图gif + da[0].chunks.save_gif('disco_diffusion_cnclip_vitb16_out-result.gif') + + +## 五、更新历史 * 1.0.0 diff --git a/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/module.py b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/module.py index 806135c16..8b4cd3421 100755 --- a/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/module.py +++ b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/module.py @@ -173,10 +173,7 @@ def serving_method(self, text_prompts, **kwargs): """ Run as a service. """ - results = [] - for text_prompt in text_prompts: - result = self.generate_image(text_prompts=text_prompt, **kwargs)[0].to_base64() - results.append(result) + results = self.generate_image(text_prompts=text_prompts, **kwargs).to_base64() return results @runnable diff --git a/modules/image/text_to_image/disco_diffusion_ernievil_base/README.md b/modules/image/text_to_image/disco_diffusion_ernievil_base/README.md index 182bb6ec2..eb857deb2 100755 --- a/modules/image/text_to_image/disco_diffusion_ernievil_base/README.md +++ b/modules/image/text_to_image/disco_diffusion_ernievil_base/README.md @@ -74,7 +74,7 @@ disco_diffusion_ernievil_base 是一个文图生成模型,可以通过输入 # 展示所有的中间结果 da[0].chunks.plot_image_sprites(skip_empty=True, show_index=True, keep_aspect_ratio=True) # 将整个生成过程保存为一个动态图gif - da[0].chunks.save_gif('disco_diffusion_ernievil_base_out-result.gif', show_index=True, inline_display=True, size_ratio=0.5) + da[0].chunks.save_gif('disco_diffusion_ernievil_base_out-result.gif') ``` - ### 3、API @@ -104,7 +104,47 @@ disco_diffusion_ernievil_base 是一个文图生成模型,可以通过输入 - **返回** - ra(DocumentArray): DocumentArray对象, 包含`n_batches`个Documents,其中每个Document都保存了迭代过程的所有中间结果。详细可参考[DocumentArray使用文档](https://docarray.jina.ai/fundamentals/documentarray/index.html)。 -## 四、更新历史 +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线文图生成服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m disco_diffusion_ernievil_base + ``` + + - 这样就完成了一个文图生成的在线服务API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果,返回的预测结果在反序列化后即是上述接口声明中说明的DocumentArray类型,返回后对结果的操作方式和使用generate_image接口完全相同。 + + - ```python + import requests + import json + import cv2 + import base64 + from docarray import DocumentArray + + # 发送HTTP请求 + data = {'text_prompts': '孤舟蓑笠翁,独钓寒江雪。风格如齐白石所作'} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/disco_diffusion_ernievil_base" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 获取返回结果 + da = DocumentArray.from_base64(r.json()["results"]) + # 手动将最终生成的图像保存到指定路径 + da[0].save_uri_to_file('disco_diffusion_ernievil_base_out-result.png') + # 将生成过程保存为一个动态图gif + da[0].chunks.save_gif('disco_diffusion_ernievil_base_out-result.gif') + + +## 五、更新历史 * 1.0.0 diff --git a/modules/image/text_to_image/disco_diffusion_ernievil_base/module.py b/modules/image/text_to_image/disco_diffusion_ernievil_base/module.py index a4159ee0f..c87a14f78 100755 --- a/modules/image/text_to_image/disco_diffusion_ernievil_base/module.py +++ b/modules/image/text_to_image/disco_diffusion_ernievil_base/module.py @@ -175,10 +175,7 @@ def serving_method(self, text_prompts, **kwargs): """ Run as a service. """ - results = [] - for text_prompt in text_prompts: - result = self.generate_image(text_prompts=text_prompt, **kwargs)[0].to_base64() - results.append(result) + results = self.generate_image(text_prompts=text_prompts, **kwargs).to_base64() return results @runnable diff --git a/modules/image/text_to_image/ernie_vilg/README.md b/modules/image/text_to_image/ernie_vilg/README.md old mode 100644 new mode 100755 index c85e52d2b..82427c1d8 --- a/modules/image/text_to_image/ernie_vilg/README.md +++ b/modules/image/text_to_image/ernie_vilg/README.md @@ -93,7 +93,46 @@ - **返回** - images(List(PIL.Image)): 返回生成的所有图像列表,PIL的Image格式。 -## 四、更新历史 +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线文图生成服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m ernie_vilg + ``` + + - 这样就完成了一个文图生成的在线服务API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果。 + + - ```python + import requests + import json + import cv2 + import base64 + from io import BytesIO + from PIL import Image + + # 发送HTTP请求 + data = {'text_prompts': '巨大的白色城堡'} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/ernie_vilg" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 获取返回结果 + for i, result in enumerate(r.json()["results"]): + image = Image.open(BytesIO(base64.b64decode(result))) + image.save('result_{}.png'.format(i)) + + +## 五、更新历史 * 1.0.0 diff --git a/modules/image/text_to_image/ernie_vilg/module.py b/modules/image/text_to_image/ernie_vilg/module.py old mode 100644 new mode 100755 index 7af5abb0c..dad3c9833 --- a/modules/image/text_to_image/ernie_vilg/module.py +++ b/modules/image/text_to_image/ernie_vilg/module.py @@ -1,5 +1,6 @@ import argparse import ast +import base64 import os import re import sys @@ -214,6 +215,20 @@ def run_cmd(self, argvs): output_dir=args.output_dir) return results + @serving + def serving_method(self, text_prompts, **kwargs): + """ + Run as a service. + """ + results_base64encoded = [] + results = self.generate_image(text_prompts=text_prompts, **kwargs) + for result in results: + buffered = BytesIO() + result.save(buffered, format="png") + img_str = base64.b64encode(buffered.getvalue()).decode('utf-8') + results_base64encoded.append(img_str) + return results_base64encoded + def add_module_input_arg(self): """ Add the command input options. From bccc7e24f34305773bbabf742a809eb2dad4697e Mon Sep 17 00:00:00 2001 From: chenjian Date: Tue, 30 Aug 2022 18:22:27 +0800 Subject: [PATCH 037/117] optimize ernie_vilg module doc (#1968) * optimize doc * update * fix * add logo * add logo * update new style * fix Co-authored-by: wuzewu --- .../image/text_to_image/ernie_vilg/README.md | 678 +++++++++++++++++- .../image/text_to_image/ernie_vilg/module.py | 11 +- 2 files changed, 669 insertions(+), 20 deletions(-) diff --git a/modules/image/text_to_image/ernie_vilg/README.md b/modules/image/text_to_image/ernie_vilg/README.md index 82427c1d8..74bd920e5 100755 --- a/modules/image/text_to_image/ernie_vilg/README.md +++ b/modules/image/text_to_image/ernie_vilg/README.md @@ -1,4 +1,7 @@ -# ernie_vilg +

+ + +# PaddleHub ERNIE-ViLG |模型名称|ernie_vilg| | :--- | :---: | @@ -14,11 +17,11 @@ ### 应用效果展示 - - 输入文本 "宁静的小镇" 风格 "油画" + - 输入文本 "戴眼镜的猫" 风格 "油画" - 输出图像

- +
@@ -48,7 +51,7 @@ - ### 1、命令行预测 - ```shell - $ hub run ernie_vilg --text_prompts "宁静的小镇" --output_dir ernie_vilg_out + $ hub run ernie_vilg --text_prompts "宁静的小镇" --style "油画" --output_dir ernie_vilg_out ``` - ### 2、预测代码示例 @@ -58,25 +61,16 @@ module = hub.Module(name="ernie_vilg") text_prompts = ["宁静的小镇"] - images = module.generate_image(text_prompts=text_prompts, output_dir='./ernie_vilg_out/') + images = module.generate_image(text_prompts=text_prompts, style='油画', output_dir='./ernie_vilg_out/') ``` - ### 3、API - - ```python - def __init__(ak: Optional[str]=None, sk: Optional[str]=None) - ``` - - 初始化模块,可自定义用于申请访问文心API的ak和sk。 - - - **参数** - - ak:(Optional[str]): 用于申请文心api使用token的ak,可不填。 - - sk:(Optional[str]): 用于申请文心api使用token的sk,可不填。 - - ```python def generate_image( text_prompts:str, style: Optional[str] = "油画", - topk: Optional[int] = 10, + topk: Optional[int] = 6, output_dir: Optional[str] = 'ernievilg_output') ``` @@ -85,8 +79,8 @@ - **参数** - text_prompts(str): 输入的语句,描述想要生成的图像的内容。 - - style(Optional[str]): 生成图像的风格,当前支持'油画','水彩','粉笔画','卡通','儿童画','蜡笔画'。 - - topk(Optional[int]): 保存前多少张图,最多保存10张。 + - style(Optional[str]): 生成图像的风格,当前支持'油画','水彩','粉笔画','卡通','儿童画','蜡笔画','探索无限'。 + - topk(Optional[int]): 保存前多少张图,最多保存6张。 - output_dir(Optional[str]): 保存输出图像的目录,默认为"ernievilg_output"。 @@ -141,3 +135,653 @@ ```shell $ hub install ernie_vilg == 1.0.0 ``` + + + + +## 六、 Prompt 指南 + + + +这是一份如何调整 Prompt 得到更漂亮的图片的经验性文档。我们的结果和经验都来源于[文心 ERNIE-ViLG Demo](https://wenxin.baidu.com/moduleApi/ernieVilg) 和[社区的资料](#related-work)。 + +什么是 Prompt?Prompt 是输入到 Demo 中的文字,可以是一个实体,例如猫;也可以是一串富含想象力的文字,例如:『夕阳日落时,天边有巨大的云朵,海面波涛汹涌,风景,胶片感』。不同的 Prompt 对于生成的图像质量影响非常大。所以也就有了下面所有的 Prompt 的一些经验性技巧。 + +| ![174_蒙娜丽莎,赛博朋克,宝丽来,33毫米,蒸汽波艺术_000-1](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/174_蒙娜丽莎,赛博朋克,宝丽来,33毫米,蒸汽波艺术_000-1.jpg) | +| :----------------------------------------------------------: | +| 蒙娜丽莎,赛博朋克,宝丽来,33毫米,蒸汽波艺术 | + + + + +## 前言 + +Prompt 的重要性如此重要,以至于我们需要构造一个示例来进行一次说明。 + +如下图,[文心 ERNIE-ViLG Demo](https://wenxin.baidu.com/moduleApi/ernieVilg) 中,『卡通』模式下,输入的 Prompt 为『橘猫』,以及 『卡通』模型式下『极乐迪斯科里的猫, 故障艺术』两个示例,能够看出来后者的细节更多,呈现的图片也更加的风格化。 + +开放风格限制(本质上就是在 Prompt 中不加入风格控制词),即下图图3,得到的图片细节更多、也更加真实,同时还保留了比较强烈的风格元素。所以后面的所有内容,都将围绕着如何构造更好的 Prompt 进行资料的整理。 + +| ![橘猫](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/极乐猫0.jpg) | ![极乐迪斯科里的猫](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/极乐猫1.jpg) | ![极乐迪斯科里的猫](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/极乐猫3.jpg) | +| :----------------------------------------------------------: | :----------------------------------------------------------: | ------------------------------------------------------------ | +| “橘猫”(卡通) | “极乐迪斯科里的猫, 故障艺术”(卡通) | “极乐迪斯科里的猫, 故障艺术” (探索无限) | + +| ![cat-hd](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/cat-hd.jpg) | +| :----------------------------: | +| 极乐迪斯科里的猫,故障艺术 | + + + +## 呼吁与准则 + +机器生成图片的最终目的还是便捷地为人类创造美的作品。而技术不是十全十美的,不能保证每次生成的图像都能够尽善尽美。因此呼吁所有相关玩家,如果想分享作品,那就分享那些美感爆棚的作品! + +算法生成的图片难免会受到数据的影响,从而导致生成的图片是有数据偏见的。因此在分享机器生成图片到社交媒体之前,请三思当前的图片是不是含有:令人不适的、暴力的、色情的内容。如果有以上的内容请自行承担法律后果。 + + + +## Prompt 的设计 + +如何设计 Prompt,下文大概会通过4个方面来说明:[Prompt 公式](#p-eq),[Prompt 原则](#p-principle),[Prompt 主体](#p-entity)、[Prompt 修饰词](#p-modifier)。 + +需要注意的是,这里的 Prompt 公式仅仅是个入门级别的参考,是经验的简单总结,在熟悉了 Prompt 的原理之后,可以尽情的发挥脑洞修改 Prompt。 + + + + + + +## Prompt 公式 + +$$ +Prompt = [形容词] [主语] ,[细节设定], [修饰语或者艺术家] +$$ + +按照这个公式,我们首先构造一个形容词加主语的案例。 这里我构造的是 戴着眼镜的猫, 风格我选择的是油画风格,然后我再添加一些细节设定,这里我给的是 漂浮在宇宙中, 可以看到 ,猫猫的后面出现了很多天体。 + +| ![猫1](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/猫1.jpg) | ![猫2](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/猫2.jpg) | ![猫3](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/猫3.jpg) | +| :----------------------------------------------------------: | :----------------------------------------------------------: | :----------------------------------------------------------: | +| “戴着眼镜的猫”(油画) | “戴着眼镜的猫,漂浮在宇宙中”(油画) | “戴着眼镜的猫,漂浮在宇宙中,高更风格”(油画) | + +最后我们想让我们的照片风格更加有艺术性的效果, 我们选择的艺术家是高更, 可以看到图像的画风有了更强的艺术风格。 + + + + +## Prompt 设计原则 + +### Prompt 简单原则: 清楚地陈述 + +除了公式之外,也有一些简单的 Prompt设计原则分享给大家:即**清楚的陈述**。 + +例如我们如果是简单的输入风景的话,往往模型不知道我们想要的风景是什么样子的(下图1)。我们要去尽量的幻想风景的样子,然后变成语言描述。 例如我想像的是日落时,海边的风景, 那我就构造了 Prompt 『夕阳日落时,阳光落在云层上,海面波光粼粼,风景』(下图2)。 进一步的,我想风格化我的图像,所以我在结尾的部分,增加了『胶片感』来让图片的色彩更加好看一些(下图3)。但是云彩的细节丢失了一些,进一步的我再增加天边巨大云朵这一个细节,让我的图片朝着我想要的样子靠的更进一步(下图4)。 + +| ![猫1](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/风景1.jpg) | ![猫2](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/风景2.jpg) | ![猫3](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/风景3.jpg) | ![猫3](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/风景4.jpg) | +| :------------------------: | :----------------------------------------------: | :------------------------------------------------------: | -------------------------------------------------------- | +| “风景” | “夕阳日落时,阳光落在云层上,海面波光粼粼,风景” | “夕阳日落时,阳光落在云层上,海面波涛汹涌,风景,胶片感” | 夕阳日落时,天边有巨大的云朵,海面波涛汹涌,风景,胶片感 | + + + + +## Prompt 主体的选择 + +Prompt 的主体可以是千奇百怪、各种各样的。这里我挑了几个简单的容易出效果的主体示例和一些能够营造特殊氛围的氛围词来激发大家的灵感。 + + + +| ![宇航员](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/宇航员.jpg) | ![孤岛](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/孤岛.jpg) | ![白色城堡](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/白色城堡.jpg) | ![机器人](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/机器人.jpg) | +| :----------------------------------------------------------: | :----------------------------------------------------------: | :----------------------------------------------------------: | :----------------------------------------------------------: | +| 宇航员 | 孤岛 | 白色城堡 | 机器人 | +| ![巫师](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/巫师.jpg) | ![罗马城](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/罗马城.jpg) | ![海鸥](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/海鸥.jpg) | ![气球](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/气球.jpg) | +| 巫师 | 罗马城 | 海鸥 | 气球 | + + + + + +| ![霓虹灯](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/霓虹灯.jpg) | ![烟](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/烟.jpg) | ![漩涡](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/漩涡.jpg) | +| :----------------------------------------------------------: | :----------------------------------------------------------: | :----------------------------------------------------------: | +| …日落,霓虹灯…薄雾 | …烟… | …燃烧漩涡, …烟雾和碎片 | +| ![废墟](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/废墟.jpg) | ![光之](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/光之.jpg) | ![巨大的](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/巨大的.jpg) | +| …废墟… | 光之… | 巨大的… | + + + + +## Prompt 修饰词 + +如果想让生成的图片更加的艺术化、风格话,可以考虑在 Prompt 中添加艺术修饰词。艺术修饰词可以是一些美术风格(例如表现主义、抽象主义等),也可以是一些美学词汇(蒸汽波艺术、故障艺术等),也可以是一些摄影术语(80mm摄像头、浅景深等),也可以是一些绘图软件(虚幻引擎、C4D等)。 + +按照这样的规律,我们在两个输入基准上 : + +> 一只猫坐在椅子上,戴着一副墨镜 +> +> 日落时的城市天际线 +> + +通过构造『输入 + Prompt 修饰词』来展示不同修饰词的效果 (这里的策略参考了[资料](https://docs.google.com/document/d/11WlzjBT0xRpQhP9tFMtxzd0q6ANIdHPUBkMV-YB043U/edit))。 + +需要注意的是,不是所有的 Prompt 对于所有的修饰词都会发生反应。所以查阅 Prompt 修饰词的过程中,会发现部分的 Prompt 修饰词只能对两个基准中的一个生效。这是很正常的,因为 Prompt 的调优是一个反复的试错的过程。接下来,大家结合如下的 Prompt 修饰词, Happy Prompting 吧! + + + +### 复古未来主义风格 + +| ![00472_000_一只猫坐在椅子上,戴着一副墨镜,复古未来主义风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00472_000_一只猫坐在椅子上,戴着一副墨镜,复古未来主义风格.jpg) | ![00472_000_日落时的城市天际线,复古未来主义风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00472_000_日落时的城市天际线,复古未来主义风格.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 一只猫坐在椅子上,戴着一副墨镜,复古未来主义风格 | 日落时的城市天际线,复古未来主义风格 | + + + +### 粉彩朋克风格 + +| ![00017_004_一只猫坐在椅子上,戴着一副墨镜,粉彩朋克风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00017_004_一只猫坐在椅子上,戴着一副墨镜,粉彩朋克风格.jpg) | ![00029_001_日落时的城市天际线,粉彩朋克风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00029_001_日落时的城市天际线,粉彩朋克风格.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 一只猫坐在椅子上,戴着一副墨镜,粉彩朋克风格 | 日落时的城市天际线,粉彩朋克风格 | + +### 史前遗迹风格 + +| ![00443_005_一只猫坐在椅子上,戴着一副墨镜,史前遗迹风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00443_005_一只猫坐在椅子上,戴着一副墨镜,史前遗迹风格.jpg) | ![00443_005_日落时的城市天际线,史前遗迹风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00443_005_日落时的城市天际线,史前遗迹风格.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 一只猫坐在椅子上,戴着一副墨镜,史前遗迹风格 | 日落时的城市天际线,史前遗迹风格 | + + + + +### 波普艺术风格 + +| ![00434_005_一只猫坐在椅子上,戴着一副墨镜,波普艺术风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00434_005_一只猫坐在椅子上,戴着一副墨镜,波普艺术风格.jpg) | ![00434_002_日落时的城市天际线,波普艺术风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00434_002_日落时的城市天际线,波普艺术风格.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 一只猫坐在椅子上,戴着一副墨镜,波普艺术风格 | 日落时的城市天际线,后世界末日风格 | + + + +### 迷幻风格 + +| ![00451_000_一只猫坐在椅子上,戴着一副墨镜,迷幻药风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00451_000_一只猫坐在椅子上,戴着一副墨镜,迷幻药风格.jpg) | ![00451_001_日落时的城市天际线,迷幻药风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00451_001_日落时的城市天际线,迷幻药风格.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 一只猫坐在椅子上,戴着一副墨镜,迷幻风格 | 日落时的城市天际线,迷幻风格 | + + +### 赛博朋克风格 + +| ![00142_003_一只猫坐在椅子上,戴着一副墨镜,赛博朋克风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00142_003_一只猫坐在椅子上,戴着一副墨镜,赛博朋克风格.jpg) | ![00142_000_日落时的城市天际线,赛博朋克风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00142_000_日落时的城市天际线,赛博朋克风格.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 一只猫坐在椅子上,戴着一副墨镜,赛博朋克风格 | 日落时的城市天际线,赛博朋克风格 | + + +### 纸箱风格 + + +| ![00081_000_一只猫坐在椅子上,戴着一副墨镜,纸箱风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00081_000_一只猫坐在椅子上,戴着一副墨镜,纸箱风格.jpg) | ![00081_000_日落时的城市天际线,纸箱风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00081_000_日落时的城市天际线,纸箱风格.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 一只猫坐在椅子上,戴着一副墨镜,纸箱风格 | 日落时的城市天际线,纸箱风格 | + +### 未来主义风格 + +| ![00083_000_一只猫坐在椅子上,戴着一副墨镜,未来主义风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00083_000_一只猫坐在椅子上,戴着一副墨镜,未来主义风格.jpg) | ![00083_002_日落时的城市天际线,未来主义风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00083_002_日落时的城市天际线,未来主义风格.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 一只猫坐在椅子上,戴着一副墨镜,未来主义风格 | 一只猫坐在椅子上,戴着一副墨镜,未来主义风格 | + + + +### 抽象技术风格 + +| ![00000_003_一只猫坐在椅子上,戴着一副墨镜, 抽象技术风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00000_003_一只猫坐在椅子上,戴着一副墨镜,抽象技术风格.jpg) | ![00000_004_日落时的城市天际线,抽象技术风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00000_004_日落时的城市天际线,抽象技术风格.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 一只猫坐在椅子上,戴着一副墨镜,抽象技术风格 | 日落时的城市天际线,抽象技术风格 | + + + + +### 海滩兔风格 + + +| ![00049_001_一只猫坐在椅子上,戴着一副墨镜,海滩兔风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00049_001_一只猫坐在椅子上,戴着一副墨镜,海滩兔风格.jpg) | ![00049_003_日落时的城市天际线,海滩兔风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00049_003_日落时的城市天际线,海滩兔风格.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 一只猫坐在椅子上,戴着一副墨镜,海滩兔风格 | 日落时的城市天际线,海滩兔风格 | + + +### 粉红公主风格 + +| ![00038_004_一只猫坐在椅子上,戴着一副墨镜,粉红公主风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00038_004_一只猫坐在椅子上,戴着一副墨镜,粉红公主风格.jpg) | ![00046_004_日落时的城市天际线,粉红公主风格-1](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00046_004_日落时的城市天际线,粉红公主风格-1.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 一只猫坐在椅子上,戴着一副墨镜,粉红公主风格 | 日落时的城市天际线,粉红公主风格 | + + +### 嬉皮士风格 + +| ![00275_002_一只猫坐在椅子上,戴着一副墨镜,嬉皮士风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00275_002_一只猫坐在椅子上,戴着一副墨镜,嬉皮士风格.jpg) | ![00275_001_日落时的城市天际线,嬉皮士风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00275_001_日落时的城市天际线,嬉皮士风格.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 一只猫坐在椅子上,戴着一副墨镜,嬉皮士风格 | 日落时的城市天际线,嬉皮士风格 | + +### 幻象之城风格 + +| ![00288_000_一只猫坐在椅子上,戴着一副墨镜,幻象之城风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00288_000_一只猫坐在椅子上,戴着一副墨镜,幻象之城风格.jpg) | ![00288_004_日落时的城市天际线,幻象之城风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00288_004_日落时的城市天际线,幻象之城风格.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 一只猫坐在椅子上,戴着一副墨镜,幻象之城风格 | 日落时的城市天际线,幻象之城风格 | + + +### 美人鱼风格 + +| ![00351_002_一只猫坐在椅子上,戴着一副墨镜,美人鱼风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00351_002_一只猫坐在椅子上,戴着一副墨镜,美人鱼风格.jpg) | ![00351_000_日落时的城市天际线,美人鱼风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00351_000_日落时的城市天际线,美人鱼风格.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 一只猫坐在椅子上,戴着一副墨镜,美人鱼风格 | 日落时的城市天际线,美人鱼风格 | + + +### 迷宫物语风格 + + +| ![00382_005_一只猫坐在椅子上,戴着一副墨镜,迷宫物语风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00382_005_一只猫坐在椅子上,戴着一副墨镜,迷宫物语风格.jpg) | ![00382_000_日落时的城市天际线,迷宫物语风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00382_000_日落时的城市天际线,迷宫物语风格.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 一只猫坐在椅子上,戴着一副墨镜,迷宫物语风格 | 日落时的城市天际线,迷宫物语风格 | + +### 仙女风格 + + +| ![00397_003_一只猫坐在椅子上,戴着一副墨镜,仙女风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00397_003_一只猫坐在椅子上,戴着一副墨镜,仙女风格.jpg) | ![00397_004_日落时的城市天际线,仙女风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00397_004_日落时的城市天际线,仙女风格.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 一只猫坐在椅子上,戴着一副墨镜,仙女风格 | 日落时的城市天际线,仙女风格 | + + + + + +### Low Poly 风格 + +| ![猫low-poly风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/猫low-poly风格.jpg) | ![sky-line-low-poly](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/sky-line-low-poly.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 一只猫坐在椅子上,戴着一副墨镜, low poly 风格 | 日落时的城市天际线, low-poly | + + + + +### 浮世绘风格 + +| ![00564_001_一只猫坐在椅子上,戴着一副墨镜,浮世绘风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00564_001_一只猫坐在椅子上,戴着一副墨镜,浮世绘风格.jpg) | ![00564_002_日落时的城市天际线,浮世绘风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00564_002_日落时的城市天际线,浮世绘风格.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 一只猫坐在椅子上,戴着一副墨镜,浮世绘风格 | 日落时的城市天际线,浮世绘风格 | + +### 矢量心风格 + +| ![00573_001_一只猫坐在椅子上,戴着一副墨镜,矢量心风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00573_001_一只猫坐在椅子上,戴着一副墨镜,矢量心风格.jpg) | ![00573_005_日落时的城市天际线,矢量心风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00573_005_日落时的城市天际线,矢量心风格.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 一只猫坐在椅子上,戴着一副墨镜,矢量心风格 | 日落时的城市天际线,矢量心风格 | + + +### 摩托车手风格 + + +| ![00051_000_一只猫坐在椅子上,戴着一副墨镜,摩托车手风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00051_000_一只猫坐在椅子上,戴着一副墨镜,摩托车手风格.jpg) | ![日落时的城市天际线,摩托车手风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/日落时的城市天际线,摩托车手风格.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 一只猫坐在椅子上,戴着一副墨镜,摩托车手风格 | 日落时的城市天际线,摩托车手风格 | + + + +### 孟菲斯公司风格 + + +| ![00114_001_一只猫坐在椅子上,戴着一副墨镜,孟菲斯公司风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00114_001_一只猫坐在椅子上,戴着一副墨镜,孟菲斯公司风格.jpg) | ![00114_002_日落时的城市天际线,孟菲斯公司风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00114_002_日落时的城市天际线,孟菲斯公司风格.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 一只猫坐在椅子上,戴着一副墨镜,孟菲斯公司风格 | 日落时的城市天际线,孟菲斯公司风格 | + + +### 泥塑风格 + + +| ![一只猫坐在椅子上,戴着一副墨镜, 泥塑风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/一只猫坐在椅子上戴着一副墨镜泥塑风格.jpg) | ![00013_002_日落时的城市天际线, 泥塑](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00013_002_日落时的城市天际线,泥塑.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 一只猫坐在椅子上,戴着一副墨镜, 泥塑风格 | 日落时的城市天际线, 泥塑风格 | + + + + +### 苔藓风格 + +| ![00006_001_一只猫坐在椅子上,戴着一副墨镜,苔藓风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00006_001_一只猫坐在椅子上,戴着一副墨镜,苔藓风格.jpg) | ![00004_004_日落时的城市天际线,苔藓风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00004_004_日落时的城市天际线,苔藓风格.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 一只猫坐在椅子上,戴着一副墨镜,苔藓风格 | 日落时的城市天际线,苔藓风格 | + + + +### 新浪潮风格 + +| ![00389_000_一只猫坐在椅子上,戴着一副墨镜,新浪潮风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00389_000_一只猫坐在椅子上,戴着一副墨镜,新浪潮风格.jpg) | ![00389_005_日落时的城市天际线,新浪潮风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00389_005_日落时的城市天际线,新浪潮风格.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 一只猫坐在椅子上,戴着一副墨镜,新浪潮风格 | 日落时的城市天际线,新浪潮风格 | + +### 嘻哈风格 + +| ![00274_000_一只猫坐在椅子上,戴着一副墨镜,嘻哈风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00274_000_一只猫坐在椅子上,戴着一副墨镜,嘻哈风格.jpg) | ![00274_005_日落时的城市天际线,嘻哈风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00274_005_日落时的城市天际线,嘻哈风格.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 一只猫坐在椅子上,戴着一副墨镜,嘻哈风格 | 日落时的城市天际线,嘻哈风格 | + +### 矢量图 + +| ![00177_001_一只猫坐在椅子上,戴着一副墨镜, 矢量图](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00177_001_一只猫坐在椅子上戴着一副墨镜矢量图.jpg) | ![00020_002_日落时的城市天际线, 矢量图](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00020_002_日落时的城市天际线矢量图.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 一只猫坐在椅子上,戴着一副墨镜, 矢量图 | 日落时的城市天际线, 矢量图 | + +### 铅笔艺术 + + +| ![00203_000_一只猫坐在椅子上,戴着一副墨镜, 铅笔艺术](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00203_000_一只猫坐在椅子上戴着一副墨镜铅笔艺术.jpg) | ![00053_000_日落时的城市天际线, 铅笔艺术](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00053_000_日落时的城市天际线铅笔艺术.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 一只猫坐在椅子上,戴着一副墨镜, 铅笔艺术 | 日落时的城市天际线, 铅笔艺术 | + + +### 女巫店风格 + +| ![00606_001_一只猫坐在椅子上,戴着一副墨镜,女巫店风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00606_001_一只猫坐在椅子上,戴着一副墨镜,女巫店风格.jpg) | ![00606_000_日落时的城市天际线,女巫店风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00606_000_日落时的城市天际线,女巫店风格.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 一只猫坐在椅子上,戴着一副墨镜,女巫店风格 | 日落时的城市天际线,女巫店风格 | + + + +### 4D 建模 + + +| ![00230_000_一只猫坐在椅子上,戴着一副墨镜, 4D 建模](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00230_000_一只猫坐在椅子上戴着一副墨镜4D建模.jpg) | ![00082_001_日落时的城市天际线, 4D 建模](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00082_001_日落时的城市天际线4D建模.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 一只猫坐在椅子上,戴着一副墨镜, 4D 建模 | 日落时的城市天际线, 4D 建模 | + + + +### 水彩墨风格 + + +| ![00280_004_一只猫坐在椅子上,戴着一副墨镜, 水彩墨风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00280_004_一只猫坐在椅子上,戴着一副墨镜,水彩墨风格.jpg) | ![00130_004_日落时的城市天际线, 水彩墨风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00130_004_日落时的城市天际线,水彩墨风格.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 一只猫坐在椅子上,戴着一副墨镜, 水彩墨风格 | 日落时的城市天际线, 水彩墨风格 | + + + +### 酸性精灵风格 + +| ![00001_004_一只猫坐在椅子上,戴着一副墨镜,酸性精灵风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00001_004_一只猫坐在椅子上,戴着一副墨镜,酸性精灵风格.jpg) | ![00001_004_日落时的城市天际线,酸性精灵风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00001_004_日落时的城市天际线,酸性精灵风格.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 一只猫坐在椅子上,戴着一副墨镜,酸性精灵风格 | 日落时的城市天际线,酸性精灵风格 | + + +### 海盗风格 + +| ![00427_002_一只猫坐在椅子上,戴着一副墨镜,海盗风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00427_002_一只猫坐在椅子上,戴着一副墨镜,海盗风格.jpg) | ![00427_000_日落时的城市天际线,海盗风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00427_000_日落时的城市天际线,海盗风格.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 日落时的城市天际线,海盗风格 | 一只猫坐在椅子上,戴着一副墨镜,海盗风格 | + + + +### 古埃及风格 + + +| ![00017_005_一只猫坐在椅子上,戴着一副墨镜,古埃及风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00017_005_一只猫坐在椅子上,戴着一副墨镜,古埃及风格.jpg) | ![00017_003_日落时的城市天际线,古埃及风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00017_003_日落时的城市天际线,古埃及风格.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 一只猫坐在椅子上,戴着一副墨镜,古埃及风格 | 日落时的城市天际线,古埃及风格 | + +### 风帽风格 + + +| ![戴着帽子的猫](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/戴着帽子的猫.jpg) | ![戴着帽子的城市](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/戴着帽子的城市.jpg) | +| --------------------------------------------------------- | ------------------------------------------------------------ | +| 一只猫坐在椅子上,戴着一副墨镜,风帽风格 | 日落时的城市天际线,风帽风格 | + +### 装饰艺术风格 + + +| ![00029_000_一只猫坐在椅子上,戴着一副墨镜,装饰艺术风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00029_000_一只猫坐在椅子上,戴着一副墨镜,装饰艺术风格.jpg) | ![00029_005_日落时的城市天际线,装饰艺术风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00029_005_日落时的城市天际线,装饰艺术风格.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 一只猫坐在椅子上,戴着一副墨镜,装饰艺术风格 | 日落时的城市天际线,装饰艺术风格 | + +### 极光风格 + + +| ![00035_004_一只猫坐在椅子上,戴着一副墨镜,极光风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00035_004_一只猫坐在椅子上,戴着一副墨镜,极光风格.jpg) | ![00035_003_日落时的城市天际线,极光风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00035_003_日落时的城市天际线,极光风格.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 一只猫坐在椅子上,戴着一副墨镜,极光风格 | 日落时的城市天际线,极光风格 | + +### 秋天风格 + + +| ![00036_005_一只猫坐在椅子上,戴着一副墨镜,秋天风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00036_005_一只猫坐在椅子上,戴着一副墨镜,秋天风格.jpg) | ![00036_003_日落时的城市天际线,秋天风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00036_003_日落时的城市天际线,秋天风格.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 日落时的城市天际线,秋天风格 | 一只猫坐在椅子上,戴着一副墨镜,秋天风格 | + +### 巴洛克风格 + + +| ![00046_002_一只猫坐在椅子上,戴着一副墨镜,巴洛克风格风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00046_002_一只猫坐在椅子上,戴着一副墨镜,巴洛克风格风格.jpg) | ![00046_003_日落时的城市天际线,巴洛克风格风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00046_003_日落时的城市天际线,巴洛克风格风格.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 一只猫坐在椅子上,戴着一副墨镜,巴洛克风格 | 日落时的城市天际线,巴洛克风格 | + +### 立体主义风格 + +| ![00128_002_一只猫坐在椅子上,戴着一副墨镜,立体主义风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00128_002_一只猫坐在椅子上,戴着一副墨镜,立体主义风格.jpg) | ![00128_004_日落时的城市天际线,立体主义风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00128_004_日落时的城市天际线,立体主义风格.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 一只猫坐在椅子上,戴着一副墨镜,立体主义风格 | 日落时的城市天际线,立体主义风格 | + + +### 黑暗自然主义风格 + +| ![00147_002_一只猫坐在椅子上,戴着一副墨镜,黑暗自然主义风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00147_002_一只猫坐在椅子上,戴着一副墨镜,黑暗自然主义风格.jpg) | ![00147_004_日落时的城市天际线,黑暗自然主义风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00147_004_日落时的城市天际线,黑暗自然主义风格.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 一只猫坐在椅子上,戴着一副墨镜,黑暗自然主义风格 | 日落时的城市天际线,黑暗自然主义风格 | + +### 表现主义风格 + +| ![00190_001_一只猫坐在椅子上,戴着一副墨镜,表现主义风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00190_001_一只猫坐在椅子上,戴着一副墨镜,表现主义风格.jpg) | ![00190_000_日落时的城市天际线,表现主义风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00190_000_日落时的城市天际线,表现主义风格.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 一只猫坐在椅子上,戴着一副墨镜,表现主义风格 | 日落时的城市天际线,表现主义风格 | + +### 野兽派风格 + +| ![00200_000_一只猫坐在椅子上,戴着一副墨镜,野兽派风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00200_000_一只猫坐在椅子上,戴着一副墨镜,野兽派风格.jpg) | ![00200_002_日落时的城市天际线,野兽派风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00200_002_日落时的城市天际线,野兽派风格.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 一只猫坐在椅子上,戴着一副墨镜,野兽派风格 | 日落时的城市天际线,野兽派风格 | + +### 鬼魂风格 + +| ![00226_001_一只猫坐在椅子上,戴着一副墨镜,鬼魂风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00226_001_一只猫坐在椅子上,戴着一副墨镜,鬼魂风格.jpg) | ![00226_002_日落时的城市天际线,鬼魂风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00226_002_日落时的城市天际线,鬼魂风格.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 一只猫坐在椅子上,戴着一副墨镜,鬼魂风格 | 日落时的城市天际线,鬼魂风格 | + +### 印象主义风格 + +| ![00289_000_一只猫坐在椅子上,戴着一副墨镜,印象主义风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00289_000_一只猫坐在椅子上,戴着一副墨镜,印象主义风格.jpg) | ![00289_001_日落时的城市天际线,印象主义风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00289_001_日落时的城市天际线,印象主义风格.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 一只猫坐在椅子上,戴着一副墨镜,印象主义风格 | 日落时的城市天际线,印象主义风格 | + +### 卡瓦伊风格 + +| ![00305_001_一只猫坐在椅子上,戴着一副墨镜,卡瓦伊风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00305_001_一只猫坐在椅子上,戴着一副墨镜,卡瓦伊风格.jpg) | ![00305_000_日落时的城市天际线,卡瓦伊风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00305_000_日落时的城市天际线,卡瓦伊风格.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 一只猫坐在椅子上,戴着一副墨镜,卡瓦伊风格 | 日落时的城市天际线,卡瓦伊风格 | + +### 极简主义风格 + +| ![00362_004_一只猫坐在椅子上,戴着一副墨镜,极简主义风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00362_004_一只猫坐在椅子上,戴着一副墨镜,极简主义风格.jpg) | ![00362_002_日落时的城市天际线,极简主义风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00362_002_日落时的城市天际线,极简主义风格.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 一只猫坐在椅子上,戴着一副墨镜,极简主义风格 | 日落时的城市天际线,极简主义风格 | + +### 水井惠郎风格 + +| ![00364_000_一只猫坐在椅子上,戴着一副墨镜,水井惠郎风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00364_000_一只猫坐在椅子上,戴着一副墨镜,水井惠郎风格.jpg) | ![00364_000_日落时的城市天际线,水井惠郎风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00364_000_日落时的城市天际线,水井惠郎风格.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 一只猫坐在椅子上,戴着一副墨镜,水井惠郎风格 | 日落时的城市天际线,水井惠郎风格 | + +### 照片写实风格 + +| ![00423_000_一只猫坐在椅子上,戴着一副墨镜,照片写实风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00423_000_一只猫坐在椅子上,戴着一副墨镜,照片写实风格.jpg) | ![00423_002_日落时的城市天际线,照片写实风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00423_002_日落时的城市天际线,照片写实风格.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 一只猫坐在椅子上,戴着一副墨镜,照片写实风格 | 日落时的城市天际线,照片写实风格 | + + +### 像素可爱风格 + +| ![00428_005_一只猫坐在椅子上,戴着一副墨镜,像素可爱风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00428_005_一只猫坐在椅子上,戴着一副墨镜,像素可爱风格.jpg) | ![00428_005_日落时的城市天际线,像素可爱风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00428_005_日落时的城市天际线,像素可爱风格.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 一只猫坐在椅子上,戴着一副墨镜,像素可爱风格 | 日落时的城市天际线,像素可爱风格 | + + + +### 雨天风格 + +| ![00067_002_一只猫坐在椅子上,戴着一副墨镜,雨天风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00067_002_一只猫坐在椅子上,戴着一副墨镜,雨天风格.jpg) | ![00050_003_日落时的城市天际线,雨天风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00050_003_日落时的城市天际线,雨天风格.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 日落时的城市天际线,雨天风格 | 一只猫坐在椅子上,戴着一副墨镜,雨天风格 | + +### 湿漉漉的风格 + +| ![00523_005_一只猫坐在椅子上,戴着一副墨镜,湿漉漉的风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00523_005_一只猫坐在椅子上,戴着一副墨镜,湿漉漉的风格.jpg) | ![00523_001_日落时的城市天际线,湿漉漉的风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00523_001_日落时的城市天际线,湿漉漉的风格.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 一只猫坐在椅子上,戴着一副墨镜,湿漉漉的风格 | 日落时的城市天际线,湿漉漉的风格 | + + +### 维京人风格 + +| ![00577_004_一只猫坐在椅子上,戴着一副墨镜,维京人风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00577_004_一只猫坐在椅子上,戴着一副墨镜,维京人风格.jpg) | ![00577_005_日落时的城市天际线,维京人风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00577_005_日落时的城市天际线,维京人风格.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 一只猫坐在椅子上,戴着一副墨镜,维京人风格 | 日落时的城市天际线,维京人风格 | + +### 后印象主义 + + +| ![一只猫坐在椅子上,戴着一副墨镜,风格:后印象主义](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style/一只猫坐在椅子上,戴着一副墨镜,风格:后印象主义.jpg) | ![日落时的城市天际线, 风格:后印象主义-v2](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style/日落时的城市天际线,风格:后印象主义-v2.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 一只猫坐在椅子上,戴着一副墨镜,风格:后印象主义 | 日落时的城市天际线, 风格:后印象主义-v2 | + +### 素人主义 + + +| ![一只猫坐在椅子上,戴着一副墨镜,风格:素人主义](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style/一只猫坐在椅子上,戴着一副墨镜,风格:素人主义.jpg) | ![日落时的城市天际线,风格:素人艺术](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style/日落时的城市天际线,风格:素人艺术.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 一只猫坐在椅子上,戴着一副墨镜,风格:素人主义 | 日落时的城市天际线, 风格:素人艺术 | + + + +### 碎核风格 + + +| ![00064_000_一只猫坐在椅子上,戴着一副墨镜,碎核风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00064_000_一只猫坐在椅子上,戴着一副墨镜,碎核风格.jpg) | ![00064_002_日落时的城市天际线,碎核风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00064_002_日落时的城市天际线,碎核风格.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 一只猫坐在椅子上,戴着一副墨镜,碎核风格 | 日落时的城市天际线,碎核风格 | + + + + + + + +## Prompt 更多信息 + +### 概念组合 + +![赛博朋克中国山水园林](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/赛博朋克中国山水园林.jpg) + +## ShowCase + +更多 ShowCase 和创意 Prompt,可以参考我的[社交账号](#关注我) 或者是 http://youpromptme.cn/#/gallery/ (建设中) + +### 故障艺术 + +| ![076_时钟故障,时间故障,概念艺术,艺术站总部,pixiv趋势,cgsociety,蒸汽波艺术_004-1](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/076_时钟故障,时间故障,概念艺术,艺术站总部,pixiv趋势,cgsociety,蒸汽波艺术_004-1.jpg) | ![024_巨大的纯白色城堡-油画,故障艺术_005-1](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/024_巨大的纯白色城堡-油画,故障艺术_005-1.jpg) | ![065_Yggdrasil,世界树和地球融合在一起,故障艺术_009](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/065_Yggdrasil,世界树和地球融合在一起,故障艺术_009.jpg) | ![106_在百货公司和工厂的高商业需求中,未来复古科幻幻想对象或设备的专业概念艺术,故障艺术_005](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/106_在百货公司和工厂的高商业需求中,未来复古科幻幻想对象或设备的专业概念艺术,故障艺术_005.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | +| _时钟故障,时间故障,概念艺术,艺术站总部,pixiv趋势,cgsociety,蒸汽波艺术 | 巨大的纯白色城堡-油画,故障艺术 | Yggdrasil,世界树和地球融合在一起,故障艺术 | 在百货公司和工厂的高商业需求中,未来复古科幻幻想对象或设备的专业概念艺术,故障艺术 | + + + +### 蒸汽波艺术 + +| ![185_荒岛,蒸汽波艺术_000-1](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/185_荒岛,蒸汽波艺术_000-1.jpg) | ![060_Christoph-Vacher和Kevin-sloan创作的广阔幻想景观,蒸汽波艺术_007](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/060_Christoph-Vacher和Kevin-sloan创作的广阔幻想景观,蒸汽波艺术_007.jpg) | ![戴着眼镜的猫,蒸汽波艺术, vaporwave art 02](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/戴着眼镜的猫,蒸汽波艺术,vaporwaveart02.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | +| 荒岛,蒸汽波艺术 | Christoph-Vacher和Kevin-sloan创作的广阔幻想景观,蒸汽波艺术 | 戴着眼镜的猫,蒸汽波艺术 | + + +### 包豪斯艺术 + +| ![007_一只海鸥和史蒂文·西格正在进行一场凝视比赛,绘画,包豪斯_002](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/007_一只海鸥和史蒂文·西格正在进行一场凝视比赛,绘画,包豪斯_002.jpg) | ![033_梵高猫头鹰,包豪斯_000](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/033_梵高猫头鹰,包豪斯_000.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 一只海鸥和史蒂文·西格正在进行一场凝视比赛,绘画,包豪斯 | 梵高猫头鹰,包豪斯 | + + + + + +### 概念艺术 + +| ![079_4k专业HDR-DnD幻想概念艺术一条由闪电制成的令人敬畏的龙,故障艺术_004](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/079_4k专业HDR-DnD幻想概念艺术一条由闪电制成的令人敬畏的龙,故障艺术_004.jpg) | ![043_4k专业HDR-DnD奇幻概念艺术小鸡施展幻觉咒语,故障艺术_003](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/043_4k专业HDR-DnD奇幻概念艺术小鸡施展幻觉咒语,故障艺术_003.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 4k专业HDR-DnD幻想概念艺术一条由闪电制成的令人敬畏的龙,概念艺术 | 4k专业HDR-DnD奇幻概念艺术小鸡施展幻觉咒语,概念艺术 | + + + +### 像素艺术 + +| ![pixel1](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/pixel1.jpg) | ![pixel2](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/pixel2.jpg) | ![pixel3](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/pixel3.jpg) | ![pixel4](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/pixel4.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | + + + +### 艺术家 + +| ![001_萨尔瓦多·达利描绘古代文明的超现实主义梦幻油画,写实风格_006](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/001_萨尔瓦多·达利描绘古代文明的超现实主义梦幻油画,写实风格_006.jpg) | ![033_梵高猫头鹰,蒸汽波艺术_001](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/033_梵高猫头鹰,蒸汽波艺术_001.jpg) | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| 萨尔瓦多·达利描绘古代文明的超现实主义梦幻油画,写实风格 | 梵高猫头鹰,蒸汽波艺术 | + + + + +## 附录 + +### 常见的艺术家和艺术风格整理 + +| 艺术类型 | 艺术家 | 常用艺术风格 | +| ---------- | ---------------------- | ---------------------- | +| 肖像画 | 文森特·梵高 | 印象主义 | +| 风景画 | 尼古拉斯·罗伊里奇 | 现实主义 | +| 风俗画 | 皮埃尔-奥古斯特·雷诺阿 | 浪漫主义 | +| 宗教绘画 | 克劳德·莫内 | 表现主义 | +| 抽象画 | 彼得·孔查洛夫斯基 | 后印象主义 | +| 都市风景画 | 卡米尔·毕沙罗 | 象征主义 | +| 素描与草图 | 约翰·辛格·萨金特 | 新艺术主义 | +| 静物 | 伦勃朗 | 巴洛克风格 | +| 裸体画 | 马克·夏加尔 | 抽象表现主义 | +| 插画 | 巴勃罗·毕加索 | 北欧文艺复兴 | +| | 古斯塔夫·多雷 | 素人艺术,原始主义 | +| | 阿尔布雷特·丢勒 | 立体主义 | +| | 鲍里斯·库斯妥基耶夫 | 洛可可 | +| | 埃德加·德加 | 色域绘画 | +| | | 波普艺术 | +| | | 文艺复兴开端 | +| | | 文艺复兴全盛期 | +| | | 极简主义 | +| | | 矫饰主义,文艺复兴晚期 | + + + +### 常见的摄影风格词整理 + +| 可以加入到 Prompt 中的摄影词 | | +| ---------------------------- | ------------ | +| 浅景深 | 仰拍 | +| 负像 | 动态模糊 | +| 微距 | 高反差 | +| 双色版 | 中心构图 | +| 角度 | 逆光 | +| 三分法 | 长曝光 | +| 抓拍 | 禅宗摄影 | +| 软焦点 | 抽象微距镜头 | +| 黑白 | 暗色调 | +| 无镜反射 | 长时间曝光 | +| 双色调 | 框架,取景 | +| 颗粒图像 | | + + + + +### 相关链接 + +美学相关的词汇: https://aesthetics.fandom.com/wiki/List_of_Aesthetics + +DALL-E 2 的 Prompt 技巧资料:https://docs.google.com/document/d/11WlzjBT0xRpQhP9tFMtxzd0q6ANIdHPUBkMV-YB043U/edit + +DiscoDiffusion Prompt 技巧资料:https://docs.google.com/document/d/1l8s7uS2dGqjztYSjPpzlmXLjl5PM3IGkRWI3IiCuK7g/edit diff --git a/modules/image/text_to_image/ernie_vilg/module.py b/modules/image/text_to_image/ernie_vilg/module.py index dad3c9833..38ed6b9d4 100755 --- a/modules/image/text_to_image/ernie_vilg/module.py +++ b/modules/image/text_to_image/ernie_vilg/module.py @@ -66,13 +66,15 @@ def generate_image(self, text_prompts, style: Optional[str] = "油画", topk: Optional[int] = 10, + visualization: Optional[bool] = True, output_dir: Optional[str] = 'ernievilg_output'): """ Create image by text prompts using ErnieVilG model. :param text_prompts: Phrase, sentence, or string of words and phrases describing what the image should look like. - :param style: Image stype, currently supported 油画、水彩、粉笔画、卡通、儿童画、蜡笔画 + :param style: Image stype, currently supported 油画、水彩、粉笔画、卡通、儿童画、蜡笔画、探索无限。 :param topk: Top k images to save. + :param visualization: Whether to save images or not. :output_dir: Output directory """ if not os.path.exists(output_dir): @@ -186,7 +188,8 @@ def generate_image(self, for text, data in results.items(): for idx, imgdata in enumerate(data['imgUrls']): image = Image.open(BytesIO(requests.get(imgdata['image']).content)) - image.save(os.path.join(output_dir, '{}_{}.png'.format(text, idx))) + if visualization: + image.save(os.path.join(output_dir, '{}_{}.png'.format(text, idx))) result_images.append(image) if idx + 1 >= topk: break @@ -212,6 +215,7 @@ def run_cmd(self, argvs): results = self.generate_image(text_prompts=args.text_prompts, style=args.style, topk=args.topk, + visualization=args.visualization, output_dir=args.output_dir) return results @@ -237,9 +241,10 @@ def add_module_input_arg(self): self.arg_input_group.add_argument('--style', type=str, default='油画', - choices=['油画', '水彩', '粉笔画', '卡通', '儿童画', '蜡笔画'], + choices=['油画', '水彩', '粉笔画', '卡通', '儿童画', '蜡笔画', '探索无限'], help="绘画风格") self.arg_input_group.add_argument('--topk', type=int, default=10, help="选取保存前多少张图,最多10张") self.arg_input_group.add_argument('--ak', type=str, default=None, help="申请文心api使用token的ak") self.arg_input_group.add_argument('--sk', type=str, default=None, help="申请文心api使用token的sk") + self.arg_input_group.add_argument('--visualization', type=bool, default=True, help="是否保存生成的图片") self.arg_input_group.add_argument('--output_dir', type=str, default='ernievilg_output') From 752bd02fd490111ca68b6b11320f9f2f78afc1ce Mon Sep 17 00:00:00 2001 From: chenjian Date: Wed, 31 Aug 2022 17:27:24 +0800 Subject: [PATCH 038/117] Optimize ernie vilg document (#1972) * add content table for readme * optimize error prompt * fix --- .../image/text_to_image/ernie_vilg/README.md | 115 ++++++++++-------- .../image/text_to_image/ernie_vilg/module.py | 38 ++++-- 2 files changed, 87 insertions(+), 66 deletions(-) diff --git a/modules/image/text_to_image/ernie_vilg/README.md b/modules/image/text_to_image/ernie_vilg/README.md index 74bd920e5..34016b396 100755 --- a/modules/image/text_to_image/ernie_vilg/README.md +++ b/modules/image/text_to_image/ernie_vilg/README.md @@ -3,6 +3,17 @@ # PaddleHub ERNIE-ViLG +# 目录 +1. [模型基本信息](#一模型基本信息) +2. [安装](#二安装) +3. [模型API预测](#三模型api预测) +4. [Prompt 指南](#四-prompt-指南) +5. [服务部署](#五服务部署) +6. [更新历史](#六更新历史) + + +## 一、模型基本信息 + |模型名称|ernie_vilg| | :--- | :---: | |类别|图像-文图生成| @@ -13,8 +24,6 @@ |最新更新日期|2022-08-02| |数据指标|-| -## 一、模型基本信息 - ### 应用效果展示 - 输入文本 "戴眼镜的猫" 风格 "油画" @@ -87,59 +96,8 @@ - **返回** - images(List(PIL.Image)): 返回生成的所有图像列表,PIL的Image格式。 -## 四、服务部署 - -- PaddleHub Serving可以部署一个在线文图生成服务。 - -- ### 第一步:启动PaddleHub Serving - - - 运行启动命令: - - ```shell - $ hub serving start -m ernie_vilg - ``` - - - 这样就完成了一个文图生成的在线服务API的部署,默认端口号为8866。 - - - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 - -- ### 第二步:发送预测请求 - - - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果。 - - - ```python - import requests - import json - import cv2 - import base64 - from io import BytesIO - from PIL import Image - - # 发送HTTP请求 - data = {'text_prompts': '巨大的白色城堡'} - headers = {"Content-type": "application/json"} - url = "http://127.0.0.1:8866/predict/ernie_vilg" - r = requests.post(url=url, headers=headers, data=json.dumps(data)) - - # 获取返回结果 - for i, result in enumerate(r.json()["results"]): - image = Image.open(BytesIO(base64.b64decode(result))) - image.save('result_{}.png'.format(i)) - - -## 五、更新历史 - -* 1.0.0 - - 初始发布 - - ```shell - $ hub install ernie_vilg == 1.0.0 - ``` - - - -## 六、 Prompt 指南 +## 四、 Prompt 指南 @@ -785,3 +743,52 @@ Prompt 的主体可以是千奇百怪、各种各样的。这里我挑了几个 DALL-E 2 的 Prompt 技巧资料:https://docs.google.com/document/d/11WlzjBT0xRpQhP9tFMtxzd0q6ANIdHPUBkMV-YB043U/edit DiscoDiffusion Prompt 技巧资料:https://docs.google.com/document/d/1l8s7uS2dGqjztYSjPpzlmXLjl5PM3IGkRWI3IiCuK7g/edit + +## 五、服务部署 + +- PaddleHub Serving可以部署一个在线文图生成服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m ernie_vilg + ``` + + - 这样就完成了一个文图生成的在线服务API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果。 + + - ```python + import requests + import json + import cv2 + import base64 + from io import BytesIO + from PIL import Image + + # 发送HTTP请求 + data = {'text_prompts': '巨大的白色城堡'} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/ernie_vilg" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 获取返回结果 + for i, result in enumerate(r.json()["results"]): + image = Image.open(BytesIO(base64.b64decode(result))) + image.save('result_{}.png'.format(i)) + + +## 六、更新历史 + +* 1.0.0 + + 初始发布 + + ```shell + $ hub install ernie_vilg == 1.0.0 + ``` diff --git a/modules/image/text_to_image/ernie_vilg/module.py b/modules/image/text_to_image/ernie_vilg/module.py index 38ed6b9d4..f812c60cc 100755 --- a/modules/image/text_to_image/ernie_vilg/module.py +++ b/modules/image/text_to_image/ernie_vilg/module.py @@ -65,7 +65,7 @@ def _apply_token(self, ak, sk): def generate_image(self, text_prompts, style: Optional[str] = "油画", - topk: Optional[int] = 10, + topk: Optional[int] = 6, visualization: Optional[bool] = True, output_dir: Optional[str] = 'ernievilg_output'): """ @@ -119,8 +119,11 @@ def generate_image(self, if res['code'] != 0: print("Token失效重新请求后依然发生错误,请检查输入的参数") raise RuntimeError("Token失效重新请求后依然发生错误,请检查输入的参数") - - taskids.append(res['data']["taskId"]) + if res['msg'] == 'success': + taskids.append(res['data']["taskId"]) + else: + print(res['msg']) + raise RuntimeError(res['msg']) start_time = time.time() process_bar = tqdm(total=100, unit='%') @@ -163,13 +166,17 @@ def generate_image(self, if res['code'] != 0: print("Token失效重新请求后依然发生错误,请检查输入的参数") raise RuntimeError("Token失效重新请求后依然发生错误,请检查输入的参数") - if res['data']['status'] == 1: - has_done.append(res['data']['taskId']) - results[res['data']['text']] = { - 'imgUrls': res['data']['imgUrls'], - 'waiting': res['data']['waiting'], - 'taskId': res['data']['taskId'] - } + if res['msg'] == 'success': + if res['data']['status'] == 1: + has_done.append(res['data']['taskId']) + results[res['data']['text']] = { + 'imgUrls': res['data']['imgUrls'], + 'waiting': res['data']['waiting'], + 'taskId': res['data']['taskId'] + } + else: + print(res['msg']) + raise RuntimeError(res['msg']) total_time = int(re.match('[0-9]+', str(res['data']['waiting'])).group(0)) * 60 end_time = time.time() progress_rate = int(((end_time - start_time) / total_time * 100)) if total_time != 0 else 100 @@ -187,7 +194,14 @@ def generate_image(self, result_images = [] for text, data in results.items(): for idx, imgdata in enumerate(data['imgUrls']): - image = Image.open(BytesIO(requests.get(imgdata['image']).content)) + try: + image = Image.open(BytesIO(requests.get(imgdata['image']).content)) + except Exception as e: + print('Download generated images error, retry one time') + try: + image = Image.open(BytesIO(requests.get(imgdata['image']).content)) + except Exception: + raise RuntimeError('Download generated images failed.') if visualization: image.save(os.path.join(output_dir, '{}_{}.png'.format(text, idx))) result_images.append(image) @@ -243,7 +257,7 @@ def add_module_input_arg(self): default='油画', choices=['油画', '水彩', '粉笔画', '卡通', '儿童画', '蜡笔画', '探索无限'], help="绘画风格") - self.arg_input_group.add_argument('--topk', type=int, default=10, help="选取保存前多少张图,最多10张") + self.arg_input_group.add_argument('--topk', type=int, default=6, help="选取保存前多少张图,最多10张") self.arg_input_group.add_argument('--ak', type=str, default=None, help="申请文心api使用token的ak") self.arg_input_group.add_argument('--sk', type=str, default=None, help="申请文心api使用token的sk") self.arg_input_group.add_argument('--visualization', type=bool, default=True, help="是否保存生成的图片") From c0c14fc6379539d3a3269127a092300183cdeb3a Mon Sep 17 00:00:00 2001 From: OleNet Date: Fri, 2 Sep 2022 00:02:02 +0800 Subject: [PATCH 039/117] Update Readme For Module ERNIE-ViLG Readme (#1980) * Update README.md * Update README.md --- modules/image/text_to_image/ernie_vilg/README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/modules/image/text_to_image/ernie_vilg/README.md b/modules/image/text_to_image/ernie_vilg/README.md index 34016b396..823d859b1 100755 --- a/modules/image/text_to_image/ernie_vilg/README.md +++ b/modules/image/text_to_image/ernie_vilg/README.md @@ -100,6 +100,14 @@ ## 四、 Prompt 指南 +(在 GitHub 阅读 README 的用户可以访问 www.youpromptme.cn 获得更好的阅读体验) + +(图片比较多,完全加载页面可能需要 3min) + +作者:佳祥 (LCL-Brew) + +原文地址: https://github.com/OleNet/YouPromptMe/tree/gh-pages/you-prompt-me + 这是一份如何调整 Prompt 得到更漂亮的图片的经验性文档。我们的结果和经验都来源于[文心 ERNIE-ViLG Demo](https://wenxin.baidu.com/moduleApi/ernieVilg) 和[社区的资料](#related-work)。 From 1007c416b131801d80d8807aad03af8ec4466767 Mon Sep 17 00:00:00 2001 From: DanielYang Date: Fri, 2 Sep 2022 10:17:48 +0800 Subject: [PATCH 040/117] Update README.md --- README.md | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index fba466927..256d4780e 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ English | [简体中文](README_ch.md) -## Introduction and Features +## ⭐Introduction - **PaddleHub** aims to provide developers with rich, high-quality, and directly usable pre-trained models. - **Abundant Pre-trained Models**: 360+ pre-trained models cover the 6 major categories, including Wenxin large models, Image, Text, Audio, Video, and Industrial application. All of them are free for download and offline usage. - **No Need for Deep Learning Background**: you can use AI models quickly and enjoy the dividends of the artificial intelligence era. @@ -36,28 +36,28 @@ English | [简体中文](README_ch.md) - **Easy-to-use Transfer Learning**: few lines of codes to complete the transfer-learning task such as image classification and text classification based on high quality pre-trained models. - **Cross-platform**: support Linux, Windows, MacOS and other operating systems. -### Recent updates +### 💥Recent Updates - **🔥2022.08.19:** The v2.3.0 version is released, supports Wenxin large models and five text-to-image models based on disco diffusion(dd). - Support [Wenxin large models API](https://wenxin.baidu.com/moduleApi) for Baidu ERNIE large-scale pre-trained model, including [**ERNIE-ViLG** model](https://aistudio.baidu.com/aistudio/projectdetail/4445016), which supports text-to-image task, and [**ERNIE 3.0 Zeus**](https://aistudio.baidu.com/aistudio/projectdetail/4445054) model, which supports applications such as writing essays, summarization, couplets, question answering, writing novels and completing text. - Add five text-to-image domain models based on disco diffusion(dd), three for [English](https://aistudio.baidu.com/aistudio/projectdetail/4444984) and two for Chinese. Welcome to enjoy our **ERNIE-ViL**-based Chinese text-to-image module [disco_diffusion_ernievil_base](https://aistudio.baidu.com/aistudio/projectdetail/4444998) in aistudio. - **2022.02.18:** Added Huggingface Org, add spaces and models to the org: [PaddlePaddle Huggingface](https://huggingface.co/PaddlePaddle) -- **🔥2021.12.22**,The v2.2.0 version is released. [1]More than 100 new models released,including dialog, speech, segmentation, OCR, text processing, GANs, and many other categories. The total number of pre-trained models reaches [**【360】**](https://www.paddlepaddle.org.cn/hublist). [2]Add an [indexed file](./modules/README.md) including useful information of pretrained models supported by PaddleHub. [3]Refactor README of pretrained models. +- **2021.12.22**,The v2.2.0 version is released. [1]More than 100 new models released,including dialog, speech, segmentation, OCR, text processing, GANs, and many other categories. The total number of pre-trained models reaches [**【360】**](https://www.paddlepaddle.org.cn/hublist). [2]Add an [indexed file](./modules/README.md) including useful information of pretrained models supported by PaddleHub. [3]Refactor README of pretrained models. - [【more】](./docs/docs_en/release.md) -## Visualization Demo [[More]](./docs/docs_en/visualization.md) [[ModelList]](./modules) +## 🌈Visualization Demo -### **[Wenxin large models](https://www.paddlepaddle.org.cn/hubdetail?name=ernie_vilg&en_category=TextToImage)** +#### 🏜️ [Text-to-Image Models](https://www.paddlepaddle.org.cn/hubdetail?name=ernie_vilg&en_category=TextToImage) - Include ERNIE-ViL、ERNIE 3.0 Zeus, supports applications such as text-to-image, writing essays, summarization, couplets, question answering, writing novels and completing text.

-### **[Computer Vision (212 models)](./modules#Image)** +#### 👓 [Computer Vision Models](./modules#Image)
@@ -66,7 +66,7 @@ English | [简体中文](README_ch.md) - Many thanks to CopyRight@[PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR)、[PaddleDetection](https://github.com/PaddlePaddle/PaddleDetection)、[PaddleGAN](https://github.com/PaddlePaddle/PaddleGAN)、[AnimeGAN](https://github.com/TachibanaYoshino/AnimeGANv2)、[openpose](https://github.com/CMU-Perceptual-Computing-Lab/openpose)、[PaddleSeg](https://github.com/PaddlePaddle/PaddleSeg)、[Zhengxia Zou](https://github.com/jiupinjia/SkyAR)、[PaddleClas](https://github.com/PaddlePaddle/PaddleClas) for the pre-trained models, you can try to train your models with them. -### **[Natural Language Processing (130 models)](./modules#Text)** +#### 🎤 [Natural Language Processing Models](./modules#Text)
@@ -75,7 +75,7 @@ English | [简体中文](README_ch.md) -### [Speech (15 models)](./modules#Audio) +#### 🎧 [Speech Models](./modules#Audio) - ASR speech recognition algorithm, multiple algorithms are available. - The speech recognition effect is as follows:
@@ -138,7 +138,7 @@ English | [简体中文](README_ch.md) - Many thanks to CopyRight@[PaddleSpeech](https://github.com/PaddlePaddle/PaddleSpeech) for the pre-trained models, you can try to train your models with PaddleSpeech. -### [Video (8 models)](./modules#Video) +#### 📽️ [Video Models](./modules#Video) - Short video classification trained via large-scale video datasets, supports 3000+ tag types prediction for short Form Videos. - Many thanks to CopyRight@[PaddleVideo](https://github.com/PaddlePaddle/PaddleVideo) for the pre-trained model, you can try to train your models with PaddleVideo. - `Example: Input a short video of swimming, the algorithm can output the result of "swimming"` @@ -146,8 +146,8 @@ English | [简体中文](README_ch.md)
-## ===**Key Points**=== -- All the above pre-trained models are all open source and free, and the number of models is continuously updated. Welcome **⭐Star⭐** to pay attention. +### ⭐ Thanks for Your Star ⭐ +- All the above pre-trained models are all **open source and free**, and the number of models is continuously updated. Welcome **⭐Star⭐** to pay attention.
@@ -155,18 +155,18 @@ English | [简体中文](README_ch.md) -## Welcome to join PaddleHub technical group +## 🍻Welcome to join PaddleHub technical group -If you have any questions during the use of the model, you can join the official WeChat group to get more efficient questions and answers, and fully communicate with developers from all walks of life. We look forward to your joining. +- If you have any questions during the use of the model, you can join the official WeChat group to get more efficient questions and answers, and fully communicate with developers from all walks of life. We look forward to your joining.
-please add WeChat above and send "Hub" to the robot, the robot will invite you to join the group automatically. +- please add WeChat above and send "Hub" to the robot, the robot will invite you to join the group automatically. -## QuickStart +## ✈️QuickStart -### The installation of required components. +#### 🚁The installation of required components. ```python # install paddlepaddle with gpu # !pip install --upgrade paddlepaddle-gpu @@ -178,7 +178,7 @@ please add WeChat above and send "Hub" to the robot, the robot will invite you t !pip install --upgrade paddlehub ``` -### The simplest cases of Chinese word segmentation. +#### 🛫The simplest cases of Chinese word segmentation. ```python import paddlehub as hub @@ -190,23 +190,23 @@ results = lac.cut(text=test_text, use_gpu=False, batch_size=1, return_tag=True) print(results) #{'word': ['今天', '是', '个', '好天气', '。'], 'tag': ['TIME', 'v', 'q', 'n', 'w']} ``` -### The simplest command of deploying lac service. +#### 🛰️The simplest command of deploying lac service.
```python !hub serving start -m lac ``` -More model description, please refer [Models List](https://www.paddlepaddle.org.cn/hublist) +- 📣More model description, please refer [Models List](https://www.paddlepaddle.org.cn/hublist) -More API for transfer learning, please refer [Tutorial](https://paddlehub.readthedocs.io/en/release-v2.1/transfer_learning_index.html) +- 📣More API for transfer learning, please refer [Tutorial](https://paddlehub.readthedocs.io/en/release-v2.1/transfer_learning_index.html) -## License +## 📚License The release of this project is certified by the Apache 2.0 license. -## Contribution +## 👨‍👨‍👧‍👦Contribution

From a6790a651a12eb391060e533868bf0ba197f6f7e Mon Sep 17 00:00:00 2001 From: chenjian Date: Fri, 2 Sep 2022 10:41:24 +0800 Subject: [PATCH 041/117] Add stable diffusion module --- .../text_to_image/stable_diffusion/README.md | 160 ++ .../stable_diffusion/clip/README.md | 2 + .../stable_diffusion/clip/clip/__init__.py | 1 + .../stable_diffusion/clip/clip/layers.py | 182 +++ .../stable_diffusion/clip/clip/model.py | 259 +++ .../clip/clip/simple_tokenizer.py | 135 ++ .../stable_diffusion/clip/clip/utils.py | 88 + .../stable_diffusion/diffusers/__init__.py | 20 + .../diffusers/configuration_utils.py | 312 ++++ .../diffusers/models/README.md | 11 + .../diffusers/models/__init__.py | 20 + .../diffusers/models/attention.py | 465 ++++++ .../diffusers/models/embeddings.py | 116 ++ .../diffusers/models/resnet.py | 515 ++++++ .../diffusers/models/unet_2d.py | 206 +++ .../diffusers/models/unet_2d_condition.py | 206 +++ .../diffusers/models/unet_blocks.py | 1428 +++++++++++++++++ .../stable_diffusion/diffusers/models/vae.py | 465 ++++++ .../diffusers/schedulers/README.md | 18 + .../diffusers/schedulers/__init__.py | 24 + .../diffusers/schedulers/scheduling_ddim.py | 182 +++ .../diffusers/schedulers/scheduling_ddpm.py | 191 +++ .../schedulers/scheduling_karras_ve.py | 124 ++ .../schedulers/scheduling_lms_discrete.py | 133 ++ .../diffusers/schedulers/scheduling_pndm.py | 258 +++ .../diffusers/schedulers/scheduling_sde_ve.py | 172 ++ .../diffusers/schedulers/scheduling_sde_vp.py | 59 + .../diffusers/schedulers/scheduling_utils.py | 102 ++ .../text_to_image/stable_diffusion/module.py | 367 +++++ .../stable_diffusion/requirements.txt | 8 + 30 files changed, 6229 insertions(+) create mode 100644 modules/image/text_to_image/stable_diffusion/README.md create mode 100755 modules/image/text_to_image/stable_diffusion/clip/README.md create mode 100755 modules/image/text_to_image/stable_diffusion/clip/clip/__init__.py create mode 100755 modules/image/text_to_image/stable_diffusion/clip/clip/layers.py create mode 100755 modules/image/text_to_image/stable_diffusion/clip/clip/model.py create mode 100755 modules/image/text_to_image/stable_diffusion/clip/clip/simple_tokenizer.py create mode 100755 modules/image/text_to_image/stable_diffusion/clip/clip/utils.py create mode 100644 modules/image/text_to_image/stable_diffusion/diffusers/__init__.py create mode 100644 modules/image/text_to_image/stable_diffusion/diffusers/configuration_utils.py create mode 100644 modules/image/text_to_image/stable_diffusion/diffusers/models/README.md create mode 100644 modules/image/text_to_image/stable_diffusion/diffusers/models/__init__.py create mode 100644 modules/image/text_to_image/stable_diffusion/diffusers/models/attention.py create mode 100644 modules/image/text_to_image/stable_diffusion/diffusers/models/embeddings.py create mode 100644 modules/image/text_to_image/stable_diffusion/diffusers/models/resnet.py create mode 100644 modules/image/text_to_image/stable_diffusion/diffusers/models/unet_2d.py create mode 100644 modules/image/text_to_image/stable_diffusion/diffusers/models/unet_2d_condition.py create mode 100644 modules/image/text_to_image/stable_diffusion/diffusers/models/unet_blocks.py create mode 100644 modules/image/text_to_image/stable_diffusion/diffusers/models/vae.py create mode 100644 modules/image/text_to_image/stable_diffusion/diffusers/schedulers/README.md create mode 100644 modules/image/text_to_image/stable_diffusion/diffusers/schedulers/__init__.py create mode 100644 modules/image/text_to_image/stable_diffusion/diffusers/schedulers/scheduling_ddim.py create mode 100644 modules/image/text_to_image/stable_diffusion/diffusers/schedulers/scheduling_ddpm.py create mode 100644 modules/image/text_to_image/stable_diffusion/diffusers/schedulers/scheduling_karras_ve.py create mode 100644 modules/image/text_to_image/stable_diffusion/diffusers/schedulers/scheduling_lms_discrete.py create mode 100644 modules/image/text_to_image/stable_diffusion/diffusers/schedulers/scheduling_pndm.py create mode 100644 modules/image/text_to_image/stable_diffusion/diffusers/schedulers/scheduling_sde_ve.py create mode 100644 modules/image/text_to_image/stable_diffusion/diffusers/schedulers/scheduling_sde_vp.py create mode 100644 modules/image/text_to_image/stable_diffusion/diffusers/schedulers/scheduling_utils.py create mode 100755 modules/image/text_to_image/stable_diffusion/module.py create mode 100644 modules/image/text_to_image/stable_diffusion/requirements.txt diff --git a/modules/image/text_to_image/stable_diffusion/README.md b/modules/image/text_to_image/stable_diffusion/README.md new file mode 100644 index 000000000..a3afa9793 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion/README.md @@ -0,0 +1,160 @@ +# stable_diffusion + +|模型名称|stable_diffusion| +| :--- | :---: | +|类别|多模态-文图生成| +|网络|CLIP Text Encoder+UNet+VAD| +|数据集|-| +|是否支持Fine-tuning|否| +|模型大小|4.0GB| +|最新更新日期|2022-08-26| +|数据指标|-| + +## 一、模型基本信息 + +### 应用效果展示 + + - 输入文本 "in the morning light,Overlooking TOKYO city by greg rutkowski and thomas kinkade,Trending on artstation." + + - 输出图像 +

+ +
+ + - 生成过程 +

+ +
+ +### 模型介绍 + +Stable Diffusion是一种潜在扩散模型(Latent Diffusion), 属于生成类模型,这类模型通过对随机噪声进行一步步地迭代降噪并采样来获得感兴趣的图像,当前取得了令人惊艳的效果。相比于Disco Diffusion, Stable Diffusion通过在低纬度的潜在空间(lower dimensional latent space)而不是原像素空间来做迭代,极大地降低了内存和计算量的需求,并且在V100上一分钟之内即可以渲染出想要的图像,欢迎体验。 + +更多详情请参考论文:[High-Resolution Image Synthesis with Latent Diffusion Models](https://arxiv.org/abs/2112.10752) + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install stable_diffusion + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run stable_diffusion --text_prompts "in the morning light,Overlooking TOKYO city by greg rutkowski and thomas kinkade,Trending on artstation." --output_dir stable_diffusion_out + ``` + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + + module = hub.Module(name="stable_diffusion") + text_prompts = ["in the morning light,Overlooking TOKYO city by greg rutkowski and thomas kinkade,Trending on artstation."] + # 生成图像, 默认会在stable_diffusion_out目录保存图像 + # 返回的da是一个DocumentArray对象,保存了所有的结果,包括最终结果和迭代过程的中间结果 + # 可以通过操作DocumentArray对象对生成的图像做后处理,保存或者分析 + # 您可以设置batch_size一次生成多张 + da = module.generate_image(text_prompts=text_prompts, batch_size=3, output_dir='./stable_diffusion_out/') + # 展示所有的中间结果 + da[0].chunks[-1].chunks.plot_image_sprites(skip_empty=True, show_index=True, keep_aspect_ratio=True) + # 将整个生成过程保存为一个动态图gif + da[0].chunks[-1].chunks.save_gif('stable_diffusion_out-merged-result.gif') + # da索引的是prompt, da[0].chunks索引的是该prompt下生成的第一张图,在batch_size不为1时能同时生成多张图 + # 您也可以按照上述操作显示单张图,如第0张的生成过程 + da[0].chunks[0].chunks.plot_image_sprites(skip_empty=True, show_index=True, keep_aspect_ratio=True) + da[0].chunks[0].chunks.save_gif('stable_diffusion_out-image-0-result.gif') + ``` + +- ### 3、API + + - ```python + def generate_image( + text_prompts, + style: Optional[str] = None, + artist: Optional[str] = None, + width_height: Optional[List[int]] = [512, 512], + seed: Optional[int] = None, + batch_size: Optional[int] = 1, + output_dir: Optional[str] = 'stable_diffusion_out'): + ``` + + - 文图生成API,生成文本描述内容的图像。 + + - **参数** + + - text_prompts(str): 输入的语句,描述想要生成的图像的内容。通常比较有效的构造方式为 "一段描述性的文字内容" + "指定艺术家的名字",如"in the morning light,Overlooking TOKYO city by greg rutkowski and thomas kinkade,Trending on artstation."。prompt的构造可以参考[网站](https://docs.google.com/document/d/1XUT2G9LmkZataHFzmuOtRXnuWBfhvXDAo8DkS--8tec/edit#)。 + - style(Optional[str]): 指定绘画的风格,如'watercolor','Chinese painting'等。当不指定时,风格完全由您所填写的prompt决定。 + - artist(Optional[str]): 指定特定的艺术家,如Greg Rutkowsk、krenz,将会生成所指定艺术家的绘画风格。当不指定时,风格完全由您所填写的prompt决定。各种艺术家的风格可以参考[网站](https://weirdwonderfulai.art/resources/disco-diffusion-70-plus-artist-studies/)。 + - width_height(Optional[List[int]]): 指定最终输出图像的宽高,宽和高都需要是64的倍数,生成的图像越大,所需要的计算时间越长。 + - seed(Optional[int]): 随机种子,由于输入默认是随机高斯噪声,设置不同的随机种子会由不同的初始输入,从而最终生成不同的结果,可以设置该参数来获得不同的输出图像。 + - batch_size(Optional[int]): 指定每个prompt一次生成的图像的数量。 + - output_dir(Optional[str]): 保存输出图像的目录,默认为"stable_diffusion_out"。 + + + - **返回** + - ra(DocumentArray): DocumentArray对象, 包含`batch_size`个Documents,其中每个Document都保存了迭代过程的所有中间结果。详细可参考[DocumentArray使用文档](https://docarray.jina.ai/fundamentals/documentarray/index.html)。 + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线文图生成服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m stable_diffusion + ``` + + - 这样就完成了一个文图生成的在线服务API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果,返回的预测结果在反序列化后即是上述接口声明中说明的DocumentArray类型,返回后对结果的操作方式和使用generate_image接口完全相同。 + + - ```python + import requests + import json + import cv2 + import base64 + from docarray import DocumentArray + + # 发送HTTP请求 + data = {'text_prompts': 'in the morning light,Overlooking TOKYO city by greg rutkowski and thomas kinkade,Trending on artstation.'} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/stable_diffusion" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 获取返回结果 + r.json()["results"] + da = DocumentArray.from_base64(r.json()["results"]) + # 保存结果图 + da[0].save_uri_to_file('stable_diffusion_out.png') + # 将生成过程保存为一个动态图gif + da[0].chunks[0].chunks.save_gif('stable_diffusion_out.gif') + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + ```shell + $ hub install stable_diffusion == 1.0.0 + ``` diff --git a/modules/image/text_to_image/stable_diffusion/clip/README.md b/modules/image/text_to_image/stable_diffusion/clip/README.md new file mode 100755 index 000000000..9944794f8 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion/clip/README.md @@ -0,0 +1,2 @@ +# OpenAI CLIP implemented in Paddle. +The original implementation repo is [ranchlai/clip.paddle](https://github.com/ranchlai/clip.paddle). We use this repo here for text encoder in stable diffusion. diff --git a/modules/image/text_to_image/stable_diffusion/clip/clip/__init__.py b/modules/image/text_to_image/stable_diffusion/clip/clip/__init__.py new file mode 100755 index 000000000..5657b56e6 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion/clip/clip/__init__.py @@ -0,0 +1 @@ +from .utils import * diff --git a/modules/image/text_to_image/stable_diffusion/clip/clip/layers.py b/modules/image/text_to_image/stable_diffusion/clip/clip/layers.py new file mode 100755 index 000000000..286f35ab4 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion/clip/clip/layers.py @@ -0,0 +1,182 @@ +from typing import Optional + +import paddle +import paddle.nn as nn +from paddle import Tensor +from paddle.nn import functional as F +from paddle.nn import Linear + +__all__ = ['ResidualAttentionBlock', 'AttentionPool2d', 'multi_head_attention_forward', 'MultiHeadAttention'] + + +def multi_head_attention_forward(x: Tensor, + num_heads: int, + q_proj: Linear, + k_proj: Linear, + v_proj: Linear, + c_proj: Linear, + attn_mask: Optional[Tensor] = None): + max_len, batch_size, emb_dim = x.shape + head_dim = emb_dim // num_heads + scaling = float(head_dim)**-0.5 + q = q_proj(x) # L, N, E + k = k_proj(x) # L, N, E + v = v_proj(x) # L, N, E + #k = k.con + v = v.reshape((-1, batch_size * num_heads, head_dim)).transpose((1, 0, 2)) + k = k.reshape((-1, batch_size * num_heads, head_dim)).transpose((1, 0, 2)) + q = q.reshape((-1, batch_size * num_heads, head_dim)).transpose((1, 0, 2)) + + q = q * scaling + qk = paddle.bmm(q, k.transpose((0, 2, 1))) + if attn_mask is not None: + if attn_mask.ndim == 2: + attn_mask.unsqueeze_(0) + #assert str(attn_mask.dtype) == 'VarType.FP32' and attn_mask.ndim == 3 + assert attn_mask.shape[0] == 1 and attn_mask.shape[1] == max_len and attn_mask.shape[2] == max_len + qk += attn_mask + + qk = paddle.nn.functional.softmax(qk, axis=-1) + atten = paddle.bmm(qk, v) + atten = atten.transpose((1, 0, 2)) + atten = atten.reshape((max_len, batch_size, emb_dim)) + atten = c_proj(atten) + return atten + + +class MultiHeadAttention(nn.Layer): # without attention mask + + def __init__(self, emb_dim: int, num_heads: int): + super().__init__() + self.q_proj = nn.Linear(emb_dim, emb_dim, bias_attr=True) + self.k_proj = nn.Linear(emb_dim, emb_dim, bias_attr=True) + self.v_proj = nn.Linear(emb_dim, emb_dim, bias_attr=True) + self.c_proj = nn.Linear(emb_dim, emb_dim, bias_attr=True) + self.head_dim = emb_dim // num_heads + self.emb_dim = emb_dim + self.num_heads = num_heads + assert self.head_dim * num_heads == emb_dim, "embed_dim must be divisible by num_heads" + #self.scaling = float(self.head_dim) ** -0.5 + + def forward(self, x, attn_mask=None): # x is in shape[max_len,batch_size,emb_dim] + + atten = multi_head_attention_forward(x, + self.num_heads, + self.q_proj, + self.k_proj, + self.v_proj, + self.c_proj, + attn_mask=attn_mask) + + return atten + + +class Identity(nn.Layer): + + def __init__(self): + super().__init__() + + def forward(self, x): + return x + + +class Bottleneck(nn.Layer): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1): + super().__init__() + + # all conv layers have stride 1. an avgpool is performed after the second convolution when stride > 1 + self.conv1 = nn.Conv2D(inplanes, planes, 1, bias_attr=False) + self.bn1 = nn.BatchNorm2D(planes) + + self.conv2 = nn.Conv2D(planes, planes, 3, padding=1, bias_attr=False) + self.bn2 = nn.BatchNorm2D(planes) + + self.avgpool = nn.AvgPool2D(stride) if stride > 1 else Identity() + + self.conv3 = nn.Conv2D(planes, planes * self.expansion, 1, bias_attr=False) + self.bn3 = nn.BatchNorm2D(planes * self.expansion) + + self.relu = nn.ReLU() + self.downsample = None + self.stride = stride + + if stride > 1 or inplanes != planes * Bottleneck.expansion: + self.downsample = nn.Sequential( + ("-1", nn.AvgPool2D(stride)), + ("0", nn.Conv2D(inplanes, planes * self.expansion, 1, stride=1, bias_attr=False)), + ("1", nn.BatchNorm2D(planes * self.expansion))) + + def forward(self, x): + identity = x + + out = self.relu(self.bn1(self.conv1(x))) + out = self.relu(self.bn2(self.conv2(out))) + out = self.avgpool(out) + out = self.bn3(self.conv3(out)) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.relu(out) + return out + + +class AttentionPool2d(nn.Layer): + + def __init__(self, spacial_dim: int, embed_dim: int, num_heads: int, output_dim: int = None): + super().__init__() + + self.positional_embedding = paddle.create_parameter((spacial_dim**2 + 1, embed_dim), dtype='float32') + + self.q_proj = nn.Linear(embed_dim, embed_dim, bias_attr=True) + self.k_proj = nn.Linear(embed_dim, embed_dim, bias_attr=True) + self.v_proj = nn.Linear(embed_dim, embed_dim, bias_attr=True) + self.c_proj = nn.Linear(embed_dim, output_dim or embed_dim, bias_attr=True) + self.num_heads = num_heads + + self.head_dim = embed_dim // num_heads + assert self.head_dim * num_heads == embed_dim, "embed_dim must be divisible by num_heads" + + def forward(self, x): + + x = x.reshape((x.shape[0], x.shape[1], x.shape[2] * x.shape[3])).transpose((2, 0, 1)) # NCHW -> (HW)NC + max_len, batch_size, emb_dim = x.shape + head_dim = self.head_dim + x = paddle.concat([paddle.mean(x, axis=0, keepdim=True), x], axis=0) + x = x + paddle.unsqueeze(self.positional_embedding, 1) + out = multi_head_attention_forward(x, self.num_heads, self.q_proj, self.k_proj, self.v_proj, self.c_proj) + + return out[0] + + +class QuickGELU(nn.Layer): + + def forward(self, x): + return x * paddle.nn.functional.sigmoid(1.702 * x) + + +class ResidualAttentionBlock(nn.Layer): + + def __init__(self, d_model: int, n_head: int, attn_mask=None): + super().__init__() + + self.attn = MultiHeadAttention(d_model, n_head) + self.ln_1 = nn.LayerNorm(d_model) + self.mlp = nn.Sequential(("c_fc", nn.Linear(d_model, d_model * 4)), ("gelu", QuickGELU()), + ("c_proj", nn.Linear(d_model * 4, d_model))) + self.ln_2 = nn.LayerNorm(d_model) + self.attn_mask = attn_mask + + def attention(self, x): + x = self.attn(x, self.attn_mask) + assert isinstance(x, paddle.Tensor) # not tuble here + return x + + def forward(self, x): + + x = x + self.attention(self.ln_1(x)) + x = x + self.mlp(self.ln_2(x)) + return x diff --git a/modules/image/text_to_image/stable_diffusion/clip/clip/model.py b/modules/image/text_to_image/stable_diffusion/clip/clip/model.py new file mode 100755 index 000000000..06affcc4b --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion/clip/clip/model.py @@ -0,0 +1,259 @@ +from typing import Tuple +from typing import Union + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddle import nn + +from .layers import AttentionPool2d +from .layers import Bottleneck +from .layers import MultiHeadAttention +from .layers import ResidualAttentionBlock + + +class ModifiedResNet(nn.Layer): + """ + A ResNet class that is similar to torchvision's but contains the following changes: + - There are now 3 "stem" convolutions as opposed to 1, with an average pool instead of a max pool. + - Performs anti-aliasing strided convolutions, where an avgpool is prepended to convolutions with stride > 1 + - The final pooling layer is a QKV attention instead of an average pool + """ + + def __init__(self, layers, output_dim, heads, input_resolution=224, width=64): + super().__init__() + self.output_dim = output_dim + self.input_resolution = input_resolution + + # the 3-layer stem + self.conv1 = nn.Conv2D(3, width // 2, kernel_size=3, stride=2, padding=1, bias_attr=False) + self.bn1 = nn.BatchNorm2D(width // 2) + self.conv2 = nn.Conv2D(width // 2, width // 2, kernel_size=3, padding=1, bias_attr=False) + self.bn2 = nn.BatchNorm2D(width // 2) + self.conv3 = nn.Conv2D(width // 2, width, kernel_size=3, padding=1, bias_attr=False) + self.bn3 = nn.BatchNorm2D(width) + self.avgpool = nn.AvgPool2D(2) + self.relu = nn.ReLU() + + # residual layers + self._inplanes = width # this is a *mutable* variable used during construction + self.layer1 = self._make_layer(width, layers[0]) + self.layer2 = self._make_layer(width * 2, layers[1], stride=2) + self.layer3 = self._make_layer(width * 4, layers[2], stride=2) + self.layer4 = self._make_layer(width * 8, layers[3], stride=2) + + embed_dim = width * 32 # the ResNet feature dimension + self.attnpool = AttentionPool2d(input_resolution // 32, embed_dim, heads, output_dim) + + def _make_layer(self, planes, blocks, stride=1): + layers = [Bottleneck(self._inplanes, planes, stride)] + + self._inplanes = planes * Bottleneck.expansion + for _ in range(1, blocks): + layers.append(Bottleneck(self._inplanes, planes)) + + return nn.Sequential(*layers) + + def forward(self, x): + + def stem(x): + for conv, bn in [(self.conv1, self.bn1), (self.conv2, self.bn2), (self.conv3, self.bn3)]: + x = self.relu(bn(conv(x))) + x = self.avgpool(x) + return x + + #x = x.type(self.conv1.weight.dtype) + x = stem(x) + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + x = self.attnpool(x) + + return x + + +class Transformer(nn.Layer): + + def __init__(self, width: int, layers: int, heads: int, attn_mask=None): + super().__init__() + self.width = width + self.layers = layers + self.resblocks = nn.Sequential(*[ResidualAttentionBlock(width, heads, attn_mask) for _ in range(layers)]) + + def forward(self, x): + return self.resblocks(x) + + +class VisualTransformer(nn.Layer): + + def __init__(self, input_resolution: int, patch_size: int, width: int, layers: int, heads: int, output_dim: int): + super().__init__() + self.input_resolution = input_resolution + self.output_dim = output_dim + # used patch_size x patch_size, stride patch_size to do linear projection + self.conv1 = nn.Conv2D(in_channels=3, + out_channels=width, + kernel_size=patch_size, + stride=patch_size, + bias_attr=False) + + # scale = width ** -0.5 + self.class_embedding = paddle.create_parameter((width, ), 'float32') + + self.positional_embedding = paddle.create_parameter(((input_resolution // patch_size)**2 + 1, width), 'float32') + + self.ln_pre = nn.LayerNorm(width) + + self.transformer = Transformer(width, layers, heads) + + self.ln_post = nn.LayerNorm(width) + self.proj = paddle.create_parameter((width, output_dim), 'float32') + + def forward(self, x): + + x = self.conv1(x) + x = x.reshape((x.shape[0], x.shape[1], -1)) + x = x.transpose((0, 2, 1)) + x = paddle.concat([self.class_embedding + paddle.zeros((x.shape[0], 1, x.shape[-1]), dtype=x.dtype), x], axis=1) + + x = x + self.positional_embedding + x = self.ln_pre(x) + x = x.transpose((1, 0, 2)) + x = self.transformer(x) + x = x.transpose((1, 0, 2)) + x = self.ln_post(x[:, 0, :]) + if self.proj is not None: + x = paddle.matmul(x, self.proj) + + return x + + +class TextTransformer(nn.Layer): + + def __init__(self, context_length: int, vocab_size: int, transformer_width: int, transformer_heads: int, + transformer_layers: int): + super().__init__() + self.context_length = context_length + self.transformer = Transformer(width=transformer_width, + layers=transformer_layers, + heads=transformer_heads, + attn_mask=self.build_attention_mask()) + + self.vocab_size = vocab_size + self.token_embedding = nn.Embedding(vocab_size, transformer_width) + self.positional_embedding = paddle.create_parameter((self.context_length, transformer_width), 'float32') + self.ln_final = nn.LayerNorm(transformer_width) + + def build_attention_mask(self): + # lazily create causal attention mask, with full attention between the vision tokens + # mask = paddle.empty((self.context_length, self.context_length),dtype='float32') + # mask.fill_(float("-inf")) + #mask.triu_(1) # zero out the lower diagonal + + mask = paddle.ones((self.context_length, self.context_length)) * float("-inf") + mask = paddle.triu(mask, diagonal=1) + + return mask + + def forward(self, text): + x = self.token_embedding(text) # [batch_size, n_ctx, d_model] + x = x + self.positional_embedding + x = x.transpose((1, 0, 2)) # NLD -> LND + x = self.transformer(x) + x = x.transpose((1, 0, 2)) # LND -> NLD + x = self.ln_final(x) + return x + + +class CLIP(nn.Layer): + + def __init__( + self, + embed_dim: int, + # vision + image_resolution: int, + vision_layers: Union[Tuple[int, int, int, int], int], + vision_width: int, + vision_patch_size: int, + # text + context_length: int, + vocab_size: int, + transformer_width: int, + transformer_heads: int, + transformer_layers: int): + super().__init__() + + self.context_length = context_length + if isinstance(vision_layers, (tuple, list)): + vision_heads = vision_width * 32 // 64 + self.visual = ModifiedResNet(layers=vision_layers, + output_dim=embed_dim, + heads=vision_heads, + input_resolution=image_resolution, + width=vision_width) + else: + vision_heads = vision_width // 64 + self.visual = VisualTransformer(input_resolution=image_resolution, + patch_size=vision_patch_size, + width=vision_width, + layers=vision_layers, + heads=vision_heads, + output_dim=embed_dim) + + self.transformer = Transformer(width=transformer_width, + layers=transformer_layers, + heads=transformer_heads, + attn_mask=self.build_attention_mask()) + + self.vocab_size = vocab_size + self.token_embedding = nn.Embedding(vocab_size, transformer_width) + self.positional_embedding = paddle.create_parameter((self.context_length, transformer_width), 'float32') + self.ln_final = nn.LayerNorm(transformer_width) + + self.text_projection = paddle.create_parameter((transformer_width, embed_dim), 'float32') + self.logit_scale = paddle.create_parameter((1, ), 'float32') + + def build_attention_mask(self): + # lazily create causal attention mask, with full attention between the vision tokens + # mask = paddle.empty((self.context_length, self.context_length),dtype='float32') + # mask.fill_(float("-inf")) + #mask.triu_(1) # zero out the lower diagonal + + mask = paddle.ones((self.context_length, self.context_length)) * float("-inf") + mask = paddle.triu(mask, diagonal=1) + + return mask + + def encode_image(self, image): + return self.visual(image) + + def encode_text(self, text): + x = self.token_embedding(text) # [batch_size, n_ctx, d_model] + x = x + self.positional_embedding + x = x.transpose((1, 0, 2)) # NLD -> LND + x = self.transformer(x) + x = x.transpose((1, 0, 2)) # LND -> NLD + x = self.ln_final(x) + idx = text.numpy().argmax(-1) + idx = list(idx) + x = [x[i:i + 1, int(j), :] for i, j in enumerate(idx)] + x = paddle.concat(x, 0) + x = paddle.matmul(x, self.text_projection) + return x + + def forward(self, image, text): + image_features = self.encode_image(image) + text_features = self.encode_text(text) + + # normalized features + image_features = image_features / image_features.norm(dim=-1, keepdim=True) + text_features = text_features / text_features.norm(dim=-1, keepdim=True) + + # cosine similarity as logits + logit_scale = self.logit_scale.exp() + logits_per_image = paddle.matmul(logit_scale * image_features, text_features.t()) + logits_per_text = paddle.matmul(logit_scale * text_features, image_features.t()) + + # shape = [global_batch_size, global_batch_size] + return logits_per_image, logits_per_text diff --git a/modules/image/text_to_image/stable_diffusion/clip/clip/simple_tokenizer.py b/modules/image/text_to_image/stable_diffusion/clip/clip/simple_tokenizer.py new file mode 100755 index 000000000..4eaf82e9e --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion/clip/clip/simple_tokenizer.py @@ -0,0 +1,135 @@ +import gzip +import html +import os +from functools import lru_cache + +import ftfy +import regex as re + + +@lru_cache() +def default_bpe(): + return os.path.join(os.path.dirname(os.path.abspath(__file__)), "../assets/bpe_simple_vocab_16e6.txt.gz") + + +@lru_cache() +def bytes_to_unicode(): + """ + Returns list of utf-8 byte and a corresponding list of unicode strings. + The reversible bpe codes work on unicode strings. + This means you need a large # of unicode characters in your vocab if you want to avoid UNKs. + When you're at something like a 10B token dataset you end up needing around 5K for decent coverage. + This is a signficant percentage of your normal, say, 32K bpe vocab. + To avoid that, we want lookup tables between utf-8 bytes and unicode strings. + And avoids mapping to whitespace/control characters the bpe code barfs on. + """ + bs = list(range(ord("!"), ord("~") + 1)) + list(range(ord("¡"), ord("¬") + 1)) + list(range(ord("®"), ord("ÿ") + 1)) + cs = bs[:] + n = 0 + for b in range(2**8): + if b not in bs: + bs.append(b) + cs.append(2**8 + n) + n += 1 + cs = [chr(n) for n in cs] + return dict(zip(bs, cs)) + + +def get_pairs(word): + """Return set of symbol pairs in a word. + Word is represented as tuple of symbols (symbols being variable-length strings). + """ + pairs = set() + prev_char = word[0] + for char in word[1:]: + pairs.add((prev_char, char)) + prev_char = char + return pairs + + +def basic_clean(text): + text = ftfy.fix_text(text) + text = html.unescape(html.unescape(text)) + return text.strip() + + +def whitespace_clean(text): + text = re.sub(r'\s+', ' ', text) + text = text.strip() + return text + + +class SimpleTokenizer(object): + + def __init__(self, bpe_path: str = default_bpe()): + self.byte_encoder = bytes_to_unicode() + self.byte_decoder = {v: k for k, v in self.byte_encoder.items()} + merges = gzip.open(bpe_path).read().decode("utf-8").split('\n') + merges = merges[1:49152 - 256 - 2 + 1] + merges = [tuple(merge.split()) for merge in merges] + vocab = list(bytes_to_unicode().values()) + vocab = vocab + [v + '' for v in vocab] + for merge in merges: + vocab.append(''.join(merge)) + vocab.extend(['<|startoftext|>', '<|endoftext|>']) + self.encoder = dict(zip(vocab, range(len(vocab)))) + self.decoder = {v: k for k, v in self.encoder.items()} + self.bpe_ranks = dict(zip(merges, range(len(merges)))) + self.cache = {'<|startoftext|>': '<|startoftext|>', '<|endoftext|>': '<|endoftext|>'} + self.pat = re.compile( + r"""<\|startoftext\|>|<\|endoftext\|>|'s|'t|'re|'ve|'m|'ll|'d|[\p{L}]+|[\p{N}]|[^\s\p{L}\p{N}]+""", + re.IGNORECASE) + + def bpe(self, token): + if token in self.cache: + return self.cache[token] + word = tuple(token[:-1]) + (token[-1] + '', ) + pairs = get_pairs(word) + + if not pairs: + return token + '' + + while True: + bigram = min(pairs, key=lambda pair: self.bpe_ranks.get(pair, float('inf'))) + if bigram not in self.bpe_ranks: + break + first, second = bigram + new_word = [] + i = 0 + while i < len(word): + try: + j = word.index(first, i) + new_word.extend(word[i:j]) + i = j + except: + new_word.extend(word[i:]) + break + + if word[i] == first and i < len(word) - 1 and word[i + 1] == second: + new_word.append(first + second) + i += 2 + else: + new_word.append(word[i]) + i += 1 + new_word = tuple(new_word) + word = new_word + if len(word) == 1: + break + else: + pairs = get_pairs(word) + word = ' '.join(word) + self.cache[token] = word + return word + + def encode(self, text): + bpe_tokens = [] + text = whitespace_clean(basic_clean(text)).lower() + for token in re.findall(self.pat, text): + token = ''.join(self.byte_encoder[b] for b in token.encode('utf-8')) + bpe_tokens.extend(self.encoder[bpe_token] for bpe_token in self.bpe(token).split(' ')) + return bpe_tokens + + def decode(self, tokens): + text = ''.join([self.decoder[token] for token in tokens]) + text = bytearray([self.byte_decoder[c] for c in text]).decode('utf-8', errors="replace").replace('', ' ') + return text diff --git a/modules/image/text_to_image/stable_diffusion/clip/clip/utils.py b/modules/image/text_to_image/stable_diffusion/clip/clip/utils.py new file mode 100755 index 000000000..b5d417144 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion/clip/clip/utils.py @@ -0,0 +1,88 @@ +import os +from typing import List +from typing import Union + +import numpy as np +import paddle +from paddle.utils import download +from paddle.vision.transforms import CenterCrop +from paddle.vision.transforms import Compose +from paddle.vision.transforms import Normalize +from paddle.vision.transforms import Resize +from paddle.vision.transforms import ToTensor + +from .model import CLIP +from .model import TextTransformer +from .simple_tokenizer import SimpleTokenizer + +__all__ = ['transform', 'tokenize', 'build_model'] + +MODEL_NAMES = ['VITL14'] + +URL = {'VITL14': os.path.join(os.path.dirname(__file__), 'pre_trained', 'vitl14_textencoder.pdparams')} + +MEAN, STD = (0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711) +_tokenizer = SimpleTokenizer() + +transform = Compose([ + Resize(224, interpolation='bicubic'), + CenterCrop(224), lambda image: image.convert('RGB'), + ToTensor(), + Normalize(mean=MEAN, std=STD), lambda t: t.unsqueeze_(0) +]) + + +def tokenize(texts: Union[str, List[str]], context_length: int = 77): + """ + Returns the tokenized representation of given input string(s) + + Parameters + ---------- + texts : Union[str, List[str]] + An input string or a list of input strings to tokenize + + context_length : int + The context length to use; all CLIP models use 77 as the context length + + Returns + ------- + A two-dimensional tensor containing the resulting tokens, shape = [number of input strings, context_length] + """ + if isinstance(texts, str): + texts = [texts] + + sot_token = _tokenizer.encoder["<|startoftext|>"] + eot_token = _tokenizer.encoder["<|endoftext|>"] + all_tokens = [[sot_token] + _tokenizer.encode(text) + [eot_token] for text in texts] + result = paddle.zeros((len(all_tokens), context_length), dtype='int64') + + for i, tokens in enumerate(all_tokens): + if len(tokens) > context_length: + raise RuntimeError(f"Input {texts[i]} is too long for context length {context_length}") + result[i, :len(tokens)] = paddle.Tensor(np.array(tokens)) + + return result + + +def build_model(name='VITL14'): + assert name in MODEL_NAMES, f"model name must be one of {MODEL_NAMES}" + name2model = {'VITL14': build_vitl14_language_model} + model = name2model[name]() + weight = URL[name] + sd = paddle.load(weight) + state_dict = model.state_dict() + for key, value in sd.items(): + if key in state_dict: + state_dict[key] = value + model.load_dict(state_dict) + model.eval() + return model + + +def build_vitl14_language_model(): + model = TextTransformer(context_length=77, + vocab_size=49408, + transformer_width=768, + transformer_heads=12, + transformer_layers=12) + return model diff --git a/modules/image/text_to_image/stable_diffusion/diffusers/__init__.py b/modules/image/text_to_image/stable_diffusion/diffusers/__init__.py new file mode 100644 index 000000000..7f41816d7 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion/diffusers/__init__.py @@ -0,0 +1,20 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__version__ = "0.2.4" + +from .models import AutoencoderKL, UNet2DConditionModel, UNet2DModel, VQModel + +from .schedulers import (DDIMScheduler, DDPMScheduler, KarrasVeScheduler, PNDMScheduler, SchedulerMixin, + ScoreSdeVeScheduler, LMSDiscreteScheduler) diff --git a/modules/image/text_to_image/stable_diffusion/diffusers/configuration_utils.py b/modules/image/text_to_image/stable_diffusion/diffusers/configuration_utils.py new file mode 100644 index 000000000..c90ebd5be --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion/diffusers/configuration_utils.py @@ -0,0 +1,312 @@ +# coding=utf-8 +# Copyright 2022 The HuggingFace Inc. team. +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" ConfigMixinuration base class and utilities.""" +import functools +import inspect +import json +import os +import re +from collections import OrderedDict +from typing import Any +from typing import Dict +from typing import Tuple +from typing import Union + +from requests import HTTPError + +from paddlehub.common.logger import logger + +HUGGINGFACE_CO_RESOLVE_ENDPOINT = "HUGGINGFACE_CO_RESOLVE_ENDPOINT" +DIFFUSERS_CACHE = "./caches" + +_re_configuration_file = re.compile(r"config\.(.*)\.json") + + +class ConfigMixin: + r""" + Base class for all configuration classes. Handles a few parameters common to all models' configurations as well as + methods for loading/downloading/saving configurations. + + """ + config_name = "model_config.json" + ignore_for_config = [] + + def register_to_config(self, **kwargs): + if self.config_name is None: + raise NotImplementedError(f"Make sure that {self.__class__} has defined a class name `config_name`") + kwargs["_class_name"] = self.__class__.__name__ + kwargs["_diffusers_version"] = "0.0.1" + + for key, value in kwargs.items(): + try: + setattr(self, key, value) + except AttributeError as err: + logger.error(f"Can't set {key} with value {value} for {self}") + raise err + + if not hasattr(self, "_internal_dict"): + internal_dict = kwargs + else: + previous_dict = dict(self._internal_dict) + internal_dict = {**self._internal_dict, **kwargs} + logger.debug(f"Updating config from {previous_dict} to {internal_dict}") + + self._internal_dict = FrozenDict(internal_dict) + + def save_config(self, save_directory: Union[str, os.PathLike], push_to_hub: bool = False, **kwargs): + """ + Save a configuration object to the directory `save_directory`, so that it can be re-loaded using the + [`~ConfigMixin.from_config`] class method. + + Args: + save_directory (`str` or `os.PathLike`): + Directory where the configuration JSON file will be saved (will be created if it does not exist). + kwargs: + Additional key word arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method. + """ + if os.path.isfile(save_directory): + raise AssertionError(f"Provided path ({save_directory}) should be a directory, not a file") + + os.makedirs(save_directory, exist_ok=True) + + # If we save using the predefined names, we can load using `from_config` + output_config_file = os.path.join(save_directory, self.config_name) + + self.to_json_file(output_config_file) + logger.info(f"ConfigMixinuration saved in {output_config_file}") + + @classmethod + def from_config(cls, pretrained_model_name_or_path: Union[str, os.PathLike], return_unused_kwargs=False, **kwargs): + config_dict = cls.get_config_dict(pretrained_model_name_or_path=pretrained_model_name_or_path, **kwargs) + + init_dict, unused_kwargs = cls.extract_init_dict(config_dict, **kwargs) + + model = cls(**init_dict) + + if return_unused_kwargs: + return model, unused_kwargs + else: + return model + + @classmethod + def get_config_dict(cls, pretrained_model_name_or_path: Union[str, os.PathLike], + **kwargs) -> Tuple[Dict[str, Any], Dict[str, Any]]: + cache_dir = kwargs.pop("cache_dir", DIFFUSERS_CACHE) + force_download = kwargs.pop("force_download", False) + resume_download = kwargs.pop("resume_download", False) + proxies = kwargs.pop("proxies", None) + use_auth_token = kwargs.pop("use_auth_token", None) + local_files_only = kwargs.pop("local_files_only", False) + revision = kwargs.pop("revision", None) + subfolder = kwargs.pop("subfolder", None) + + user_agent = {"file_type": "config"} + + pretrained_model_name_or_path = str(pretrained_model_name_or_path) + + if cls.config_name is None: + raise ValueError( + "`self.config_name` is not defined. Note that one should not load a config from " + "`ConfigMixin`. Please make sure to define `config_name` in a class inheriting from `ConfigMixin`") + + if os.path.isfile(pretrained_model_name_or_path): + config_file = pretrained_model_name_or_path + elif os.path.isdir(pretrained_model_name_or_path): + if os.path.isfile(os.path.join(pretrained_model_name_or_path, cls.config_name)): + # Load from a PyTorch checkpoint + config_file = os.path.join(pretrained_model_name_or_path, cls.config_name) + elif subfolder is not None and os.path.isfile( + os.path.join(pretrained_model_name_or_path, subfolder, cls.config_name)): + config_file = os.path.join(pretrained_model_name_or_path, subfolder, cls.config_name) + else: + raise EnvironmentError( + f"Error no file named {cls.config_name} found in directory {pretrained_model_name_or_path}.") + else: + try: + # Load from URL or cache if already cached + from huggingface_hub import hf_hub_download + config_file = hf_hub_download( + pretrained_model_name_or_path, + filename=cls.config_name, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + resume_download=resume_download, + local_files_only=local_files_only, + use_auth_token=use_auth_token, + user_agent=user_agent, + subfolder=subfolder, + ) + + except HTTPError as err: + raise EnvironmentError("There was a specific connection error when trying to load" + f" {pretrained_model_name_or_path}:\n{err}") + except ValueError: + raise EnvironmentError( + f"We couldn't connect to '{HUGGINGFACE_CO_RESOLVE_ENDPOINT}' to load this model, couldn't find it" + f" in the cached files and it looks like {pretrained_model_name_or_path} is not the path to a" + f" directory containing a {cls.config_name} file.\nCheckout your internet connection or see how to" + " run the library in offline mode at" + " 'https://huggingface.co/docs/diffusers/installation#offline-mode'.") + except EnvironmentError: + raise EnvironmentError( + f"Can't load config for '{pretrained_model_name_or_path}'. If you were trying to load it from " + "'https://huggingface.co/models', make sure you don't have a local directory with the same name. " + f"Otherwise, make sure '{pretrained_model_name_or_path}' is the correct path to a directory " + f"containing a {cls.config_name} file") + + try: + # Load config dict + config_dict = cls._dict_from_json_file(config_file) + except (json.JSONDecodeError, UnicodeDecodeError): + raise EnvironmentError(f"It looks like the config file at '{config_file}' is not a valid JSON file.") + + return config_dict + + @classmethod + def extract_init_dict(cls, config_dict, **kwargs): + expected_keys = set(dict(inspect.signature(cls.__init__).parameters).keys()) + expected_keys.remove("self") + # remove general kwargs if present in dict + if "kwargs" in expected_keys: + expected_keys.remove("kwargs") + # remove keys to be ignored + if len(cls.ignore_for_config) > 0: + expected_keys = expected_keys - set(cls.ignore_for_config) + init_dict = {} + for key in expected_keys: + if key in kwargs: + # overwrite key + init_dict[key] = kwargs.pop(key) + elif key in config_dict: + # use value from config dict + init_dict[key] = config_dict.pop(key) + + unused_kwargs = config_dict.update(kwargs) + + passed_keys = set(init_dict.keys()) + if len(expected_keys - passed_keys) > 0: + logger.warning( + f"{expected_keys - passed_keys} was not found in config. Values will be initialized to default values.") + + return init_dict, unused_kwargs + + @classmethod + def _dict_from_json_file(cls, json_file: Union[str, os.PathLike]): + with open(json_file, "r", encoding="utf-8") as reader: + text = reader.read() + return json.loads(text) + + def __repr__(self): + return f"{self.__class__.__name__} {self.to_json_string()}" + + @property + def config(self) -> Dict[str, Any]: + return self._internal_dict + + def to_json_string(self) -> str: + """ + Serializes this instance to a JSON string. + + Returns: + `str`: String containing all the attributes that make up this configuration instance in JSON format. + """ + config_dict = self._internal_dict if hasattr(self, "_internal_dict") else {} + return json.dumps(config_dict, indent=2, sort_keys=True) + "\n" + + def to_json_file(self, json_file_path: Union[str, os.PathLike]): + """ + Save this instance to a JSON file. + + Args: + json_file_path (`str` or `os.PathLike`): + Path to the JSON file in which this configuration instance's parameters will be saved. + """ + with open(json_file_path, "w", encoding="utf-8") as writer: + writer.write(self.to_json_string()) + + +class FrozenDict(OrderedDict): + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + for key, value in self.items(): + setattr(self, key, value) + + self.__frozen = True + + def __delitem__(self, *args, **kwargs): + raise Exception(f"You cannot use ``__delitem__`` on a {self.__class__.__name__} instance.") + + def setdefault(self, *args, **kwargs): + raise Exception(f"You cannot use ``setdefault`` on a {self.__class__.__name__} instance.") + + def pop(self, *args, **kwargs): + raise Exception(f"You cannot use ``pop`` on a {self.__class__.__name__} instance.") + + def update(self, *args, **kwargs): + raise Exception(f"You cannot use ``update`` on a {self.__class__.__name__} instance.") + + def __setattr__(self, name, value): + if hasattr(self, "__frozen") and self.__frozen: + raise Exception(f"You cannot use ``__setattr__`` on a {self.__class__.__name__} instance.") + super().__setattr__(name, value) + + def __setitem__(self, name, value): + if hasattr(self, "__frozen") and self.__frozen: + raise Exception(f"You cannot use ``__setattr__`` on a {self.__class__.__name__} instance.") + super().__setitem__(name, value) + + +def register_to_config(init): + """ + Decorator to apply on the init of classes inheriting from `ConfigMixin` so that all the arguments are automatically + sent to `self.register_for_config`. To ignore a specific argument accepted by the init but that shouldn't be + registered in the config, use the `ignore_for_config` class variable + + Warning: Once decorated, all private arguments (beginning with an underscore) are trashed and not sent to the init! + """ + + @functools.wraps(init) + def inner_init(self, *args, **kwargs): + # Ignore private kwargs in the init. + init_kwargs = {k: v for k, v in kwargs.items() if not k.startswith("_")} + init(self, *args, **init_kwargs) + if not isinstance(self, ConfigMixin): + raise RuntimeError( + f"`@register_for_config` was applied to {self.__class__.__name__} init method, but this class does " + "not inherit from `ConfigMixin`.") + + ignore = getattr(self, "ignore_for_config", []) + # Get positional arguments aligned with kwargs + new_kwargs = {} + signature = inspect.signature(init) + parameters = { + name: p.default + for i, (name, p) in enumerate(signature.parameters.items()) if i > 0 and name not in ignore + } + for arg, name in zip(args, parameters.keys()): + new_kwargs[name] = arg + + # Then add all kwargs + new_kwargs.update({ + k: init_kwargs.get(k, default) + for k, default in parameters.items() if k not in ignore and k not in new_kwargs + }) + getattr(self, "register_to_config")(**new_kwargs) + + return inner_init diff --git a/modules/image/text_to_image/stable_diffusion/diffusers/models/README.md b/modules/image/text_to_image/stable_diffusion/diffusers/models/README.md new file mode 100644 index 000000000..e786fe518 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion/diffusers/models/README.md @@ -0,0 +1,11 @@ +# Models + +- Models: Neural network that models $p_\theta(\mathbf{x}_{t-1}|\mathbf{x}_t)$ (see image below) and is trained end-to-end to denoise a noisy input to an image. Examples: UNet, Conditioned UNet, 3D UNet, Transformer UNet + +## API + +TODO(Suraj, Patrick) + +## Examples + +TODO(Suraj, Patrick) diff --git a/modules/image/text_to_image/stable_diffusion/diffusers/models/__init__.py b/modules/image/text_to_image/stable_diffusion/diffusers/models/__init__.py new file mode 100644 index 000000000..f55cc88a8 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion/diffusers/models/__init__.py @@ -0,0 +1,20 @@ +# flake8: noqa +# There's no way to ignore "F401 '...' imported but unused" warnings in this +# module, but to preserve other warnings. So, don't check this module at all. +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from .unet_2d import UNet2DModel +from .unet_2d_condition import UNet2DConditionModel +from .vae import AutoencoderKL +from .vae import VQModel diff --git a/modules/image/text_to_image/stable_diffusion/diffusers/models/attention.py b/modules/image/text_to_image/stable_diffusion/diffusers/models/attention.py new file mode 100644 index 000000000..29d0e73a7 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion/diffusers/models/attention.py @@ -0,0 +1,465 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +from inspect import isfunction + +import numpy as np +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + + +def finfo(dtype): + if dtype == paddle.float32: + return np.finfo(np.float32) + if dtype == paddle.float16: + return np.finfo(np.float16) + if dtype == paddle.float64: + return np.finfo(np.float64) + + +paddle.finfo = finfo + + +class AttentionBlockNew(nn.Layer): + """ + An attention block that allows spatial positions to attend to each other. Originally ported from here, but adapted + to the N-d case. + https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/models/unet.py#L66. + Uses three q, k, v linear layers to compute attention + """ + + def __init__( + self, + channels, + num_head_channels=None, + num_groups=32, + rescale_output_factor=1.0, + eps=1e-5, + ): + super().__init__() + self.channels = channels + + self.num_heads = channels // num_head_channels if num_head_channels is not None else 1 + self.num_head_size = num_head_channels + self.group_norm = nn.GroupNorm(num_channels=channels, num_groups=num_groups, epsilon=eps) + + # define q,k,v as linear layers + self.query = nn.Linear(channels, channels) + self.key = nn.Linear(channels, channels) + self.value = nn.Linear(channels, channels) + + self.rescale_output_factor = rescale_output_factor + self.proj_attn = nn.Linear(channels, channels) + + def transpose_for_scores(self, projection: paddle.Tensor) -> paddle.Tensor: + new_projection_shape = projection.shape[:-1] + [self.num_heads, -1] + # move heads to 2nd position (B, T, H * D) -> (B, T, H, D) -> (B, H, T, D) + new_projection = projection.reshape(new_projection_shape).transpose([0, 2, 1, 3]) + return new_projection + + def forward(self, hidden_states): + residual = hidden_states + batch, channel, height, width = hidden_states.shape + + # norm + hidden_states = self.group_norm(hidden_states) + + hidden_states = hidden_states.reshape([batch, channel, height * width]).transpose([0, 2, 1]) + + # proj to q, k, v + query_proj = self.query(hidden_states) + key_proj = self.key(hidden_states) + value_proj = self.value(hidden_states) + + # transpose + query_states = self.transpose_for_scores(query_proj) + key_states = self.transpose_for_scores(key_proj) + value_states = self.transpose_for_scores(value_proj) + + # get scores + scale = 1 / math.sqrt(math.sqrt(self.channels / self.num_heads)) + attention_scores = paddle.matmul(query_states * scale, key_states * scale, transpose_y=True) + attention_probs = F.softmax(attention_scores.astype("float32"), axis=-1).astype(attention_scores.dtype) + + # compute attention output + context_states = paddle.matmul(attention_probs, value_states) + + context_states = context_states.transpose([0, 2, 1, 3]) + new_context_states_shape = context_states.shape[:-2] + [ + self.channels, + ] + context_states = context_states.reshape(new_context_states_shape) + + # compute next hidden_states + hidden_states = self.proj_attn(context_states) + hidden_states = hidden_states.transpose([0, 2, 1]).reshape([batch, channel, height, width]) + + # res connect and rescale + hidden_states = (hidden_states + residual) / self.rescale_output_factor + return hidden_states + + def set_weight(self, attn_layer): + self.group_norm.weight.set_value(attn_layer.norm.weight) + self.group_norm.bias.set_value(attn_layer.norm.bias) + + if hasattr(attn_layer, "q"): + self.query.weight.set_value(attn_layer.q.weight[:, :, 0, 0]) + self.key.weight.set_value(attn_layer.k.weight[:, :, 0, 0]) + self.value.weight.set_value(attn_layer.v.weight[:, :, 0, 0]) + + self.query.bias.set_value(attn_layer.q.bias) + self.key.bias.set_value(attn_layer.k.bias) + self.value.bias.set_value(attn_layer.v.bias) + + self.proj_attn.weight.set_value(attn_layer.proj_out.weight[:, :, 0, 0]) + self.proj_attn.bias.set_value(attn_layer.proj_out.bias) + elif hasattr(attn_layer, "NIN_0"): + self.query.weight.set_value(attn_layer.NIN_0.W.t()) + self.key.weight.set_value(attn_layer.NIN_1.W.t()) + self.value.weight.set_value(attn_layer.NIN_2.W.t()) + + self.query.bias.set_value(attn_layer.NIN_0.b) + self.key.bias.set_value(attn_layer.NIN_1.b) + self.value.bias.set_value(attn_layer.NIN_2.b) + + self.proj_attn.weight.set_value(attn_layer.NIN_3.W.t()) + self.proj_attn.bias.set_value(attn_layer.NIN_3.b) + + self.group_norm.weight.set_value(attn_layer.GroupNorm_0.weight) + self.group_norm.bias.set_value(attn_layer.GroupNorm_0.bias) + else: + qkv_weight = attn_layer.qkv.weight.reshape( + [self.num_heads, 3 * self.channels // self.num_heads, self.channels]) + qkv_bias = attn_layer.qkv.bias.reshape([self.num_heads, 3 * self.channels // self.num_heads]) + + q_w, k_w, v_w = qkv_weight.split(self.channels // self.num_heads, axis=1) + q_b, k_b, v_b = qkv_bias.split(self.channels // self.num_heads, axis=1) + + self.query.weight.set_value(q_w.reshape([-1, self.channels])) + self.key.weight.set_value(k_w.reshape([-1, self.channels])) + self.value.weight.set_value(v_w.reshape([-1, self.channels])) + + self.query.bias.set_value(q_b.flatten()) + self.key.bias.set_value(k_b.flatten()) + self.value.bias.set_value(v_b.flatten()) + + self.proj_attn.weight.set_value(attn_layer.proj.weight[:, :, 0]) + self.proj_attn.bias.set_value(attn_layer.proj.bias) + + +class SpatialTransformer(nn.Layer): + """ + Transformer block for image-like data. First, project the input (aka embedding) and reshape to b, t, d. Then apply + standard transformer action. Finally, reshape to image + """ + + def __init__(self, in_channels, n_heads, d_head, depth=1, dropout=0.0, context_dim=None): + super().__init__() + self.n_heads = n_heads + self.d_head = d_head + self.in_channels = in_channels + inner_dim = n_heads * d_head + self.norm = nn.GroupNorm(num_groups=32, num_channels=in_channels, epsilon=1e-6) + + self.proj_in = nn.Conv2D(in_channels, inner_dim, kernel_size=1, stride=1, padding=0) + + self.transformer_blocks = nn.LayerList([ + BasicTransformerBlock(inner_dim, n_heads, d_head, dropout=dropout, context_dim=context_dim) + for d in range(depth) + ]) + + self.proj_out = nn.Conv2D(inner_dim, in_channels, kernel_size=1, stride=1, padding=0) + + def forward(self, x, context=None): + # note: if no context is given, cross-attention defaults to self-attention + b, c, h, w = x.shape + x_in = x + x = self.norm(x) + x = self.proj_in(x) + x = x.transpose([0, 2, 3, 1]).reshape([b, h * w, c]) + for block in self.transformer_blocks: + x = block(x, context=context) + x = x.reshape([b, h, w, c]).transpose([0, 3, 1, 2]) + x = self.proj_out(x) + return x + x_in + + def set_weight(self, layer): + self.norm = layer.norm + self.proj_in = layer.proj_in + self.transformer_blocks = layer.transformer_blocks + self.proj_out = layer.proj_out + + +class BasicTransformerBlock(nn.Layer): + + def __init__(self, dim, n_heads, d_head, dropout=0.0, context_dim=None, gated_ff=True, checkpoint=True): + super().__init__() + self.attn1 = CrossAttention(query_dim=dim, heads=n_heads, dim_head=d_head, + dropout=dropout) # is a self-attention + self.ff = FeedForward(dim, dropout=dropout, glu=gated_ff) + self.attn2 = CrossAttention(query_dim=dim, + context_dim=context_dim, + heads=n_heads, + dim_head=d_head, + dropout=dropout) # is self-attn if context is none + self.norm1 = nn.LayerNorm(dim) + self.norm2 = nn.LayerNorm(dim) + self.norm3 = nn.LayerNorm(dim) + self.checkpoint = checkpoint + + def forward(self, x, context=None): + x = self.attn1(self.norm1(x)) + x + x = self.attn2(self.norm2(x), context=context) + x + x = self.ff(self.norm3(x)) + x + return x + + +class CrossAttention(nn.Layer): + + def __init__(self, query_dim, context_dim=None, heads=8, dim_head=64, dropout=0.0): + super().__init__() + inner_dim = dim_head * heads + context_dim = default(context_dim, query_dim) + + self.scale = dim_head**-0.5 + self.heads = heads + + self.to_q = nn.Linear(query_dim, inner_dim, bias_attr=False) + self.to_k = nn.Linear(context_dim, inner_dim, bias_attr=False) + self.to_v = nn.Linear(context_dim, inner_dim, bias_attr=False) + + self.to_out = nn.Sequential(nn.Linear(inner_dim, query_dim), nn.Dropout(dropout)) + + def reshape_heads_to_batch_dim(self, tensor): + batch_size, seq_len, dim = tensor.shape + head_size = self.heads + tensor = tensor.reshape([batch_size, seq_len, head_size, dim // head_size]) + tensor = tensor.transpose([0, 2, 1, 3]).reshape([batch_size * head_size, seq_len, dim // head_size]) + return tensor + + def reshape_batch_dim_to_heads(self, tensor): + batch_size, seq_len, dim = tensor.shape + head_size = self.heads + tensor = tensor.reshape([batch_size // head_size, head_size, seq_len, dim]) + tensor = tensor.transpose([0, 2, 1, 3]).reshape([batch_size // head_size, seq_len, dim * head_size]) + return tensor + + def forward(self, x, context=None, mask=None): + batch_size, sequence_length, dim = x.shape + + h = self.heads + + q = self.to_q(x) + context = default(context, x) + k = self.to_k(context) + v = self.to_v(context) + + q = self.reshape_heads_to_batch_dim(q) + k = self.reshape_heads_to_batch_dim(k) + v = self.reshape_heads_to_batch_dim(v) + + sim = paddle.einsum("b i d, b j d -> b i j", q * self.scale, k) + + if exists(mask): + mask = mask.reshape([batch_size, -1]) + max_neg_value = -paddle.finfo(sim.dtype).max + mask = mask[:, None, :].repeat(h, 1, 1) + sim.masked_fill_(~mask, max_neg_value) + + # attention, what we cannot get enough of + attn = F.softmax(sim, axis=-1) + + out = paddle.einsum("b i j, b j d -> b i d", attn, v) + out = self.reshape_batch_dim_to_heads(out) + return self.to_out(out) + + +class FeedForward(nn.Layer): + + def __init__(self, dim, dim_out=None, mult=4, glu=False, dropout=0.0): + super().__init__() + inner_dim = int(dim * mult) + dim_out = default(dim_out, dim) + project_in = nn.Sequential(nn.Linear(dim, inner_dim), nn.GELU()) if not glu else GEGLU(dim, inner_dim) + + self.net = nn.Sequential(project_in, nn.Dropout(dropout), nn.Linear(inner_dim, dim_out)) + + def forward(self, x): + return self.net(x) + + +# feedforward +class GEGLU(nn.Layer): + + def __init__(self, dim_in, dim_out): + super().__init__() + self.proj = nn.Linear(dim_in, dim_out * 2) + + def forward(self, x): + x, gate = self.proj(x).chunk(2, axis=-1) + return x * F.gelu(gate) + + +# TODO(Patrick) - remove once all weights have been converted -> not needed anymore then +class NIN(nn.Layer): + + def __init__(self, in_dim, num_units, init_scale=0.1): + super().__init__() + self.W = self.create_parameter(shape=[in_dim, num_units], default_initializer=nn.initializer.Constant(0.)) + self.b = self.create_parameter(shape=[ + num_units, + ], + is_bias=True, + default_initializer=nn.initializer.Constant(0.)) + + +def exists(val): + return val is not None + + +def default(val, d): + if exists(val): + return val + return d() if isfunction(d) else d + + +# the main attention block that is used for all models +class AttentionBlock(nn.Layer): + """ + An attention block that allows spatial positions to attend to each other. + + Originally ported from here, but adapted to the N-d case. + https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/models/unet.py#L66. + """ + + def __init__( + self, + channels, + num_heads=1, + num_head_channels=None, + num_groups=32, + encoder_channels=None, + overwrite_qkv=False, + overwrite_linear=False, + rescale_output_factor=1.0, + eps=1e-5, + ): + super().__init__() + self.channels = channels + if num_head_channels is None: + self.num_heads = num_heads + else: + assert (channels % num_head_channels == 0 + ), f"q,k,v channels {channels} is not divisible by num_head_channels {num_head_channels}" + self.num_heads = channels // num_head_channels + + self.norm = nn.GroupNorm(num_channels=channels, num_groups=num_groups, epsilon=eps) + self.qkv = nn.Conv1D(channels, channels * 3, 1) + self.n_heads = self.num_heads + self.rescale_output_factor = rescale_output_factor + + if encoder_channels is not None: + self.encoder_kv = nn.Conv1D(encoder_channels, channels * 2, 1) + + self.proj = nn.Conv1D(channels, channels, 1) + + self.overwrite_qkv = overwrite_qkv + self.overwrite_linear = overwrite_linear + + if overwrite_qkv: + in_channels = channels + self.norm = nn.GroupNorm(num_channels=channels, num_groups=num_groups, epsilon=1e-6) + self.q = nn.Conv2D(in_channels, in_channels, kernel_size=1, stride=1, padding=0) + self.k = nn.Conv2D(in_channels, in_channels, kernel_size=1, stride=1, padding=0) + self.v = nn.Conv2D(in_channels, in_channels, kernel_size=1, stride=1, padding=0) + self.proj_out = nn.Conv2D(in_channels, in_channels, kernel_size=1, stride=1, padding=0) + elif self.overwrite_linear: + num_groups = min(channels // 4, 32) + self.norm = nn.GroupNorm(num_channels=channels, num_groups=num_groups, epsilon=1e-6) + self.NIN_0 = NIN(channels, channels) + self.NIN_1 = NIN(channels, channels) + self.NIN_2 = NIN(channels, channels) + self.NIN_3 = NIN(channels, channels) + + self.GroupNorm_0 = nn.GroupNorm(num_groups=num_groups, num_channels=channels, epsilon=1e-6) + else: + self.proj_out = nn.Conv1D(channels, channels, 1) + self.set_weights(self) + + self.is_overwritten = False + + def set_weights(self, layer): + if self.overwrite_qkv: + qkv_weight = paddle.concat([layer.q.weight, layer.k.weight, layer.v.weight], axis=0)[:, :, :, 0] + qkv_bias = paddle.concat([layer.q.bias, layer.k.bias, layer.v.bias], axis=0) + + self.qkv.weight.set_value(qkv_weight) + self.qkv.bias.set_value(qkv_bias) + + proj_out = nn.Conv1D(self.channels, self.channels, 1) + proj_out.weight.set_value(layer.proj_out.weight[:, :, :, 0]) + proj_out.bias.set_value(layer.proj_out.bias) + + self.proj = proj_out + elif self.overwrite_linear: + self.qkv.weight.set_value( + paddle.concat([self.NIN_0.W.t(), self.NIN_1.W.t(), self.NIN_2.W.t()], axis=0)[:, :, None]) + self.qkv.bias.set_value(paddle.concat([self.NIN_0.b, self.NIN_1.b, self.NIN_2.b], axis=0)) + + self.proj.weight.set_value(self.NIN_3.W.t()[:, :, None]) + self.proj.bias.set_value(self.NIN_3.b) + + self.norm.weight.set_value(self.GroupNorm_0.weight) + self.norm.bias.set_value(self.GroupNorm_0.bias) + else: + self.proj.weight.set_value(self.proj_out.weight) + self.proj.bias.set_value(self.proj_out.bias) + + def forward(self, x, encoder_out=None): + if not self.is_overwritten and (self.overwrite_qkv or self.overwrite_linear): + self.set_weights(self) + self.is_overwritten = True + + b, c, *spatial = x.shape + hid_states = self.norm(x).reshape([b, c, -1]) + + qkv = self.qkv(hid_states) + bs, width, length = qkv.shape + assert width % (3 * self.n_heads) == 0 + ch = width // (3 * self.n_heads) + q, k, v = qkv.reshape([bs * self.n_heads, ch * 3, length]).split(ch, axis=1) + + if encoder_out is not None: + encoder_kv = self.encoder_kv(encoder_out) + assert encoder_kv.shape[1] == self.n_heads * ch * 2 + ek, ev = encoder_kv.reshape([bs * self.n_heads, ch * 2, -1]).split(ch, axis=1) + k = paddle.concat([ek, k], axis=-1) + v = paddle.concat([ev, v], axis=-1) + + scale = 1 / math.sqrt(math.sqrt(ch)) + weight = paddle.einsum("bct,bcs->bts", q * scale, k * scale) # More stable with f16 than dividing afterwards + weight = F.softmax(weight.astype("float32"), axis=-1).astype(weight.dtype) + + a = paddle.einsum("bts,bcs->bct", weight, v) + h = a.reshape([bs, -1, length]) + + h = self.proj(h) + h = h.reshape([b, c, *spatial]) + + result = x + h + + result = result / self.rescale_output_factor + + return result diff --git a/modules/image/text_to_image/stable_diffusion/diffusers/models/embeddings.py b/modules/image/text_to_image/stable_diffusion/diffusers/models/embeddings.py new file mode 100644 index 000000000..3e826193b --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion/diffusers/models/embeddings.py @@ -0,0 +1,116 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math + +import numpy as np +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + + +def get_timestep_embedding(timesteps, + embedding_dim, + flip_sin_to_cos=False, + downscale_freq_shift=1, + scale=1, + max_period=10000): + """ + This matches the implementation in Denoising Diffusion Probabilistic Models: Create sinusoidal timestep embeddings. + + :param timesteps: a 1-D Tensor of N indices, one per batch element. + These may be fractional. + :param embedding_dim: the dimension of the output. :param max_period: controls the minimum frequency of the + embeddings. :return: an [N x dim] Tensor of positional embeddings. + """ + assert len(timesteps.shape) == 1, "Timesteps should be a 1d-array" + + half_dim = embedding_dim // 2 + exponent = -math.log(max_period) * paddle.arange(start=0, end=half_dim, dtype="float32") + exponent = exponent / (half_dim - downscale_freq_shift) + + emb = paddle.exp(exponent) + emb = timesteps[:, None].astype("float32") * emb[None, :] + + # scale embeddings + emb = scale * emb + + # concat sine and cosine embeddings + emb = paddle.concat([paddle.sin(emb), paddle.cos(emb)], axis=-1) + + # flip sine and cosine embeddings + if flip_sin_to_cos: + emb = paddle.concat([emb[:, half_dim:], emb[:, :half_dim]], axis=-1) + + # zero pad + if embedding_dim % 2 == 1: + emb = paddle.concat(emb, paddle.zeros([emb.shape[0], 1]), axis=-1) + return emb + + +class TimestepEmbedding(nn.Layer): + + def __init__(self, channel, time_embed_dim, act_fn="silu"): + super().__init__() + + self.linear_1 = nn.Linear(channel, time_embed_dim) + self.act = None + if act_fn == "silu": + self.act = nn.Silu() + self.linear_2 = nn.Linear(time_embed_dim, time_embed_dim) + + def forward(self, sample): + sample = self.linear_1(sample) + + if self.act is not None: + sample = self.act(sample) + + sample = self.linear_2(sample) + return sample + + +class Timesteps(nn.Layer): + + def __init__(self, num_channels, flip_sin_to_cos, downscale_freq_shift): + super().__init__() + self.num_channels = num_channels + self.flip_sin_to_cos = flip_sin_to_cos + self.downscale_freq_shift = downscale_freq_shift + + def forward(self, timesteps): + t_emb = get_timestep_embedding( + timesteps, + self.num_channels, + flip_sin_to_cos=self.flip_sin_to_cos, + downscale_freq_shift=self.downscale_freq_shift, + ) + return t_emb + + +class GaussianFourierProjection(nn.Layer): + """Gaussian Fourier embeddings for noise levels.""" + + def __init__(self, embedding_size=256, scale=1.0): + super().__init__() + self.register_buffer("weight", paddle.randn((embedding_size, )) * scale) + + # to delete later + self.register_buffer("W", paddle.randn((embedding_size, )) * scale) + + self.weight = self.W + + def forward(self, x): + x = paddle.log(x) + x_proj = x[:, None] * self.weight[None, :] * 2 * np.pi + out = paddle.concat([paddle.sin(x_proj), paddle.cos(x_proj)], axis=-1) + return out diff --git a/modules/image/text_to_image/stable_diffusion/diffusers/models/resnet.py b/modules/image/text_to_image/stable_diffusion/diffusers/models/resnet.py new file mode 100644 index 000000000..944bc11cd --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion/diffusers/models/resnet.py @@ -0,0 +1,515 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from functools import partial + +import numpy as np +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + + +def pad_new(x, pad, mode="constant", value=0): + new_pad = [] + for _ in range(x.ndim * 2 - len(pad)): + new_pad.append(0) + ndim = list(range(x.ndim - 1, 0, -1)) + axes_start = {} + for i, _pad in enumerate(pad): + if _pad < 0: + new_pad.append(0) + zhengshu, yushu = divmod(i, 2) + if yushu == 0: + axes_start[ndim[zhengshu]] = -_pad + else: + new_pad.append(_pad) + + padded = paddle.nn.functional.pad(x, new_pad, mode=mode, value=value) + padded_shape = paddle.shape(padded) + axes = [] + starts = [] + ends = [] + for k, v in axes_start.items(): + axes.append(k) + starts.append(v) + ends.append(padded_shape[k]) + assert v < padded_shape[k] + + if axes: + return padded.slice(axes=axes, starts=starts, ends=ends) + else: + return padded + + +class Upsample2D(nn.Layer): + """ + An upsampling layer with an optional convolution. + + :param channels: channels in the inputs and outputs. :param use_conv: a bool determining if a convolution is + applied. :param dims: determines if the signal is 1D, 2D, or 3D. If 3D, then + upsampling occurs in the inner-two dimensions. + """ + + def __init__(self, channels, use_conv=False, use_conv_transpose=False, out_channels=None, name="conv"): + super().__init__() + self.channels = channels + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.use_conv_transpose = use_conv_transpose + self.name = name + + conv = None + if use_conv_transpose: + conv = nn.Conv2DTranspose(channels, self.out_channels, 4, 2, 1) + elif use_conv: + conv = nn.Conv2D(self.channels, self.out_channels, 3, padding=1) + + # TODO(Suraj, Patrick) - clean up after weight dicts are correctly renamed + if name == "conv": + self.conv = conv + else: + self.Conv2d_0 = conv + + def forward(self, x): + assert x.shape[1] == self.channels + if self.use_conv_transpose: + return self.conv(x) + + x = F.interpolate(x, scale_factor=2.0, mode="nearest") + + # TODO(Suraj, Patrick) - clean up after weight dicts are correctly renamed + if self.use_conv: + if self.name == "conv": + x = self.conv(x) + else: + x = self.Conv2d_0(x) + + return x + + +class Downsample2D(nn.Layer): + """ + A downsampling layer with an optional convolution. + + :param channels: channels in the inputs and outputs. :param use_conv: a bool determining if a convolution is + applied. :param dims: determines if the signal is 1D, 2D, or 3D. If 3D, then + downsampling occurs in the inner-two dimensions. + """ + + def __init__(self, channels, use_conv=False, out_channels=None, padding=1, name="conv"): + super().__init__() + self.channels = channels + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.padding = padding + stride = 2 + self.name = name + + if use_conv: + conv = nn.Conv2D(self.channels, self.out_channels, 3, stride=stride, padding=padding) + else: + assert self.channels == self.out_channels + conv = nn.AvgPool2D(kernel_size=stride, stride=stride) + + # TODO(Suraj, Patrick) - clean up after weight dicts are correctly renamed + if name == "conv": + self.Conv2d_0 = conv + self.conv = conv + elif name == "Conv2d_0": + self.conv = conv + else: + self.conv = conv + + def forward(self, x): + assert x.shape[1] == self.channels + if self.use_conv and self.padding == 0: + pad = (0, 1, 0, 1) + x = pad_new(x, pad, mode="constant", value=0) + + assert x.shape[1] == self.channels + x = self.conv(x) + + return x + + +class FirUpsample2D(nn.Layer): + + def __init__(self, channels=None, out_channels=None, use_conv=False, fir_kernel=(1, 3, 3, 1)): + super().__init__() + out_channels = out_channels if out_channels else channels + if use_conv: + self.Conv2d_0 = nn.Conv2D(channels, out_channels, kernel_size=3, stride=1, padding=1) + self.use_conv = use_conv + self.fir_kernel = fir_kernel + self.out_channels = out_channels + + def _upsample_2d(self, x, w=None, k=None, factor=2, gain=1): + """Fused `upsample_2d()` followed by `Conv2d()`. + + Args: + Padding is performed only once at the beginning, not between the operations. The fused op is considerably more + efficient than performing the same calculation using standard TensorFlow ops. It supports gradients of arbitrary: + order. + x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, + C]`. + w: Weight tensor of the shape `[filterH, filterW, inChannels, + outChannels]`. Grouped convolution can be performed by `inChannels = x.shape[0] // numGroups`. + k: FIR filter of the shape `[firH, firW]` or `[firN]` + (separable). The default is `[1] * factor`, which corresponds to nearest-neighbor upsampling. + factor: Integer upsampling factor (default: 2). gain: Scaling factor for signal magnitude (default: 1.0). + + Returns: + Tensor of the shape `[N, C, H * factor, W * factor]` or `[N, H * factor, W * factor, C]`, and same datatype as + `x`. + """ + + assert isinstance(factor, int) and factor >= 1 + + # Setup filter kernel. + if k is None: + k = [1] * factor + + # setup kernel + k = np.asarray(k, dtype=np.float32) + if k.ndim == 1: + k = np.outer(k, k) + k /= np.sum(k) + + k = k * (gain * (factor**2)) + + if self.use_conv: + convH = w.shape[2] + convW = w.shape[3] + inC = w.shape[1] + + p = (k.shape[0] - factor) - (convW - 1) + + stride = (factor, factor) + # Determine data dimensions. + stride = [1, 1, factor, factor] + output_shape = ((x.shape[2] - 1) * factor + convH, (x.shape[3] - 1) * factor + convW) + output_padding = ( + output_shape[0] - (x.shape[2] - 1) * stride[0] - convH, + output_shape[1] - (x.shape[3] - 1) * stride[1] - convW, + ) + assert output_padding[0] >= 0 and output_padding[1] >= 0 + inC = w.shape[1] + num_groups = x.shape[1] // inC + + # Transpose weights. + w = paddle.reshape(w, (num_groups, -1, inC, convH, convW)) + w = w[..., ::-1, ::-1].transpose([0, 2, 1, 3, 4]) + w = paddle.reshape(w, (num_groups * inC, -1, convH, convW)) + + x = F.conv2d_transpose(x, w, stride=stride, output_padding=output_padding, padding=0) + + x = upfirdn2d_native(x, paddle.to_tensor(k), pad=((p + 1) // 2 + factor - 1, p // 2 + 1)) + else: + p = k.shape[0] - factor + x = upfirdn2d_native(x, paddle.to_tensor(k), up=factor, pad=((p + 1) // 2 + factor - 1, p // 2)) + + return x + + def forward(self, x): + if self.use_conv: + h = self._upsample_2d(x, self.Conv2d_0.weight, k=self.fir_kernel) + h = h + self.Conv2d_0.bias.reshape([1, -1, 1, 1]) + else: + h = self._upsample_2d(x, k=self.fir_kernel, factor=2) + + return h + + +class FirDownsample2D(nn.Layer): + + def __init__(self, channels=None, out_channels=None, use_conv=False, fir_kernel=(1, 3, 3, 1)): + super().__init__() + out_channels = out_channels if out_channels else channels + if use_conv: + self.Conv2d_0 = nn.Conv2D(channels, out_channels, kernel_size=3, stride=1, padding=1) + self.fir_kernel = fir_kernel + self.use_conv = use_conv + self.out_channels = out_channels + + def _downsample_2d(self, x, w=None, k=None, factor=2, gain=1): + """Fused `Conv2d()` followed by `downsample_2d()`. + + Args: + Padding is performed only once at the beginning, not between the operations. The fused op is considerably more + efficient than performing the same calculation using standard TensorFlow ops. It supports gradients of arbitrary: + order. + x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`. w: Weight tensor of the shape `[filterH, + filterW, inChannels, outChannels]`. Grouped convolution can be performed by `inChannels = x.shape[0] // + numGroups`. k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable). The default is `[1] * + factor`, which corresponds to average pooling. factor: Integer downsampling factor (default: 2). gain: + Scaling factor for signal magnitude (default: 1.0). + + Returns: + Tensor of the shape `[N, C, H // factor, W // factor]` or `[N, H // factor, W // factor, C]`, and same + datatype as `x`. + """ + + assert isinstance(factor, int) and factor >= 1 + if k is None: + k = [1] * factor + + # setup kernel + k = np.asarray(k, dtype=np.float32) + if k.ndim == 1: + k = np.outer(k, k) + k /= np.sum(k) + + k = k * gain + + if self.use_conv: + _, _, convH, convW = w.shape + p = (k.shape[0] - factor) + (convW - 1) + s = [factor, factor] + x = upfirdn2d_native(x, paddle.to_tensor(k), pad=((p + 1) // 2, p // 2)) + x = F.conv2d(x, w, stride=s, padding=0) + else: + p = k.shape[0] - factor + x = upfirdn2d_native(x, paddle.to_tensor(k), down=factor, pad=((p + 1) // 2, p // 2)) + + return x + + def forward(self, x): + if self.use_conv: + x = self._downsample_2d(x, w=self.Conv2d_0.weight, k=self.fir_kernel) + x = x + self.Conv2d_0.bias.reshape([1, -1, 1, 1]) + else: + x = self._downsample_2d(x, k=self.fir_kernel, factor=2) + + return x + + +class ResnetBlock(nn.Layer): + + def __init__( + self, + *, + in_channels, + out_channels=None, + conv_shortcut=False, + dropout=0.0, + temb_channels=512, + groups=32, + groups_out=None, + pre_norm=True, + eps=1e-6, + non_linearity="swish", + time_embedding_norm="default", + kernel=None, + output_scale_factor=1.0, + use_nin_shortcut=None, + up=False, + down=False, + ): + super().__init__() + self.pre_norm = pre_norm + self.pre_norm = True + self.in_channels = in_channels + out_channels = in_channels if out_channels is None else out_channels + self.out_channels = out_channels + self.use_conv_shortcut = conv_shortcut + self.time_embedding_norm = time_embedding_norm + self.up = up + self.down = down + self.output_scale_factor = output_scale_factor + + if groups_out is None: + groups_out = groups + + self.norm1 = nn.GroupNorm(num_groups=groups, num_channels=in_channels, epsilon=eps) + + self.conv1 = nn.Conv2D(in_channels, out_channels, kernel_size=3, stride=1, padding=1) + + if temb_channels is not None: + self.time_emb_proj = nn.Linear(temb_channels, out_channels) + else: + self.time_emb_proj = None + + self.norm2 = nn.GroupNorm(num_groups=groups_out, num_channels=out_channels, epsilon=eps) + self.dropout = nn.Dropout(dropout) + self.conv2 = nn.Conv2D(out_channels, out_channels, kernel_size=3, stride=1, padding=1) + + if non_linearity == "swish": + self.nonlinearity = lambda x: F.silu(x) + elif non_linearity == "mish": + self.nonlinearity = Mish() + elif non_linearity == "silu": + self.nonlinearity = nn.Silu() + + self.upsample = self.downsample = None + if self.up: + if kernel == "fir": + fir_kernel = (1, 3, 3, 1) + self.upsample = lambda x: upsample_2d(x, k=fir_kernel) + elif kernel == "sde_vp": + self.upsample = partial(F.interpolate, scale_factor=2.0, mode="nearest") + else: + self.upsample = Upsample2D(in_channels, use_conv=False) + elif self.down: + if kernel == "fir": + fir_kernel = (1, 3, 3, 1) + self.downsample = lambda x: downsample_2d(x, k=fir_kernel) + elif kernel == "sde_vp": + self.downsample = partial(F.avg_pool2d, kernel_size=2, stride=2) + else: + self.downsample = Downsample2D(in_channels, use_conv=False, padding=1, name="op") + + self.use_nin_shortcut = self.in_channels != self.out_channels if use_nin_shortcut is None else use_nin_shortcut + + self.conv_shortcut = None + if self.use_nin_shortcut: + self.conv_shortcut = nn.Conv2D(in_channels, out_channels, kernel_size=1, stride=1, padding=0) + + def forward(self, x, temb, hey=False): + h = x + + # make sure hidden states is in float32 + # when running in half-precision + h = self.norm1(h.astype("float32")).astype(h.dtype) + h = self.nonlinearity(h) + + if self.upsample is not None: + x = self.upsample(x) + h = self.upsample(h) + elif self.downsample is not None: + x = self.downsample(x) + h = self.downsample(h) + + h = self.conv1(h) + + if temb is not None: + temb = self.time_emb_proj(self.nonlinearity(temb))[:, :, None, None] + h = h + temb + + # make sure hidden states is in float32 + # when running in half-precision + h = self.norm2(h.astype("float32")).astype(h.dtype) + h = self.nonlinearity(h) + + h = self.dropout(h) + h = self.conv2(h) + + if self.conv_shortcut is not None: + x = self.conv_shortcut(x) + + out = (x + h) / self.output_scale_factor + + return out + + +class Mish(nn.Layer): + + def forward(self, x): + return x * F.tanh(F.softplus(x)) + + +def upsample_2d(x, k=None, factor=2, gain=1): + r"""Upsample2D a batch of 2D images with the given filter. + + Args: + Accepts a batch of 2D images of the shape `[N, C, H, W]` or `[N, H, W, C]` and upsamples each image with the given + filter. The filter is normalized so that if the input pixels are constant, they will be scaled by the specified + `gain`. Pixels outside the image are assumed to be zero, and the filter is padded with zeros so that its shape is a: + multiple of the upsampling factor. + x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, + C]`. + k: FIR filter of the shape `[firH, firW]` or `[firN]` + (separable). The default is `[1] * factor`, which corresponds to nearest-neighbor upsampling. + factor: Integer upsampling factor (default: 2). gain: Scaling factor for signal magnitude (default: 1.0). + + Returns: + Tensor of the shape `[N, C, H * factor, W * factor]` + """ + assert isinstance(factor, int) and factor >= 1 + if k is None: + k = [1] * factor + + k = np.asarray(k, dtype=np.float32) + if k.ndim == 1: + k = np.outer(k, k) + k /= np.sum(k) + + k = k * (gain * (factor**2)) + p = k.shape[0] - factor + return upfirdn2d_native(x, paddle.to_tensor(k), up=factor, pad=((p + 1) // 2 + factor - 1, p // 2)) + + +def downsample_2d(x, k=None, factor=2, gain=1): + r"""Downsample2D a batch of 2D images with the given filter. + + Args: + Accepts a batch of 2D images of the shape `[N, C, H, W]` or `[N, H, W, C]` and downsamples each image with the + given filter. The filter is normalized so that if the input pixels are constant, they will be scaled by the + specified `gain`. Pixels outside the image are assumed to be zero, and the filter is padded with zeros so that its + shape is a multiple of the downsampling factor. + x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, + C]`. + k: FIR filter of the shape `[firH, firW]` or `[firN]` + (separable). The default is `[1] * factor`, which corresponds to average pooling. + factor: Integer downsampling factor (default: 2). gain: Scaling factor for signal magnitude (default: 1.0). + + Returns: + Tensor of the shape `[N, C, H // factor, W // factor]` + """ + + assert isinstance(factor, int) and factor >= 1 + if k is None: + k = [1] * factor + + k = np.asarray(k, dtype=np.float32) + if k.ndim == 1: + k = np.outer(k, k) + k /= np.sum(k) + + k = k * gain + p = k.shape[0] - factor + return upfirdn2d_native(x, paddle.to_tensor(k), down=factor, pad=((p + 1) // 2, p // 2)) + + +def upfirdn2d_native(input, kernel, up=1, down=1, pad=(0, 0)): + up_x = up_y = up + down_x = down_y = down + pad_x0 = pad_y0 = pad[0] + pad_x1 = pad_y1 = pad[1] + + _, channel, in_h, in_w = input.shape + input = input.reshape([-1, in_h, in_w, 1]) + + _, in_h, in_w, minor = input.shape + kernel_h, kernel_w = kernel.shape + + out = input.reshape([-1, in_h, 1, in_w, 1, minor]) + # TODO + out = pad_new(out, [0, 0, 0, up_x - 1, 0, 0, 0, up_y - 1]) + out = out.reshape([-1, in_h * up_y, in_w * up_x, minor]) + + out = pad_new(out, [0, 0, max(pad_x0, 0), max(pad_x1, 0), max(pad_y0, 0), max(pad_y1, 0)]) + out = out[:, max(-pad_y0, 0):out.shape[1] - max(-pad_y1, 0), max(-pad_x0, 0):out.shape[2] - max(-pad_x1, 0), :, ] + + out = out.transpose([0, 3, 1, 2]) + out = out.reshape([-1, 1, in_h * up_y + pad_y0 + pad_y1, in_w * up_x + pad_x0 + pad_x1]) + w = paddle.flip(kernel, [0, 1]).reshape([1, 1, kernel_h, kernel_w]) + out = F.conv2d(out, w) + out = out.reshape( + [-1, minor, in_h * up_y + pad_y0 + pad_y1 - kernel_h + 1, in_w * up_x + pad_x0 + pad_x1 - kernel_w + 1]) + out = out.transpose([0, 2, 3, 1]) + out = out[:, ::down_y, ::down_x, :] + + out_h = (in_h * up_y + pad_y0 + pad_y1 - kernel_h) // down_y + 1 + out_w = (in_w * up_x + pad_x0 + pad_x1 - kernel_w) // down_x + 1 + + return out.reshape([-1, channel, out_h, out_w]) diff --git a/modules/image/text_to_image/stable_diffusion/diffusers/models/unet_2d.py b/modules/image/text_to_image/stable_diffusion/diffusers/models/unet_2d.py new file mode 100644 index 000000000..11316a819 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion/diffusers/models/unet_2d.py @@ -0,0 +1,206 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Dict +from typing import Union + +import paddle +import paddle.nn as nn + +from ..configuration_utils import ConfigMixin +from ..configuration_utils import register_to_config +from .embeddings import GaussianFourierProjection +from .embeddings import TimestepEmbedding +from .embeddings import Timesteps +from .unet_blocks import get_down_block +from .unet_blocks import get_up_block +from .unet_blocks import UNetMidBlock2D + + +class UNet2DModel(nn.Layer, ConfigMixin): + + @register_to_config + def __init__( + self, + sample_size=None, + in_channels=3, + out_channels=3, + center_input_sample=False, + time_embedding_type="positional", + freq_shift=0, + flip_sin_to_cos=True, + down_block_types=("DownBlock2D", "AttnDownBlock2D", "AttnDownBlock2D", "AttnDownBlock2D"), + up_block_types=("AttnUpBlock2D", "AttnUpBlock2D", "AttnUpBlock2D", "UpBlock2D"), + block_out_channels=(224, 448, 672, 896), + layers_per_block=2, + mid_block_scale_factor=1, + downsample_padding=1, + act_fn="silu", + attention_head_dim=8, + norm_num_groups=32, + norm_eps=1e-5, + ): + super().__init__() + + self.sample_size = sample_size + time_embed_dim = block_out_channels[0] * 4 + + # input + self.conv_in = nn.Conv2D(in_channels, block_out_channels[0], kernel_size=3, padding=(1, 1)) + + # time + if time_embedding_type == "fourier": + self.time_proj = GaussianFourierProjection(embedding_size=block_out_channels[0], scale=16) + timestep_input_dim = 2 * block_out_channels[0] + elif time_embedding_type == "positional": + self.time_proj = Timesteps(block_out_channels[0], flip_sin_to_cos, freq_shift) + timestep_input_dim = block_out_channels[0] + + self.time_embedding = TimestepEmbedding(timestep_input_dim, time_embed_dim) + + self.down_blocks = nn.LayerList([]) + self.mid_block = None + self.up_blocks = nn.LayerList([]) + + # down + output_channel = block_out_channels[0] + for i, down_block_type in enumerate(down_block_types): + input_channel = output_channel + output_channel = block_out_channels[i] + is_final_block = i == len(block_out_channels) - 1 + + down_block = get_down_block( + down_block_type, + num_layers=layers_per_block, + in_channels=input_channel, + out_channels=output_channel, + temb_channels=time_embed_dim, + add_downsample=not is_final_block, + resnet_eps=norm_eps, + resnet_act_fn=act_fn, + attn_num_head_channels=attention_head_dim, + downsample_padding=downsample_padding, + ) + self.down_blocks.append(down_block) + + # mid + self.mid_block = UNetMidBlock2D( + in_channels=block_out_channels[-1], + temb_channels=time_embed_dim, + resnet_eps=norm_eps, + resnet_act_fn=act_fn, + output_scale_factor=mid_block_scale_factor, + resnet_time_scale_shift="default", + attn_num_head_channels=attention_head_dim, + resnet_groups=norm_num_groups, + ) + + # up + reversed_block_out_channels = list(reversed(block_out_channels)) + output_channel = reversed_block_out_channels[0] + for i, up_block_type in enumerate(up_block_types): + prev_output_channel = output_channel + output_channel = reversed_block_out_channels[i] + input_channel = reversed_block_out_channels[min(i + 1, len(block_out_channels) - 1)] + + is_final_block = i == len(block_out_channels) - 1 + + up_block = get_up_block( + up_block_type, + num_layers=layers_per_block + 1, + in_channels=input_channel, + out_channels=output_channel, + prev_output_channel=prev_output_channel, + temb_channels=time_embed_dim, + add_upsample=not is_final_block, + resnet_eps=norm_eps, + resnet_act_fn=act_fn, + attn_num_head_channels=attention_head_dim, + ) + self.up_blocks.append(up_block) + prev_output_channel = output_channel + + # out + num_groups_out = norm_num_groups if norm_num_groups is not None else min(block_out_channels[0] // 4, 32) + self.conv_norm_out = nn.GroupNorm(num_channels=block_out_channels[0], + num_groups=num_groups_out, + epsilon=norm_eps) + self.conv_act = nn.Silu() + self.conv_out = nn.Conv2D(block_out_channels[0], out_channels, 3, padding=1) + + def forward(self, sample: paddle.Tensor, timestep: Union[paddle.Tensor, float, int]) -> Dict[str, paddle.Tensor]: + + # 0. center input if necessary + if self.config.center_input_sample: + sample = 2 * sample - 1.0 + + # 1. time + timesteps = timestep + if not paddle.is_tensor(timesteps): + timesteps = paddle.to_tensor([timesteps], dtype="int64") + elif paddle.is_tensor(timesteps) and len(timesteps.shape) == 0: + timesteps = timesteps[None] + + # broadcast to batch dimension + timesteps = paddle.broadcast_to(timesteps, [sample.shape[0]]) + + t_emb = self.time_proj(timesteps) + emb = self.time_embedding(t_emb) + + # 2. pre-process + skip_sample = sample + sample = self.conv_in(sample) + + # 3. down + down_block_res_samples = (sample, ) + for downsample_block in self.down_blocks: + if hasattr(downsample_block, "skip_conv"): + sample, res_samples, skip_sample = downsample_block(hidden_states=sample, + temb=emb, + skip_sample=skip_sample) + else: + sample, res_samples = downsample_block(hidden_states=sample, temb=emb) + + down_block_res_samples += res_samples + + # 4. mid + sample = self.mid_block(sample, emb) + + # 5. up + skip_sample = None + for upsample_block in self.up_blocks: + res_samples = down_block_res_samples[-len(upsample_block.resnets):] + down_block_res_samples = down_block_res_samples[:-len(upsample_block.resnets)] + + if hasattr(upsample_block, "skip_conv"): + sample, skip_sample = upsample_block(sample, res_samples, emb, skip_sample) + else: + sample = upsample_block(sample, res_samples, emb) + + # 6. post-process + # make sure hidden states is in float32 + # when running in half-precision + sample = self.conv_norm_out(sample.astype("float32")).astype(sample.dtype) + sample = self.conv_act(sample) + sample = self.conv_out(sample) + + if skip_sample is not None: + sample += skip_sample + + if self.config.time_embedding_type == "fourier": + timesteps = timesteps.reshape((sample.shape[0], *([1] * len(sample.shape[1:])))) + sample = sample / timesteps + + output = {"sample": sample} + + return output diff --git a/modules/image/text_to_image/stable_diffusion/diffusers/models/unet_2d_condition.py b/modules/image/text_to_image/stable_diffusion/diffusers/models/unet_2d_condition.py new file mode 100644 index 000000000..897491b2f --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion/diffusers/models/unet_2d_condition.py @@ -0,0 +1,206 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Dict +from typing import Union + +import paddle +import paddle.nn as nn + +from ..configuration_utils import ConfigMixin +from ..configuration_utils import register_to_config +from .embeddings import TimestepEmbedding +from .embeddings import Timesteps +from .unet_blocks import get_down_block +from .unet_blocks import get_up_block +from .unet_blocks import UNetMidBlock2DCrossAttn + + +class UNet2DConditionModel(nn.Layer, ConfigMixin): + + @register_to_config + def __init__( + self, + sample_size=64, + in_channels=4, + out_channels=4, + center_input_sample=False, + flip_sin_to_cos=True, + freq_shift=0, + down_block_types=("CrossAttnDownBlock2D", "CrossAttnDownBlock2D", "CrossAttnDownBlock2D", "DownBlock2D"), + up_block_types=("UpBlock2D", "CrossAttnUpBlock2D", "CrossAttnUpBlock2D", "CrossAttnUpBlock2D"), + block_out_channels=(320, 640, 1280, 1280), + layers_per_block=2, + downsample_padding=1, + mid_block_scale_factor=1, + act_fn="silu", + norm_num_groups=32, + norm_eps=1e-5, + cross_attention_dim=768, + attention_head_dim=8, + ): + super().__init__() + + self.sample_size = sample_size + time_embed_dim = block_out_channels[0] * 4 + + # input + self.conv_in = nn.Conv2D(in_channels, block_out_channels[0], kernel_size=3, padding=(1, 1)) + + # time + self.time_proj = Timesteps(block_out_channels[0], flip_sin_to_cos, freq_shift) + timestep_input_dim = block_out_channels[0] + + self.time_embedding = TimestepEmbedding(timestep_input_dim, time_embed_dim) + + self.down_blocks = nn.LayerList([]) + self.mid_block = None + self.up_blocks = nn.LayerList([]) + + # down + output_channel = block_out_channels[0] + for i, down_block_type in enumerate(down_block_types): + input_channel = output_channel + output_channel = block_out_channels[i] + is_final_block = i == len(block_out_channels) - 1 + + down_block = get_down_block( + down_block_type, + num_layers=layers_per_block, + in_channels=input_channel, + out_channels=output_channel, + temb_channels=time_embed_dim, + add_downsample=not is_final_block, + resnet_eps=norm_eps, + resnet_act_fn=act_fn, + cross_attention_dim=cross_attention_dim, + attn_num_head_channels=attention_head_dim, + downsample_padding=downsample_padding, + ) + self.down_blocks.append(down_block) + + # mid + self.mid_block = UNetMidBlock2DCrossAttn( + in_channels=block_out_channels[-1], + temb_channels=time_embed_dim, + resnet_eps=norm_eps, + resnet_act_fn=act_fn, + output_scale_factor=mid_block_scale_factor, + resnet_time_scale_shift="default", + cross_attention_dim=cross_attention_dim, + attn_num_head_channels=attention_head_dim, + resnet_groups=norm_num_groups, + ) + + # up + reversed_block_out_channels = list(reversed(block_out_channels)) + output_channel = reversed_block_out_channels[0] + for i, up_block_type in enumerate(up_block_types): + prev_output_channel = output_channel + output_channel = reversed_block_out_channels[i] + input_channel = reversed_block_out_channels[min(i + 1, len(block_out_channels) - 1)] + + is_final_block = i == len(block_out_channels) - 1 + + up_block = get_up_block( + up_block_type, + num_layers=layers_per_block + 1, + in_channels=input_channel, + out_channels=output_channel, + prev_output_channel=prev_output_channel, + temb_channels=time_embed_dim, + add_upsample=not is_final_block, + resnet_eps=norm_eps, + resnet_act_fn=act_fn, + cross_attention_dim=cross_attention_dim, + attn_num_head_channels=attention_head_dim, + ) + self.up_blocks.append(up_block) + prev_output_channel = output_channel + + # out + self.conv_norm_out = nn.GroupNorm(num_channels=block_out_channels[0], + num_groups=norm_num_groups, + epsilon=norm_eps) + self.conv_act = nn.Silu() + self.conv_out = nn.Conv2D(block_out_channels[0], out_channels, 3, padding=1) + + def forward( + self, + sample: paddle.Tensor, + timestep: Union[paddle.Tensor, float, int], + encoder_hidden_states: paddle.Tensor, + ) -> Dict[str, paddle.Tensor]: + + # 0. center input if necessary + if self.config.center_input_sample: + sample = 2 * sample - 1.0 + + # 1. time + timesteps = timestep + if not paddle.is_tensor(timesteps): + timesteps = paddle.to_tensor([timesteps], dtype="int64") + elif paddle.is_tensor(timesteps) and len(timesteps.shape) == 0: + timesteps = timesteps[None] + + # broadcast to batch dimension + timesteps = paddle.broadcast_to(timesteps, [sample.shape[0]]) + + t_emb = self.time_proj(timesteps) + emb = self.time_embedding(t_emb) + + # 2. pre-process + sample = self.conv_in(sample) + + # 3. down + down_block_res_samples = (sample, ) + for downsample_block in self.down_blocks: + + if hasattr(downsample_block, "attentions") and downsample_block.attentions is not None: + sample, res_samples = downsample_block(hidden_states=sample, + temb=emb, + encoder_hidden_states=encoder_hidden_states) + else: + sample, res_samples = downsample_block(hidden_states=sample, temb=emb) + + down_block_res_samples += res_samples + + # 4. mid + sample = self.mid_block(sample, emb, encoder_hidden_states=encoder_hidden_states) + + # 5. up + for upsample_block in self.up_blocks: + + res_samples = down_block_res_samples[-len(upsample_block.resnets):] + down_block_res_samples = down_block_res_samples[:-len(upsample_block.resnets)] + + if hasattr(upsample_block, "attentions") and upsample_block.attentions is not None: + sample = upsample_block( + hidden_states=sample, + temb=emb, + res_hidden_states_tuple=res_samples, + encoder_hidden_states=encoder_hidden_states, + ) + else: + sample = upsample_block(hidden_states=sample, temb=emb, res_hidden_states_tuple=res_samples) + + # 6. post-process + # make sure hidden states is in float32 + # when running in half-precision + sample = self.conv_norm_out(sample.astype("float32")).astype(sample.dtype) + sample = self.conv_act(sample) + sample = self.conv_out(sample) + + output = {"sample": sample} + + return output diff --git a/modules/image/text_to_image/stable_diffusion/diffusers/models/unet_blocks.py b/modules/image/text_to_image/stable_diffusion/diffusers/models/unet_blocks.py new file mode 100644 index 000000000..684a2a43d --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion/diffusers/models/unet_blocks.py @@ -0,0 +1,1428 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import paddle +import paddle.nn as nn + +from .attention import AttentionBlockNew +from .attention import SpatialTransformer +from .resnet import Downsample2D +from .resnet import FirDownsample2D +from .resnet import FirUpsample2D +from .resnet import ResnetBlock +from .resnet import Upsample2D + + +def get_down_block( + down_block_type, + num_layers, + in_channels, + out_channels, + temb_channels, + add_downsample, + resnet_eps, + resnet_act_fn, + attn_num_head_channels, + cross_attention_dim=None, + downsample_padding=None, +): + down_block_type = down_block_type[7:] if down_block_type.startswith("UNetRes") else down_block_type + if down_block_type == "DownBlock2D": + return DownBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + temb_channels=temb_channels, + add_downsample=add_downsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + downsample_padding=downsample_padding, + ) + elif down_block_type == "AttnDownBlock2D": + return AttnDownBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + temb_channels=temb_channels, + add_downsample=add_downsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + downsample_padding=downsample_padding, + attn_num_head_channels=attn_num_head_channels, + ) + elif down_block_type == "CrossAttnDownBlock2D": + if cross_attention_dim is None: + raise ValueError("cross_attention_dim must be specified for CrossAttnUpBlock2D") + return CrossAttnDownBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + temb_channels=temb_channels, + add_downsample=add_downsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + downsample_padding=downsample_padding, + cross_attention_dim=cross_attention_dim, + attn_num_head_channels=attn_num_head_channels, + ) + elif down_block_type == "SkipDownBlock2D": + return SkipDownBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + temb_channels=temb_channels, + add_downsample=add_downsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + downsample_padding=downsample_padding, + ) + elif down_block_type == "AttnSkipDownBlock2D": + return AttnSkipDownBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + temb_channels=temb_channels, + add_downsample=add_downsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + downsample_padding=downsample_padding, + attn_num_head_channels=attn_num_head_channels, + ) + elif down_block_type == "DownEncoderBlock2D": + return DownEncoderBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + add_downsample=add_downsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + downsample_padding=downsample_padding, + ) + + +def get_up_block( + up_block_type, + num_layers, + in_channels, + out_channels, + prev_output_channel, + temb_channels, + add_upsample, + resnet_eps, + resnet_act_fn, + attn_num_head_channels, + cross_attention_dim=None, +): + up_block_type = up_block_type[7:] if up_block_type.startswith("UNetRes") else up_block_type + if up_block_type == "UpBlock2D": + return UpBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + prev_output_channel=prev_output_channel, + temb_channels=temb_channels, + add_upsample=add_upsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + ) + elif up_block_type == "CrossAttnUpBlock2D": + if cross_attention_dim is None: + raise ValueError("cross_attention_dim must be specified for CrossAttnUpBlock2D") + return CrossAttnUpBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + prev_output_channel=prev_output_channel, + temb_channels=temb_channels, + add_upsample=add_upsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + cross_attention_dim=cross_attention_dim, + attn_num_head_channels=attn_num_head_channels, + ) + elif up_block_type == "AttnUpBlock2D": + return AttnUpBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + prev_output_channel=prev_output_channel, + temb_channels=temb_channels, + add_upsample=add_upsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + attn_num_head_channels=attn_num_head_channels, + ) + elif up_block_type == "SkipUpBlock2D": + return SkipUpBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + prev_output_channel=prev_output_channel, + temb_channels=temb_channels, + add_upsample=add_upsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + ) + elif up_block_type == "AttnSkipUpBlock2D": + return AttnSkipUpBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + prev_output_channel=prev_output_channel, + temb_channels=temb_channels, + add_upsample=add_upsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + attn_num_head_channels=attn_num_head_channels, + ) + elif up_block_type == "UpDecoderBlock2D": + return UpDecoderBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + add_upsample=add_upsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + ) + raise ValueError(f"{up_block_type} does not exist.") + + +class UNetMidBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + attn_num_head_channels=1, + attention_type="default", + output_scale_factor=1.0, + **kwargs, + ): + super().__init__() + + self.attention_type = attention_type + resnet_groups = resnet_groups if resnet_groups is not None else min(in_channels // 4, 32) + + # there is always at least one resnet + resnets = [ + ResnetBlock( + in_channels=in_channels, + out_channels=in_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + ) + ] + attentions = [] + + for _ in range(num_layers): + attentions.append( + AttentionBlockNew( + in_channels, + num_head_channels=attn_num_head_channels, + rescale_output_factor=output_scale_factor, + eps=resnet_eps, + num_groups=resnet_groups, + )) + resnets.append( + ResnetBlock( + in_channels=in_channels, + out_channels=in_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + + self.attentions = nn.LayerList(attentions) + self.resnets = nn.LayerList(resnets) + + def forward(self, hidden_states, temb=None, encoder_states=None): + hidden_states = self.resnets[0](hidden_states, temb) + for attn, resnet in zip(self.attentions, self.resnets[1:]): + if self.attention_type == "default": + hidden_states = attn(hidden_states) + else: + hidden_states = attn(hidden_states, encoder_states) + hidden_states = resnet(hidden_states, temb) + + return hidden_states + + +class UNetMidBlock2DCrossAttn(nn.Layer): + + def __init__( + self, + in_channels: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + attn_num_head_channels=1, + attention_type="default", + output_scale_factor=1.0, + cross_attention_dim=1280, + **kwargs, + ): + super().__init__() + + self.attention_type = attention_type + resnet_groups = resnet_groups if resnet_groups is not None else min(in_channels // 4, 32) + + # there is always at least one resnet + resnets = [ + ResnetBlock( + in_channels=in_channels, + out_channels=in_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + ) + ] + attentions = [] + + for _ in range(num_layers): + attentions.append( + SpatialTransformer( + in_channels, + attn_num_head_channels, + in_channels // attn_num_head_channels, + depth=1, + context_dim=cross_attention_dim, + )) + resnets.append( + ResnetBlock( + in_channels=in_channels, + out_channels=in_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + + self.attentions = nn.LayerList(attentions) + self.resnets = nn.LayerList(resnets) + + def forward(self, hidden_states, temb=None, encoder_hidden_states=None): + hidden_states = self.resnets[0](hidden_states, temb) + for attn, resnet in zip(self.attentions, self.resnets[1:]): + hidden_states = attn(hidden_states, encoder_hidden_states) + hidden_states = resnet(hidden_states, temb) + + return hidden_states + + +class AttnDownBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + out_channels: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + attn_num_head_channels=1, + attention_type="default", + output_scale_factor=1.0, + downsample_padding=1, + add_downsample=True, + ): + super().__init__() + resnets = [] + attentions = [] + + self.attention_type = attention_type + + for i in range(num_layers): + in_channels = in_channels if i == 0 else out_channels + resnets.append( + ResnetBlock( + in_channels=in_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + attentions.append( + AttentionBlockNew( + out_channels, + num_head_channels=attn_num_head_channels, + rescale_output_factor=output_scale_factor, + eps=resnet_eps, + )) + + self.attentions = nn.LayerList(attentions) + self.resnets = nn.LayerList(resnets) + + if add_downsample: + self.downsamplers = nn.LayerList([ + Downsample2D(in_channels, + use_conv=True, + out_channels=out_channels, + padding=downsample_padding, + name="op") + ]) + else: + self.downsamplers = None + + def forward(self, hidden_states, temb=None): + output_states = () + + for resnet, attn in zip(self.resnets, self.attentions): + hidden_states = resnet(hidden_states, temb) + hidden_states = attn(hidden_states) + output_states += (hidden_states, ) + + if self.downsamplers is not None: + for downsampler in self.downsamplers: + hidden_states = downsampler(hidden_states) + + output_states += (hidden_states, ) + + return hidden_states, output_states + + +class CrossAttnDownBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + out_channels: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + attn_num_head_channels=1, + cross_attention_dim=1280, + attention_type="default", + output_scale_factor=1.0, + downsample_padding=1, + add_downsample=True, + ): + super().__init__() + resnets = [] + attentions = [] + + self.attention_type = attention_type + + for i in range(num_layers): + in_channels = in_channels if i == 0 else out_channels + resnets.append( + ResnetBlock( + in_channels=in_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + attentions.append( + SpatialTransformer( + out_channels, + attn_num_head_channels, + out_channels // attn_num_head_channels, + depth=1, + context_dim=cross_attention_dim, + )) + self.attentions = nn.LayerList(attentions) + self.resnets = nn.LayerList(resnets) + + if add_downsample: + self.downsamplers = nn.LayerList([ + Downsample2D(in_channels, + use_conv=True, + out_channels=out_channels, + padding=downsample_padding, + name="op") + ]) + else: + self.downsamplers = None + + def forward(self, hidden_states, temb=None, encoder_hidden_states=None): + output_states = () + + for resnet, attn in zip(self.resnets, self.attentions): + hidden_states = resnet(hidden_states, temb) + hidden_states = attn(hidden_states, context=encoder_hidden_states) + output_states += (hidden_states, ) + + if self.downsamplers is not None: + for downsampler in self.downsamplers: + hidden_states = downsampler(hidden_states) + + output_states += (hidden_states, ) + + return hidden_states, output_states + + +class DownBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + out_channels: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + output_scale_factor=1.0, + add_downsample=True, + downsample_padding=1, + ): + super().__init__() + resnets = [] + + for i in range(num_layers): + in_channels = in_channels if i == 0 else out_channels + resnets.append( + ResnetBlock( + in_channels=in_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + + self.resnets = nn.LayerList(resnets) + + if add_downsample: + self.downsamplers = nn.LayerList([ + Downsample2D(in_channels, + use_conv=True, + out_channels=out_channels, + padding=downsample_padding, + name="op") + ]) + else: + self.downsamplers = None + + def forward(self, hidden_states, temb=None): + output_states = () + + for resnet in self.resnets: + hidden_states = resnet(hidden_states, temb) + output_states += (hidden_states, ) + + if self.downsamplers is not None: + for downsampler in self.downsamplers: + hidden_states = downsampler(hidden_states) + + output_states += (hidden_states, ) + + return hidden_states, output_states + + +class DownEncoderBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + out_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + output_scale_factor=1.0, + add_downsample=True, + downsample_padding=1, + ): + super().__init__() + resnets = [] + + for i in range(num_layers): + in_channels = in_channels if i == 0 else out_channels + resnets.append( + ResnetBlock( + in_channels=in_channels, + out_channels=out_channels, + temb_channels=None, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + + self.resnets = nn.LayerList(resnets) + + if add_downsample: + self.downsamplers = nn.LayerList([ + Downsample2D(in_channels, + use_conv=True, + out_channels=out_channels, + padding=downsample_padding, + name="op") + ]) + else: + self.downsamplers = None + + def forward(self, hidden_states): + for resnet in self.resnets: + hidden_states = resnet(hidden_states, temb=None) + + if self.downsamplers is not None: + for downsampler in self.downsamplers: + hidden_states = downsampler(hidden_states) + + return hidden_states + + +class AttnDownEncoderBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + out_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + attn_num_head_channels=1, + output_scale_factor=1.0, + add_downsample=True, + downsample_padding=1, + ): + super().__init__() + resnets = [] + attentions = [] + + for i in range(num_layers): + in_channels = in_channels if i == 0 else out_channels + resnets.append( + ResnetBlock( + in_channels=in_channels, + out_channels=out_channels, + temb_channels=None, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + attentions.append( + AttentionBlockNew( + out_channels, + num_head_channels=attn_num_head_channels, + rescale_output_factor=output_scale_factor, + eps=resnet_eps, + num_groups=resnet_groups, + )) + + self.attentions = nn.LayerList(attentions) + self.resnets = nn.LayerList(resnets) + + if add_downsample: + self.downsamplers = nn.LayerList([ + Downsample2D(in_channels, + use_conv=True, + out_channels=out_channels, + padding=downsample_padding, + name="op") + ]) + else: + self.downsamplers = None + + def forward(self, hidden_states): + for resnet, attn in zip(self.resnets, self.attentions): + hidden_states = resnet(hidden_states, temb=None) + hidden_states = attn(hidden_states) + + if self.downsamplers is not None: + for downsampler in self.downsamplers: + hidden_states = downsampler(hidden_states) + + return hidden_states + + +class AttnSkipDownBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + out_channels: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_pre_norm: bool = True, + attn_num_head_channels=1, + attention_type="default", + output_scale_factor=np.sqrt(2.0), + downsample_padding=1, + add_downsample=True, + ): + super().__init__() + self.attentions = nn.LayerList([]) + self.resnets = nn.LayerList([]) + + self.attention_type = attention_type + + for i in range(num_layers): + in_channels = in_channels if i == 0 else out_channels + self.resnets.append( + ResnetBlock( + in_channels=in_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=min(in_channels // 4, 32), + groups_out=min(out_channels // 4, 32), + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + self.attentions.append( + AttentionBlockNew( + out_channels, + num_head_channels=attn_num_head_channels, + rescale_output_factor=output_scale_factor, + eps=resnet_eps, + )) + + if add_downsample: + self.resnet_down = ResnetBlock( + in_channels=out_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=min(out_channels // 4, 32), + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + use_nin_shortcut=True, + down=True, + kernel="fir", + ) + self.downsamplers = nn.LayerList([FirDownsample2D(in_channels, out_channels=out_channels)]) + self.skip_conv = nn.Conv2D(3, out_channels, kernel_size=(1, 1), stride=(1, 1)) + else: + self.resnet_down = None + self.downsamplers = None + self.skip_conv = None + + def forward(self, hidden_states, temb=None, skip_sample=None): + output_states = () + + for resnet, attn in zip(self.resnets, self.attentions): + hidden_states = resnet(hidden_states, temb) + hidden_states = attn(hidden_states) + output_states += (hidden_states, ) + + if self.downsamplers is not None: + hidden_states = self.resnet_down(hidden_states, temb) + for downsampler in self.downsamplers: + skip_sample = downsampler(skip_sample) + + hidden_states = self.skip_conv(skip_sample) + hidden_states + + output_states += (hidden_states, ) + + return hidden_states, output_states, skip_sample + + +class SkipDownBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + out_channels: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_pre_norm: bool = True, + output_scale_factor=np.sqrt(2.0), + add_downsample=True, + downsample_padding=1, + ): + super().__init__() + self.resnets = nn.LayerList([]) + + for i in range(num_layers): + in_channels = in_channels if i == 0 else out_channels + self.resnets.append( + ResnetBlock( + in_channels=in_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=min(in_channels // 4, 32), + groups_out=min(out_channels // 4, 32), + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + + if add_downsample: + self.resnet_down = ResnetBlock( + in_channels=out_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=min(out_channels // 4, 32), + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + use_nin_shortcut=True, + down=True, + kernel="fir", + ) + self.downsamplers = nn.LayerList([FirDownsample2D(in_channels, out_channels=out_channels)]) + self.skip_conv = nn.Conv2D(3, out_channels, kernel_size=(1, 1), stride=(1, 1)) + else: + self.resnet_down = None + self.downsamplers = None + self.skip_conv = None + + def forward(self, hidden_states, temb=None, skip_sample=None): + output_states = () + + for resnet in self.resnets: + hidden_states = resnet(hidden_states, temb) + output_states += (hidden_states, ) + + if self.downsamplers is not None: + hidden_states = self.resnet_down(hidden_states, temb) + for downsampler in self.downsamplers: + skip_sample = downsampler(skip_sample) + + hidden_states = self.skip_conv(skip_sample) + hidden_states + + output_states += (hidden_states, ) + + return hidden_states, output_states, skip_sample + + +class AttnUpBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + prev_output_channel: int, + out_channels: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + attention_type="default", + attn_num_head_channels=1, + output_scale_factor=1.0, + add_upsample=True, + ): + super().__init__() + resnets = [] + attentions = [] + + self.attention_type = attention_type + + for i in range(num_layers): + res_skip_channels = in_channels if (i == num_layers - 1) else out_channels + resnet_in_channels = prev_output_channel if i == 0 else out_channels + + resnets.append( + ResnetBlock( + in_channels=resnet_in_channels + res_skip_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + attentions.append( + AttentionBlockNew( + out_channels, + num_head_channels=attn_num_head_channels, + rescale_output_factor=output_scale_factor, + eps=resnet_eps, + )) + + self.attentions = nn.LayerList(attentions) + self.resnets = nn.LayerList(resnets) + + if add_upsample: + self.upsamplers = nn.LayerList([Upsample2D(out_channels, use_conv=True, out_channels=out_channels)]) + else: + self.upsamplers = None + + def forward(self, hidden_states, res_hidden_states_tuple, temb=None): + for resnet, attn in zip(self.resnets, self.attentions): + + # pop res hidden states + res_hidden_states = res_hidden_states_tuple[-1] + res_hidden_states_tuple = res_hidden_states_tuple[:-1] + hidden_states = paddle.concat([hidden_states, res_hidden_states], axis=1) + + hidden_states = resnet(hidden_states, temb) + hidden_states = attn(hidden_states) + + if self.upsamplers is not None: + for upsampler in self.upsamplers: + hidden_states = upsampler(hidden_states) + + return hidden_states + + +class CrossAttnUpBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + out_channels: int, + prev_output_channel: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + attn_num_head_channels=1, + cross_attention_dim=1280, + attention_type="default", + output_scale_factor=1.0, + downsample_padding=1, + add_upsample=True, + ): + super().__init__() + resnets = [] + attentions = [] + + self.attention_type = attention_type + + for i in range(num_layers): + res_skip_channels = in_channels if (i == num_layers - 1) else out_channels + resnet_in_channels = prev_output_channel if i == 0 else out_channels + + resnets.append( + ResnetBlock( + in_channels=resnet_in_channels + res_skip_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + attentions.append( + SpatialTransformer( + out_channels, + attn_num_head_channels, + out_channels // attn_num_head_channels, + depth=1, + context_dim=cross_attention_dim, + )) + self.attentions = nn.LayerList(attentions) + self.resnets = nn.LayerList(resnets) + + if add_upsample: + self.upsamplers = nn.LayerList([Upsample2D(out_channels, use_conv=True, out_channels=out_channels)]) + else: + self.upsamplers = None + + def forward(self, hidden_states, res_hidden_states_tuple, temb=None, encoder_hidden_states=None): + for resnet, attn in zip(self.resnets, self.attentions): + + # pop res hidden states + res_hidden_states = res_hidden_states_tuple[-1] + res_hidden_states_tuple = res_hidden_states_tuple[:-1] + hidden_states = paddle.concat([hidden_states, res_hidden_states], axis=1) + + hidden_states = resnet(hidden_states, temb) + hidden_states = attn(hidden_states, context=encoder_hidden_states) + + if self.upsamplers is not None: + for upsampler in self.upsamplers: + hidden_states = upsampler(hidden_states) + + return hidden_states + + +class UpBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + prev_output_channel: int, + out_channels: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + output_scale_factor=1.0, + add_upsample=True, + ): + super().__init__() + resnets = [] + + for i in range(num_layers): + res_skip_channels = in_channels if (i == num_layers - 1) else out_channels + resnet_in_channels = prev_output_channel if i == 0 else out_channels + + resnets.append( + ResnetBlock( + in_channels=resnet_in_channels + res_skip_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + + self.resnets = nn.LayerList(resnets) + + if add_upsample: + self.upsamplers = nn.LayerList([Upsample2D(out_channels, use_conv=True, out_channels=out_channels)]) + else: + self.upsamplers = None + + def forward(self, hidden_states, res_hidden_states_tuple, temb=None): + for resnet in self.resnets: + + # pop res hidden states + res_hidden_states = res_hidden_states_tuple[-1] + res_hidden_states_tuple = res_hidden_states_tuple[:-1] + hidden_states = paddle.concat([hidden_states, res_hidden_states], axis=1) + + hidden_states = resnet(hidden_states, temb) + + if self.upsamplers is not None: + for upsampler in self.upsamplers: + hidden_states = upsampler(hidden_states) + + return hidden_states + + +class UpDecoderBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + out_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + output_scale_factor=1.0, + add_upsample=True, + ): + super().__init__() + resnets = [] + + for i in range(num_layers): + input_channels = in_channels if i == 0 else out_channels + + resnets.append( + ResnetBlock( + in_channels=input_channels, + out_channels=out_channels, + temb_channels=None, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + + self.resnets = nn.LayerList(resnets) + + if add_upsample: + self.upsamplers = nn.LayerList([Upsample2D(out_channels, use_conv=True, out_channels=out_channels)]) + else: + self.upsamplers = None + + def forward(self, hidden_states): + for resnet in self.resnets: + hidden_states = resnet(hidden_states, temb=None) + + if self.upsamplers is not None: + for upsampler in self.upsamplers: + hidden_states = upsampler(hidden_states) + + return hidden_states + + +class AttnUpDecoderBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + out_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + attn_num_head_channels=1, + output_scale_factor=1.0, + add_upsample=True, + ): + super().__init__() + resnets = [] + attentions = [] + + for i in range(num_layers): + input_channels = in_channels if i == 0 else out_channels + + resnets.append( + ResnetBlock( + in_channels=input_channels, + out_channels=out_channels, + temb_channels=None, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + attentions.append( + AttentionBlockNew( + out_channels, + num_head_channels=attn_num_head_channels, + rescale_output_factor=output_scale_factor, + eps=resnet_eps, + num_groups=resnet_groups, + )) + + self.attentions = nn.LayerList(attentions) + self.resnets = nn.LayerList(resnets) + + if add_upsample: + self.upsamplers = nn.LayerList([Upsample2D(out_channels, use_conv=True, out_channels=out_channels)]) + else: + self.upsamplers = None + + def forward(self, hidden_states): + for resnet, attn in zip(self.resnets, self.attentions): + hidden_states = resnet(hidden_states, temb=None) + hidden_states = attn(hidden_states) + + if self.upsamplers is not None: + for upsampler in self.upsamplers: + hidden_states = upsampler(hidden_states) + + return hidden_states + + +class AttnSkipUpBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + prev_output_channel: int, + out_channels: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_pre_norm: bool = True, + attn_num_head_channels=1, + attention_type="default", + output_scale_factor=np.sqrt(2.0), + upsample_padding=1, + add_upsample=True, + ): + super().__init__() + self.attentions = nn.LayerList([]) + self.resnets = nn.LayerList([]) + + self.attention_type = attention_type + + for i in range(num_layers): + res_skip_channels = in_channels if (i == num_layers - 1) else out_channels + resnet_in_channels = prev_output_channel if i == 0 else out_channels + + self.resnets.append( + ResnetBlock( + in_channels=resnet_in_channels + res_skip_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=min(resnet_in_channels + res_skip_channels // 4, 32), + groups_out=min(out_channels // 4, 32), + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + + self.attentions.append( + AttentionBlockNew( + out_channels, + num_head_channels=attn_num_head_channels, + rescale_output_factor=output_scale_factor, + eps=resnet_eps, + )) + + self.upsampler = FirUpsample2D(in_channels, out_channels=out_channels) + if add_upsample: + self.resnet_up = ResnetBlock( + in_channels=out_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=min(out_channels // 4, 32), + groups_out=min(out_channels // 4, 32), + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + use_nin_shortcut=True, + up=True, + kernel="fir", + ) + self.skip_conv = nn.Conv2D(out_channels, 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + self.skip_norm = nn.GroupNorm(num_groups=min(out_channels // 4, 32), + num_channels=out_channels, + eps=resnet_eps, + affine=True) + self.act = nn.SiLU() + else: + self.resnet_up = None + self.skip_conv = None + self.skip_norm = None + self.act = None + + def forward(self, hidden_states, res_hidden_states_tuple, temb=None, skip_sample=None): + for resnet in self.resnets: + # pop res hidden states + res_hidden_states = res_hidden_states_tuple[-1] + res_hidden_states_tuple = res_hidden_states_tuple[:-1] + hidden_states = paddle.concat([hidden_states, res_hidden_states], axis=1) + + hidden_states = resnet(hidden_states, temb) + + hidden_states = self.attentions[0](hidden_states) + + if skip_sample is not None: + skip_sample = self.upsampler(skip_sample) + else: + skip_sample = 0 + + if self.resnet_up is not None: + skip_sample_states = self.skip_norm(hidden_states) + skip_sample_states = self.act(skip_sample_states) + skip_sample_states = self.skip_conv(skip_sample_states) + + skip_sample = skip_sample + skip_sample_states + + hidden_states = self.resnet_up(hidden_states, temb) + + return hidden_states, skip_sample + + +class SkipUpBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + prev_output_channel: int, + out_channels: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_pre_norm: bool = True, + output_scale_factor=np.sqrt(2.0), + add_upsample=True, + upsample_padding=1, + ): + super().__init__() + self.resnets = nn.LayerList([]) + + for i in range(num_layers): + res_skip_channels = in_channels if (i == num_layers - 1) else out_channels + resnet_in_channels = prev_output_channel if i == 0 else out_channels + + self.resnets.append( + ResnetBlock( + in_channels=resnet_in_channels + res_skip_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=min((resnet_in_channels + res_skip_channels) // 4, 32), + groups_out=min(out_channels // 4, 32), + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + + self.upsampler = FirUpsample2D(in_channels, out_channels=out_channels) + if add_upsample: + self.resnet_up = ResnetBlock( + in_channels=out_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=min(out_channels // 4, 32), + groups_out=min(out_channels // 4, 32), + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + use_nin_shortcut=True, + up=True, + kernel="fir", + ) + self.skip_conv = nn.Conv2D(out_channels, 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + self.skip_norm = nn.GroupNorm(num_groups=min(out_channels // 4, 32), + num_channels=out_channels, + eps=resnet_eps, + affine=True) + self.act = nn.SiLU() + else: + self.resnet_up = None + self.skip_conv = None + self.skip_norm = None + self.act = None + + def forward(self, hidden_states, res_hidden_states_tuple, temb=None, skip_sample=None): + for resnet in self.resnets: + # pop res hidden states + res_hidden_states = res_hidden_states_tuple[-1] + res_hidden_states_tuple = res_hidden_states_tuple[:-1] + hidden_states = paddle.concat([hidden_states, res_hidden_states], axis=1) + + hidden_states = resnet(hidden_states, temb) + + if skip_sample is not None: + skip_sample = self.upsampler(skip_sample) + else: + skip_sample = 0 + + if self.resnet_up is not None: + skip_sample_states = self.skip_norm(hidden_states) + skip_sample_states = self.act(skip_sample_states) + skip_sample_states = self.skip_conv(skip_sample_states) + + skip_sample = skip_sample + skip_sample_states + + hidden_states = self.resnet_up(hidden_states, temb) + + return hidden_states, skip_sample diff --git a/modules/image/text_to_image/stable_diffusion/diffusers/models/vae.py b/modules/image/text_to_image/stable_diffusion/diffusers/models/vae.py new file mode 100644 index 000000000..59e35b0fb --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion/diffusers/models/vae.py @@ -0,0 +1,465 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import paddle +import paddle.nn as nn + +from ..configuration_utils import ConfigMixin +from ..configuration_utils import register_to_config +from .unet_blocks import get_down_block +from .unet_blocks import get_up_block +from .unet_blocks import UNetMidBlock2D + + +class Encoder(nn.Layer): + + def __init__( + self, + in_channels=3, + out_channels=3, + down_block_types=("DownEncoderBlock2D", ), + block_out_channels=(64, ), + layers_per_block=2, + act_fn="silu", + double_z=True, + ): + super().__init__() + self.layers_per_block = layers_per_block + + self.conv_in = nn.Conv2D(in_channels, block_out_channels[0], kernel_size=3, stride=1, padding=1) + + self.mid_block = None + self.down_blocks = nn.LayerList([]) + + # down + output_channel = block_out_channels[0] + for i, down_block_type in enumerate(down_block_types): + input_channel = output_channel + output_channel = block_out_channels[i] + is_final_block = i == len(block_out_channels) - 1 + + down_block = get_down_block( + down_block_type, + num_layers=self.layers_per_block, + in_channels=input_channel, + out_channels=output_channel, + add_downsample=not is_final_block, + resnet_eps=1e-6, + downsample_padding=0, + resnet_act_fn=act_fn, + attn_num_head_channels=None, + temb_channels=None, + ) + self.down_blocks.append(down_block) + + # mid + self.mid_block = UNetMidBlock2D( + in_channels=block_out_channels[-1], + resnet_eps=1e-6, + resnet_act_fn=act_fn, + output_scale_factor=1, + resnet_time_scale_shift="default", + attn_num_head_channels=None, + resnet_groups=32, + temb_channels=None, + ) + + # out + num_groups_out = 32 + self.conv_norm_out = nn.GroupNorm(num_channels=block_out_channels[-1], num_groups=num_groups_out, epsilon=1e-6) + self.conv_act = nn.Silu() + + conv_out_channels = 2 * out_channels if double_z else out_channels + self.conv_out = nn.Conv2D(block_out_channels[-1], conv_out_channels, 3, padding=1) + + def forward(self, x): + sample = x + sample = self.conv_in(sample) + + # down + for down_block in self.down_blocks: + sample = down_block(sample) + + # middle + sample = self.mid_block(sample) + + # post-process + sample = self.conv_norm_out(sample) + sample = self.conv_act(sample) + sample = self.conv_out(sample) + + return sample + + +class Decoder(nn.Layer): + + def __init__( + self, + in_channels=3, + out_channels=3, + up_block_types=("UpDecoderBlock2D", ), + block_out_channels=(64, ), + layers_per_block=2, + act_fn="silu", + ): + super().__init__() + self.layers_per_block = layers_per_block + + self.conv_in = nn.Conv2D(in_channels, block_out_channels[-1], kernel_size=3, stride=1, padding=1) + + self.mid_block = None + self.up_blocks = nn.LayerList([]) + + # mid + self.mid_block = UNetMidBlock2D( + in_channels=block_out_channels[-1], + resnet_eps=1e-6, + resnet_act_fn=act_fn, + output_scale_factor=1, + resnet_time_scale_shift="default", + attn_num_head_channels=None, + resnet_groups=32, + temb_channels=None, + ) + + # up + reversed_block_out_channels = list(reversed(block_out_channels)) + output_channel = reversed_block_out_channels[0] + for i, up_block_type in enumerate(up_block_types): + prev_output_channel = output_channel + output_channel = reversed_block_out_channels[i] + + is_final_block = i == len(block_out_channels) - 1 + + up_block = get_up_block( + up_block_type, + num_layers=self.layers_per_block + 1, + in_channels=prev_output_channel, + out_channels=output_channel, + prev_output_channel=None, + add_upsample=not is_final_block, + resnet_eps=1e-6, + resnet_act_fn=act_fn, + attn_num_head_channels=None, + temb_channels=None, + ) + self.up_blocks.append(up_block) + prev_output_channel = output_channel + + # out + num_groups_out = 32 + self.conv_norm_out = nn.GroupNorm(num_channels=block_out_channels[0], num_groups=num_groups_out, epsilon=1e-6) + self.conv_act = nn.Silu() + self.conv_out = nn.Conv2D(block_out_channels[0], out_channels, 3, padding=1) + + def forward(self, z): + sample = z + sample = self.conv_in(sample) + + # middle + sample = self.mid_block(sample) + + # up + for up_block in self.up_blocks: + sample = up_block(sample) + + # post-process + sample = self.conv_norm_out(sample) + sample = self.conv_act(sample) + sample = self.conv_out(sample) + + return sample + + +class VectorQuantizer(nn.Layer): + """ + Improved version over VectorQuantizer, can be used as a drop-in replacement. Mostly avoids costly matrix + multiplications and allows for post-hoc remapping of indices. + """ + + # NOTE: due to a bug the beta term was applied to the wrong term. for + # backwards compatibility we use the buggy version by default, but you can + # specify legacy=False to fix it. + def __init__(self, n_e, e_dim, beta, remap=None, unknown_index="random", sane_index_shape=False, legacy=True): + super().__init__() + self.n_e = n_e + self.e_dim = e_dim + self.beta = beta + self.legacy = legacy + + self.embedding = nn.Embedding(self.n_e, self.e_dim) + self.embedding.weight.data.uniform_(-1.0 / self.n_e, 1.0 / self.n_e) + + self.remap = remap + if self.remap is not None: + self.register_buffer("used", paddle.to_tensor(np.load(self.remap))) + self.re_embed = self.used.shape[0] + self.unknown_index = unknown_index # "random" or "extra" or integer + if self.unknown_index == "extra": + self.unknown_index = self.re_embed + self.re_embed = self.re_embed + 1 + print(f"Remapping {self.n_e} indices to {self.re_embed} indices. " + f"Using {self.unknown_index} for unknown indices.") + else: + self.re_embed = n_e + + self.sane_index_shape = sane_index_shape + + def remap_to_used(self, inds): + ishape = inds.shape + assert len(ishape) > 1 + inds = inds.reshape([ishape[0], -1]) + used = self.used + match = (inds[:, :, None] == used[None, None, ...]).astype("int64") + new = match.argmax(-1) + unknown = match.sum(2) < 1 + if self.unknown_index == "random": + new[unknown] = paddle.randint(0, self.re_embed, shape=new[unknown].shape) + else: + new[unknown] = self.unknown_index + return new.reshape(ishape) + + def unmap_to_all(self, inds): + ishape = inds.shape + assert len(ishape) > 1 + inds = inds.reshape([ishape[0], -1]) + used = self.used + if self.re_embed > self.used.shape[0]: # extra token + inds[inds >= self.used.shape[0]] = 0 # simply set to zero + back = paddle.gather(used[None, :][inds.shape[0] * [0], :], inds, axis=1) + return back.reshape(ishape) + + def forward(self, z): + # reshape z -> (batch, height, width, channel) and flatten + z = z.transpose([0, 2, 3, 1]) + z_flattened = z.reshape([-1, self.e_dim]) + # distances from z to embeddings e_j (z - e)^2 = z^2 + e^2 - 2 e * z + + d = (paddle.sum(z_flattened**2, axis=1, keepdim=True) + paddle.sum(self.embedding.weight**2, axis=1) - + 2 * paddle.einsum("bd,dn->bn", z_flattened, self.embedding.weight.t())) + + min_encoding_indices = paddle.argmin(d, axis=1) + z_q = self.embedding(min_encoding_indices).reshape(z.shape) + perplexity = None + min_encodings = None + + # compute loss for embedding + if not self.legacy: + loss = self.beta * paddle.mean((z_q.detach() - z)**2) + paddle.mean((z_q - z.detach())**2) + else: + loss = paddle.mean((z_q.detach() - z)**2) + self.beta * paddle.mean((z_q - z.detach())**2) + + # preserve gradients + z_q = z + (z_q - z).detach() + + # reshape back to match original input shape + z_q = z_q.transpose([0, 3, 1, 2]) + + if self.remap is not None: + min_encoding_indices = min_encoding_indices.reshape([z.shape[0], -1]) # add batch axis + min_encoding_indices = self.remap_to_used(min_encoding_indices) + min_encoding_indices = min_encoding_indices.reshape([-1, 1]) # flatten + + if self.sane_index_shape: + min_encoding_indices = min_encoding_indices.reshape([z_q.shape[0], z_q.shape[2], z_q.shape[3]]) + + return z_q, loss, (perplexity, min_encodings, min_encoding_indices) + + def get_codebook_entry(self, indices, shape): + # shape specifying (batch, height, width, channel) + if self.remap is not None: + indices = indices.reshape([shape[0], -1]) # add batch axis + indices = self.unmap_to_all(indices) + indices = indices.flatten() # flatten again + + # get quantized latent vectors + z_q = self.embedding(indices) + + if shape is not None: + z_q = z_q.reshape(shape) + # reshape back to match original input shape + z_q = z_q.transpose([0, 3, 1, 2]) + + return z_q + + +class DiagonalGaussianDistribution(object): + + def __init__(self, parameters, deterministic=False): + self.parameters = parameters + self.mean, self.logvar = paddle.chunk(parameters, 2, axis=1) + self.logvar = paddle.clip(self.logvar, -30.0, 20.0) + self.deterministic = deterministic + self.std = paddle.exp(0.5 * self.logvar) + self.var = paddle.exp(self.logvar) + if self.deterministic: + self.var = self.std = paddle.zeros_like(self.mean) + + def sample(self): + x = self.mean + self.std * paddle.randn(self.mean.shape) + return x + + def kl(self, other=None): + if self.deterministic: + return paddle.to_tensor([0.0]) + else: + if other is None: + return 0.5 * paddle.sum(paddle.pow(self.mean, 2) + self.var - 1.0 - self.logvar, axis=[1, 2, 3]) + else: + return 0.5 * paddle.sum( + paddle.pow(self.mean - other.mean, 2) / other.var + self.var / other.var - 1.0 - self.logvar + + other.logvar, + axis=[1, 2, 3], + ) + + def nll(self, sample, dims=[1, 2, 3]): + if self.deterministic: + return paddle.to_tensor([0.0]) + logtwopi = np.log(2.0 * np.pi) + return 0.5 * paddle.sum(logtwopi + self.logvar + paddle.pow(sample - self.mean, 2) / self.var, axis=dims) + + def mode(self): + return self.mean + + +class VQModel(ConfigMixin): + + @register_to_config + def __init__( + self, + in_channels=3, + out_channels=3, + down_block_types=("DownEncoderBlock2D", ), + up_block_types=("UpDecoderBlock2D", ), + block_out_channels=(64, ), + layers_per_block=1, + act_fn="silu", + latent_channels=3, + sample_size=32, + num_vq_embeddings=256, + ): + super().__init__() + + # pass init params to Encoder + self.encoder = Encoder( + in_channels=in_channels, + out_channels=latent_channels, + down_block_types=down_block_types, + block_out_channels=block_out_channels, + layers_per_block=layers_per_block, + act_fn=act_fn, + double_z=False, + ) + + self.quant_conv = nn.Conv2D(latent_channels, latent_channels, 1) + self.quantize = VectorQuantizer(num_vq_embeddings, + latent_channels, + beta=0.25, + remap=None, + sane_index_shape=False) + self.post_quant_conv = nn.Conv2D(latent_channels, latent_channels, 1) + + # pass init params to Decoder + self.decoder = Decoder( + in_channels=latent_channels, + out_channels=out_channels, + up_block_types=up_block_types, + block_out_channels=block_out_channels, + layers_per_block=layers_per_block, + act_fn=act_fn, + ) + + def encode(self, x): + h = self.encoder(x) + h = self.quant_conv(h) + return h + + def decode(self, h, force_not_quantize=False): + # also go through quantization layer + if not force_not_quantize: + quant, emb_loss, info = self.quantize(h) + else: + quant = h + quant = self.post_quant_conv(quant) + dec = self.decoder(quant) + return dec + + def forward(self, sample): + x = sample + h = self.encode(x) + dec = self.decode(h) + return dec + + +class AutoencoderKL(nn.Layer, ConfigMixin): + + @register_to_config + def __init__( + self, + in_channels=3, + out_channels=3, + down_block_types=("DownEncoderBlock2D", "DownEncoderBlock2D", "DownEncoderBlock2D", "DownEncoderBlock2D"), + up_block_types=("UpDecoderBlock2D", "UpDecoderBlock2D", "UpDecoderBlock2D", "UpDecoderBlock2D"), + block_out_channels=(128, 256, 512, 512), + layers_per_block=2, + act_fn="silu", + latent_channels=4, + sample_size=512, + ): + super().__init__() + + # pass init params to Encoder + self.encoder = Encoder( + in_channels=in_channels, + out_channels=latent_channels, + down_block_types=down_block_types, + block_out_channels=block_out_channels, + layers_per_block=layers_per_block, + act_fn=act_fn, + double_z=True, + ) + + # pass init params to Decoder + self.decoder = Decoder( + in_channels=latent_channels, + out_channels=out_channels, + up_block_types=up_block_types, + block_out_channels=block_out_channels, + layers_per_block=layers_per_block, + act_fn=act_fn, + ) + + self.quant_conv = nn.Conv2D(2 * latent_channels, 2 * latent_channels, 1) + self.post_quant_conv = nn.Conv2D(latent_channels, latent_channels, 1) + + def encode(self, x): + h = self.encoder(x) + moments = self.quant_conv(h) + posterior = DiagonalGaussianDistribution(moments) + return posterior + + def decode(self, z): + z = self.post_quant_conv(z) + dec = self.decoder(z) + return dec + + def forward(self, sample, sample_posterior=False): + x = sample + posterior = self.encode(x) + if sample_posterior: + z = posterior.sample() + else: + z = posterior.mode() + dec = self.decode(z) + return dec diff --git a/modules/image/text_to_image/stable_diffusion/diffusers/schedulers/README.md b/modules/image/text_to_image/stable_diffusion/diffusers/schedulers/README.md new file mode 100644 index 000000000..40f50f232 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion/diffusers/schedulers/README.md @@ -0,0 +1,18 @@ +# Schedulers + +- Schedulers are the algorithms to use diffusion models in inference as well as for training. They include the noise schedules and define algorithm-specific diffusion steps. +- Schedulers can be used interchangable between diffusion models in inference to find the preferred trade-off between speed and generation quality. +- Schedulers are available in numpy, but can easily be transformed into Py + +## API + +- Schedulers should provide one or more `def step(...)` functions that should be called iteratively to unroll the diffusion loop during +the forward pass. +- Schedulers should be framework-agnostic, but provide a simple functionality to convert the scheduler into a specific framework, such as PyTorch +with a `set_format(...)` method. + +## Examples + +- The DDPM scheduler was proposed in [Denoising Diffusion Probabilistic Models](https://arxiv.org/abs/2006.11239) and can be found in [scheduling_ddpm.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_ddpm.py). An example of how to use this scheduler can be found in [pipeline_ddpm.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/pipeline_ddpm.py). +- The DDIM scheduler was proposed in [Denoising Diffusion Implicit Models](https://arxiv.org/abs/2010.02502) and can be found in [scheduling_ddim.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_ddim.py). An example of how to use this scheduler can be found in [pipeline_ddim.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/pipeline_ddim.py). +- The PNDM scheduler was proposed in [Pseudo Numerical Methods for Diffusion Models on Manifolds](https://arxiv.org/abs/2202.09778) and can be found in [scheduling_pndm.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_pndm.py). An example of how to use this scheduler can be found in [pipeline_pndm.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/pipeline_pndm.py). diff --git a/modules/image/text_to_image/stable_diffusion/diffusers/schedulers/__init__.py b/modules/image/text_to_image/stable_diffusion/diffusers/schedulers/__init__.py new file mode 100644 index 000000000..cebc3e618 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion/diffusers/schedulers/__init__.py @@ -0,0 +1,24 @@ +# flake8: noqa +# There's no way to ignore "F401 '...' imported but unused" warnings in this +# module, but to preserve other warnings. So, don't check this module at all. +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from .scheduling_ddim import DDIMScheduler +from .scheduling_ddpm import DDPMScheduler +from .scheduling_karras_ve import KarrasVeScheduler +from .scheduling_lms_discrete import LMSDiscreteScheduler +from .scheduling_pndm import PNDMScheduler +from .scheduling_sde_ve import ScoreSdeVeScheduler +from .scheduling_sde_vp import ScoreSdeVpScheduler +from .scheduling_utils import SchedulerMixin diff --git a/modules/image/text_to_image/stable_diffusion/diffusers/schedulers/scheduling_ddim.py b/modules/image/text_to_image/stable_diffusion/diffusers/schedulers/scheduling_ddim.py new file mode 100644 index 000000000..ebe362d99 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion/diffusers/schedulers/scheduling_ddim.py @@ -0,0 +1,182 @@ +# Copyright 2022 Stanford University Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# DISCLAIMER: This code is strongly influenced by https://github.com/pesser/pypaddle_diffusion +# and https://github.com/hojonathanho/diffusion +import math +from typing import Union + +import numpy as np +import paddle + +from ..configuration_utils import ConfigMixin +from ..configuration_utils import register_to_config +from .scheduling_utils import SchedulerMixin + + +def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999): + """ + Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of + (1-beta) over time from t = [0,1]. + + :param num_diffusion_timesteps: the number of betas to produce. :param alpha_bar: a lambda that takes an argument t + from 0 to 1 and + produces the cumulative product of (1-beta) up to that part of the diffusion process. + :param max_beta: the maximum beta to use; use values lower than 1 to + prevent singularities. + """ + + def alpha_bar(time_step): + return math.cos((time_step + 0.008) / 1.008 * math.pi / 2)**2 + + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta)) + return np.array(betas, dtype=np.float32) + + +class DDIMScheduler(SchedulerMixin, ConfigMixin): + + @register_to_config + def __init__( + self, + num_train_timesteps=1000, + beta_start=0.0001, + beta_end=0.02, + beta_schedule="linear", + trained_betas=None, + timestep_values=None, + clip_sample=True, + set_alpha_to_one=True, + tensor_format="pd", + ): + + if beta_schedule == "linear": + self.betas = np.linspace(beta_start, beta_end, num_train_timesteps, dtype=np.float32) + elif beta_schedule == "scaled_linear": + # this schedule is very specific to the latent diffusion model. + self.betas = np.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=np.float32)**2 + elif beta_schedule == "squaredcos_cap_v2": + # Glide cosine schedule + self.betas = betas_for_alpha_bar(num_train_timesteps) + else: + raise NotImplementedError(f"{beta_schedule} does is not implemented for {self.__class__}") + + self.alphas = 1.0 - self.betas + self.alphas_cumprod = np.cumprod(self.alphas, axis=0) + + # At every step in ddim, we are looking into the previous alphas_cumprod + # For the final step, there is no previous alphas_cumprod because we are already at 0 + # `set_alpha_to_one` decides whether we set this paratemer simply to one or + # whether we use the final alpha of the "non-previous" one. + self.final_alpha_cumprod = np.array(1.0) if set_alpha_to_one else self.alphas_cumprod[0] + + # setable values + self.num_inference_steps = None + self.timesteps = np.arange(0, num_train_timesteps)[::-1].copy() + + self.tensor_format = tensor_format + self.set_format(tensor_format=tensor_format) + + def _get_variance(self, timestep, prev_timestep): + alpha_prod_t = self.alphas_cumprod[timestep] + alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod + beta_prod_t = 1 - alpha_prod_t + beta_prod_t_prev = 1 - alpha_prod_t_prev + + variance = (beta_prod_t_prev / beta_prod_t) * (1 - alpha_prod_t / alpha_prod_t_prev) + + return variance + + def set_timesteps(self, num_inference_steps, offset=0): + self.num_inference_steps = num_inference_steps + self.timesteps = np.arange(0, self.config.num_train_timesteps, + self.config.num_train_timesteps // self.num_inference_steps)[::-1].copy() + self.timesteps += offset + self.set_format(tensor_format=self.tensor_format) + + def step( + self, + model_output: Union[paddle.Tensor, np.ndarray], + timestep: int, + sample: Union[paddle.Tensor, np.ndarray], + eta: float = 0.0, + use_clipped_model_output: bool = False, + generator=None, + ): + # See formulas (12) and (16) of DDIM paper https://arxiv.org/pdf/2010.02502.pdf + # Ideally, read DDIM paper in-detail understanding + + # Notation ( -> + # - pred_noise_t -> e_theta(x_t, t) + # - pred_original_sample -> f_theta(x_t, t) or x_0 + # - std_dev_t -> sigma_t + # - eta -> η + # - pred_sample_direction -> "direction pointingc to x_t" + # - pred_prev_sample -> "x_t-1" + + # 1. get previous step value (=t-1) + prev_timestep = timestep - self.config.num_train_timesteps // self.num_inference_steps + + # 2. compute alphas, betas + alpha_prod_t = self.alphas_cumprod[timestep] + alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod + beta_prod_t = 1 - alpha_prod_t + + # 3. compute predicted original sample from predicted noise also called + # "predicted x_0" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf + pred_original_sample = (sample - beta_prod_t**(0.5) * model_output) / alpha_prod_t**(0.5) + + # 4. Clip "predicted x_0" + if self.config.clip_sample: + pred_original_sample = self.clip(pred_original_sample, -1, 1) + + # 5. compute variance: "sigma_t(η)" -> see formula (16) + # σ_t = sqrt((1 − α_t−1)/(1 − α_t)) * sqrt(1 − α_t/α_t−1) + variance = self._get_variance(timestep, prev_timestep) + std_dev_t = eta * variance**(0.5) + + if use_clipped_model_output: + # the model_output is always re-derived from the clipped x_0 in Glide + model_output = (sample - alpha_prod_t**(0.5) * pred_original_sample) / beta_prod_t**(0.5) + + # 6. compute "direction pointing to x_t" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf + pred_sample_direction = (1 - alpha_prod_t_prev - std_dev_t**2)**(0.5) * model_output + + # 7. compute x_t without "random noise" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf + prev_sample = alpha_prod_t_prev**(0.5) * pred_original_sample + pred_sample_direction + + if eta > 0: + noise = paddle.randn(model_output.shape) + variance = self._get_variance(timestep, prev_timestep)**(0.5) * eta * noise + + if not paddle.is_tensor(model_output): + variance = variance.numpy() + + prev_sample = prev_sample + variance + + return {"prev_sample": prev_sample} + + def add_noise(self, original_samples, noise, timesteps): + sqrt_alpha_prod = self.alphas_cumprod[timesteps]**0.5 + sqrt_alpha_prod = self.match_shape(sqrt_alpha_prod, original_samples) + sqrt_one_minus_alpha_prod = (1 - self.alphas_cumprod[timesteps])**0.5 + sqrt_one_minus_alpha_prod = self.match_shape(sqrt_one_minus_alpha_prod, original_samples) + + noisy_samples = sqrt_alpha_prod * original_samples + sqrt_one_minus_alpha_prod * noise + return noisy_samples + + def __len__(self): + return self.config.num_train_timesteps diff --git a/modules/image/text_to_image/stable_diffusion/diffusers/schedulers/scheduling_ddpm.py b/modules/image/text_to_image/stable_diffusion/diffusers/schedulers/scheduling_ddpm.py new file mode 100644 index 000000000..34551b2ad --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion/diffusers/schedulers/scheduling_ddpm.py @@ -0,0 +1,191 @@ +# Copyright 2022 UC Berkely Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# DISCLAIMER: This file is strongly influenced by https://github.com/ermongroup/ddim +import math +from typing import Union + +import numpy as np +import paddle + +from ..configuration_utils import ConfigMixin +from ..configuration_utils import register_to_config +from .scheduling_utils import SchedulerMixin + + +def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999): + """ + Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of + (1-beta) over time from t = [0,1]. + + :param num_diffusion_timesteps: the number of betas to produce. :param alpha_bar: a lambda that takes an argument t + from 0 to 1 and + produces the cumulative product of (1-beta) up to that part of the diffusion process. + :param max_beta: the maximum beta to use; use values lower than 1 to + prevent singularities. + """ + + def alpha_bar(time_step): + return math.cos((time_step + 0.008) / 1.008 * math.pi / 2)**2 + + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta)) + return np.array(betas, dtype=np.float32) + + +class DDPMScheduler(SchedulerMixin, ConfigMixin): + + @register_to_config + def __init__( + self, + num_train_timesteps=1000, + beta_start=0.0001, + beta_end=0.02, + beta_schedule="linear", + trained_betas=None, + variance_type="fixed_small", + clip_sample=True, + tensor_format="pd", + ): + + if trained_betas is not None: + self.betas = np.asarray(trained_betas) + elif beta_schedule == "linear": + self.betas = np.linspace(beta_start, beta_end, num_train_timesteps, dtype=np.float32) + elif beta_schedule == "scaled_linear": + # this schedule is very specific to the latent diffusion model. + self.betas = np.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=np.float32)**2 + elif beta_schedule == "squaredcos_cap_v2": + # Glide cosine schedule + self.betas = betas_for_alpha_bar(num_train_timesteps) + else: + raise NotImplementedError(f"{beta_schedule} does is not implemented for {self.__class__}") + + self.alphas = 1.0 - self.betas + self.alphas_cumprod = np.cumprod(self.alphas, axis=0) + self.one = np.array(1.0) + + # setable values + self.num_inference_steps = None + self.timesteps = np.arange(0, num_train_timesteps)[::-1].copy() + + self.tensor_format = tensor_format + self.set_format(tensor_format=tensor_format) + + self.variance_type = variance_type + + def set_timesteps(self, num_inference_steps): + num_inference_steps = min(self.config.num_train_timesteps, num_inference_steps) + self.num_inference_steps = num_inference_steps + self.timesteps = np.arange(0, self.config.num_train_timesteps, + self.config.num_train_timesteps // self.num_inference_steps)[::-1].copy() + self.set_format(tensor_format=self.tensor_format) + + def _get_variance(self, t, predicted_variance=None, variance_type=None): + alpha_prod_t = self.alphas_cumprod[t] + alpha_prod_t_prev = self.alphas_cumprod[t - 1] if t > 0 else self.one + + # For t > 0, compute predicted variance βt (see formula (6) and (7) from https://arxiv.org/pdf/2006.11239.pdf) + # and sample from it to get previous sample + # x_{t-1} ~ N(pred_prev_sample, variance) == add variance to pred_sample + variance = (1 - alpha_prod_t_prev) / (1 - alpha_prod_t) * self.betas[t] + + if variance_type is None: + variance_type = self.config.variance_type + + # hacks - were probs added for training stability + if variance_type == "fixed_small": + variance = self.clip(variance, min_value=1e-20) + # for rl-diffuser https://arxiv.org/abs/2205.09991 + elif variance_type == "fixed_small_log": + variance = self.log(self.clip(variance, min_value=1e-20)) + elif variance_type == "fixed_large": + variance = self.betas[t] + elif variance_type == "fixed_large_log": + # Glide max_log + variance = self.log(self.betas[t]) + elif variance_type == "learned": + return predicted_variance + elif variance_type == "learned_range": + min_log = variance + max_log = self.betas[t] + frac = (predicted_variance + 1) / 2 + variance = frac * max_log + (1 - frac) * min_log + + return variance + + def step( + self, + model_output: Union[paddle.Tensor, np.ndarray], + timestep: int, + sample: Union[paddle.Tensor, np.ndarray], + predict_epsilon=True, + generator=None, + ): + t = timestep + + if model_output.shape[1] == sample.shape[1] * 2 and self.variance_type in ["learned", "learned_range"]: + model_output, predicted_variance = paddle.split(model_output, sample.shape[1], axis=1) + else: + predicted_variance = None + + # 1. compute alphas, betas + alpha_prod_t = self.alphas_cumprod[t] + alpha_prod_t_prev = self.alphas_cumprod[t - 1] if t > 0 else self.one + beta_prod_t = 1 - alpha_prod_t + beta_prod_t_prev = 1 - alpha_prod_t_prev + + # 2. compute predicted original sample from predicted noise also called + # "predicted x_0" of formula (15) from https://arxiv.org/pdf/2006.11239.pdf + if predict_epsilon: + pred_original_sample = (sample - beta_prod_t**(0.5) * model_output) / alpha_prod_t**(0.5) + else: + pred_original_sample = model_output + + # 3. Clip "predicted x_0" + if self.config.clip_sample: + pred_original_sample = self.clip(pred_original_sample, -1, 1) + + # 4. Compute coefficients for pred_original_sample x_0 and current sample x_t + # See formula (7) from https://arxiv.org/pdf/2006.11239.pdf + pred_original_sample_coeff = (alpha_prod_t_prev**(0.5) * self.betas[t]) / beta_prod_t + current_sample_coeff = self.alphas[t]**(0.5) * beta_prod_t_prev / beta_prod_t + + # 5. Compute predicted previous sample µ_t + # See formula (7) from https://arxiv.org/pdf/2006.11239.pdf + pred_prev_sample = pred_original_sample_coeff * pred_original_sample + current_sample_coeff * sample + + # 6. Add noise + variance = 0 + if t > 0: + noise = self.randn_like(model_output) + variance = (self._get_variance(t, predicted_variance=predicted_variance)**0.5) * noise + + pred_prev_sample = pred_prev_sample + variance + + return {"prev_sample": pred_prev_sample} + + def add_noise(self, original_samples, noise, timesteps): + sqrt_alpha_prod = self.alphas_cumprod[timesteps]**0.5 + sqrt_alpha_prod = self.match_shape(sqrt_alpha_prod, original_samples) + sqrt_one_minus_alpha_prod = (1 - self.alphas_cumprod[timesteps])**0.5 + sqrt_one_minus_alpha_prod = self.match_shape(sqrt_one_minus_alpha_prod, original_samples) + + noisy_samples = sqrt_alpha_prod * original_samples + sqrt_one_minus_alpha_prod * noise + return noisy_samples + + def __len__(self): + return self.config.num_train_timesteps diff --git a/modules/image/text_to_image/stable_diffusion/diffusers/schedulers/scheduling_karras_ve.py b/modules/image/text_to_image/stable_diffusion/diffusers/schedulers/scheduling_karras_ve.py new file mode 100644 index 000000000..36827564e --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion/diffusers/schedulers/scheduling_karras_ve.py @@ -0,0 +1,124 @@ +# Copyright 2022 NVIDIA and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Union + +import numpy as np +import paddle + +from ..configuration_utils import ConfigMixin +from ..configuration_utils import register_to_config +from .scheduling_utils import SchedulerMixin + + +class KarrasVeScheduler(SchedulerMixin, ConfigMixin): + """ + Stochastic sampling from Karras et al. [1] tailored to the Variance-Expanding (VE) models [2]. Use Algorithm 2 and + the VE column of Table 1 from [1] for reference. + + [1] Karras, Tero, et al. "Elucidating the Design Space of Diffusion-Based Generative Models." + https://arxiv.org/abs/2206.00364 [2] Song, Yang, et al. "Score-based generative modeling through stochastic + differential equations." https://arxiv.org/abs/2011.13456 + """ + + @register_to_config + def __init__( + self, + sigma_min=0.02, + sigma_max=100, + s_noise=1.007, + s_churn=80, + s_min=0.05, + s_max=50, + tensor_format="pd", + ): + """ + For more details on the parameters, see the original paper's Appendix E.: "Elucidating the Design Space of + Diffusion-Based Generative Models." https://arxiv.org/abs/2206.00364. The grid search values used to find the + optimal {s_noise, s_churn, s_min, s_max} for a specific model are described in Table 5 of the paper. + + Args: + sigma_min (`float`): minimum noise magnitude + sigma_max (`float`): maximum noise magnitude + s_noise (`float`): the amount of additional noise to counteract loss of detail during sampling. + A reasonable range is [1.000, 1.011]. + s_churn (`float`): the parameter controlling the overall amount of stochasticity. + A reasonable range is [0, 100]. + s_min (`float`): the start value of the sigma range where we add noise (enable stochasticity). + A reasonable range is [0, 10]. + s_max (`float`): the end value of the sigma range where we add noise. + A reasonable range is [0.2, 80]. + """ + # setable values + self.num_inference_steps = None + self.timesteps = None + self.schedule = None # sigma(t_i) + + self.tensor_format = tensor_format + self.set_format(tensor_format=tensor_format) + + def set_timesteps(self, num_inference_steps): + self.num_inference_steps = num_inference_steps + self.timesteps = np.arange(0, self.num_inference_steps)[::-1].copy() + self.schedule = [(self.sigma_max * (self.sigma_min**2 / self.sigma_max**2)**(i / (num_inference_steps - 1))) + for i in self.timesteps] + self.schedule = np.array(self.schedule, dtype=np.float32) + + self.set_format(tensor_format=self.tensor_format) + + def add_noise_to_input(self, sample, sigma, generator=None): + """ + Explicit Langevin-like "churn" step of adding noise to the sample according to a factor gamma_i ≥ 0 to reach a + higher noise level sigma_hat = sigma_i + gamma_i*sigma_i. + """ + if self.s_min <= sigma <= self.s_max: + gamma = min(self.s_churn / self.num_inference_steps, 2**0.5 - 1) + else: + gamma = 0 + + # sample eps ~ N(0, S_noise^2 * I) + eps = self.s_noise * paddle.randn(sample.shape) + sigma_hat = sigma + gamma * sigma + sample_hat = sample + ((sigma_hat**2 - sigma**2)**0.5 * eps) + + return sample_hat, sigma_hat + + def step( + self, + model_output: Union[paddle.Tensor, np.ndarray], + sigma_hat: float, + sigma_prev: float, + sample_hat: Union[paddle.Tensor, np.ndarray], + ): + pred_original_sample = sample_hat + sigma_hat * model_output + derivative = (sample_hat - pred_original_sample) / sigma_hat + sample_prev = sample_hat + (sigma_prev - sigma_hat) * derivative + + return {"prev_sample": sample_prev, "derivative": derivative} + + def step_correct( + self, + model_output: Union[paddle.Tensor, np.ndarray], + sigma_hat: float, + sigma_prev: float, + sample_hat: Union[paddle.Tensor, np.ndarray], + sample_prev: Union[paddle.Tensor, np.ndarray], + derivative: Union[paddle.Tensor, np.ndarray], + ): + pred_original_sample = sample_prev + sigma_prev * model_output + derivative_corr = (sample_prev - pred_original_sample) / sigma_prev + sample_prev = sample_hat + (sigma_prev - sigma_hat) * (0.5 * derivative + 0.5 * derivative_corr) + return {"prev_sample": sample_prev, "derivative": derivative_corr} + + def add_noise(self, original_samples, noise, timesteps): + raise NotImplementedError() diff --git a/modules/image/text_to_image/stable_diffusion/diffusers/schedulers/scheduling_lms_discrete.py b/modules/image/text_to_image/stable_diffusion/diffusers/schedulers/scheduling_lms_discrete.py new file mode 100644 index 000000000..2ed63cc2c --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion/diffusers/schedulers/scheduling_lms_discrete.py @@ -0,0 +1,133 @@ +# Copyright 2022 Katherine Crowson and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Union + +import numpy as np +import paddle +from scipy import integrate + +from ..configuration_utils import ConfigMixin +from ..configuration_utils import register_to_config +from .scheduling_utils import SchedulerMixin + + +class LMSDiscreteScheduler(SchedulerMixin, ConfigMixin): + + @register_to_config + def __init__( + self, + num_train_timesteps=1000, + beta_start=0.0001, + beta_end=0.02, + beta_schedule="linear", + trained_betas=None, + timestep_values=None, + tensor_format="pd", + ): + """ + Linear Multistep Scheduler for discrete beta schedules. Based on the original k-diffusion implementation by + Katherine Crowson: + https://github.com/crowsonkb/k-diffusion/blob/481677d114f6ea445aa009cf5bd7a9cdee909e47/k_diffusion/sampling.py#L181 + """ + + if beta_schedule == "linear": + self.betas = np.linspace(beta_start, beta_end, num_train_timesteps, dtype=np.float32) + elif beta_schedule == "scaled_linear": + # this schedule is very specific to the latent diffusion model. + self.betas = np.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=np.float32)**2 + else: + raise NotImplementedError(f"{beta_schedule} does is not implemented for {self.__class__}") + + self.alphas = 1.0 - self.betas + self.alphas_cumprod = np.cumprod(self.alphas, axis=0) + + self.sigmas = ((1 - self.alphas_cumprod) / self.alphas_cumprod)**0.5 + + # setable values + self.num_inference_steps = None + self.timesteps = np.arange(0, num_train_timesteps)[::-1].copy() + self.derivatives = [] + + self.tensor_format = tensor_format + self.set_format(tensor_format=tensor_format) + + def get_lms_coefficient(self, order, t, current_order): + """ + Compute a linear multistep coefficient + """ + + def lms_derivative(tau): + prod = 1.0 + for k in range(order): + if current_order == k: + continue + prod *= (tau - self.sigmas[t - k]) / (self.sigmas[t - current_order] - self.sigmas[t - k]) + return prod + + integrated_coeff = integrate.quad(lms_derivative, self.sigmas[t], self.sigmas[t + 1], epsrel=1e-4)[0] + + return integrated_coeff + + def set_timesteps(self, num_inference_steps): + self.num_inference_steps = num_inference_steps + self.timesteps = np.linspace(self.num_train_timesteps - 1, 0, num_inference_steps, dtype=float) + + low_idx = np.floor(self.timesteps).astype(int) + high_idx = np.ceil(self.timesteps).astype(int) + frac = np.mod(self.timesteps, 1.0) + sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod)**0.5) + sigmas = (1 - frac) * sigmas[low_idx] + frac * sigmas[high_idx] + self.sigmas = np.concatenate([sigmas, [0.0]]) + + self.derivatives = [] + + self.set_format(tensor_format=self.tensor_format) + + def step( + self, + model_output: Union[paddle.Tensor, np.ndarray], + timestep: int, + sample: Union[paddle.Tensor, np.ndarray], + order: int = 4, + ): + sigma = self.sigmas[timestep] + + # 1. compute predicted original sample (x_0) from sigma-scaled predicted noise + pred_original_sample = sample - sigma * model_output + + # 2. Convert to an ODE derivative + derivative = (sample - pred_original_sample) / sigma + self.derivatives.append(derivative) + if len(self.derivatives) > order: + self.derivatives.pop(0) + + # 3. Compute linear multistep coefficients + order = min(timestep + 1, order) + lms_coeffs = [self.get_lms_coefficient(order, timestep, curr_order) for curr_order in range(order)] + + # 4. Compute previous sample based on the derivatives path + prev_sample = sample + sum(coeff * derivative + for coeff, derivative in zip(lms_coeffs, reversed(self.derivatives))) + + return {"prev_sample": prev_sample} + + def add_noise(self, original_samples, noise, timesteps): + alpha_prod = self.alphas_cumprod[timesteps] + alpha_prod = self.match_shape(alpha_prod, original_samples) + + noisy_samples = (alpha_prod**0.5) * original_samples + ((1 - alpha_prod)**0.5) * noise + return noisy_samples + + def __len__(self): + return self.config.num_train_timesteps diff --git a/modules/image/text_to_image/stable_diffusion/diffusers/schedulers/scheduling_pndm.py b/modules/image/text_to_image/stable_diffusion/diffusers/schedulers/scheduling_pndm.py new file mode 100644 index 000000000..12abd9cfe --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion/diffusers/schedulers/scheduling_pndm.py @@ -0,0 +1,258 @@ +# Copyright 2022 Zhejiang University Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# DISCLAIMER: This file is strongly influenced by https://github.com/ermongroup/ddim +import math +from typing import Union + +import numpy as np +import paddle + +from ..configuration_utils import ConfigMixin +from ..configuration_utils import register_to_config +from .scheduling_utils import SchedulerMixin + + +def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999): + """ + Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of + (1-beta) over time from t = [0,1]. + + :param num_diffusion_timesteps: the number of betas to produce. :param alpha_bar: a lambda that takes an argument t + from 0 to 1 and + produces the cumulative product of (1-beta) up to that part of the diffusion process. + :param max_beta: the maximum beta to use; use values lower than 1 to + prevent singularities. + """ + + def alpha_bar(time_step): + return math.cos((time_step + 0.008) / 1.008 * math.pi / 2)**2 + + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta)) + return np.array(betas, dtype=np.float32) + + +class PNDMScheduler(SchedulerMixin, ConfigMixin): + + @register_to_config + def __init__( + self, + num_train_timesteps=1000, + beta_start=0.0001, + beta_end=0.02, + beta_schedule="linear", + tensor_format="pd", + skip_prk_steps=False, + ): + + if beta_schedule == "linear": + self.betas = np.linspace(beta_start, beta_end, num_train_timesteps, dtype=np.float32) + elif beta_schedule == "scaled_linear": + # this schedule is very specific to the latent diffusion model. + self.betas = np.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=np.float32)**2 + elif beta_schedule == "squaredcos_cap_v2": + # Glide cosine schedule + self.betas = betas_for_alpha_bar(num_train_timesteps) + else: + raise NotImplementedError(f"{beta_schedule} does is not implemented for {self.__class__}") + + self.alphas = 1.0 - self.betas + self.alphas_cumprod = np.cumprod(self.alphas, axis=0) + + self.one = np.array(1.0) + + # For now we only support F-PNDM, i.e. the runge-kutta method + # For more information on the algorithm please take a look at the paper: https://arxiv.org/pdf/2202.09778.pdf + # mainly at formula (9), (12), (13) and the Algorithm 2. + self.pndm_order = 4 + + # running values + self.cur_model_output = 0 + self.counter = 0 + self.cur_sample = None + self.ets = [] + + # setable values + self.num_inference_steps = None + self._timesteps = np.arange(0, num_train_timesteps)[::-1].copy() + self._offset = 0 + self.prk_timesteps = None + self.plms_timesteps = None + self.timesteps = None + + self.tensor_format = tensor_format + self.set_format(tensor_format=tensor_format) + + def set_timesteps(self, num_inference_steps, offset=0): + self.num_inference_steps = num_inference_steps + self._timesteps = list( + range(0, self.config.num_train_timesteps, self.config.num_train_timesteps // num_inference_steps)) + self._offset = offset + self._timesteps = [t + self._offset for t in self._timesteps] + + if self.config.skip_prk_steps: + # for some models like stable diffusion the prk steps can/should be skipped to + # produce better results. When using PNDM with `self.config.skip_prk_steps` the implementation + # is based on crowsonkb's PLMS sampler implementation: https://github.com/CompVis/latent-diffusion/pull/51 + self.prk_timesteps = [] + self.plms_timesteps = list(reversed(self._timesteps[:-1] + self._timesteps[-2:-1] + self._timesteps[-1:])) + else: + prk_timesteps = np.array(self._timesteps[-self.pndm_order:]).repeat(2) + np.tile( + np.array([0, self.config.num_train_timesteps // num_inference_steps // 2]), self.pndm_order) + self.prk_timesteps = list(reversed(prk_timesteps[:-1].repeat(2)[1:-1])) + self.plms_timesteps = list(reversed(self._timesteps[:-3])) + + self.timesteps = self.prk_timesteps + self.plms_timesteps + + self.ets = [] + self.counter = 0 + self.set_format(tensor_format=self.tensor_format) + + def step( + self, + model_output: Union[paddle.Tensor, np.ndarray], + timestep: int, + sample: Union[paddle.Tensor, np.ndarray], + ): + if self.counter < len(self.prk_timesteps) and not self.config.skip_prk_steps: + return self.step_prk(model_output=model_output, timestep=timestep, sample=sample) + else: + return self.step_plms(model_output=model_output, timestep=timestep, sample=sample) + + def step_prk( + self, + model_output: Union[paddle.Tensor, np.ndarray], + timestep: int, + sample: Union[paddle.Tensor, np.ndarray], + ): + """ + Step function propagating the sample with the Runge-Kutta method. RK takes 4 forward passes to approximate the + solution to the differential equation. + """ + diff_to_prev = 0 if self.counter % 2 else self.config.num_train_timesteps // self.num_inference_steps // 2 + prev_timestep = max(timestep - diff_to_prev, self.prk_timesteps[-1]) + timestep = self.prk_timesteps[self.counter // 4 * 4] + + if self.counter % 4 == 0: + self.cur_model_output += 1 / 6 * model_output + self.ets.append(model_output) + self.cur_sample = sample + elif (self.counter - 1) % 4 == 0: + self.cur_model_output += 1 / 3 * model_output + elif (self.counter - 2) % 4 == 0: + self.cur_model_output += 1 / 3 * model_output + elif (self.counter - 3) % 4 == 0: + model_output = self.cur_model_output + 1 / 6 * model_output + self.cur_model_output = 0 + + # cur_sample should not be `None` + cur_sample = self.cur_sample if self.cur_sample is not None else sample + + prev_sample = self._get_prev_sample(cur_sample, timestep, prev_timestep, model_output) + self.counter += 1 + + return {"prev_sample": prev_sample} + + def step_plms( + self, + model_output: Union[paddle.Tensor, np.ndarray], + timestep: int, + sample: Union[paddle.Tensor, np.ndarray], + ): + """ + Step function propagating the sample with the linear multi-step method. This has one forward pass with multiple + times to approximate the solution. + """ + if not self.config.skip_prk_steps and len(self.ets) < 3: + raise ValueError( + f"{self.__class__} can only be run AFTER scheduler has been run " + "in 'prk' mode for at least 12 iterations " + "See: https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/pipeline_pndm.py " + "for more information.") + + prev_timestep = max(timestep - self.config.num_train_timesteps // self.num_inference_steps, 0) + + if self.counter != 1: + self.ets.append(model_output) + else: + prev_timestep = timestep + timestep = timestep + self.config.num_train_timesteps // self.num_inference_steps + + if len(self.ets) == 1 and self.counter == 0: + model_output = model_output + self.cur_sample = sample + elif len(self.ets) == 1 and self.counter == 1: + model_output = (model_output + self.ets[-1]) / 2 + sample = self.cur_sample + self.cur_sample = None + elif len(self.ets) == 2: + model_output = (3 * self.ets[-1] - self.ets[-2]) / 2 + elif len(self.ets) == 3: + model_output = (23 * self.ets[-1] - 16 * self.ets[-2] + 5 * self.ets[-3]) / 12 + else: + model_output = (1 / 24) * (55 * self.ets[-1] - 59 * self.ets[-2] + 37 * self.ets[-3] - 9 * self.ets[-4]) + + prev_sample = self._get_prev_sample(sample, timestep, prev_timestep, model_output) + self.counter += 1 + + return {"prev_sample": prev_sample} + + def _get_prev_sample(self, sample, timestep, timestep_prev, model_output): + # See formula (9) of PNDM paper https://arxiv.org/pdf/2202.09778.pdf + # this function computes x_(t−δ) using the formula of (9) + # Note that x_t needs to be added to both sides of the equation + + # Notation ( -> + # alpha_prod_t -> α_t + # alpha_prod_t_prev -> α_(t−δ) + # beta_prod_t -> (1 - α_t) + # beta_prod_t_prev -> (1 - α_(t−δ)) + # sample -> x_t + # model_output -> e_θ(x_t, t) + # prev_sample -> x_(t−δ) + alpha_prod_t = self.alphas_cumprod[timestep + 1 - self._offset] + alpha_prod_t_prev = self.alphas_cumprod[timestep_prev + 1 - self._offset] + beta_prod_t = 1 - alpha_prod_t + beta_prod_t_prev = 1 - alpha_prod_t_prev + + # corresponds to (α_(t−δ) - α_t) divided by + # denominator of x_t in formula (9) and plus 1 + # Note: (α_(t−δ) - α_t) / (sqrt(α_t) * (sqrt(α_(t−δ)) + sqr(α_t))) = + # sqrt(α_(t−δ)) / sqrt(α_t)) + sample_coeff = (alpha_prod_t_prev / alpha_prod_t)**(0.5) + + # corresponds to denominator of e_θ(x_t, t) in formula (9) + model_output_denom_coeff = alpha_prod_t * beta_prod_t_prev**(0.5) + (alpha_prod_t * beta_prod_t * + alpha_prod_t_prev)**(0.5) + + # full formula (9) + prev_sample = (sample_coeff * sample - + (alpha_prod_t_prev - alpha_prod_t) * model_output / model_output_denom_coeff) + + return prev_sample + + def add_noise(self, original_samples, noise, timesteps): + sqrt_alpha_prod = self.alphas_cumprod[timesteps]**0.5 + sqrt_alpha_prod = self.match_shape(sqrt_alpha_prod, original_samples) + sqrt_one_minus_alpha_prod = (1 - self.alphas_cumprod[timesteps])**0.5 + sqrt_one_minus_alpha_prod = self.match_shape(sqrt_one_minus_alpha_prod, original_samples) + + noisy_samples = sqrt_alpha_prod * original_samples + sqrt_one_minus_alpha_prod * noise + return noisy_samples + + def __len__(self): + return self.config.num_train_timesteps diff --git a/modules/image/text_to_image/stable_diffusion/diffusers/schedulers/scheduling_sde_ve.py b/modules/image/text_to_image/stable_diffusion/diffusers/schedulers/scheduling_sde_ve.py new file mode 100644 index 000000000..92ca23d5b --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion/diffusers/schedulers/scheduling_sde_ve.py @@ -0,0 +1,172 @@ +# Copyright 2022 Google Brain and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# DISCLAIMER: This file is strongly influenced by https://github.com/yang-song/score_sde_pypaddle +# TODO(Patrick, Anton, Suraj) - make scheduler framework indepedent and clean-up a bit +from typing import Union + +import numpy as np +import paddle + +from ..configuration_utils import ConfigMixin +from ..configuration_utils import register_to_config +from .scheduling_utils import SchedulerMixin + + +class ScoreSdeVeScheduler(SchedulerMixin, ConfigMixin): + """ + The variance exploding stochastic differential equation (SDE) scheduler. + + :param snr: coefficient weighting the step from the model_output sample (from the network) to the random noise. + :param sigma_min: initial noise scale for sigma sequence in sampling procedure. The minimum sigma should mirror the + distribution of the data. + :param sigma_max: :param sampling_eps: the end value of sampling, where timesteps decrease progessively from 1 to + epsilon. :param correct_steps: number of correction steps performed on a produced sample. :param tensor_format: + "np" or "pd" for the expected format of samples passed to the Scheduler. + """ + + @register_to_config + def __init__( + self, + num_train_timesteps=2000, + snr=0.15, + sigma_min=0.01, + sigma_max=1348, + sampling_eps=1e-5, + correct_steps=1, + tensor_format="pd", + ): + # self.sigmas = None + # self.discrete_sigmas = None + # + # # setable values + # self.num_inference_steps = None + self.timesteps = None + + self.set_sigmas(num_train_timesteps, sigma_min, sigma_max, sampling_eps) + + self.tensor_format = tensor_format + self.set_format(tensor_format=tensor_format) + + def set_timesteps(self, num_inference_steps, sampling_eps=None): + sampling_eps = sampling_eps if sampling_eps is not None else self.config.sampling_eps + tensor_format = getattr(self, "tensor_format", "pd") + if tensor_format == "np": + self.timesteps = np.linspace(1, sampling_eps, num_inference_steps) + elif tensor_format == "pd": + self.timesteps = paddle.linspace(1, sampling_eps, num_inference_steps) + else: + raise ValueError(f"`self.tensor_format`: {self.tensor_format} is not valid.") + + def set_sigmas(self, num_inference_steps, sigma_min=None, sigma_max=None, sampling_eps=None): + sigma_min = sigma_min if sigma_min is not None else self.config.sigma_min + sigma_max = sigma_max if sigma_max is not None else self.config.sigma_max + sampling_eps = sampling_eps if sampling_eps is not None else self.config.sampling_eps + if self.timesteps is None: + self.set_timesteps(num_inference_steps, sampling_eps) + + tensor_format = getattr(self, "tensor_format", "pd") + if tensor_format == "np": + self.discrete_sigmas = np.exp(np.linspace(np.log(sigma_min), np.log(sigma_max), num_inference_steps)) + self.sigmas = np.array([sigma_min * (sigma_max / sigma_min)**t for t in self.timesteps]) + elif tensor_format == "pd": + self.discrete_sigmas = paddle.exp(paddle.linspace(np.log(sigma_min), np.log(sigma_max), + num_inference_steps)) + self.sigmas = paddle.to_tensor([sigma_min * (sigma_max / sigma_min)**t for t in self.timesteps]) + else: + raise ValueError(f"`self.tensor_format`: {self.tensor_format} is not valid.") + + def get_adjacent_sigma(self, timesteps, t): + tensor_format = getattr(self, "tensor_format", "pd") + if tensor_format == "np": + return np.where(timesteps == 0, np.zeros_like(t), self.discrete_sigmas[timesteps - 1]) + elif tensor_format == "pd": + return paddle.where(timesteps == 0, paddle.zeros_like(t), self.discrete_sigmas[timesteps - 1]) + + raise ValueError(f"`self.tensor_format`: {self.tensor_format} is not valid.") + + def set_seed(self, seed): + tensor_format = getattr(self, "tensor_format", "pd") + if tensor_format == "np": + np.random.seed(seed) + elif tensor_format == "pd": + paddle.seed(seed) + else: + raise ValueError(f"`self.tensor_format`: {self.tensor_format} is not valid.") + + def step_pred( + self, + model_output: Union[paddle.Tensor, np.ndarray], + timestep: int, + sample: Union[paddle.Tensor, np.ndarray], + seed=None, + ): + """ + Predict the sample at the previous timestep by reversing the SDE. + """ + if seed is not None: + self.set_seed(seed) + # TODO(Patrick) non-Pypaddle + + timestep = timestep * paddle.ones(sample.shape[0]) # paddle.repeat_interleave(timestep, sample.shape[0]) + timesteps = (timestep * (len(self.timesteps) - 1)).astype("int64") + + sigma = self.discrete_sigmas[timesteps] + adjacent_sigma = self.get_adjacent_sigma(timesteps, timestep) + drift = self.zeros_like(sample) + diffusion = (sigma**2 - adjacent_sigma**2)**0.5 + + # equation 6 in the paper: the model_output modeled by the network is grad_x log pt(x) + # also equation 47 shows the analog from SDE models to ancestral sampling methods + drift = drift - diffusion[:, None, None, None]**2 * model_output + + # equation 6: sample noise for the diffusion term of + noise = self.randn_like(sample) + prev_sample_mean = sample - drift # subtract because `dt` is a small negative timestep + # TODO is the variable diffusion the correct scaling term for the noise? + prev_sample = prev_sample_mean + diffusion[:, None, None, None] * noise # add impact of diffusion field g + + return {"prev_sample": prev_sample, "prev_sample_mean": prev_sample_mean} + + def step_correct( + self, + model_output: Union[paddle.Tensor, np.ndarray], + sample: Union[paddle.Tensor, np.ndarray], + seed=None, + ): + """ + Correct the predicted sample based on the output model_output of the network. This is often run repeatedly + after making the prediction for the previous timestep. + """ + if seed is not None: + self.set_seed(seed) + + # For small batch sizes, the paper "suggest replacing norm(z) with sqrt(d), where d is the dim. of z" + # sample noise for correction + noise = self.randn_like(sample) + + # compute step size from the model_output, the noise, and the snr + grad_norm = self.norm(model_output) + noise_norm = self.norm(noise) + step_size = (self.config.snr * noise_norm / grad_norm)**2 * 2 + step_size = step_size * paddle.ones(sample.shape[0]) + # self.repeat_scalar(step_size, sample.shape[0]) + + # compute corrected sample: model_output term and noise term + prev_sample_mean = sample + step_size[:, None, None, None] * model_output + prev_sample = prev_sample_mean + ((step_size * 2)**0.5)[:, None, None, None] * noise + + return {"prev_sample": prev_sample} + + def __len__(self): + return self.config.num_train_timesteps diff --git a/modules/image/text_to_image/stable_diffusion/diffusers/schedulers/scheduling_sde_vp.py b/modules/image/text_to_image/stable_diffusion/diffusers/schedulers/scheduling_sde_vp.py new file mode 100644 index 000000000..8ad84c73e --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion/diffusers/schedulers/scheduling_sde_vp.py @@ -0,0 +1,59 @@ +# Copyright 2022 Google Brain and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# DISCLAIMER: This file is strongly influenced by https://github.com/yang-song/score_sde_pytorch +# TODO(Patrick, Anton, Suraj) - make scheduler framework indepedent and clean-up a bit +import numpy as np +import paddle + +from ..configuration_utils import ConfigMixin +from ..configuration_utils import register_to_config +from .scheduling_utils import SchedulerMixin + + +class ScoreSdeVpScheduler(SchedulerMixin, ConfigMixin): + + @register_to_config + def __init__(self, num_train_timesteps=2000, beta_min=0.1, beta_max=20, sampling_eps=1e-3, tensor_format="np"): + + self.sigmas = None + self.discrete_sigmas = None + self.timesteps = None + + def set_timesteps(self, num_inference_steps): + self.timesteps = paddle.linspace(1, self.config.sampling_eps, num_inference_steps) + + def step_pred(self, score, x, t): + # TODO(Patrick) better comments + non-PyTorch + # postprocess model score + log_mean_coeff = (-0.25 * t**2 * (self.config.beta_max - self.config.beta_min) - 0.5 * t * self.config.beta_min) + std = paddle.sqrt(1.0 - paddle.exp(2.0 * log_mean_coeff)) + score = -score / std[:, None, None, None] + + # compute + dt = -1.0 / len(self.timesteps) + + beta_t = self.config.beta_min + t * (self.config.beta_max - self.config.beta_min) + drift = -0.5 * beta_t[:, None, None, None] * x + diffusion = paddle.sqrt(beta_t) + drift = drift - diffusion[:, None, None, None]**2 * score + x_mean = x + drift * dt + + # add noise + noise = self.randn_like(x) + x = x_mean + diffusion[:, None, None, None] * np.sqrt(-dt) * noise + + return x, x_mean + + def __len__(self): + return self.config.num_train_timesteps diff --git a/modules/image/text_to_image/stable_diffusion/diffusers/schedulers/scheduling_utils.py b/modules/image/text_to_image/stable_diffusion/diffusers/schedulers/scheduling_utils.py new file mode 100644 index 000000000..dc3cbde5a --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion/diffusers/schedulers/scheduling_utils.py @@ -0,0 +1,102 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Union + +import numpy as np +import paddle + +SCHEDULER_CONFIG_NAME = "scheduler_config.json" + + +class SchedulerMixin: + + config_name = SCHEDULER_CONFIG_NAME + ignore_for_config = ["tensor_format"] + + def set_format(self, tensor_format="pd"): + self.tensor_format = tensor_format + if tensor_format == "pd": + for key, value in vars(self).items(): + if isinstance(value, np.ndarray): + setattr(self, key, paddle.to_tensor(value)) + + return self + + def clip(self, tensor, min_value=None, max_value=None): + tensor_format = getattr(self, "tensor_format", "pd") + + if tensor_format == "np": + return np.clip(tensor, min_value, max_value) + elif tensor_format == "pd": + return paddle.clip(tensor, min_value, max_value) + + raise ValueError(f"`self.tensor_format`: {self.tensor_format} is not valid.") + + def log(self, tensor): + tensor_format = getattr(self, "tensor_format", "pd") + + if tensor_format == "np": + return np.log(tensor) + elif tensor_format == "pd": + return paddle.log(tensor) + + raise ValueError(f"`self.tensor_format`: {self.tensor_format} is not valid.") + + def match_shape(self, values: Union[np.ndarray, paddle.Tensor], broadcast_array: Union[np.ndarray, paddle.Tensor]): + """ + Turns a 1-D array into an array or tensor with len(broadcast_array.shape) dims. + + Args: + values: an array or tensor of values to extract. + broadcast_array: an array with a larger shape of K dimensions with the batch + dimension equal to the length of timesteps. + Returns: + a tensor of shape [batch_size, 1, ...] where the shape has K dims. + """ + + tensor_format = getattr(self, "tensor_format", "pd") + values = values.flatten() + + while len(values.shape) < len(broadcast_array.shape): + values = values[..., None] + + return values + + def norm(self, tensor): + tensor_format = getattr(self, "tensor_format", "pd") + if tensor_format == "np": + return np.linalg.norm(tensor) + elif tensor_format == "pd": + return paddle.norm(tensor.reshape([tensor.shape[0], -1]), axis=-1).mean() + + raise ValueError(f"`self.tensor_format`: {self.tensor_format} is not valid.") + + def randn_like(self, tensor, generator=None): + tensor_format = getattr(self, "tensor_format", "pd") + if tensor_format == "np": + return np.random.randn(np.shape(tensor)) + elif tensor_format == "pd": + # return paddle.randn_like(tensor) + return paddle.randn(tensor.shape) + + raise ValueError(f"`self.tensor_format`: {self.tensor_format} is not valid.") + + def zeros_like(self, tensor): + tensor_format = getattr(self, "tensor_format", "pd") + if tensor_format == "np": + return np.zeros_like(tensor) + elif tensor_format == "pd": + return paddle.zeros_like(tensor) + + raise ValueError(f"`self.tensor_format`: {self.tensor_format} is not valid.") diff --git a/modules/image/text_to_image/stable_diffusion/module.py b/modules/image/text_to_image/stable_diffusion/module.py new file mode 100755 index 000000000..d8ca11b5c --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion/module.py @@ -0,0 +1,367 @@ +# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import ast +import os +import random +import sys +from functools import partial +from typing import List +from typing import Optional + +import numpy as np +import paddle +from docarray import Document +from docarray import DocumentArray +from IPython import display +from PIL import Image +from stable_diffusion.clip.clip.utils import build_model +from stable_diffusion.clip.clip.utils import tokenize +from stable_diffusion.diffusers import AutoencoderKL +from stable_diffusion.diffusers import DDIMScheduler +from stable_diffusion.diffusers import LMSDiscreteScheduler +from stable_diffusion.diffusers import PNDMScheduler +from stable_diffusion.diffusers import UNet2DConditionModel +from tqdm.auto import tqdm + +import paddlehub as hub +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving + + +@moduleinfo(name="stable_diffusion", + version="1.0.0", + type="image/text_to_image", + summary="", + author="paddlepaddle", + author_email="paddle-dev@baidu.com") +class StableDiffusion: + + def __init__(self): + self.vae = AutoencoderKL(in_channels=3, + out_channels=3, + down_block_types=("DownEncoderBlock2D", "DownEncoderBlock2D", "DownEncoderBlock2D", + "DownEncoderBlock2D"), + up_block_types=("UpDecoderBlock2D", "UpDecoderBlock2D", "UpDecoderBlock2D", + "UpDecoderBlock2D"), + block_out_channels=(128, 256, 512, 512), + layers_per_block=2, + act_fn="silu", + latent_channels=4, + sample_size=512) + + self.unet = UNet2DConditionModel(sample_size=64, + in_channels=4, + out_channels=4, + center_input_sample=False, + flip_sin_to_cos=True, + freq_shift=0, + down_block_types=("CrossAttnDownBlock2D", "CrossAttnDownBlock2D", + "CrossAttnDownBlock2D", "DownBlock2D"), + up_block_types=("UpBlock2D", "CrossAttnUpBlock2D", "CrossAttnUpBlock2D", + "CrossAttnUpBlock2D"), + block_out_channels=(320, 640, 1280, 1280), + layers_per_block=2, + downsample_padding=1, + mid_block_scale_factor=1, + act_fn="silu", + norm_num_groups=32, + norm_eps=1e-5, + cross_attention_dim=768, + attention_head_dim=8) + + unet_path = os.path.join(self.directory, 'pre_trained', 'stable-diffusion-v1-4-unet.pdparams') + vae_path = os.path.join(self.directory, 'pre_trained', 'stable-diffusion-v1-4-vae.pdparams') + self.unet.set_dict(paddle.load(unet_path)) + self.vae.set_dict(paddle.load(vae_path)) + for parameter in self.unet.parameters(): + parameter.stop_gradient = True + self.unet.eval() + for parameter in self.vae.parameters(): + parameter.stop_gradient = True + self.vae.eval() + + self.text_encoder = build_model() + for parameter in self.text_encoder.parameters(): + parameter.stop_gradient = True + self.scheduler = PNDMScheduler(beta_start=0.00085, + beta_end=0.012, + beta_schedule="scaled_linear", + num_train_timesteps=1000, + skip_prk_steps=True) + + def generate_image(self, + text_prompts, + style: Optional[str] = None, + artist: Optional[str] = None, + width_height: Optional[List[int]] = [512, 512], + batch_size: Optional[int] = 1, + num_inference_steps=50, + guidance_scale=7.5, + enable_fp16=False, + seed=None, + display_rate=5, + use_gpu=True, + output_dir: Optional[str] = 'stable_diffusion_out'): + """ + Create Disco Diffusion artworks and save the result into a DocumentArray. + + :param text_prompts: Phrase, sentence, or string of words and phrases describing what the image should look like. The words will be analyzed by the AI and will guide the diffusion process toward the image(s) you describe. These can include commas and weights to adjust the relative importance of each element. E.g. "A beautiful painting of a singular lighthouse, shining its light across a tumultuous sea of blood by greg rutkowski and thomas kinkade, Trending on artstation."Notice that this prompt loosely follows a structure: [subject], [prepositional details], [setting], [meta modifiers and artist]; this is a good starting point for your experiments. Developing text prompts takes practice and experience, and is not the subject of this guide. If you are a beginner to writing text prompts, a good place to start is on a simple AI art app like Night Cafe, starry ai or WOMBO prior to using DD, to get a feel for how text gets translated into images by GAN tools. These other apps use different technologies, but many of the same principles apply. + :param style: Image style, such as oil paintings, if specified, style will be used to construct prompts. + :param artist: Artist style, if specified, style will be used to construct prompts. + :param width_height: Desired final image size, in pixels. You can have a square, wide, or tall image, but each edge length should be set to a multiple of 64px, and a minimum of 512px on the default CLIP model setting. If you forget to use multiples of 64px in your dimensions, DD will adjust the dimensions of your image to make it so. + :param batch_size: This variable sets the number of still images you want SD to create for each prompt. + :param num_inference_steps: The number of inference steps. + :param guidance_scale: Increase the adherence to the conditional signal which in this case is text as well as overall sample quality. + :param enable_fp16: Whether to use float16. + :param use_gpu: whether to use gpu or not. + :param output_dir: Output directory. + :return: a DocumentArray object that has `n_batches` Documents + """ + if seed: + np.random.seed(seed) + random.seed(seed) + paddle.seed(seed) + + if use_gpu: + try: + _places = os.environ.get("CUDA_VISIBLE_DEVICES", None) + if _places: + paddle.device.set_device("gpu:{}".format(0)) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + else: + paddle.device.set_device("cpu") + paddle.disable_static() + + if not os.path.exists(output_dir): + os.makedirs(output_dir, exist_ok=True) + + if isinstance(text_prompts, str): + text_prompts = text_prompts.rstrip(',.,。') + if style is not None: + text_prompts += ",{}".format(style) + if artist is not None: + text_prompts += ",{},trending on artstation".format(artist) + text_prompts = [text_prompts] + elif isinstance(text_prompts, list): + for i, prompt in enumerate( + text_prompts): # different from dd here, dd can have multiple prompts for one image with weight. + text_prompts[i] = prompt.rstrip(',.,。') + if style is not None: + text_prompts[i] += ",{}".format(style) + if artist is not None: + text_prompts[i] += ",{},trending on artstation".format(artist) + + width, height = width_height + da_batches = DocumentArray() + + for prompt in text_prompts: + d = Document(tags={'prompt': prompt}) + da_batches.append(d) + for i in range(batch_size): + d.chunks.append(Document(tags={'prompt': prompt, 'image idx': i})) + d.chunks.append(Document(tags={'prompt': prompt, 'image idx': 'merged'})) + with paddle.amp.auto_cast(enable=enable_fp16, level='O2'): + prompts = [prompt] * batch_size + text_input = tokenize(prompts) + text_embeddings = self.text_encoder(text_input) + uncond_input = tokenize([""] * batch_size) + uncond_embeddings = self.text_encoder(uncond_input) + text_embeddings = paddle.concat([uncond_embeddings, text_embeddings]) + + latents = paddle.randn((batch_size, self.unet.in_channels, height // 8, width // 8), ) + if isinstance(self.scheduler, LMSDiscreteScheduler): + latents = latents * self.scheduler.sigmas[0] + + self.scheduler.set_timesteps(num_inference_steps) + for i, t in tqdm(enumerate(self.scheduler.timesteps)): + # expand the latents if we are doing classifier-free guidance to avoid doing two forward passes. + latent_model_input = paddle.concat([latents] * 2) + + if isinstance(self.scheduler, LMSDiscreteScheduler): + sigma = self.scheduler.sigmas[i] + latent_model_input = latent_model_input / ((sigma**2 + 1)**0.5) + + # predict the noise residual + noise_pred = self.unet(latent_model_input, t, encoder_hidden_states=text_embeddings)["sample"] + + # perform guidance + noise_pred_uncond, noise_pred_text = noise_pred.chunk(2) + noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond) + + # compute the previous noisy sample x_t -> x_t-1 + if isinstance(self.scheduler, LMSDiscreteScheduler): + latents = self.scheduler.step(noise_pred, i, latents)["prev_sample"] + else: + latents = self.scheduler.step(noise_pred, t, latents)["prev_sample"] + if i % display_rate == 0: + # vae decode + images = self.vae.decode(1 / 0.18215 * latents) + images = (images / 2 + 0.5).clip(0, 1) + merge_image = images.cpu().transpose([2, 0, 3, 1]).flatten(1, 2).numpy() + merge_image = (merge_image * 255).round().astype(np.uint8) + merge_image = Image.fromarray(merge_image) + merge_image.save(os.path.join(output_dir, f'{prompt}-progress.png')) + c = Document(tags={'step': i, 'prompt': prompt}) + c.load_pil_image_to_datauri(merge_image) + d.chunks[-1].chunks.append(c) + display.clear_output(wait=True) + display.display(merge_image) + images = images.cpu().transpose([0, 2, 3, 1]).numpy() + images = (images * 255).round().astype(np.uint8) + for j in range(images.shape[0]): + image = Image.fromarray(images[j]) + c = Document(tags={'step': i, 'prompt': prompt}) + c.load_pil_image_to_datauri(image) + d.chunks[j].chunks.append(c) + + # vae decode + images = self.vae.decode(1 / 0.18215 * latents) + images = (images / 2 + 0.5).clip(0, 1) + merge_image = images.cpu().transpose([2, 0, 3, 1]).flatten(1, 2).numpy() + merge_image = (merge_image * 255).round().astype(np.uint8) + merge_image = Image.fromarray(merge_image) + merge_image.save(os.path.join(output_dir, f'{prompt}-merge.png')) + display.clear_output(wait=True) + display.display(merge_image) + d.load_pil_image_to_datauri(merge_image) + d.chunks[-1].load_pil_image_to_datauri(merge_image) + images = images.cpu().transpose([0, 2, 3, 1]).numpy() + images = (images * 255).round().astype(np.uint8) + for j in range(images.shape[0]): + image = Image.fromarray(images[j]) + image.save(os.path.join(output_dir, f'{prompt}-image-{j}.png')) + d.chunks[j].load_pil_image_to_datauri(image) + return da_batches + + @serving + def serving_method(self, text_prompts, **kwargs): + """ + Run as a service. + """ + results = self.generate_image(text_prompts=text_prompts, **kwargs).to_base64() + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.generate_image(text_prompts=args.text_prompts, + style=args.style, + artist=args.artist, + width_height=args.width_height, + batch_size=args.batch_size, + num_inference_steps=args.num_inference_steps, + guidance_scale=args.guidance_scale, + enable_fp16=args.enable_fp16, + seed=args.seed, + display_rate=args.display_rate, + use_gpu=args.use_gpu, + output_dir=args.output_dir) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + + self.arg_input_group.add_argument('--num_inference_steps', + type=int, + default=50, + help="The number of inference steps.") + + self.arg_input_group.add_argument( + '--guidance_scale', + type=float, + default=7.5, + help= + "Increase the adherence to the conditional signal which in this case is text as well as overall sample quality." + ) + + self.arg_input_group.add_argument( + '--seed', + type=int, + default=None, + help= + "Deep in the diffusion code, there is a random number ‘seed’ which is used as the basis for determining the initial state of the diffusion. By default, this is random, but you can also specify your own seed." + ) + + self.arg_input_group.add_argument( + '--display_rate', + type=int, + default=10, + help="During a diffusion run, you can monitor the progress of each image being created with this variable.") + + self.arg_config_group.add_argument('--use_gpu', + type=ast.literal_eval, + default=True, + help="whether use GPU or not") + + self.arg_config_group.add_argument('--enable_fp16', + type=ast.literal_eval, + default=False, + help="whether use float16 or not") + + self.arg_config_group.add_argument('--output_dir', + type=str, + default='stable_diffusion_out', + help='Output directory.') + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument( + '--text_prompts', + type=str, + help= + 'Phrase, sentence, or string of words and phrases describing what the image should look like. The words will be analyzed by the AI and will guide the diffusion process toward the image(s) you describe. These can include commas and weights to adjust the relative importance of each element. E.g. "A beautiful painting of a singular lighthouse, shining its light across a tumultuous sea of blood by greg rutkowski and thomas kinkade, Trending on artstation."Notice that this prompt loosely follows a structure: [subject], [prepositional details], [setting], [meta modifiers and artist]; this is a good starting point for your experiments. Developing text prompts takes practice and experience, and is not the subject of this guide. If you are a beginner to writing text prompts, a good place to start is on a simple AI art app like Night Cafe, starry ai or WOMBO prior to using DD, to get a feel for how text gets translated into images by GAN tools. These other apps use different technologies, but many of the same principles apply.' + ) + self.arg_input_group.add_argument( + '--style', + type=str, + default=None, + help='Image style, such as oil paintings, if specified, style will be used to construct prompts.') + self.arg_input_group.add_argument('--artist', + type=str, + default=None, + help='Artist style, if specified, style will be used to construct prompts.') + + self.arg_input_group.add_argument( + '--width_height', + type=ast.literal_eval, + default=[512, 512], + help= + "Desired final image size, in pixels. You can have a square, wide, or tall image, but each edge length should be set to a multiple of 64px, and a minimum of 512px on the default CLIP model setting. If you forget to use multiples of 64px in your dimensions, DD will adjust the dimensions of your image to make it so." + ) + self.arg_input_group.add_argument( + '--batch_size', + type=int, + default=1, + help="This variable sets the number of still images you want SD to create for each prompt.") diff --git a/modules/image/text_to_image/stable_diffusion/requirements.txt b/modules/image/text_to_image/stable_diffusion/requirements.txt new file mode 100644 index 000000000..45e6baa06 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion/requirements.txt @@ -0,0 +1,8 @@ +numpy +ftfy +regex +docarray>=0.13.29 +pyyaml +regex +tqdm +ipywidgets From 6b5230dcedb115e548e49982d8f889fc2d6dac69 Mon Sep 17 00:00:00 2001 From: DanielYang Date: Fri, 2 Sep 2022 10:42:52 +0800 Subject: [PATCH 042/117] Update README.md --- README.md | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 256d4780e..897acd88a 100644 --- a/README.md +++ b/README.md @@ -11,35 +11,23 @@ English | [简体中文](README_ch.md)

- -

-

- - -

- -## ⭐Introduction -- **PaddleHub** aims to provide developers with rich, high-quality, and directly usable pre-trained models. -- **Abundant Pre-trained Models**: 360+ pre-trained models cover the 6 major categories, including Wenxin large models, Image, Text, Audio, Video, and Industrial application. All of them are free for download and offline usage. -- **No Need for Deep Learning Background**: you can use AI models quickly and enjoy the dividends of the artificial intelligence era. -- **Quick Model Prediction**: model prediction can be realized through a few lines of scripts to quickly experience the model effect. -- **Model As Service**: one-line command to build deep learning model API service deployment capabilities. -- **Easy-to-use Transfer Learning**: few lines of codes to complete the transfer-learning task such as image classification and text classification based on high quality pre-trained models. -- **Cross-platform**: support Linux, Windows, MacOS and other operating systems. +## ⭐Features +- **400+ AI Models**: rich, high-quality AI models, including CV, NLP, and Speech. +- **Easy to Use**: 3 lines of code to predict the 400+ AI models +- **Model As Service**: easy to build a service with only one line of command. +- **Cross-platform**: support Linux, Windows, MacOS ### 💥Recent Updates -- **🔥2022.08.19:** The v2.3.0 version is released, supports Wenxin large models and five text-to-image models based on disco diffusion(dd). - - Support [Wenxin large models API](https://wenxin.baidu.com/moduleApi) for Baidu ERNIE large-scale pre-trained model, including [**ERNIE-ViLG** model](https://aistudio.baidu.com/aistudio/projectdetail/4445016), which supports text-to-image task, and [**ERNIE 3.0 Zeus**](https://aistudio.baidu.com/aistudio/projectdetail/4445054) model, which supports applications such as writing essays, summarization, couplets, question answering, writing novels and completing text. - - Add five text-to-image domain models based on disco diffusion(dd), three for [English](https://aistudio.baidu.com/aistudio/projectdetail/4444984) and two for Chinese. Welcome to enjoy our **ERNIE-ViL**-based Chinese text-to-image module [disco_diffusion_ernievil_base](https://aistudio.baidu.com/aistudio/projectdetail/4444998) in aistudio. +- **🔥2022.08.19:** The v2.3.0 version is released, supports [**ERNIE_ViLG**](./modules/image/text_to_image/ernie_vilg)、[**Disco Diffusion(DD)**](./modules/image/text_to_image/disco_diffusion_clip_vitb32) and [**Stable Diffusion(SD)**](./modules/image/text_to_image/stable_diffusion) - **2022.02.18:** Added Huggingface Org, add spaces and models to the org: [PaddlePaddle Huggingface](https://huggingface.co/PaddlePaddle) - **2021.12.22**,The v2.2.0 version is released. [1]More than 100 new models released,including dialog, speech, segmentation, OCR, text processing, GANs, and many other categories. The total number of pre-trained models reaches [**【360】**](https://www.paddlepaddle.org.cn/hublist). [2]Add an [indexed file](./modules/README.md) including useful information of pretrained models supported by PaddleHub. [3]Refactor README of pretrained models. From 9fe874167db2b344423764e1a66f21bfc5cf3a53 Mon Sep 17 00:00:00 2001 From: DanielYang Date: Fri, 2 Sep 2022 10:46:07 +0800 Subject: [PATCH 043/117] Update README.md --- README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 897acd88a..8e44ddbe3 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,10 @@ English | [简体中文](README_ch.md) - **Cross-platform**: support Linux, Windows, MacOS ### 💥Recent Updates -- **🔥2022.08.19:** The v2.3.0 version is released, supports [**ERNIE_ViLG**](./modules/image/text_to_image/ernie_vilg)、[**Disco Diffusion(DD)**](./modules/image/text_to_image/disco_diffusion_clip_vitb32) and [**Stable Diffusion(SD)**](./modules/image/text_to_image/stable_diffusion) +- **🔥2022.08.19:** The v2.3.0 version is released + - supports [**ERNIE_ViLG**](./modules/image/text_to_image/ernie_vilg)([Hugging Face Space Demo](https://huggingface.co/spaces/PaddlePaddle/ERNIE-ViLG)) + - supports [**Disco Diffusion(DD)**](./modules/image/text_to_image/disco_diffusion_clip_vitb32) and [**Stable Diffusion(SD)**](./modules/image/text_to_image/stable_diffusion) + - **2022.02.18:** Added Huggingface Org, add spaces and models to the org: [PaddlePaddle Huggingface](https://huggingface.co/PaddlePaddle) - **2021.12.22**,The v2.2.0 version is released. [1]More than 100 new models released,including dialog, speech, segmentation, OCR, text processing, GANs, and many other categories. The total number of pre-trained models reaches [**【360】**](https://www.paddlepaddle.org.cn/hublist). [2]Add an [indexed file](./modules/README.md) including useful information of pretrained models supported by PaddleHub. [3]Refactor README of pretrained models. From f66835aa330f38a58f454fa762f24be2e37ce95d Mon Sep 17 00:00:00 2001 From: Zeyu Chen Date: Fri, 2 Sep 2022 10:56:19 +0800 Subject: [PATCH 044/117] Update README.md --- README.md | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 8e44ddbe3..e0c7a2a66 100644 --- a/README.md +++ b/README.md @@ -21,29 +21,24 @@ English | [简体中文](README_ch.md) ## ⭐Features -- **400+ AI Models**: rich, high-quality AI models, including CV, NLP, and Speech. -- **Easy to Use**: 3 lines of code to predict the 400+ AI models -- **Model As Service**: easy to build a service with only one line of command. -- **Cross-platform**: support Linux, Windows, MacOS +- **📦400+ AI Models**: Rich, high-quality AI models, including CV, NLP, Speech, Video and Cross-Modal. +- **🧒Easy to Use**: 3 lines of code to predict the 400+ AI models +- **💁Model As Service**: Easy to build a service with only one line of command. +- **💻Cross-platform**: Support Linux, Windows and MacOS ### 💥Recent Updates - **🔥2022.08.19:** The v2.3.0 version is released - supports [**ERNIE_ViLG**](./modules/image/text_to_image/ernie_vilg)([Hugging Face Space Demo](https://huggingface.co/spaces/PaddlePaddle/ERNIE-ViLG)) - supports [**Disco Diffusion(DD)**](./modules/image/text_to_image/disco_diffusion_clip_vitb32) and [**Stable Diffusion(SD)**](./modules/image/text_to_image/stable_diffusion) -- **2022.02.18:** Added Huggingface Org, add spaces and models to the org: [PaddlePaddle Huggingface](https://huggingface.co/PaddlePaddle) -- **2021.12.22**,The v2.2.0 version is released. [1]More than 100 new models released,including dialog, speech, segmentation, OCR, text processing, GANs, and many other categories. The total number of pre-trained models reaches [**【360】**](https://www.paddlepaddle.org.cn/hublist). [2]Add an [indexed file](./modules/README.md) including useful information of pretrained models supported by PaddleHub. [3]Refactor README of pretrained models. - -- [【more】](./docs/docs_en/release.md) - - +- **2022.02.18:** Release models to the HuggingFace PaddlePaddle Space: [PaddlePaddle Huggingface](https://huggingface.co/PaddlePaddle) +- [**More**](./docs/docs_en/release.md) ## 🌈Visualization Demo - #### 🏜️ [Text-to-Image Models](https://www.paddlepaddle.org.cn/hubdetail?name=ernie_vilg&en_category=TextToImage) -- Include ERNIE-ViL、ERNIE 3.0 Zeus, supports applications such as text-to-image, writing essays, summarization, couplets, question answering, writing novels and completing text. +- Include ERNIE-ViL, ERNIE 3.0 Zeus, supports applications such as text-to-image, writing essays, summarization, couplets, question answering, writing novels and completing text.
From f7c781b2858d537e1f2e9588431e0f4cf04c4fc3 Mon Sep 17 00:00:00 2001 From: DanielYang Date: Fri, 2 Sep 2022 11:09:58 +0800 Subject: [PATCH 045/117] Update README.md --- README.md | 42 +++++++++++------------------------------- 1 file changed, 11 insertions(+), 31 deletions(-) diff --git a/README.md b/README.md index e0c7a2a66..ba4611488 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ English | [简体中文](README_ch.md)

------------------------------------------------------------------------------------------ @@ -35,6 +35,8 @@ English | [简体中文](README_ch.md) - [**More**](./docs/docs_en/release.md) + + ## 🌈Visualization Demo #### 🏜️ [Text-to-Image Models](https://www.paddlepaddle.org.cn/hubdetail?name=ernie_vilg&en_category=TextToImage) @@ -62,8 +64,6 @@ English | [简体中文](README_ch.md) #### 🎧 [Speech Models](./modules#Audio) -- ASR speech recognition algorithm, multiple algorithms are available. -- The speech recognition effect is as follows:
@@ -90,33 +90,21 @@ English | [简体中文](README_ch.md)
- -- TTS speech synthesis algorithm, multiple algorithms are available. -- Input: `Life was like a box of chocolates, you never know what you're gonna get.` -- The synthesis effect is as follows:
- - - + + - + -
deepvoice3 fastspeech transformerInput Text Output Audio
- -
-
Life was like a box of chocolates, you never know what you're gonna get.
- -
-
@@ -124,16 +112,9 @@ English | [简体中文](README_ch.md) - Many thanks to CopyRight@[PaddleSpeech](https://github.com/PaddlePaddle/PaddleSpeech) for the pre-trained models, you can try to train your models with PaddleSpeech. -#### 📽️ [Video Models](./modules#Video) -- Short video classification trained via large-scale video datasets, supports 3000+ tag types prediction for short Form Videos. -- Many thanks to CopyRight@[PaddleVideo](https://github.com/PaddlePaddle/PaddleVideo) for the pre-trained model, you can try to train your models with PaddleVideo. -- `Example: Input a short video of swimming, the algorithm can output the result of "swimming"` -
- -
-### ⭐ Thanks for Your Star ⭐ -- All the above pre-trained models are all **open source and free**, and the number of models is continuously updated. Welcome **⭐Star⭐** to pay attention. +### ⭐ Thanks for Your Star +- All the above pre-trained models are all **open source and free**, and the number of models is continuously updated. Welcome **Star** to pay attention.
@@ -146,7 +127,8 @@ English | [简体中文](README_ch.md) - If you have any questions during the use of the model, you can join the official WeChat group to get more efficient questions and answers, and fully communicate with developers from all walks of life. We look forward to your joining.
-
+
+ - please add WeChat above and send "Hub" to the robot, the robot will invite you to join the group automatically. @@ -183,9 +165,7 @@ print(results) !hub serving start -m lac ``` -- 📣More model description, please refer [Models List](https://www.paddlepaddle.org.cn/hublist) - -- 📣More API for transfer learning, please refer [Tutorial](https://paddlehub.readthedocs.io/en/release-v2.1/transfer_learning_index.html) +- 📣More model description, please refer [Models List](./modules) ## 📚License From 7cf4b9de2f5153783272f1f5d51b7d80fec2bd79 Mon Sep 17 00:00:00 2001 From: Zeyu Chen Date: Fri, 2 Sep 2022 11:47:20 +0800 Subject: [PATCH 046/117] Update README.md --- README.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index ba4611488..8a8a99f95 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ English | [简体中文](README_ch.md)

- +

QuickStart | Models List | Demos

@@ -22,18 +22,18 @@ English | [简体中文](README_ch.md) ## ⭐Features - **📦400+ AI Models**: Rich, high-quality AI models, including CV, NLP, Speech, Video and Cross-Modal. -- **🧒Easy to Use**: 3 lines of code to predict the 400+ AI models +- **🧒Easy to Use**: 3 lines of code to predict the 400+ AI models. - **💁Model As Service**: Easy to build a service with only one line of command. -- **💻Cross-platform**: Support Linux, Windows and MacOS +- **💻Cross-platform**: Support Linux, Windows and MacOS. ### 💥Recent Updates -- **🔥2022.08.19:** The v2.3.0 version is released +- **🔥2022.08.19:** The v2.3.0 version is released 🎉 - supports [**ERNIE_ViLG**](./modules/image/text_to_image/ernie_vilg)([Hugging Face Space Demo](https://huggingface.co/spaces/PaddlePaddle/ERNIE-ViLG)) - supports [**Disco Diffusion(DD)**](./modules/image/text_to_image/disco_diffusion_clip_vitb32) and [**Stable Diffusion(SD)**](./modules/image/text_to_image/stable_diffusion) -- **2022.02.18:** Release models to the HuggingFace PaddlePaddle Space: [PaddlePaddle Huggingface](https://huggingface.co/PaddlePaddle) +- **2022.02.18:** Release models to HuggingFace [PaddlePaddle Space](https://huggingface.co/PaddlePaddle) -- [**More**](./docs/docs_en/release.md) +- For more previous release please refer to [**PaddleHub Release Note**](./docs/docs_en/release.md) @@ -51,7 +51,7 @@ English | [简体中文](README_ch.md)
-- Many thanks to CopyRight@[PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR)、[PaddleDetection](https://github.com/PaddlePaddle/PaddleDetection)、[PaddleGAN](https://github.com/PaddlePaddle/PaddleGAN)、[AnimeGAN](https://github.com/TachibanaYoshino/AnimeGANv2)、[openpose](https://github.com/CMU-Perceptual-Computing-Lab/openpose)、[PaddleSeg](https://github.com/PaddlePaddle/PaddleSeg)、[Zhengxia Zou](https://github.com/jiupinjia/SkyAR)、[PaddleClas](https://github.com/PaddlePaddle/PaddleClas) for the pre-trained models, you can try to train your models with them. +- Many thanks to CopyRight@[PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR), [PaddleDetection](https://github.com/PaddlePaddle/PaddleDetection)、[PaddleGAN](https://github.com/PaddlePaddle/PaddleGAN), [AnimeGAN](https://github.com/TachibanaYoshino/AnimeGANv2)、[openpose](https://github.com/CMU-Perceptual-Computing-Lab/openpose)、[PaddleSeg](https://github.com/PaddlePaddle/PaddleSeg), [Zhengxia Zou](https://github.com/jiupinjia/SkyAR)、[PaddleClas](https://github.com/PaddlePaddle/PaddleClas) for the pre-trained models, you can try to train your models with them. #### 🎤 [Natural Language Processing Models](./modules#Text) From e03d11260b86a2f49f896e733d68e82edb0d9286 Mon Sep 17 00:00:00 2001 From: chenjian Date: Fri, 2 Sep 2022 15:17:37 +0800 Subject: [PATCH 047/117] Add baidu translate module --- .../baidu_translate/README.md | 114 ++++++++++++++++++ .../baidu_translate/module.py | 104 ++++++++++++++++ 2 files changed, 218 insertions(+) create mode 100644 modules/text/machine_translation/baidu_translate/README.md create mode 100644 modules/text/machine_translation/baidu_translate/module.py diff --git a/modules/text/machine_translation/baidu_translate/README.md b/modules/text/machine_translation/baidu_translate/README.md new file mode 100644 index 000000000..5b93ae3e5 --- /dev/null +++ b/modules/text/machine_translation/baidu_translate/README.md @@ -0,0 +1,114 @@ +# baidu_translate +|模型名称|baidu_translate| +| :--- | :---: | +|类别|文本-机器翻译| +|网络|-| +|数据集|-| +|是否支持Fine-tuning|否| +|模型大小|-| +|最新更新日期|2022-09-01| +|数据指标|-| + +## 一、模型基本信息 + +- ### 模型介绍 + + - 本模块提供百度翻译开放平台的服务,可支持多语种互译。您只需要通过传入待翻译的内容,并指定要翻译的源语言(支持源语言语种自动检测)和目标语言种类,就可以得到相应的翻译结果。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.1.0 + + - paddlehub >= 2.3.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install baidu_translate + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、预测代码示例 + + - ```python + import paddlehub as hub + + module = hub.Module(name='baidu_translate') + result = module.translate("I like panda") + print(result) + ``` + +- ### 2、API + + - ```python + def translate(query: str, + from_lang: Optional[str] = "en", + to_lang: Optional[int] = "zh") + ``` + + - 翻译API,输入源语言的文本句子,解码后输出翻译后的目标语言的文本句子。 + + - **参数** + + - `query`(str): 待翻译的语言。 + - `from_lang`(int): 源语言。 + - `to_lang`(int): 目标语言。 + + - **返回** + + - `result`(str): 翻译后的目标语言句子。 + + 源语言和目标语言都采用ISO 639-1语言编码标准来表示,常用的语言编码如下, 更多语言表示可以参考[文档](https://fanyi-api.baidu.com/doc/21)。 +

+ + +## 四、服务部署 + +- 通过启动PaddleHub Serving,可以加载模型部署在线翻译服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + + - ```shell + $ hub serving start -m baidu_translate + ``` + + - 通过以上命令可完成一个翻译API的部署,默认端口号为8866。 + + +- ## 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + text = "I like panda" + data = {"query": text, "from_lang":'en', "to_lang":'zh'} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/baidu_translate" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + - 关于PaddleHub Serving更多信息参考:[服务部署](../../../../docs/docs_ch/tutorial/serving.md) + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install baidu_translate==1.0.0 + ``` diff --git a/modules/text/machine_translation/baidu_translate/module.py b/modules/text/machine_translation/baidu_translate/module.py new file mode 100644 index 000000000..f19d8f92a --- /dev/null +++ b/modules/text/machine_translation/baidu_translate/module.py @@ -0,0 +1,104 @@ +import argparse +import random +from hashlib import md5 +from typing import Optional + +import requests + +import paddlehub as hub +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving + + +def make_md5(s, encoding='utf-8'): + return md5(s.encode(encoding)).hexdigest() + + +@moduleinfo(name="baidu_translate", + version="1.0.0", + type="text/machine_translation", + summary="", + author="baidu-nlp", + author_email="paddle-dev@baidu.com") +class BaiduTranslate: + + def __init__(self, appid=None, appkey=None): + """ + :param appid: appid for requesting Baidu translation service. + :param appkey: appkey for requesting Baidu translation service. + """ + # Set your own appid/appkey. + if appid == None: + self.appid = '20201015000580007' + else: + self.appid = appid + if appkey is None: + self.appkey = 'IFJB6jBORFuMmVGDRud1' + else: + self.appkey = appkey + self.url = 'http://api.fanyi.baidu.com/api/trans/vip/translate' + + def translate(self, query: str, from_lang: Optional[str] = "en", to_lang: Optional[int] = "zh"): + """ + Create image by text prompts using ErnieVilG model. + + :param query: Text to be translated. + :param from_lang: Source language. + :param to_lang: Dst language. + + Return translated string. + """ + # Generate salt and sign + salt = random.randint(32768, 65536) + sign = make_md5(self.appid + query + str(salt) + self.appkey) + + # Build request + headers = {'Content-Type': 'application/x-www-form-urlencoded'} + payload = {'appid': self.appid, 'q': query, 'from': from_lang, 'to': to_lang, 'salt': salt, 'sign': sign} + + # Send request + try: + r = requests.post(self.url, params=payload, headers=headers) + result = r.json() + except Exception as e: + error_msg = str(e) + raise RuntimeError(error_msg) + if 'error_code' in result: + raise RuntimeError(result['error_msg']) + return result['trans_result'][0]['dst'] + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + if args.appid is not None and args.appkey is not None: + self.appid = args.appid + self.appkey = args.appkey + result = self.translate(args.query, args.from_lang, args.to_lang) + return result + + @serving + def serving_method(self, query, from_lang, to_lang): + """ + Run as a service. + """ + return self.translate(query, from_lang, to_lang) + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--query', type=str) + self.arg_input_group.add_argument('--from_lang', type=str, default='en', help="源语言") + self.arg_input_group.add_argument('--to_lang', type=str, default='zh', help="目标语言") + self.arg_input_group.add_argument('--appid', type=str, default=None, help="注册得到的个人appid") + self.arg_input_group.add_argument('--appkey', type=str, default=None, help="注册得到的个人appkey") From 8fd2696f9c4af91fa811def055aadf65b59129bb Mon Sep 17 00:00:00 2001 From: Zeyu Chen Date: Fri, 2 Sep 2022 17:03:56 +0800 Subject: [PATCH 048/117] Update README.md --- README.md | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 8a8a99f95..280cd50eb 100644 --- a/README.md +++ b/README.md @@ -4,9 +4,8 @@ English | [简体中文](README_ch.md)

- ------------------------------------------------------------------------------------------

@@ -22,14 +21,14 @@ English | [简体中文](README_ch.md) ## ⭐Features - **📦400+ AI Models**: Rich, high-quality AI models, including CV, NLP, Speech, Video and Cross-Modal. -- **🧒Easy to Use**: 3 lines of code to predict the 400+ AI models. -- **💁Model As Service**: Easy to build a service with only one line of command. +- **🧒Easy to Use**: 3 lines of code to predict 400+ AI models. +- **💁Model As Service**: Easy to serve model with only one line of command. - **💻Cross-platform**: Support Linux, Windows and MacOS. ### 💥Recent Updates - **🔥2022.08.19:** The v2.3.0 version is released 🎉 - - supports [**ERNIE_ViLG**](./modules/image/text_to_image/ernie_vilg)([Hugging Face Space Demo](https://huggingface.co/spaces/PaddlePaddle/ERNIE-ViLG)) - - supports [**Disco Diffusion(DD)**](./modules/image/text_to_image/disco_diffusion_clip_vitb32) and [**Stable Diffusion(SD)**](./modules/image/text_to_image/stable_diffusion) + - Supports [**ERNIE-ViLG**](./modules/image/text_to_image/ernie_vilg)([HuggingFace Space Demo](https://huggingface.co/spaces/PaddlePaddle/ERNIE-ViLG)) + - Supports [**Disco Diffusion (DD)**](./modules/image/text_to_image/disco_diffusion_clip_vitb32) and [**Stable Diffusion (SD)**](./modules/image/text_to_image/stable_diffusion) - **2022.02.18:** Release models to HuggingFace [PaddlePaddle Space](https://huggingface.co/PaddlePaddle) @@ -40,7 +39,7 @@ English | [简体中文](README_ch.md) ## 🌈Visualization Demo #### 🏜️ [Text-to-Image Models](https://www.paddlepaddle.org.cn/hubdetail?name=ernie_vilg&en_category=TextToImage) -- Include ERNIE-ViL, ERNIE 3.0 Zeus, supports applications such as text-to-image, writing essays, summarization, couplets, question answering, writing novels and completing text. +- Include ERNIE-ViLG, ERNIE-ViL, ERNIE 3.0 Zeus, supports applications such as text-to-image, writing essays, summarization, couplets, question answering, writing novels and completing text.

From bd1f20e6adb543f6894ee3bed28c721717137b92 Mon Sep 17 00:00:00 2001 From: Zeyu Chen Date: Fri, 2 Sep 2022 17:04:11 +0800 Subject: [PATCH 049/117] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 280cd50eb..428900a1a 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,7 @@ English | [简体中文](README_ch.md) + ------------------------------------------------------------------------------------------

From 2e727825a3b2ee45358b42ee5c12967d7fdb4595 Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Wed, 14 Sep 2022 14:23:29 +0800 Subject: [PATCH 050/117] fix typo --- docs/docs_ch/get_start/linux_quickstart.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/docs_ch/get_start/linux_quickstart.md b/docs/docs_ch/get_start/linux_quickstart.md index ebebaa448..c6f08573d 100755 --- a/docs/docs_ch/get_start/linux_quickstart.md +++ b/docs/docs_ch/get_start/linux_quickstart.md @@ -206,5 +206,5 @@ - output image ## 第6步:飞桨预训练模型探索之旅 -- 恭喜你,到这里PaddleHub在windows环境下的安装和入门案例就全部完成了,快快开启你更多的深度学习模型探索之旅吧。[【更多模型探索,跳转飞桨官网】](https://www.paddlepaddle.org.cn/hublist) +- 恭喜你,到这里PaddleHub在linux环境下的安装和入门案例就全部完成了,快快开启你更多的深度学习模型探索之旅吧。[【更多模型探索,跳转飞桨官网】](https://www.paddlepaddle.org.cn/hublist) From 27aa1eab93b9023a0ae9025f78d3878a82ecb54b Mon Sep 17 00:00:00 2001 From: DanielYang Date: Wed, 14 Sep 2022 16:23:00 +0800 Subject: [PATCH 051/117] Update README.md --- modules/README.md | 118 +++++++++++++++++++++++----------------------- 1 file changed, 59 insertions(+), 59 deletions(-) diff --git a/modules/README.md b/modules/README.md index b1dad2373..a6b0d265a 100644 --- a/modules/README.md +++ b/modules/README.md @@ -21,14 +21,14 @@ English | [简体中文](README_ch.md) |module|Network|Dataset|Introduction| |--|--|--|--| -|[DriverStatusRecognition](image/classification/DriverStatusRecognition)|MobileNetV3_small_ssld|分心司机检测数据集|| -|[mobilenet_v2_animals](image/classification/mobilenet_v2_animals)|MobileNet_v2|百度自建动物数据集|| +|[DriverStatusRecognition](image/classification/DriverStatusRecognition)|MobileNetV3_small_ssld|Drivers|| +|[mobilenet_v2_animals](image/classification/mobilenet_v2_animals)|MobileNet_v2|Animals|| |[repvgg_a1_imagenet](image/classification/repvgg_a1_imagenet)|RepVGG|ImageNet-2012|| |[repvgg_a0_imagenet](image/classification/repvgg_a0_imagenet)|RepVGG|ImageNet-2012|| |[resnext152_32x4d_imagenet](image/classification/resnext152_32x4d_imagenet)|ResNeXt|ImageNet-2012|| |[resnet_v2_152_imagenet](image/classification/resnet_v2_152_imagenet)|ResNet V2|ImageNet-2012|| -|[resnet50_vd_animals](image/classification/resnet50_vd_animals)|ResNet50_vd|百度自建动物数据集|| -|[food_classification](image/classification/food_classification)|ResNet50_vd_ssld|美食数据集|| +|[resnet50_vd_animals](image/classification/resnet50_vd_animals)|ResNet50_vd|Animals|| +|[food_classification](image/classification/food_classification)|ResNet50_vd_ssld|dishes|| |[mobilenet_v3_large_imagenet_ssld](image/classification/mobilenet_v3_large_imagenet_ssld)|Mobilenet_v3_large|ImageNet-2012|| |[resnext152_vd_32x4d_imagenet](image/classification/resnext152_vd_32x4d_imagenet)|||| |[ghostnet_x1_3_imagenet_ssld](image/classification/ghostnet_x1_3_imagenet_ssld)|GhostNet|ImageNet-2012|| @@ -38,13 +38,13 @@ English | [简体中文](README_ch.md) |[efficientnetb0_imagenet](image/classification/efficientnetb0_imagenet)|EfficientNet|ImageNet-2012|| |[efficientnetb1_imagenet](image/classification/efficientnetb1_imagenet)|EfficientNet|ImageNet-2012|| |[mobilenet_v2_imagenet_ssld](image/classification/mobilenet_v2_imagenet_ssld)|Mobilenet_v2|ImageNet-2012|| -|[resnet50_vd_dishes](image/classification/resnet50_vd_dishes)|ResNet50_vd|百度自建菜品数据集|| +|[resnet50_vd_dishes](image/classification/resnet50_vd_dishes)|ResNet50_vd|dishes|| |[pnasnet_imagenet](image/classification/pnasnet_imagenet)|PNASNet|ImageNet-2012|| |[rexnet_2_0_imagenet](image/classification/rexnet_2_0_imagenet)|ReXNet|ImageNet-2012|| -|[SnakeIdentification](image/classification/SnakeIdentification)|ResNet50_vd_ssld|蛇种数据集|| +|[SnakeIdentification](image/classification/SnakeIdentification)|ResNet50_vd_ssld|snakes|| |[hrnet40_imagenet](image/classification/hrnet40_imagenet)|HRNet|ImageNet-2012|| |[resnet_v2_34_imagenet](image/classification/resnet_v2_34_imagenet)|ResNet V2|ImageNet-2012|| -|[mobilenet_v2_dishes](image/classification/mobilenet_v2_dishes)|MobileNet_v2|百度自建菜品数据集|| +|[mobilenet_v2_dishes](image/classification/mobilenet_v2_dishes)|MobileNet_v2|dishes|| |[resnext101_vd_32x4d_imagenet](image/classification/resnext101_vd_32x4d_imagenet)|ResNeXt|ImageNet-2012|| |[repvgg_b2g4_imagenet](image/classification/repvgg_b2g4_imagenet)|RepVGG|ImageNet-2012|| |[fix_resnext101_32x48d_wsl_imagenet](image/classification/fix_resnext101_32x48d_wsl_imagenet)|ResNeXt|ImageNet-2012|| @@ -56,7 +56,7 @@ English | [简体中文](README_ch.md) |[densenet161_imagenet](image/classification/densenet161_imagenet)|DenseNet|ImageNet-2012|| |[vgg19_imagenet](image/classification/vgg19_imagenet)|vgg19_imagenet|ImageNet-2012|| |[mobilenet_v2_imagenet](image/classification/mobilenet_v2_imagenet)|Mobilenet_v2|ImageNet-2012|| -|[resnet50_vd_10w](image/classification/resnet50_vd_10w)|ResNet_vd|百度自建数据集|| +|[resnet50_vd_10w](image/classification/resnet50_vd_10w)|ResNet_vd|private|| |[resnet_v2_101_imagenet](image/classification/resnet_v2_101_imagenet)|ResNet V2 101|ImageNet-2012|| |[darknet53_imagenet](image/classification/darknet53_imagenet)|DarkNet|ImageNet-2012|| |[se_resnext50_32x4d_imagenet](image/classification/se_resnext50_32x4d_imagenet)|SE_ResNeXt|ImageNet-2012|| @@ -108,7 +108,7 @@ English | [简体中文](README_ch.md) |[efficientnetb5_imagenet](image/classification/efficientnetb5_imagenet)|EfficientNet|ImageNet-2012|| |[repvgg_b1g2_imagenet](image/classification/repvgg_b1g2_imagenet)|RepVGG|ImageNet-2012|| |[resnext101_32x48d_wsl](image/classification/resnext101_32x48d_wsl)|ResNeXt_wsl|ImageNet-2012|| -|[resnet50_vd_wildanimals](image/classification/resnet50_vd_wildanimals)|ResNet_vd|IFAW 自建野生动物数据集|| +|[resnet50_vd_wildanimals](image/classification/resnet50_vd_wildanimals)|ResNet_vd|IFAW wild animals|| |[nasnet_imagenet](image/classification/nasnet_imagenet)|NASNet|ImageNet-2012|| |[se_resnet18_vd_imagenet](image/classification/se_resnet18_vd_imagenet)|||| |[spinalnet_res50_gemstone](image/classification/spinalnet_res50_gemstone)|resnet50|gemstone|| @@ -137,61 +137,61 @@ English | [简体中文](README_ch.md) |module|Network|Dataset|Introduction| Huggingface Spaces Demo| |--|--|--|--|--| -|[pixel2style2pixel](image/Image_gan/gan/pixel2style2pixel/)|Pixel2Style2Pixel|-|人脸转正| -|[stgan_bald](image/Image_gan/gan/stgan_bald/)|STGAN|CelebA|秃头生成器| [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/PaddlePaddle/stgan_bald) | -|[styleganv2_editing](image/Image_gan/gan/styleganv2_editing)|StyleGAN V2|-|人脸编辑| -|[wav2lip](image/Image_gan/gan/wav2lip)|wav2lip|LRS2|唇形生成| [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/PaddlePaddle/wav2lip) | -|[attgan_celeba](image/Image_gan/attgan_celeba/)|AttGAN|Celeba|人脸编辑| -|[cyclegan_cityscapes](image/Image_gan/cyclegan_cityscapes)|CycleGAN|Cityscapes|实景图和语义分割结果互相转换| -|[stargan_celeba](image/Image_gan/stargan_celeba)|StarGAN|Celeba|人脸编辑| -|[stgan_celeba](image/Image_gan/stgan_celeba/)|STGAN|Celeba|人脸编辑| -|[ID_Photo_GEN](image/Image_gan/style_transfer/ID_Photo_GEN)|HRNet_W18|-|证件照生成| -|[Photo2Cartoon](image/Image_gan/style_transfer/Photo2Cartoon)|U-GAT-IT|cartoon_data|人脸卡通化|[![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/PaddlePaddle/photo2cartoon) | -|[U2Net_Portrait](image/Image_gan/style_transfer/U2Net_Portrait)|U^2Net|-|人脸素描化| -|[UGATIT_100w](image/Image_gan/style_transfer/UGATIT_100w)|U-GAT-IT|selfie2anime|人脸动漫化| -|[UGATIT_83w](image/Image_gan/style_transfer/UGATIT_83w)|U-GAT-IT|selfie2anime|人脸动漫化| -|[UGATIT_92w](image/Image_gan/style_transfer/UGATIT_92w)| U-GAT-IT|selfie2anime|人脸动漫化| -|[animegan_v1_hayao_60](image/Image_gan/style_transfer/animegan_v1_hayao_60)|AnimeGAN|The Wind Rises|图像风格迁移-宫崎骏| [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/PaddlePaddle/animegan_v1_hayao_60) | -|[animegan_v2_hayao_64](image/Image_gan/style_transfer/animegan_v2_hayao_64)|AnimeGAN|The Wind Rises|图像风格迁移-宫崎骏| [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/PaddlePaddle/animegan_v2_hayao_64) | -|[animegan_v2_hayao_99](image/Image_gan/style_transfer/animegan_v2_hayao_99)|AnimeGAN|The Wind Rises|图像风格迁移-宫崎骏| [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/PaddlePaddle/animegan_v2_hayao_99) | -|[animegan_v2_paprika_54](image/Image_gan/style_transfer/animegan_v2_paprika_54)|AnimeGAN|Paprika|图像风格迁移-今敏| [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/PaddlePaddle/animegan_v2_paprika_54) | -|[animegan_v2_paprika_74](image/Image_gan/style_transfer/animegan_v2_paprika_74)|AnimeGAN|Paprika|图像风格迁移-今敏| [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/PaddlePaddle/animegan_v2_paprika_74) | -|[animegan_v2_paprika_97](image/Image_gan/style_transfer/animegan_v2_paprika_97)|AnimeGAN|Paprika|图像风格迁移-今敏| [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/PaddlePaddle/animegan_v2_paprika_97) | -|[animegan_v2_paprika_98](image/Image_gan/style_transfer/animegan_v2_paprika_98)|AnimeGAN|Paprika|图像风格迁移-今敏| [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/PaddlePaddle/animegan_v2_paprika_98) | -|[animegan_v2_shinkai_33](image/Image_gan/style_transfer/animegan_v2_shinkai_33)|AnimeGAN|Your Name, Weathering with you|图像风格迁移-新海诚| [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/PaddlePaddle/animegan_v2_shinkai_33) | -|[animegan_v2_shinkai_53](image/Image_gan/style_transfer/animegan_v2_shinkai_53)|AnimeGAN|Your Name, Weathering with you|图像风格迁移-新海诚| [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/PaddlePaddle/animegan_v2_shinkai_53) | +|[pixel2style2pixel](image/Image_gan/gan/pixel2style2pixel/)|Pixel2Style2Pixel|-|human face| +|[stgan_bald](image/Image_gan/gan/stgan_bald/)|STGAN|CelebA|stgan_bald| [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/PaddlePaddle/stgan_bald) | +|[styleganv2_editing](image/Image_gan/gan/styleganv2_editing)|StyleGAN V2|-|human face editing| +|[wav2lip](image/Image_gan/gan/wav2lip)|wav2lip|LRS2|wav2lip| [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/PaddlePaddle/wav2lip) | +|[attgan_celeba](image/Image_gan/attgan_celeba/)|AttGAN|Celeba|human face editing| +|[cyclegan_cityscapes](image/Image_gan/cyclegan_cityscapes)|CycleGAN|Cityscapes|cyclegan_cityscapes| +|[stargan_celeba](image/Image_gan/stargan_celeba)|StarGAN|Celeba|human face editing| +|[stgan_celeba](image/Image_gan/stgan_celeba/)|STGAN|Celeba|human face editing| +|[ID_Photo_GEN](image/Image_gan/style_transfer/ID_Photo_GEN)|HRNet_W18|-|ID_Photo_GEN| +|[Photo2Cartoon](image/Image_gan/style_transfer/Photo2Cartoon)|U-GAT-IT|cartoon_data|cartoon|[![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/PaddlePaddle/photo2cartoon) | +|[U2Net_Portrait](image/Image_gan/style_transfer/U2Net_Portrait)|U^2Net|-|Portrait| +|[UGATIT_100w](image/Image_gan/style_transfer/UGATIT_100w)|U-GAT-IT|selfie2anime|selfie2anime| +|[UGATIT_83w](image/Image_gan/style_transfer/UGATIT_83w)|U-GAT-IT|selfie2anime|selfie2anime| +|[UGATIT_92w](image/Image_gan/style_transfer/UGATIT_92w)| U-GAT-IT|selfie2anime|selfie2anime| +|[animegan_v1_hayao_60](image/Image_gan/style_transfer/animegan_v1_hayao_60)|AnimeGAN|The Wind Rises|animegan_v1_hayao| [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/PaddlePaddle/animegan_v1_hayao_60) | +|[animegan_v2_hayao_64](image/Image_gan/style_transfer/animegan_v2_hayao_64)|AnimeGAN|The Wind Rises|animegan_v1_hayao| [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/PaddlePaddle/animegan_v2_hayao_64) | +|[animegan_v2_hayao_99](image/Image_gan/style_transfer/animegan_v2_hayao_99)|AnimeGAN|The Wind Rises|animegan_v1_hayao| [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/PaddlePaddle/animegan_v2_hayao_99) | +|[animegan_v2_paprika_54](image/Image_gan/style_transfer/animegan_v2_paprika_54)|AnimeGAN|Paprika|animegan_v2_paprika| [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/PaddlePaddle/animegan_v2_paprika_54) | +|[animegan_v2_paprika_74](image/Image_gan/style_transfer/animegan_v2_paprika_74)|AnimeGAN|Paprika|animegan_v2_paprika| [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/PaddlePaddle/animegan_v2_paprika_74) | +|[animegan_v2_paprika_97](image/Image_gan/style_transfer/animegan_v2_paprika_97)|AnimeGAN|Paprika|animegan_v2_paprika| [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/PaddlePaddle/animegan_v2_paprika_97) | +|[animegan_v2_paprika_98](image/Image_gan/style_transfer/animegan_v2_paprika_98)|AnimeGAN|Paprika|animegan_v2_paprika| [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/PaddlePaddle/animegan_v2_paprika_98) | +|[animegan_v2_shinkai_33](image/Image_gan/style_transfer/animegan_v2_shinkai_33)|AnimeGAN|Your Name, Weathering with you|animegan_v2_shinkai| [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/PaddlePaddle/animegan_v2_shinkai_33) | +|[animegan_v2_shinkai_53](image/Image_gan/style_transfer/animegan_v2_shinkai_53)|AnimeGAN|Your Name, Weathering with you|animegan_v2_shinkai| [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/PaddlePaddle/animegan_v2_shinkai_53) | |[msgnet](image/Image_gan/style_transfer/msgnet)|msgnet|COCO2014| |[![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/PaddlePaddle/msgnet) | -|[stylepro_artistic](image/Image_gan/style_transfer/stylepro_artistic)|StyleProNet|MS-COCO + WikiArt|艺术风格迁移| [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/PaddlePaddle/stylepro_artistic) | -|stylegan_ffhq|StyleGAN|FFHQ|图像风格迁移| +|[stylepro_artistic](image/Image_gan/style_transfer/stylepro_artistic)|StyleProNet|MS-COCO + WikiArt|stylepro_artistic| [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/PaddlePaddle/stylepro_artistic) | +|stylegan_ffhq|StyleGAN|FFHQ|stylepro_artistic| - ### Keypoint Detection |module|Network|Dataset|Introduction| |--|--|--|--| -|[face_landmark_localization](image/keypoint_detection/face_landmark_localization)|Face_Landmark|AFW/AFLW|人脸关键点检测| -|[hand_pose_localization](image/keypoint_detection/hand_pose_localization)|-|MPII, NZSL|手部关键点检测| -|[openpose_body_estimation](image/keypoint_detection/openpose_body_estimation)|two-branch multi-stage CNN|MPII, COCO 2016|肢体关键点检测| -|[human_pose_estimation_resnet50_mpii](image/keypoint_detection/human_pose_estimation_resnet50_mpii)|Pose_Resnet50|MPII|人体骨骼关键点检测 -|[openpose_hands_estimation](image/keypoint_detection/openpose_hands_estimation)|-|MPII, NZSL|手部关键点检测| +|[face_landmark_localization](image/keypoint_detection/face_landmark_localization)|Face_Landmark|AFW/AFLW|Face_Landmark| +|[hand_pose_localization](image/keypoint_detection/hand_pose_localization)|-|MPII, NZSL|hand_pose_localization| +|[openpose_body_estimation](image/keypoint_detection/openpose_body_estimation)|two-branch multi-stage CNN|MPII, COCO 2016|openpose_body_estimation| +|[human_pose_estimation_resnet50_mpii](image/keypoint_detection/human_pose_estimation_resnet50_mpii)|Pose_Resnet50|MPII|human_pose_estimation +|[openpose_hands_estimation](image/keypoint_detection/openpose_hands_estimation)|-|MPII, NZSL|openpose_hands_estimation| - ### Semantic Segmentation |module|Network|Dataset|Introduction| |--|--|--|--| -|[deeplabv3p_xception65_humanseg](image/semantic_segmentation/deeplabv3p_xception65_humanseg)|deeplabv3p|百度自建数据集|人像分割| -|[humanseg_server](image/semantic_segmentation/humanseg_server)|deeplabv3p|百度自建数据集|人像分割| -|[humanseg_mobile](image/semantic_segmentation/humanseg_mobile)|hrnet|百度自建数据集|人像分割-移动端前置摄像头| -|[humanseg_lite](image/semantic_segmentation/umanseg_lite)|shufflenet|百度自建数据集|轻量级人像分割-移动端实时| -|[ExtremeC3_Portrait_Segmentation](image/semantic_segmentation/ExtremeC3_Portrait_Segmentation)|ExtremeC3|EG1800, Baidu fashion dataset|轻量化人像分割| -|[SINet_Portrait_Segmentation](image/semantic_segmentation/SINet_Portrait_Segmentation)|SINet|EG1800, Baidu fashion dataset|轻量化人像分割| -|[FCN_HRNet_W18_Face_Seg](image/semantic_segmentation/FCN_HRNet_W18_Face_Seg)|FCN_HRNet_W18|-|人像分割| -|[ace2p](image/semantic_segmentation/ace2p)|ACE2P|LIP|人体解析| -|[Pneumonia_CT_LKM_PP](image/semantic_segmentation/Pneumonia_CT_LKM_PP)|U-NET+|连心医疗授权脱敏数据集|肺炎CT影像分析| -|[Pneumonia_CT_LKM_PP_lung](image/semantic_segmentation/Pneumonia_CT_LKM_PP_lung)|U-NET+|连心医疗授权脱敏数据集|肺炎CT影像分析| +|[deeplabv3p_xception65_humanseg](image/semantic_segmentation/deeplabv3p_xception65_humanseg)|deeplabv3p|-|humanseg| +|[humanseg_server](image/semantic_segmentation/humanseg_server)|deeplabv3p|-|humanseg| +|[humanseg_mobile](image/semantic_segmentation/humanseg_mobile)|hrnet|-|humanseg| +|[humanseg_lite](image/semantic_segmentation/umanseg_lite)|shufflenet|-|humanseg| +|[ExtremeC3_Portrait_Segmentation](image/semantic_segmentation/ExtremeC3_Portrait_Segmentation)|ExtremeC3|EG1800, Baidu fashion dataset|humanseg| +|[SINet_Portrait_Segmentation](image/semantic_segmentation/SINet_Portrait_Segmentation)|SINet|EG1800, Baidu fashion dataset|humanseg| +|[FCN_HRNet_W18_Face_Seg](image/semantic_segmentation/FCN_HRNet_W18_Face_Seg)|FCN_HRNet_W18|-|humanseg| +|[ace2p](image/semantic_segmentation/ace2p)|ACE2P|LIP|ACE2P| +|[Pneumonia_CT_LKM_PP](image/semantic_segmentation/Pneumonia_CT_LKM_PP)|U-NET+|-|Pneumonia_CT| +|[Pneumonia_CT_LKM_PP_lung](image/semantic_segmentation/Pneumonia_CT_LKM_PP_lung)|U-NET+|-|Pneumonia_CT| |[ocrnet_hrnetw18_voc](image/semantic_segmentation/ocrnet_hrnetw18_voc)|ocrnet, hrnet|PascalVoc2012| -|[U2Net](image/semantic_segmentation/U2Net)|U^2Net|-|图像前景背景分割| -|[U2Netp](image/semantic_segmentation/U2Netp)|U^2Net|-|图像前景背景分割| -|[Extract_Line_Draft](image/semantic_segmentation/Extract_Line_Draft)|UNet|Pixiv|线稿提取| +|[U2Net](image/semantic_segmentation/U2Net)|U^2Net|-|U2Net| +|[U2Netp](image/semantic_segmentation/U2Netp)|U^2Net|-|U2Net| +|[Extract_Line_Draft](image/semantic_segmentation/Extract_Line_Draft)|UNet|Pixiv|Extract_Line_Draft| |[unet_cityscapes](image/semantic_segmentation/unet_cityscapes)|UNet|cityscapes| |[ocrnet_hrnetw18_cityscapes](image/semantic_segmentation/ocrnet_hrnetw18_cityscapes)|ocrnet_hrnetw18|cityscapes| |[hardnet_cityscapes](image/semantic_segmentation/hardnet_cityscapes)|hardnet|cityscapes| @@ -210,13 +210,13 @@ English | [简体中文](README_ch.md) |module|Network|Dataset|Introduction| |--|--|--|--| -|[pyramidbox_lite_mobile](image/face_detection/pyramidbox_lite_mobile)|PyramidBox|WIDER FACE数据集 + 百度自采人脸数据集|轻量级人脸检测-移动端| -|[pyramidbox_lite_mobile_mask](image/face_detection/pyramidbox_lite_mobile_mask)|PyramidBox|WIDER FACE数据集 + 百度自采人脸数据集|轻量级人脸口罩检测-移动端| -|[pyramidbox_lite_server_mask](image/face_detection/pyramidbox_lite_server_mask)|PyramidBox|WIDER FACE数据集 + 百度自采人脸数据集|轻量级人脸口罩检测| -|[ultra_light_fast_generic_face_detector_1mb_640](image/face_detection/ultra_light_fast_generic_face_detector_1mb_640)|Ultra-Light-Fast-Generic-Face-Detector-1MB|WIDER FACE数据集|轻量级通用人脸检测-低算力设备| -|[ultra_light_fast_generic_face_detector_1mb_320](image/face_detection/ultra_light_fast_generic_face_detector_1mb_320)|Ultra-Light-Fast-Generic-Face-Detector-1MB|WIDER FACE数据集|轻量级通用人脸检测-低算力设备| -|[pyramidbox_lite_server](image/face_detection/pyramidbox_lite_server)|PyramidBox|WIDER FACE数据集 + 百度自采人脸数据集|轻量级人脸检测| -|[pyramidbox_face_detection](image/face_detection/pyramidbox_face_detection)|PyramidBox|WIDER FACE数据集|人脸检测| +|[pyramidbox_lite_mobile](image/face_detection/pyramidbox_lite_mobile)|PyramidBox|WIDER FACE|face_detection| +|[pyramidbox_lite_mobile_mask](image/face_detection/pyramidbox_lite_mobile_mask)|PyramidBox|WIDER FACE|face_detection| +|[pyramidbox_lite_server_mask](image/face_detection/pyramidbox_lite_server_mask)|PyramidBox|WIDER FACE|face_detection| +|[ultra_light_fast_generic_face_detector_1mb_640](image/face_detection/ultra_light_fast_generic_face_detector_1mb_640)|Ultra-Light-Fast-Generic-Face-Detector-1MB|WIDER FACE|face_detection| +|[ultra_light_fast_generic_face_detector_1mb_320](image/face_detection/ultra_light_fast_generic_face_detector_1mb_320)|Ultra-Light-Fast-Generic-Face-Detector-1MB|WIDER FACE|face_detection| +|[pyramidbox_lite_server](image/face_detection/pyramidbox_lite_server)|PyramidBox|WIDER FACE|face_detection| +|[pyramidbox_face_detection](image/face_detection/pyramidbox_face_detection)|PyramidBox|WIDER FACE|face_detection| - ### Text Recognition From c40b9df0ef4c81f8f01a7a92bb1239f3e69c1a13 Mon Sep 17 00:00:00 2001 From: Lin Han Date: Thu, 15 Sep 2022 13:15:42 +0000 Subject: [PATCH 052/117] Fix typo (#2010) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fix typo 这个地方多了一个 ( * fix(typo): excessive ( --- .../object_detection/faster_rcnn_resnet50_coco2017/README.md | 2 +- .../object_detection/faster_rcnn_resnet50_coco2017/README_en.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/image/object_detection/faster_rcnn_resnet50_coco2017/README.md b/modules/image/object_detection/faster_rcnn_resnet50_coco2017/README.md index abf2bbeb0..2af450afe 100644 --- a/modules/image/object_detection/faster_rcnn_resnet50_coco2017/README.md +++ b/modules/image/object_detection/faster_rcnn_resnet50_coco2017/README.md @@ -59,7 +59,7 @@ object_detector = hub.Module(name="faster_rcnn_resnet50_coco2017") result = object_detector.object_detection(images=[cv2.imread('/PATH/TO/IMAGE')]) # or - # result = object_detector.object_detection((paths=['/PATH/TO/IMAGE']) + # result = object_detector.object_detection(paths=['/PATH/TO/IMAGE']) ``` - ### 3、API diff --git a/modules/image/object_detection/faster_rcnn_resnet50_coco2017/README_en.md b/modules/image/object_detection/faster_rcnn_resnet50_coco2017/README_en.md index 62ddf3ad3..aaa652df6 100644 --- a/modules/image/object_detection/faster_rcnn_resnet50_coco2017/README_en.md +++ b/modules/image/object_detection/faster_rcnn_resnet50_coco2017/README_en.md @@ -60,7 +60,7 @@ object_detector = hub.Module(name="faster_rcnn_resnet50_coco2017") result = object_detector.object_detection(images=[cv2.imread('/PATH/TO/IMAGE')]) # or - # result = object_detector.object_detection((paths=['/PATH/TO/IMAGE']) + # result = object_detector.object_detection(paths=['/PATH/TO/IMAGE']) ``` - ### 3、API From 6a1af6f275db9a02468e9732134eee3a173adb98 Mon Sep 17 00:00:00 2001 From: Linjie Chen <40840292+linjieccc@users.noreply.github.com> Date: Fri, 16 Sep 2022 00:50:18 +0800 Subject: [PATCH 053/117] fix bug of model not found error (#2007) * Update to Auto Model * Update to Auto Model --- modules/text/language_model/rbt3/README.md | 8 +++- modules/text/language_model/rbt3/module.py | 47 ++++++++++--------- modules/text/language_model/rbtl3/README.md | 8 +++- modules/text/language_model/rbtl3/module.py | 47 ++++++++++--------- .../roberta-wwm-ext-large/README.md | 10 ++-- .../roberta-wwm-ext-large/module.py | 45 +++++++++--------- .../language_model/roberta-wwm-ext/README.md | 10 ++-- .../language_model/roberta-wwm-ext/module.py | 45 +++++++++--------- 8 files changed, 126 insertions(+), 94 deletions(-) diff --git a/modules/text/language_model/rbt3/README.md b/modules/text/language_model/rbt3/README.md index 560f78e26..641b5089b 100644 --- a/modules/text/language_model/rbt3/README.md +++ b/modules/text/language_model/rbt3/README.md @@ -1,5 +1,5 @@ ```shell -$ hub install rtb3==2.0.1 +$ hub install rtb3==2.0.2 ```


@@ -85,7 +85,7 @@ label_map = {0: 'negative', 1: 'positive'} model = hub.Module( name='rtb3', - version='2.0.1', + version='2.0.2', task='seq-cls', load_checkpoint='/path/to/parameters', label_map=label_map) @@ -163,3 +163,7 @@ paddlehub >= 2.0.0 * 2.0.1 增加文本匹配任务`text-matching` + +* 2.0.2 + + 更新预训练模型调用方法 diff --git a/modules/text/language_model/rbt3/module.py b/modules/text/language_model/rbt3/module.py index 1fdde350a..6ef8b7e03 100644 --- a/modules/text/language_model/rbt3/module.py +++ b/modules/text/language_model/rbt3/module.py @@ -11,17 +11,19 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict -import os import math +import os +from typing import Dict import paddle import paddle.nn as nn import paddle.nn.functional as F - -from paddlenlp.transformers.roberta.modeling import RobertaForSequenceClassification, RobertaForTokenClassification, RobertaModel -from paddlenlp.transformers.roberta.tokenizer import RobertaTokenizer from paddlenlp.metrics import ChunkEvaluator +from paddlenlp.transformers import AutoModel +from paddlenlp.transformers import AutoModelForSequenceClassification +from paddlenlp.transformers import AutoModelForTokenClassification +from paddlenlp.transformers import AutoTokenizer + from paddlehub.module.module import moduleinfo from paddlehub.module.nlp_module import TransformerModule from paddlehub.utils.log import logger @@ -29,7 +31,7 @@ @moduleinfo( name="rbt3", - version="2.0.1", + version="2.0.2", summary="rbt3, 3-layer, 768-hidden, 12-heads, 38M parameters ", author="ymcui", author_email="ymcui@ir.hit.edu.cn", @@ -42,13 +44,13 @@ class Roberta(nn.Layer): """ def __init__( - self, - task: str = None, - load_checkpoint: str = None, - label_map: Dict = None, - num_classes: int = 2, - suffix: bool = False, - **kwargs, + self, + task: str = None, + load_checkpoint: str = None, + label_map: Dict = None, + num_classes: int = 2, + suffix: bool = False, + **kwargs, ): super(Roberta, self).__init__() if label_map: @@ -63,23 +65,26 @@ def __init__( "current task name 'sequence_classification' was renamed to 'seq-cls', " "'sequence_classification' has been deprecated and will be removed in the future.", ) if task == 'seq-cls': - self.model = RobertaForSequenceClassification.from_pretrained( - pretrained_model_name_or_path='rbt3', num_classes=self.num_classes, **kwargs) + self.model = AutoModelForSequenceClassification.from_pretrained(pretrained_model_name_or_path='hfl/rbt3', + num_classes=self.num_classes, + **kwargs) self.criterion = paddle.nn.loss.CrossEntropyLoss() self.metric = paddle.metric.Accuracy() elif task == 'token-cls': - self.model = RobertaForTokenClassification.from_pretrained( - pretrained_model_name_or_path='rbt3', num_classes=self.num_classes, **kwargs) + self.model = AutoModelForTokenClassification.from_pretrained(pretrained_model_name_or_path='hfl/rbt3', + num_classes=self.num_classes, + **kwargs) self.criterion = paddle.nn.loss.CrossEntropyLoss() - self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], suffix=suffix) + self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], + suffix=suffix) elif task == 'text-matching': - self.model = RobertaModel.from_pretrained(pretrained_model_name_or_path='rbt3', **kwargs) + self.model = AutoModel.from_pretrained(pretrained_model_name_or_path='hfl/rbt3', **kwargs) self.dropout = paddle.nn.Dropout(0.1) self.classifier = paddle.nn.Linear(self.model.config['hidden_size'] * 3, 2) self.criterion = paddle.nn.loss.CrossEntropyLoss() self.metric = paddle.metric.Accuracy() elif task is None: - self.model = RobertaModel.from_pretrained(pretrained_model_name_or_path='rbt3', **kwargs) + self.model = AutoModel.from_pretrained(pretrained_model_name_or_path='hfl/rbt3', **kwargs) else: raise RuntimeError("Unknown task {}, task should be one in {}".format(task, self._tasks_supported)) @@ -171,4 +176,4 @@ def get_tokenizer(*args, **kwargs): """ Gets the tokenizer that is customized for this module. """ - return RobertaTokenizer.from_pretrained(pretrained_model_name_or_path='rbt3', *args, **kwargs) + return AutoTokenizer.from_pretrained(pretrained_model_name_or_path='hfl/rbt3', *args, **kwargs) diff --git a/modules/text/language_model/rbtl3/README.md b/modules/text/language_model/rbtl3/README.md index c61df18d2..8bcda2905 100644 --- a/modules/text/language_model/rbtl3/README.md +++ b/modules/text/language_model/rbtl3/README.md @@ -1,5 +1,5 @@ ```shell -$ hub install rbtl3==2.0.1 +$ hub install rbtl3==2.0.2 ```


@@ -85,7 +85,7 @@ label_map = {0: 'negative', 1: 'positive'} model = hub.Module( name='rbtl3', - version='2.0.1', + version='2.0.2', task='seq-cls', load_checkpoint='/path/to/parameters', label_map=label_map) @@ -163,3 +163,7 @@ paddlehub >= 2.0.0 * 2.0.1 增加文本匹配任务`text-matching` + +* 2.0.2 + + 更新预训练模型调用方法 diff --git a/modules/text/language_model/rbtl3/module.py b/modules/text/language_model/rbtl3/module.py index d5789099d..bab919f10 100644 --- a/modules/text/language_model/rbtl3/module.py +++ b/modules/text/language_model/rbtl3/module.py @@ -11,17 +11,19 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict -import os import math +import os +from typing import Dict import paddle import paddle.nn as nn import paddle.nn.functional as F - -from paddlenlp.transformers.roberta.modeling import RobertaForSequenceClassification, RobertaForTokenClassification, RobertaModel -from paddlenlp.transformers.roberta.tokenizer import RobertaTokenizer from paddlenlp.metrics import ChunkEvaluator +from paddlenlp.transformers import AutoModel +from paddlenlp.transformers import AutoModelForSequenceClassification +from paddlenlp.transformers import AutoModelForTokenClassification +from paddlenlp.transformers import AutoTokenizer + from paddlehub.module.module import moduleinfo from paddlehub.module.nlp_module import TransformerModule from paddlehub.utils.log import logger @@ -29,7 +31,7 @@ @moduleinfo( name="rbtl3", - version="2.0.1", + version="2.0.2", summary="rbtl3, 3-layer, 1024-hidden, 16-heads, 61M parameters ", author="ymcui", author_email="ymcui@ir.hit.edu.cn", @@ -42,13 +44,13 @@ class Roberta(nn.Layer): """ def __init__( - self, - task: str = None, - load_checkpoint: str = None, - label_map: Dict = None, - num_classes: int = 2, - suffix: bool = False, - **kwargs, + self, + task: str = None, + load_checkpoint: str = None, + label_map: Dict = None, + num_classes: int = 2, + suffix: bool = False, + **kwargs, ): super(Roberta, self).__init__() if label_map: @@ -63,23 +65,26 @@ def __init__( "current task name 'sequence_classification' was renamed to 'seq-cls', " "'sequence_classification' has been deprecated and will be removed in the future.", ) if task == 'seq-cls': - self.model = RobertaForSequenceClassification.from_pretrained( - pretrained_model_name_or_path='rbtl3', num_classes=self.num_classes, **kwargs) + self.model = AutoModelForSequenceClassification.from_pretrained(pretrained_model_name_or_path='hfl/rbtl3', + num_classes=self.num_classes, + **kwargs) self.criterion = paddle.nn.loss.CrossEntropyLoss() self.metric = paddle.metric.Accuracy() elif task == 'token-cls': - self.model = RobertaForTokenClassification.from_pretrained( - pretrained_model_name_or_path='rbtl3', num_classes=self.num_classes, **kwargs) + self.model = AutoModelForTokenClassification.from_pretrained(pretrained_model_name_or_path='hfl/rbtl3', + num_classes=self.num_classes, + **kwargs) self.criterion = paddle.nn.loss.CrossEntropyLoss() - self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], suffix=suffix) + self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], + suffix=suffix) elif task == 'text-matching': - self.model = RobertaModel.from_pretrained(pretrained_model_name_or_path='rbtl3', **kwargs) + self.model = AutoModel.from_pretrained(pretrained_model_name_or_path='hfl/rbtl3', **kwargs) self.dropout = paddle.nn.Dropout(0.1) self.classifier = paddle.nn.Linear(self.model.config['hidden_size'] * 3, 2) self.criterion = paddle.nn.loss.CrossEntropyLoss() self.metric = paddle.metric.Accuracy() elif task is None: - self.model = RobertaModel.from_pretrained(pretrained_model_name_or_path='rbtl3', **kwargs) + self.model = AutoModel.from_pretrained(pretrained_model_name_or_path='hfl/rbtl3', **kwargs) else: raise RuntimeError("Unknown task {}, task should be one in {}".format(task, self._tasks_supported)) @@ -171,4 +176,4 @@ def get_tokenizer(*args, **kwargs): """ Gets the tokenizer that is customized for this module. """ - return RobertaTokenizer.from_pretrained(pretrained_model_name_or_path='rbtl3', *args, **kwargs) + return AutoTokenizer.from_pretrained(pretrained_model_name_or_path='hfl/rbtl3', *args, **kwargs) diff --git a/modules/text/language_model/roberta-wwm-ext-large/README.md b/modules/text/language_model/roberta-wwm-ext-large/README.md index d5c5aa592..a08e62d7f 100644 --- a/modules/text/language_model/roberta-wwm-ext-large/README.md +++ b/modules/text/language_model/roberta-wwm-ext-large/README.md @@ -1,6 +1,6 @@ # roberta-wwm-ext-large |模型名称|roberta-wwm-ext-large| -| :--- | :---: | +| :--- | :---: | |类别|文本-语义模型| |网络|roberta-wwm-ext-large| |数据集|百度自建数据集| @@ -51,7 +51,7 @@ label_map = {0: 'negative', 1: 'positive'} model = hub.Module( name='roberta-wwm-ext-large', - version='2.0.2', + version='2.0.3', task='seq-cls', load_checkpoint='/path/to/parameters', label_map=label_map) @@ -181,6 +181,10 @@ for idx, text in enumerate(data): * 2.0.2 增加文本匹配任务`text-matching` + +* 2.0.3 + + 更新预训练模型调用方法 ```shell - $ hub install roberta-wwm-ext-large==2.0.2 + $ hub install roberta-wwm-ext==2.0.3 ``` diff --git a/modules/text/language_model/roberta-wwm-ext-large/module.py b/modules/text/language_model/roberta-wwm-ext-large/module.py index 13efb6aea..272df4425 100644 --- a/modules/text/language_model/roberta-wwm-ext-large/module.py +++ b/modules/text/language_model/roberta-wwm-ext-large/module.py @@ -11,17 +11,19 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict -import os import math +import os +from typing import Dict import paddle import paddle.nn as nn import paddle.nn.functional as F - -from paddlenlp.transformers.roberta.modeling import RobertaForSequenceClassification, RobertaForTokenClassification, RobertaModel -from paddlenlp.transformers.roberta.tokenizer import RobertaTokenizer from paddlenlp.metrics import ChunkEvaluator +from paddlenlp.transformers import AutoModel +from paddlenlp.transformers import AutoModelForSequenceClassification +from paddlenlp.transformers import AutoModelForTokenClassification +from paddlenlp.transformers import AutoTokenizer + from paddlehub.module.module import moduleinfo from paddlehub.module.nlp_module import TransformerModule from paddlehub.utils.log import logger @@ -29,7 +31,7 @@ @moduleinfo( name="roberta-wwm-ext-large", - version="2.0.2", + version="2.0.3", summary= "chinese-roberta-wwm-ext-large, 24-layer, 1024-hidden, 16-heads, 340M parameters. The module is executed as paddle.dygraph.", author="ymcui", @@ -43,13 +45,13 @@ class Roberta(nn.Layer): """ def __init__( - self, - task: str = None, - load_checkpoint: str = None, - label_map: Dict = None, - num_classes: int = 2, - suffix: bool = False, - **kwargs, + self, + task: str = None, + load_checkpoint: str = None, + label_map: Dict = None, + num_classes: int = 2, + suffix: bool = False, + **kwargs, ): super(Roberta, self).__init__() if label_map: @@ -64,23 +66,24 @@ def __init__( "current task name 'sequence_classification' was renamed to 'seq-cls', " "'sequence_classification' has been deprecated and will be removed in the future.", ) if task == 'seq-cls': - self.model = RobertaForSequenceClassification.from_pretrained( - pretrained_model_name_or_path='roberta-wwm-ext-large', num_classes=self.num_classes, **kwargs) + self.model = AutoModelForSequenceClassification.from_pretrained( + pretrained_model_name_or_path='hfl/roberta-wwm-ext-large', num_classes=self.num_classes, **kwargs) self.criterion = paddle.nn.loss.CrossEntropyLoss() self.metric = paddle.metric.Accuracy() elif task == 'token-cls': - self.model = RobertaForTokenClassification.from_pretrained( - pretrained_model_name_or_path='roberta-wwm-ext-large', num_classes=self.num_classes, **kwargs) + self.model = AutoModelForTokenClassification.from_pretrained( + pretrained_model_name_or_path='hfl/roberta-wwm-ext-large', num_classes=self.num_classes, **kwargs) self.criterion = paddle.nn.loss.CrossEntropyLoss() - self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], suffix=suffix) + self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], + suffix=suffix) elif task == 'text-matching': - self.model = RobertaModel.from_pretrained(pretrained_model_name_or_path='roberta-wwm-ext-large', **kwargs) + self.model = AutoModel.from_pretrained(pretrained_model_name_or_path='hfl/roberta-wwm-ext-large', **kwargs) self.dropout = paddle.nn.Dropout(0.1) self.classifier = paddle.nn.Linear(self.model.config['hidden_size'] * 3, 2) self.criterion = paddle.nn.loss.CrossEntropyLoss() self.metric = paddle.metric.Accuracy() elif task is None: - self.model = RobertaModel.from_pretrained(pretrained_model_name_or_path='roberta-wwm-ext-large', **kwargs) + self.model = AutoModel.from_pretrained(pretrained_model_name_or_path='hfl/roberta-wwm-ext-large', **kwargs) else: raise RuntimeError("Unknown task {}, task should be one in {}".format(task, self._tasks_supported)) @@ -172,4 +175,4 @@ def get_tokenizer(*args, **kwargs): """ Gets the tokenizer that is customized for this module. """ - return RobertaTokenizer.from_pretrained(pretrained_model_name_or_path='roberta-wwm-ext-large', *args, **kwargs) + return AutoTokenizer.from_pretrained(pretrained_model_name_or_path='hfl/roberta-wwm-ext-large', *args, **kwargs) diff --git a/modules/text/language_model/roberta-wwm-ext/README.md b/modules/text/language_model/roberta-wwm-ext/README.md index f052628f2..7bb502237 100644 --- a/modules/text/language_model/roberta-wwm-ext/README.md +++ b/modules/text/language_model/roberta-wwm-ext/README.md @@ -1,6 +1,6 @@ # roberta-wwm-ext |模型名称|roberta-wwm-ext| -| :--- | :---: | +| :--- | :---: | |类别|文本-语义模型| |网络|roberta-wwm-ext| |数据集|百度自建数据集| @@ -51,7 +51,7 @@ label_map = {0: 'negative', 1: 'positive'} model = hub.Module( name='roberta-wwm-ext', - version='2.0.2', + version='2.0.3', task='seq-cls', load_checkpoint='/path/to/parameters', label_map=label_map) @@ -181,6 +181,10 @@ for idx, text in enumerate(data): * 2.0.2 增加文本匹配任务`text-matching` + +* 2.0.3 + + 更新预训练模型调用方法 ```shell - $ hub install roberta-wwm-ext==2.0.2 + $ hub install roberta-wwm-ext==2.0.3 ``` diff --git a/modules/text/language_model/roberta-wwm-ext/module.py b/modules/text/language_model/roberta-wwm-ext/module.py index 66108a239..2fe144315 100644 --- a/modules/text/language_model/roberta-wwm-ext/module.py +++ b/modules/text/language_model/roberta-wwm-ext/module.py @@ -11,17 +11,19 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict -import os import math +import os +from typing import Dict import paddle import paddle.nn as nn import paddle.nn.functional as F - -from paddlenlp.transformers.roberta.modeling import RobertaForSequenceClassification, RobertaForTokenClassification, RobertaModel -from paddlenlp.transformers.roberta.tokenizer import RobertaTokenizer from paddlenlp.metrics import ChunkEvaluator +from paddlenlp.transformers import AutoModel +from paddlenlp.transformers import AutoModelForSequenceClassification +from paddlenlp.transformers import AutoModelForTokenClassification +from paddlenlp.transformers import AutoTokenizer + from paddlehub.module.module import moduleinfo from paddlehub.module.nlp_module import TransformerModule from paddlehub.utils.log import logger @@ -29,7 +31,7 @@ @moduleinfo( name="roberta-wwm-ext", - version="2.0.2", + version="2.0.3", summary= "chinese-roberta-wwm-ext, 12-layer, 768-hidden, 12-heads, 110M parameters. The module is executed as paddle.dygraph.", author="ymcui", @@ -43,13 +45,13 @@ class Roberta(nn.Layer): """ def __init__( - self, - task: str = None, - load_checkpoint: str = None, - label_map: Dict = None, - num_classes: int = 2, - suffix: bool = False, - **kwargs, + self, + task: str = None, + load_checkpoint: str = None, + label_map: Dict = None, + num_classes: int = 2, + suffix: bool = False, + **kwargs, ): super(Roberta, self).__init__() if label_map: @@ -64,23 +66,24 @@ def __init__( "current task name 'sequence_classification' was renamed to 'seq-cls', " "'sequence_classification' has been deprecated and will be removed in the future.", ) if task == 'seq-cls': - self.model = RobertaForSequenceClassification.from_pretrained( - pretrained_model_name_or_path='roberta-wwm-ext', num_classes=self.num_classes, **kwargs) + self.model = AutoModelForSequenceClassification.from_pretrained( + pretrained_model_name_or_path='hfl/roberta-wwm-ext', num_classes=self.num_classes, **kwargs) self.criterion = paddle.nn.loss.CrossEntropyLoss() self.metric = paddle.metric.Accuracy() elif task == 'token-cls': - self.model = RobertaForTokenClassification.from_pretrained( - pretrained_model_name_or_path='roberta-wwm-ext', num_classes=self.num_classes, **kwargs) + self.model = AutoModelForTokenClassification.from_pretrained( + pretrained_model_name_or_path='hfl/roberta-wwm-ext', num_classes=self.num_classes, **kwargs) self.criterion = paddle.nn.loss.CrossEntropyLoss() - self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], suffix=suffix) + self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], + suffix=suffix) elif task == 'text-matching': - self.model = RobertaModel.from_pretrained(pretrained_model_name_or_path='roberta-wwm-ext', **kwargs) + self.model = AutoModel.from_pretrained(pretrained_model_name_or_path='hfl/roberta-wwm-ext', **kwargs) self.dropout = paddle.nn.Dropout(0.1) self.classifier = paddle.nn.Linear(self.model.config['hidden_size'] * 3, 2) self.criterion = paddle.nn.loss.CrossEntropyLoss() self.metric = paddle.metric.Accuracy() elif task is None: - self.model = RobertaModel.from_pretrained(pretrained_model_name_or_path='roberta-wwm-ext', **kwargs) + self.model = AutoModel.from_pretrained(pretrained_model_name_or_path='hfl/roberta-wwm-ext', **kwargs) else: raise RuntimeError("Unknown task {}, task should be one in {}".format(task, self._tasks_supported)) @@ -172,4 +175,4 @@ def get_tokenizer(*args, **kwargs): """ Gets the tokenizer that is customized for this module. """ - return RobertaTokenizer.from_pretrained(pretrained_model_name_or_path='roberta-wwm-ext', *args, **kwargs) + return AutoTokenizer.from_pretrained(pretrained_model_name_or_path='hfl/roberta-wwm-ext', *args, **kwargs) From 196f7e6739d57b5aeaf598a6cae8f396bb205d04 Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Fri, 16 Sep 2022 10:28:00 +0800 Subject: [PATCH 054/117] update faster_rcnn_resnet50_coco2017 (#1947) * update faster_rcnn_resnet50_coco2017 * update unittest * update unittest * update unittest * update gpu config * update * add clean func * update save inference model Co-authored-by: wuzewu Co-authored-by: chenjian --- .../faster_rcnn_resnet50_coco2017/README.md | 17 +- .../README_en.md | 17 +- .../bbox_assigner.py | 19 - .../bbox_head.py | 269 ----------- .../data_feed.py | 3 - .../faster_rcnn_resnet50_coco2017/module.py | 289 ++--------- .../name_adapter.py | 61 --- .../nonlocal_helper.py | 154 ------ .../processor.py | 7 +- .../faster_rcnn_resnet50_coco2017/resnet.py | 447 ------------------ .../roi_extractor.py | 13 - .../faster_rcnn_resnet50_coco2017/rpn_head.py | 302 ------------ .../faster_rcnn_resnet50_coco2017/test.py | 108 +++++ 13 files changed, 160 insertions(+), 1546 deletions(-) delete mode 100644 modules/image/object_detection/faster_rcnn_resnet50_coco2017/bbox_assigner.py delete mode 100644 modules/image/object_detection/faster_rcnn_resnet50_coco2017/bbox_head.py delete mode 100644 modules/image/object_detection/faster_rcnn_resnet50_coco2017/name_adapter.py delete mode 100644 modules/image/object_detection/faster_rcnn_resnet50_coco2017/nonlocal_helper.py delete mode 100644 modules/image/object_detection/faster_rcnn_resnet50_coco2017/resnet.py delete mode 100644 modules/image/object_detection/faster_rcnn_resnet50_coco2017/roi_extractor.py delete mode 100644 modules/image/object_detection/faster_rcnn_resnet50_coco2017/rpn_head.py create mode 100644 modules/image/object_detection/faster_rcnn_resnet50_coco2017/test.py diff --git a/modules/image/object_detection/faster_rcnn_resnet50_coco2017/README.md b/modules/image/object_detection/faster_rcnn_resnet50_coco2017/README.md index 2af450afe..0db5a24ee 100644 --- a/modules/image/object_detection/faster_rcnn_resnet50_coco2017/README.md +++ b/modules/image/object_detection/faster_rcnn_resnet50_coco2017/README.md @@ -102,19 +102,13 @@ - ```python - def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True) + def save_inference_model(dirname) ``` - 将模型保存到指定路径。 - **参数** - - dirname: 存在模型的目录名称;
- - model\_filename: 模型文件名称,默认为\_\_model\_\_;
- - params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效);
- - combined: 是否将参数保存到统一的一个文件中。 + - dirname: 模型保存路径
## 四、服务部署 @@ -167,6 +161,11 @@ * 1.1.1 修复numpy数据读取问题 + +* 1.2.0 + + 移除 fluid api + - ```shell - $ hub install faster_rcnn_resnet50_coco2017==1.1.1 + $ hub install faster_rcnn_resnet50_coco2017==1.2.0 ``` diff --git a/modules/image/object_detection/faster_rcnn_resnet50_coco2017/README_en.md b/modules/image/object_detection/faster_rcnn_resnet50_coco2017/README_en.md index aaa652df6..35814624e 100644 --- a/modules/image/object_detection/faster_rcnn_resnet50_coco2017/README_en.md +++ b/modules/image/object_detection/faster_rcnn_resnet50_coco2017/README_en.md @@ -103,19 +103,13 @@ - ```python - def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True) + def save_inference_model(dirname) ``` - Save model to specific path - **Parameters** - - dirname: output dir for saving model - - model\_filename: filename for saving model - - params\_filename: filename for saving parameters - - combined: whether save parameters into one file + - dirname: model save path ## IV.Server Deployment @@ -168,6 +162,11 @@ * 1.1.1 Fix the problem of reading numpy + +* 1.2.0 + + Remove fluid api + - ```shell - $ hub install faster_rcnn_resnet50_coco2017==1.1.1 + $ hub install faster_rcnn_resnet50_coco2017==1.2.0 ``` diff --git a/modules/image/object_detection/faster_rcnn_resnet50_coco2017/bbox_assigner.py b/modules/image/object_detection/faster_rcnn_resnet50_coco2017/bbox_assigner.py deleted file mode 100644 index bcb6b42d1..000000000 --- a/modules/image/object_detection/faster_rcnn_resnet50_coco2017/bbox_assigner.py +++ /dev/null @@ -1,19 +0,0 @@ -class BBoxAssigner(object): - def __init__(self, - batch_size_per_im=512, - fg_fraction=.25, - fg_thresh=.5, - bg_thresh_hi=.5, - bg_thresh_lo=0., - bbox_reg_weights=[0.1, 0.1, 0.2, 0.2], - class_nums=81, - shuffle_before_sample=True): - super(BBoxAssigner, self).__init__() - self.batch_size_per_im = batch_size_per_im - self.fg_fraction = fg_fraction - self.fg_thresh = fg_thresh - self.bg_thresh_hi = bg_thresh_hi - self.bg_thresh_lo = bg_thresh_lo - self.bbox_reg_weights = bbox_reg_weights - self.class_nums = class_nums - self.use_random = shuffle_before_sample diff --git a/modules/image/object_detection/faster_rcnn_resnet50_coco2017/bbox_head.py b/modules/image/object_detection/faster_rcnn_resnet50_coco2017/bbox_head.py deleted file mode 100644 index 7f72bb939..000000000 --- a/modules/image/object_detection/faster_rcnn_resnet50_coco2017/bbox_head.py +++ /dev/null @@ -1,269 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from collections import OrderedDict - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.initializer import Normal, Xavier -from paddle.fluid.regularizer import L2Decay -from paddle.fluid.initializer import MSRA - - -class MultiClassNMS(object): - # __op__ = fluid.layers.multiclass_nms - def __init__(self, - score_threshold=.05, - nms_top_k=-1, - keep_top_k=100, - nms_threshold=.5, - normalized=False, - nms_eta=1.0, - background_label=0): - super(MultiClassNMS, self).__init__() - self.score_threshold = score_threshold - self.nms_top_k = nms_top_k - self.keep_top_k = keep_top_k - self.nms_threshold = nms_threshold - self.normalized = normalized - self.nms_eta = nms_eta - self.background_label = background_label - - -class SmoothL1Loss(object): - ''' - Smooth L1 loss - Args: - sigma (float): hyper param in smooth l1 loss - ''' - - def __init__(self, sigma=1.0): - super(SmoothL1Loss, self).__init__() - self.sigma = sigma - - def __call__(self, x, y, inside_weight=None, outside_weight=None): - return fluid.layers.smooth_l1( - x, - y, - inside_weight=inside_weight, - outside_weight=outside_weight, - sigma=self.sigma) - - -class BoxCoder(object): - def __init__(self, - prior_box_var=[0.1, 0.1, 0.2, 0.2], - code_type='decode_center_size', - box_normalized=False, - axis=1): - super(BoxCoder, self).__init__() - self.prior_box_var = prior_box_var - self.code_type = code_type - self.box_normalized = box_normalized - self.axis = axis - - -class TwoFCHead(object): - """ - RCNN head with two Fully Connected layers - Args: - mlp_dim (int): num of filters for the fc layers - """ - - def __init__(self, mlp_dim=1024): - super(TwoFCHead, self).__init__() - self.mlp_dim = mlp_dim - - def __call__(self, roi_feat): - fan = roi_feat.shape[1] * roi_feat.shape[2] * roi_feat.shape[3] - - fc6 = fluid.layers.fc( - input=roi_feat, - size=self.mlp_dim, - act='relu', - name='fc6', - param_attr=ParamAttr(name='fc6_w', initializer=Xavier(fan_out=fan)), - bias_attr=ParamAttr( - name='fc6_b', learning_rate=2., regularizer=L2Decay(0.))) - head_feat = fluid.layers.fc( - input=fc6, - size=self.mlp_dim, - act='relu', - name='fc7', - param_attr=ParamAttr(name='fc7_w', initializer=Xavier()), - bias_attr=ParamAttr( - name='fc7_b', learning_rate=2., regularizer=L2Decay(0.))) - - return head_feat - - -class BBoxHead(object): - """ - RCNN bbox head - - Args: - head (object): the head module instance, e.g., `ResNetC5`, `TwoFCHead` - box_coder (object): `BoxCoder` instance - nms (object): `MultiClassNMS` instance - num_classes: number of output classes - """ - __inject__ = ['head', 'box_coder', 'nms', 'bbox_loss'] - __shared__ = ['num_classes'] - - def __init__(self, - head, - box_coder=BoxCoder(), - nms=MultiClassNMS(), - bbox_loss=SmoothL1Loss(), - num_classes=81): - super(BBoxHead, self).__init__() - self.head = head - self.num_classes = num_classes - self.box_coder = box_coder - self.nms = nms - self.bbox_loss = bbox_loss - self.head_feat = None - - def get_head_feat(self, input=None): - """ - Get the bbox head feature map. - """ - - if input is not None: - feat = self.head(input) - if isinstance(feat, OrderedDict): - feat = list(feat.values())[0] - self.head_feat = feat - return self.head_feat - - def _get_output(self, roi_feat): - """ - Get bbox head output. - - Args: - roi_feat (Variable): RoI feature from RoIExtractor. - - Returns: - cls_score(Variable): Output of rpn head with shape of - [N, num_anchors, H, W]. - bbox_pred(Variable): Output of rpn head with shape of - [N, num_anchors * 4, H, W]. - """ - head_feat = self.get_head_feat(roi_feat) - # when ResNetC5 output a single feature map - if not isinstance(self.head, TwoFCHead): - head_feat = fluid.layers.pool2d( - head_feat, pool_type='avg', global_pooling=True) - cls_score = fluid.layers.fc( - input=head_feat, - size=self.num_classes, - act=None, - name='cls_score', - param_attr=ParamAttr( - name='cls_score_w', initializer=Normal(loc=0.0, scale=0.01)), - bias_attr=ParamAttr( - name='cls_score_b', learning_rate=2., regularizer=L2Decay(0.))) - bbox_pred = fluid.layers.fc( - input=head_feat, - size=4 * self.num_classes, - act=None, - name='bbox_pred', - param_attr=ParamAttr( - name='bbox_pred_w', initializer=Normal(loc=0.0, scale=0.001)), - bias_attr=ParamAttr( - name='bbox_pred_b', learning_rate=2., regularizer=L2Decay(0.))) - return cls_score, bbox_pred - - def get_loss(self, roi_feat, labels_int32, bbox_targets, - bbox_inside_weights, bbox_outside_weights): - """ - Get bbox_head loss. - - Args: - roi_feat (Variable): RoI feature from RoIExtractor. - labels_int32(Variable): Class label of a RoI with shape [P, 1]. - P is the number of RoI. - bbox_targets(Variable): Box label of a RoI with shape - [P, 4 * class_nums]. - bbox_inside_weights(Variable): Indicates whether a box should - contribute to loss. Same shape as bbox_targets. - bbox_outside_weights(Variable): Indicates whether a box should - contribute to loss. Same shape as bbox_targets. - - Return: - Type: Dict - loss_cls(Variable): bbox_head loss. - loss_bbox(Variable): bbox_head loss. - """ - - cls_score, bbox_pred = self._get_output(roi_feat) - - labels_int64 = fluid.layers.cast(x=labels_int32, dtype='int64') - labels_int64.stop_gradient = True - loss_cls = fluid.layers.softmax_with_cross_entropy( - logits=cls_score, label=labels_int64, numeric_stable_mode=True) - loss_cls = fluid.layers.reduce_mean(loss_cls) - loss_bbox = self.bbox_loss( - x=bbox_pred, - y=bbox_targets, - inside_weight=bbox_inside_weights, - outside_weight=bbox_outside_weights) - loss_bbox = fluid.layers.reduce_mean(loss_bbox) - return {'loss_cls': loss_cls, 'loss_bbox': loss_bbox} - - def get_prediction(self, - roi_feat, - rois, - im_info, - im_shape, - return_box_score=False): - """ - Get prediction bounding box in test stage. - - Args: - roi_feat (Variable): RoI feature from RoIExtractor. - rois (Variable): Output of generate_proposals in rpn head. - im_info (Variable): A 2-D LoDTensor with shape [B, 3]. B is the - number of input images, each element consists of im_height, - im_width, im_scale. - im_shape (Variable): Actual shape of original image with shape - [B, 3]. B is the number of images, each element consists of - original_height, original_width, 1 - - Returns: - pred_result(Variable): Prediction result with shape [N, 6]. Each - row has 6 values: [label, confidence, xmin, ymin, xmax, ymax]. - N is the total number of prediction. - """ - cls_score, bbox_pred = self._get_output(roi_feat) - - im_scale = fluid.layers.slice(im_info, [1], starts=[2], ends=[3]) - im_scale = fluid.layers.sequence_expand(im_scale, rois) - boxes = rois / im_scale - cls_prob = fluid.layers.softmax(cls_score, use_cudnn=False) - bbox_pred = fluid.layers.reshape(bbox_pred, (-1, self.num_classes, 4)) - # self.box_coder - decoded_box = fluid.layers.box_coder( - prior_box=boxes, - target_box=bbox_pred, - prior_box_var=self.box_coder.prior_box_var, - code_type=self.box_coder.code_type, - box_normalized=self.box_coder.box_normalized, - axis=self.box_coder.axis) - cliped_box = fluid.layers.box_clip(input=decoded_box, im_info=im_shape) - if return_box_score: - return {'bbox': cliped_box, 'score': cls_prob} - # self.nms - pred_result = fluid.layers.multiclass_nms( - bboxes=cliped_box, - scores=cls_prob, - score_threshold=self.nms.score_threshold, - nms_top_k=self.nms.nms_top_k, - keep_top_k=self.nms.keep_top_k, - nms_threshold=self.nms.nms_threshold, - normalized=self.nms.normalized, - nms_eta=self.nms.nms_eta, - background_label=self.nms.background_label) - return pred_result diff --git a/modules/image/object_detection/faster_rcnn_resnet50_coco2017/data_feed.py b/modules/image/object_detection/faster_rcnn_resnet50_coco2017/data_feed.py index e52cce168..d2fc1de7a 100644 --- a/modules/image/object_detection/faster_rcnn_resnet50_coco2017/data_feed.py +++ b/modules/image/object_detection/faster_rcnn_resnet50_coco2017/data_feed.py @@ -4,12 +4,9 @@ from __future__ import division import os -from collections import OrderedDict import cv2 import numpy as np -from PIL import Image, ImageEnhance -from paddle import fluid __all__ = ['test_reader'] diff --git a/modules/image/object_detection/faster_rcnn_resnet50_coco2017/module.py b/modules/image/object_detection/faster_rcnn_resnet50_coco2017/module.py index 7b26eab37..5161b6628 100644 --- a/modules/image/object_detection/faster_rcnn_resnet50_coco2017/module.py +++ b/modules/image/object_detection/faster_rcnn_resnet50_coco2017/module.py @@ -6,40 +6,31 @@ import os import ast import argparse -from collections import OrderedDict -from functools import partial from math import ceil +import paddle import numpy as np -import paddle.fluid as fluid -import paddlehub as hub +import paddle.static from paddlehub.module.module import moduleinfo, runnable, serving -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.io.parser import txt_parser -from paddlehub.common.paddle_helper import add_vars_prefix - -from faster_rcnn_resnet50_coco2017.processor import load_label_info, postprocess, base64_to_cv2 -from faster_rcnn_resnet50_coco2017.data_feed import test_reader, padding_minibatch -from faster_rcnn_resnet50_coco2017.resnet import ResNet, ResNetC5 -from faster_rcnn_resnet50_coco2017.rpn_head import AnchorGenerator, RPNTargetAssign, GenerateProposals, RPNHead -from faster_rcnn_resnet50_coco2017.bbox_head import MultiClassNMS, BBoxHead, SmoothL1Loss -from faster_rcnn_resnet50_coco2017.bbox_assigner import BBoxAssigner -from faster_rcnn_resnet50_coco2017.roi_extractor import RoIAlign +from paddle.inference import Config, create_predictor +from paddlehub.utils.parser import txt_parser +from .processor import load_label_info, postprocess, base64_to_cv2 +from .data_feed import test_reader, padding_minibatch @moduleinfo( name="faster_rcnn_resnet50_coco2017", - version="1.1.1", + version="1.2.0", type="cv/object_detection", summary= "Baidu's Faster R-CNN model for object detection with backbone ResNet50, trained with dataset COCO2017", author="paddlepaddle", author_email="paddle-dev@baidu.com") -class FasterRCNNResNet50(hub.Module): - def _initialize(self): +class FasterRCNNResNet50: + def __init__(self): # default pretrained model, Faster-RCNN with backbone ResNet50, shape of input tensor is [3, 800, 1333] self.default_pretrained_model_path = os.path.join( - self.directory, "faster_rcnn_resnet50_model") + self.directory, "faster_rcnn_resnet50_model", "model") self.label_names = load_label_info( os.path.join(self.directory, "label_file.txt")) self._set_config() @@ -48,10 +39,12 @@ def _set_config(self): """ predictor config setting """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) + model = self.default_pretrained_model_path+'.pdmodel' + params = self.default_pretrained_model_path+'.pdiparams' + cpu_config = Config(model, params) cpu_config.disable_glog_info() cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) + self.cpu_predictor = create_predictor(cpu_config) try: _places = os.environ["CUDA_VISIBLE_DEVICES"] @@ -60,236 +53,14 @@ def _set_config(self): except: use_gpu = False if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config = Config(model, params) gpu_config.disable_glog_info() gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) - - def context(self, - num_classes=81, - trainable=True, - pretrained=True, - phase='train'): - """ - Distill the Head Features, so as to perform transfer learning. - - Args: - num_classes (int): number of categories - trainable (bool): whether to set parameters trainable. - pretrained (bool): whether to load default pretrained model. - phase (str): optional choices are 'train' and 'predict'. - - Returns: - inputs (dict): the input variables. - outputs (dict): the output variables. - context_prog (Program): the program to execute transfer learning. - """ - context_prog = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(context_prog, startup_program): - with fluid.unique_name.guard(): - image = fluid.layers.data( - name='image', shape=[-1, 3, -1, -1], dtype='float32') - # backbone - backbone = ResNet( - norm_type='affine_channel', - depth=50, - feature_maps=4, - freeze_at=2) - body_feats = backbone(image) - - # var_prefix - var_prefix = '@HUB_{}@'.format(self.name) - im_info = fluid.layers.data( - name='im_info', shape=[3], dtype='float32', lod_level=0) - im_shape = fluid.layers.data( - name='im_shape', shape=[3], dtype='float32', lod_level=0) - body_feat_names = list(body_feats.keys()) - # rpn_head: RPNHead - rpn_head = self.rpn_head() - rois = rpn_head.get_proposals(body_feats, im_info, mode=phase) - # train - if phase == 'train': - gt_bbox = fluid.layers.data( - name='gt_bbox', shape=[4], dtype='float32', lod_level=1) - is_crowd = fluid.layers.data( - name='is_crowd', shape=[1], dtype='int32', lod_level=1) - gt_class = fluid.layers.data( - name='gt_class', shape=[1], dtype='int32', lod_level=1) - rpn_loss = rpn_head.get_loss(im_info, gt_bbox, is_crowd) - # bbox_assigner: BBoxAssigner - bbox_assigner = self.bbox_assigner(num_classes) - outs = fluid.layers.generate_proposal_labels( - rpn_rois=rois, - gt_classes=gt_class, - is_crowd=is_crowd, - gt_boxes=gt_bbox, - im_info=im_info, - batch_size_per_im=bbox_assigner.batch_size_per_im, - fg_fraction=bbox_assigner.fg_fraction, - fg_thresh=bbox_assigner.fg_thresh, - bg_thresh_hi=bbox_assigner.bg_thresh_hi, - bg_thresh_lo=bbox_assigner.bg_thresh_lo, - bbox_reg_weights=bbox_assigner.bbox_reg_weights, - class_nums=bbox_assigner.class_nums, - use_random=bbox_assigner.use_random) - rois = outs[0] - - body_feat = body_feats[body_feat_names[-1]] - # roi_extractor: RoIAlign - roi_extractor = self.roi_extractor() - roi_feat = fluid.layers.roi_align( - input=body_feat, - rois=rois, - pooled_height=roi_extractor.pooled_height, - pooled_width=roi_extractor.pooled_width, - spatial_scale=roi_extractor.spatial_scale, - sampling_ratio=roi_extractor.sampling_ratio) - # head_feat - bbox_head = self.bbox_head(num_classes) - head_feat = bbox_head.head(roi_feat) - if isinstance(head_feat, OrderedDict): - head_feat = list(head_feat.values())[0] - if phase == 'train': - inputs = { - 'image': var_prefix + image.name, - 'im_info': var_prefix + im_info.name, - 'im_shape': var_prefix + im_shape.name, - 'gt_class': var_prefix + gt_class.name, - 'gt_bbox': var_prefix + gt_bbox.name, - 'is_crowd': var_prefix + is_crowd.name - } - outputs = { - 'head_features': - var_prefix + head_feat.name, - 'rpn_cls_loss': - var_prefix + rpn_loss['rpn_cls_loss'].name, - 'rpn_reg_loss': - var_prefix + rpn_loss['rpn_reg_loss'].name, - 'generate_proposal_labels': - [var_prefix + var.name for var in outs] - } - elif phase == 'predict': - pred = bbox_head.get_prediction(roi_feat, rois, im_info, - im_shape) - inputs = { - 'image': var_prefix + image.name, - 'im_info': var_prefix + im_info.name, - 'im_shape': var_prefix + im_shape.name - } - outputs = { - 'head_features': var_prefix + head_feat.name, - 'rois': var_prefix + rois.name, - 'bbox_out': var_prefix + pred.name - } - add_vars_prefix(context_prog, var_prefix) - add_vars_prefix(startup_program, var_prefix) - - global_vars = context_prog.global_block().vars - inputs = { - key: global_vars[value] - for key, value in inputs.items() - } - outputs = { - key: global_vars[value] if not isinstance(value, list) else - [global_vars[var] for var in value] - for key, value in outputs.items() - } - - for param in context_prog.global_block().iter_parameters(): - param.trainable = trainable - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - exe.run(startup_program) - if pretrained: - - def _if_exist(var): - if num_classes != 81: - if 'bbox_pred' in var.name or 'cls_score' in var.name: - return False - return os.path.exists( - os.path.join(self.default_pretrained_model_path, - var.name)) - - fluid.io.load_vars( - exe, - self.default_pretrained_model_path, - predicate=_if_exist) - return inputs, outputs, context_prog - - def rpn_head(self): - return RPNHead( - anchor_generator=AnchorGenerator( - anchor_sizes=[32, 64, 128, 256, 512], - aspect_ratios=[0.5, 1.0, 2.0], - stride=[16.0, 16.0], - variance=[1.0, 1.0, 1.0, 1.0]), - rpn_target_assign=RPNTargetAssign( - rpn_batch_size_per_im=256, - rpn_fg_fraction=0.5, - rpn_negative_overlap=0.3, - rpn_positive_overlap=0.7, - rpn_straddle_thresh=0.0), - train_proposal=GenerateProposals( - min_size=0.0, - nms_thresh=0.7, - post_nms_top_n=12000, - pre_nms_top_n=2000), - test_proposal=GenerateProposals( - min_size=0.0, - nms_thresh=0.7, - post_nms_top_n=6000, - pre_nms_top_n=1000)) - - def roi_extractor(self): - return RoIAlign(resolution=14, sampling_ratio=0, spatial_scale=0.0625) - - def bbox_head(self, num_classes): - return BBoxHead( - head=ResNetC5(depth=50, norm_type='affine_channel'), - nms=MultiClassNMS( - keep_top_k=100, nms_threshold=0.5, score_threshold=0.05), - bbox_loss=SmoothL1Loss(), - num_classes=num_classes) - - def bbox_assigner(self, num_classes): - return BBoxAssigner( - batch_size_per_im=512, - bbox_reg_weights=[0.1, 0.1, 0.2, 0.2], - bg_thresh_hi=0.5, - bg_thresh_lo=0.0, - fg_fraction=0.25, - fg_thresh=0.5, - class_nums=num_classes) - - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) + self.gpu_predictor = create_predictor(gpu_config) def object_detection(self, paths=None, images=None, - data=None, use_gpu=False, batch_size=1, output_dir='detection_result', @@ -326,8 +97,6 @@ def object_detection(self, "Attempt to use GPU for prediction, but environment variable CUDA_VISIBLE_DEVICES was not set correctly." ) paths = paths if paths else list() - if data and 'image' in data: - paths += data['image'] all_images = list() for yield_return in test_reader(paths, images): @@ -347,20 +116,28 @@ def object_detection(self, padding_image, padding_info, padding_shape = padding_minibatch( batch_data) - padding_image_tensor = PaddleTensor(padding_image.copy()) - padding_info_tensor = PaddleTensor(padding_info.copy()) - padding_shape_tensor = PaddleTensor(padding_shape.copy()) + + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + feed_list = [ - padding_image_tensor, padding_info_tensor, padding_shape_tensor + padding_image, padding_info, padding_shape ] - if use_gpu: - data_out = self.gpu_predictor.run(feed_list) - else: - data_out = self.cpu_predictor.run(feed_list) + + input_names = predictor.get_input_names() + + for i, input_name in enumerate(input_names): + data = np.asarray(feed_list[i], dtype=np.float32) + handle = predictor.get_input_handle(input_name) + handle.copy_from_cpu(data) + + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) + output = postprocess( paths=paths, images=images, - data_out=data_out, + data_out=output_handle, score_thresh=score_thresh, label_names=self.label_names, output_dir=output_dir, diff --git a/modules/image/object_detection/faster_rcnn_resnet50_coco2017/name_adapter.py b/modules/image/object_detection/faster_rcnn_resnet50_coco2017/name_adapter.py deleted file mode 100644 index bebf8bdee..000000000 --- a/modules/image/object_detection/faster_rcnn_resnet50_coco2017/name_adapter.py +++ /dev/null @@ -1,61 +0,0 @@ -# coding=utf-8 - - -class NameAdapter(object): - """Fix the backbones variable names for pretrained weight""" - - def __init__(self, model): - super(NameAdapter, self).__init__() - self.model = model - - @property - def model_type(self): - return getattr(self.model, '_model_type', '') - - @property - def variant(self): - return getattr(self.model, 'variant', '') - - def fix_conv_norm_name(self, name): - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - # the naming rule is same as pretrained weight - if self.model_type == 'SEResNeXt': - bn_name = name + "_bn" - return bn_name - - def fix_shortcut_name(self, name): - if self.model_type == 'SEResNeXt': - name = 'conv' + name + '_prj' - return name - - def fix_bottleneck_name(self, name): - if self.model_type == 'SEResNeXt': - conv_name1 = 'conv' + name + '_x1' - conv_name2 = 'conv' + name + '_x2' - conv_name3 = 'conv' + name + '_x3' - shortcut_name = name - else: - conv_name1 = name + "_branch2a" - conv_name2 = name + "_branch2b" - conv_name3 = name + "_branch2c" - shortcut_name = name + "_branch1" - return conv_name1, conv_name2, conv_name3, shortcut_name - - def fix_layer_warp_name(self, stage_num, count, i): - name = 'res' + str(stage_num) - if count > 10 and stage_num == 4: - if i == 0: - conv_name = name + "a" - else: - conv_name = name + "b" + str(i) - else: - conv_name = name + chr(ord("a") + i) - if self.model_type == 'SEResNeXt': - conv_name = str(stage_num + 2) + '_' + str(i + 1) - return conv_name - - def fix_c1_stage_name(self): - return "res_conv1" if self.model_type == 'ResNeXt' else "conv1" diff --git a/modules/image/object_detection/faster_rcnn_resnet50_coco2017/nonlocal_helper.py b/modules/image/object_detection/faster_rcnn_resnet50_coco2017/nonlocal_helper.py deleted file mode 100644 index 599b8dfa0..000000000 --- a/modules/image/object_detection/faster_rcnn_resnet50_coco2017/nonlocal_helper.py +++ /dev/null @@ -1,154 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import paddle.fluid as fluid -from paddle.fluid import ParamAttr - -nonlocal_params = { - "use_zero_init_conv": False, - "conv_init_std": 0.01, - "no_bias": True, - "use_maxpool": False, - "use_softmax": True, - "use_bn": False, - "use_scale": True, # vital for the model prformance!!! - "use_affine": False, - "bn_momentum": 0.9, - "bn_epsilon": 1.0000001e-5, - "bn_init_gamma": 0.9, - "weight_decay_bn": 1.e-4, -} - - -def space_nonlocal(input, dim_in, dim_out, prefix, dim_inner, - max_pool_stride=2): - cur = input - theta = fluid.layers.conv2d(input = cur, num_filters = dim_inner, \ - filter_size = [1, 1], stride = [1, 1], \ - padding = [0, 0], \ - param_attr=ParamAttr(name = prefix + '_theta' + "_w", \ - initializer = fluid.initializer.Normal(loc = 0.0, - scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_theta' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) \ - if not nonlocal_params["no_bias"] else False, \ - name = prefix + '_theta') - theta_shape = theta.shape - theta_shape_op = fluid.layers.shape(theta) - theta_shape_op.stop_gradient = True - - if nonlocal_params["use_maxpool"]: - max_pool = fluid.layers.pool2d(input = cur, \ - pool_size = [max_pool_stride, max_pool_stride], \ - pool_type = 'max', \ - pool_stride = [max_pool_stride, max_pool_stride], \ - pool_padding = [0, 0], \ - name = prefix + '_pool') - else: - max_pool = cur - - phi = fluid.layers.conv2d(input = max_pool, num_filters = dim_inner, \ - filter_size = [1, 1], stride = [1, 1], \ - padding = [0, 0], \ - param_attr = ParamAttr(name = prefix + '_phi' + "_w", \ - initializer = fluid.initializer.Normal(loc = 0.0, - scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_phi' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) \ - if (nonlocal_params["no_bias"] == 0) else False, \ - name = prefix + '_phi') - phi_shape = phi.shape - - g = fluid.layers.conv2d(input = max_pool, num_filters = dim_inner, \ - filter_size = [1, 1], stride = [1, 1], \ - padding = [0, 0], \ - param_attr = ParamAttr(name = prefix + '_g' + "_w", \ - initializer = fluid.initializer.Normal(loc = 0.0, scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_g' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) if (nonlocal_params["no_bias"] == 0) else False, \ - name = prefix + '_g') - g_shape = g.shape - # we have to use explicit batch size (to support arbitrary spacetime size) - # e.g. (8, 1024, 4, 14, 14) => (8, 1024, 784) - theta = fluid.layers.reshape(theta, shape=(0, 0, -1)) - theta = fluid.layers.transpose(theta, [0, 2, 1]) - phi = fluid.layers.reshape(phi, [0, 0, -1]) - theta_phi = fluid.layers.matmul(theta, phi, name=prefix + '_affinity') - g = fluid.layers.reshape(g, [0, 0, -1]) - - if nonlocal_params["use_softmax"]: - if nonlocal_params["use_scale"]: - theta_phi_sc = fluid.layers.scale(theta_phi, scale=dim_inner**-.5) - else: - theta_phi_sc = theta_phi - p = fluid.layers.softmax( - theta_phi_sc, name=prefix + '_affinity' + '_prob') - else: - # not clear about what is doing in xlw's code - p = None # not implemented - raise "Not implemented when not use softmax" - - # note g's axis[2] corresponds to p's axis[2] - # e.g. g(8, 1024, 784_2) * p(8, 784_1, 784_2) => (8, 1024, 784_1) - p = fluid.layers.transpose(p, [0, 2, 1]) - t = fluid.layers.matmul(g, p, name=prefix + '_y') - - # reshape back - # e.g. (8, 1024, 784) => (8, 1024, 4, 14, 14) - t_shape = t.shape - t_re = fluid.layers.reshape( - t, shape=list(theta_shape), actual_shape=theta_shape_op) - blob_out = t_re - blob_out = fluid.layers.conv2d(input = blob_out, num_filters = dim_out, \ - filter_size = [1, 1], stride = [1, 1], padding = [0, 0], \ - param_attr = ParamAttr(name = prefix + '_out' + "_w", \ - initializer = fluid.initializer.Constant(value = 0.) \ - if nonlocal_params["use_zero_init_conv"] \ - else fluid.initializer.Normal(loc = 0.0, - scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_out' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) \ - if (nonlocal_params["no_bias"] == 0) else False, \ - name = prefix + '_out') - blob_out_shape = blob_out.shape - - if nonlocal_params["use_bn"]: - bn_name = prefix + "_bn" - blob_out = fluid.layers.batch_norm(blob_out, \ - # is_test = test_mode, \ - momentum = nonlocal_params["bn_momentum"], \ - epsilon = nonlocal_params["bn_epsilon"], \ - name = bn_name, \ - param_attr = ParamAttr(name = bn_name + "_s", \ - initializer = fluid.initializer.Constant(value = nonlocal_params["bn_init_gamma"]), \ - regularizer = fluid.regularizer.L2Decay(nonlocal_params["weight_decay_bn"])), \ - bias_attr = ParamAttr(name = bn_name + "_b", \ - regularizer = fluid.regularizer.L2Decay(nonlocal_params["weight_decay_bn"])), \ - moving_mean_name = bn_name + "_rm", \ - moving_variance_name = bn_name + "_riv") # add bn - - if nonlocal_params["use_affine"]: - affine_scale = fluid.layers.create_parameter(\ - shape=[blob_out_shape[1]], dtype = blob_out.dtype, \ - attr=ParamAttr(name=prefix + '_affine' + '_s'), \ - default_initializer = fluid.initializer.Constant(value = 1.)) - affine_bias = fluid.layers.create_parameter(\ - shape=[blob_out_shape[1]], dtype = blob_out.dtype, \ - attr=ParamAttr(name=prefix + '_affine' + '_b'), \ - default_initializer = fluid.initializer.Constant(value = 0.)) - blob_out = fluid.layers.affine_channel(blob_out, scale = affine_scale, \ - bias = affine_bias, name = prefix + '_affine') # add affine - - return blob_out - - -def add_space_nonlocal(input, dim_in, dim_out, prefix, dim_inner): - ''' - add_space_nonlocal: - Non-local Neural Networks: see https://arxiv.org/abs/1711.07971 - ''' - conv = space_nonlocal(input, dim_in, dim_out, prefix, dim_inner) - output = fluid.layers.elementwise_add(input, conv, name=prefix + '_sum') - return output diff --git a/modules/image/object_detection/faster_rcnn_resnet50_coco2017/processor.py b/modules/image/object_detection/faster_rcnn_resnet50_coco2017/processor.py index 2b3e1ce9c..fd31a14e0 100644 --- a/modules/image/object_detection/faster_rcnn_resnet50_coco2017/processor.py +++ b/modules/image/object_detection/faster_rcnn_resnet50_coco2017/processor.py @@ -107,7 +107,7 @@ def postprocess(paths, handle_id, visualization=True): """ - postprocess the lod_tensor produced by fluid.Executor.run + postprocess the lod_tensor produced by Executor.run Args: paths (list[str]): the path of images. @@ -130,9 +130,8 @@ def postprocess(paths, confidence (float): The confidence of detection result. save_path (str): The path to save output images. """ - lod_tensor = data_out[0] - lod = lod_tensor.lod[0] - results = lod_tensor.as_ndarray() + lod = data_out.lod()[0] + results = data_out.copy_to_cpu() check_dir(output_dir) diff --git a/modules/image/object_detection/faster_rcnn_resnet50_coco2017/resnet.py b/modules/image/object_detection/faster_rcnn_resnet50_coco2017/resnet.py deleted file mode 100644 index 4bd6fb61e..000000000 --- a/modules/image/object_detection/faster_rcnn_resnet50_coco2017/resnet.py +++ /dev/null @@ -1,447 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -from collections import OrderedDict -from numbers import Integral - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.framework import Variable -from paddle.fluid.regularizer import L2Decay -from paddle.fluid.initializer import Constant - -from .nonlocal_helper import add_space_nonlocal -from .name_adapter import NameAdapter - -__all__ = ['ResNet', 'ResNetC5'] - - -class ResNet(object): - """ - Residual Network, see https://arxiv.org/abs/1512.03385 - Args: - depth (int): ResNet depth, should be 34, 50. - freeze_at (int): freeze the backbone at which stage - norm_type (str): normalization type, 'bn'/'sync_bn'/'affine_channel' - freeze_norm (bool): freeze normalization layers - norm_decay (float): weight decay for normalization layer weights - variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently - feature_maps (list): index of stages whose feature maps are returned - dcn_v2_stages (list): index of stages who select deformable conv v2 - nonlocal_stages (list): index of stages who select nonlocal networks - """ - __shared__ = ['norm_type', 'freeze_norm', 'weight_prefix_name'] - - def __init__(self, - depth=50, - freeze_at=0, - norm_type='sync_bn', - freeze_norm=False, - norm_decay=0., - variant='b', - feature_maps=[3, 4, 5], - dcn_v2_stages=[], - weight_prefix_name='', - nonlocal_stages=[], - get_prediction=False, - class_dim=1000): - super(ResNet, self).__init__() - - if isinstance(feature_maps, Integral): - feature_maps = [feature_maps] - - assert depth in [34, 50], \ - "depth {} not in [34, 50]" - assert variant in ['a', 'b', 'c', 'd'], "invalid ResNet variant" - assert 0 <= freeze_at <= 4, "freeze_at should be 0, 1, 2, 3 or 4" - assert len(feature_maps) > 0, "need one or more feature maps" - assert norm_type in ['bn', 'sync_bn', 'affine_channel'] - assert not (len(nonlocal_stages)>0 and depth<50), \ - "non-local is not supported for resnet18 or resnet34" - - self.depth = depth - self.freeze_at = freeze_at - self.norm_type = norm_type - self.norm_decay = norm_decay - self.freeze_norm = freeze_norm - self.variant = variant - self._model_type = 'ResNet' - self.feature_maps = feature_maps - self.dcn_v2_stages = dcn_v2_stages - self.depth_cfg = { - 34: ([3, 4, 6, 3], self.basicblock), - 50: ([3, 4, 6, 3], self.bottleneck), - } - self.stage_filters = [64, 128, 256, 512] - self._c1_out_chan_num = 64 - self.na = NameAdapter(self) - self.prefix_name = weight_prefix_name - - self.nonlocal_stages = nonlocal_stages - self.nonlocal_mod_cfg = { - 50: 2, - 101: 5, - 152: 8, - 200: 12, - } - self.get_prediction = get_prediction - self.class_dim = class_dim - - def _conv_offset(self, - input, - filter_size, - stride, - padding, - act=None, - name=None): - out_channel = filter_size * filter_size * 3 - out = fluid.layers.conv2d( - input, - num_filters=out_channel, - filter_size=filter_size, - stride=stride, - padding=padding, - param_attr=ParamAttr(initializer=Constant(0.0), name=name + ".w_0"), - bias_attr=ParamAttr(initializer=Constant(0.0), name=name + ".b_0"), - act=act, - name=name) - return out - - def _conv_norm(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None, - dcn_v2=False): - _name = self.prefix_name + name if self.prefix_name != '' else name - if not dcn_v2: - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - param_attr=ParamAttr(name=_name + "_weights"), - bias_attr=False, - name=_name + '.conv2d.output.1') - else: - # select deformable conv" - offset_mask = self._conv_offset( - input=input, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - act=None, - name=_name + "_conv_offset") - offset_channel = filter_size**2 * 2 - mask_channel = filter_size**2 - offset, mask = fluid.layers.split( - input=offset_mask, - num_or_sections=[offset_channel, mask_channel], - dim=1) - mask = fluid.layers.sigmoid(mask) - conv = fluid.layers.deformable_conv( - input=input, - offset=offset, - mask=mask, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - deformable_groups=1, - im2col_step=1, - param_attr=ParamAttr(name=_name + "_weights"), - bias_attr=False, - name=_name + ".conv2d.output.1") - - bn_name = self.na.fix_conv_norm_name(name) - bn_name = self.prefix_name + bn_name if self.prefix_name != '' else bn_name - - norm_lr = 0. if self.freeze_norm else 1. - norm_decay = self.norm_decay - pattr = ParamAttr( - name=bn_name + '_scale', - learning_rate=norm_lr, - regularizer=L2Decay(norm_decay)) - battr = ParamAttr( - name=bn_name + '_offset', - learning_rate=norm_lr, - regularizer=L2Decay(norm_decay)) - - if self.norm_type in ['bn', 'sync_bn']: - global_stats = True if self.freeze_norm else False - out = fluid.layers.batch_norm( - input=conv, - act=act, - name=bn_name + '.output.1', - param_attr=pattr, - bias_attr=battr, - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance', - use_global_stats=global_stats) - scale = fluid.framework._get_var(pattr.name) - bias = fluid.framework._get_var(battr.name) - elif self.norm_type == 'affine_channel': - scale = fluid.layers.create_parameter( - shape=[conv.shape[1]], - dtype=conv.dtype, - attr=pattr, - default_initializer=fluid.initializer.Constant(1.)) - bias = fluid.layers.create_parameter( - shape=[conv.shape[1]], - dtype=conv.dtype, - attr=battr, - default_initializer=fluid.initializer.Constant(0.)) - out = fluid.layers.affine_channel( - x=conv, scale=scale, bias=bias, act=act) - if self.freeze_norm: - scale.stop_gradient = True - bias.stop_gradient = True - return out - - def _shortcut(self, input, ch_out, stride, is_first, name): - max_pooling_in_short_cut = self.variant == 'd' - ch_in = input.shape[1] - # the naming rule is same as pretrained weight - name = self.na.fix_shortcut_name(name) - std_senet = getattr(self, 'std_senet', False) - if ch_in != ch_out or stride != 1 or (self.depth < 50 and is_first): - if std_senet: - if is_first: - return self._conv_norm(input, ch_out, 1, stride, name=name) - else: - return self._conv_norm(input, ch_out, 3, stride, name=name) - if max_pooling_in_short_cut and not is_first: - input = fluid.layers.pool2d( - input=input, - pool_size=2, - pool_stride=2, - pool_padding=0, - ceil_mode=True, - pool_type='avg') - return self._conv_norm(input, ch_out, 1, 1, name=name) - return self._conv_norm(input, ch_out, 1, stride, name=name) - else: - return input - - def bottleneck(self, - input, - num_filters, - stride, - is_first, - name, - dcn_v2=False): - if self.variant == 'a': - stride1, stride2 = stride, 1 - else: - stride1, stride2 = 1, stride - - # ResNeXt - groups = getattr(self, 'groups', 1) - group_width = getattr(self, 'group_width', -1) - if groups == 1: - expand = 4 - elif (groups * group_width) == 256: - expand = 1 - else: # FIXME hard code for now, handles 32x4d, 64x4d and 32x8d - num_filters = num_filters // 2 - expand = 2 - - conv_name1, conv_name2, conv_name3, \ - shortcut_name = self.na.fix_bottleneck_name(name) - std_senet = getattr(self, 'std_senet', False) - if std_senet: - conv_def = [[ - int(num_filters / 2), 1, stride1, 'relu', 1, conv_name1 - ], [num_filters, 3, stride2, 'relu', groups, conv_name2], - [num_filters * expand, 1, 1, None, 1, conv_name3]] - else: - conv_def = [[num_filters, 1, stride1, 'relu', 1, conv_name1], - [num_filters, 3, stride2, 'relu', groups, conv_name2], - [num_filters * expand, 1, 1, None, 1, conv_name3]] - - residual = input - for i, (c, k, s, act, g, _name) in enumerate(conv_def): - residual = self._conv_norm( - input=residual, - num_filters=c, - filter_size=k, - stride=s, - act=act, - groups=g, - name=_name, - dcn_v2=(i == 1 and dcn_v2)) - short = self._shortcut( - input, - num_filters * expand, - stride, - is_first=is_first, - name=shortcut_name) - # Squeeze-and-Excitation - if callable(getattr(self, '_squeeze_excitation', None)): - residual = self._squeeze_excitation( - input=residual, num_channels=num_filters, name='fc' + name) - return fluid.layers.elementwise_add( - x=short, y=residual, act='relu', name=name + ".add.output.5") - - def basicblock(self, - input, - num_filters, - stride, - is_first, - name, - dcn_v2=False): - assert dcn_v2 is False, "Not implemented yet." - conv0 = self._conv_norm( - input=input, - num_filters=num_filters, - filter_size=3, - act='relu', - stride=stride, - name=name + "_branch2a") - conv1 = self._conv_norm( - input=conv0, - num_filters=num_filters, - filter_size=3, - act=None, - name=name + "_branch2b") - short = self._shortcut( - input, num_filters, stride, is_first, name=name + "_branch1") - return fluid.layers.elementwise_add(x=short, y=conv1, act='relu') - - def layer_warp(self, input, stage_num): - """ - Args: - input (Variable): input variable. - stage_num (int): the stage number, should be 2, 3, 4, 5 - - Returns: - The last variable in endpoint-th stage. - """ - assert stage_num in [2, 3, 4, 5] - - stages, block_func = self.depth_cfg[self.depth] - count = stages[stage_num - 2] - - ch_out = self.stage_filters[stage_num - 2] - is_first = False if stage_num != 2 else True - dcn_v2 = True if stage_num in self.dcn_v2_stages else False - - nonlocal_mod = 1000 - if stage_num in self.nonlocal_stages: - nonlocal_mod = self.nonlocal_mod_cfg[ - self.depth] if stage_num == 4 else 2 - - # Make the layer name and parameter name consistent - # with ImageNet pre-trained model - conv = input - for i in range(count): - conv_name = self.na.fix_layer_warp_name(stage_num, count, i) - if self.depth < 50: - is_first = True if i == 0 and stage_num == 2 else False - conv = block_func( - input=conv, - num_filters=ch_out, - stride=2 if i == 0 and stage_num != 2 else 1, - is_first=is_first, - name=conv_name, - dcn_v2=dcn_v2) - - # add non local model - dim_in = conv.shape[1] - nonlocal_name = "nonlocal_conv{}".format(stage_num) - if i % nonlocal_mod == nonlocal_mod - 1: - conv = add_space_nonlocal(conv, dim_in, dim_in, - nonlocal_name + '_{}'.format(i), - int(dim_in / 2)) - return conv - - def c1_stage(self, input): - out_chan = self._c1_out_chan_num - - conv1_name = self.na.fix_c1_stage_name() - - if self.variant in ['c', 'd']: - conv_def = [ - [out_chan // 2, 3, 2, "conv1_1"], - [out_chan // 2, 3, 1, "conv1_2"], - [out_chan, 3, 1, "conv1_3"], - ] - else: - conv_def = [[out_chan, 7, 2, conv1_name]] - - for (c, k, s, _name) in conv_def: - input = self._conv_norm( - input=input, - num_filters=c, - filter_size=k, - stride=s, - act='relu', - name=_name) - - output = fluid.layers.pool2d( - input=input, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max') - return output - - def __call__(self, input): - assert isinstance(input, Variable) - assert not (set(self.feature_maps) - set([2, 3, 4, 5])), \ - "feature maps {} not in [2, 3, 4, 5]".format(self.feature_maps) - - res_endpoints = [] - - res = input - feature_maps = self.feature_maps - severed_head = getattr(self, 'severed_head', False) - if not severed_head: - res = self.c1_stage(res) - feature_maps = range(2, max(self.feature_maps) + 1) - - for i in feature_maps: - res = self.layer_warp(res, i) - if i in self.feature_maps: - res_endpoints.append(res) - if self.freeze_at >= i: - res.stop_gradient = True - if self.get_prediction: - pool = fluid.layers.pool2d( - input=res, pool_type='avg', global_pooling=True) - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - - out = fluid.layers.fc( - input=pool, - size=self.class_dim, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv))) - out = fluid.layers.softmax(out) - return out - return OrderedDict([('res{}_sum'.format(self.feature_maps[idx]), feat) - for idx, feat in enumerate(res_endpoints)]) - - -class ResNetC5(ResNet): - def __init__(self, - depth=50, - freeze_at=2, - norm_type='affine_channel', - freeze_norm=True, - norm_decay=0., - variant='b', - feature_maps=[5], - weight_prefix_name=''): - super(ResNetC5, self).__init__(depth, freeze_at, norm_type, freeze_norm, - norm_decay, variant, feature_maps) - self.severed_head = True diff --git a/modules/image/object_detection/faster_rcnn_resnet50_coco2017/roi_extractor.py b/modules/image/object_detection/faster_rcnn_resnet50_coco2017/roi_extractor.py deleted file mode 100644 index 0241d1274..000000000 --- a/modules/image/object_detection/faster_rcnn_resnet50_coco2017/roi_extractor.py +++ /dev/null @@ -1,13 +0,0 @@ -# coding=utf-8 -__all__ = ['RoIAlign'] - - -class RoIAlign(object): - def __init__(self, resolution=7, spatial_scale=0.0625, sampling_ratio=0): - super(RoIAlign, self).__init__() - if isinstance(resolution, int): - resolution = [resolution, resolution] - self.pooled_height = resolution[0] - self.pooled_width = resolution[1] - self.spatial_scale = spatial_scale - self.sampling_ratio = sampling_ratio diff --git a/modules/image/object_detection/faster_rcnn_resnet50_coco2017/rpn_head.py b/modules/image/object_detection/faster_rcnn_resnet50_coco2017/rpn_head.py deleted file mode 100644 index 7acdf083f..000000000 --- a/modules/image/object_detection/faster_rcnn_resnet50_coco2017/rpn_head.py +++ /dev/null @@ -1,302 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.initializer import Normal -from paddle.fluid.regularizer import L2Decay - -__all__ = ['AnchorGenerator', 'RPNTargetAssign', 'GenerateProposals', 'RPNHead'] - - -class AnchorGenerator(object): - # __op__ = fluid.layers.anchor_generator - def __init__(self, - stride=[16.0, 16.0], - anchor_sizes=[32, 64, 128, 256, 512], - aspect_ratios=[0.5, 1., 2.], - variance=[1., 1., 1., 1.]): - super(AnchorGenerator, self).__init__() - self.anchor_sizes = anchor_sizes - self.aspect_ratios = aspect_ratios - self.variance = variance - self.stride = stride - - -class RPNTargetAssign(object): - # __op__ = fluid.layers.rpn_target_assign - def __init__(self, - rpn_batch_size_per_im=256, - rpn_straddle_thresh=0., - rpn_fg_fraction=0.5, - rpn_positive_overlap=0.7, - rpn_negative_overlap=0.3, - use_random=True): - super(RPNTargetAssign, self).__init__() - self.rpn_batch_size_per_im = rpn_batch_size_per_im - self.rpn_straddle_thresh = rpn_straddle_thresh - self.rpn_fg_fraction = rpn_fg_fraction - self.rpn_positive_overlap = rpn_positive_overlap - self.rpn_negative_overlap = rpn_negative_overlap - self.use_random = use_random - - -class GenerateProposals(object): - # __op__ = fluid.layers.generate_proposals - def __init__(self, - pre_nms_top_n=6000, - post_nms_top_n=1000, - nms_thresh=.5, - min_size=.1, - eta=1.): - super(GenerateProposals, self).__init__() - self.pre_nms_top_n = pre_nms_top_n - self.post_nms_top_n = post_nms_top_n - self.nms_thresh = nms_thresh - self.min_size = min_size - self.eta = eta - - -class RPNHead(object): - """ - RPN Head - - Args: - anchor_generator (object): `AnchorGenerator` instance - rpn_target_assign (object): `RPNTargetAssign` instance - train_proposal (object): `GenerateProposals` instance for training - test_proposal (object): `GenerateProposals` instance for testing - num_classes (int): number of classes in rpn output - """ - __inject__ = [ - 'anchor_generator', 'rpn_target_assign', 'train_proposal', - 'test_proposal' - ] - - def __init__(self, - anchor_generator, - rpn_target_assign, - train_proposal, - test_proposal, - num_classes=1): - super(RPNHead, self).__init__() - self.anchor_generator = anchor_generator - self.rpn_target_assign = rpn_target_assign - self.train_proposal = train_proposal - self.test_proposal = test_proposal - self.num_classes = num_classes - - def _get_output(self, input): - """ - Get anchor and RPN head output. - - Args: - input(Variable): feature map from backbone with shape of [N, C, H, W] - - Returns: - rpn_cls_score(Variable): Output of rpn head with shape of [N, num_anchors, H, W]. - rpn_bbox_pred(Variable): Output of rpn head with shape of [N, num_anchors * 4, H, W]. - """ - dim_out = input.shape[1] - rpn_conv = fluid.layers.conv2d( - input=input, - num_filters=dim_out, - filter_size=3, - stride=1, - padding=1, - act='relu', - name='conv_rpn', - param_attr=ParamAttr( - name="conv_rpn_w", initializer=Normal(loc=0., scale=0.01)), - bias_attr=ParamAttr( - name="conv_rpn_b", learning_rate=2., regularizer=L2Decay(0.))) - # Generate anchors self.anchor_generator - self.anchor, self.anchor_var = fluid.layers.anchor_generator( - input=rpn_conv, - anchor_sizes=self.anchor_generator.anchor_sizes, - aspect_ratios=self.anchor_generator.aspect_ratios, - variance=self.anchor_generator.variance, - stride=self.anchor_generator.stride) - - num_anchor = self.anchor.shape[2] - # Proposal classification scores - self.rpn_cls_score = fluid.layers.conv2d( - rpn_conv, - num_filters=num_anchor * self.num_classes, - filter_size=1, - stride=1, - padding=0, - act=None, - name='rpn_cls_score', - param_attr=ParamAttr( - name="rpn_cls_logits_w", initializer=Normal(loc=0., - scale=0.01)), - bias_attr=ParamAttr( - name="rpn_cls_logits_b", - learning_rate=2., - regularizer=L2Decay(0.))) - # Proposal bbox regression deltas - self.rpn_bbox_pred = fluid.layers.conv2d( - rpn_conv, - num_filters=4 * num_anchor, - filter_size=1, - stride=1, - padding=0, - act=None, - name='rpn_bbox_pred', - param_attr=ParamAttr( - name="rpn_bbox_pred_w", initializer=Normal(loc=0., scale=0.01)), - bias_attr=ParamAttr( - name="rpn_bbox_pred_b", - learning_rate=2., - regularizer=L2Decay(0.))) - return self.rpn_cls_score, self.rpn_bbox_pred - - def get_proposals(self, body_feats, im_info, mode='train'): - """ - Get proposals according to the output of backbone. - - Args: - body_feats (dict): The dictionary of feature maps from backbone. - im_info(Variable): The information of image with shape [N, 3] with - shape (height, width, scale). - body_feat_names(list): A list of names of feature maps from - backbone. - - Returns: - rpn_rois(Variable): Output proposals with shape of (rois_num, 4). - """ - # In RPN Heads, only the last feature map of backbone is used. - # And body_feat_names[-1] represents the last level name of backbone. - body_feat = list(body_feats.values())[-1] - rpn_cls_score, rpn_bbox_pred = self._get_output(body_feat) - - if self.num_classes == 1: - rpn_cls_prob = fluid.layers.sigmoid( - rpn_cls_score, name='rpn_cls_prob') - else: - rpn_cls_score = fluid.layers.transpose( - rpn_cls_score, perm=[0, 2, 3, 1]) - rpn_cls_score = fluid.layers.reshape( - rpn_cls_score, shape=(0, 0, 0, -1, self.num_classes)) - rpn_cls_prob_tmp = fluid.layers.softmax( - rpn_cls_score, use_cudnn=False, name='rpn_cls_prob') - rpn_cls_prob_slice = fluid.layers.slice( - rpn_cls_prob_tmp, axes=[4], starts=[1], ends=[self.num_classes]) - rpn_cls_prob, _ = fluid.layers.topk(rpn_cls_prob_slice, 1) - rpn_cls_prob = fluid.layers.reshape( - rpn_cls_prob, shape=(0, 0, 0, -1)) - rpn_cls_prob = fluid.layers.transpose( - rpn_cls_prob, perm=[0, 3, 1, 2]) - prop_op = self.train_proposal if mode == 'train' else self.test_proposal - # prop_op - rpn_rois, rpn_roi_probs = fluid.layers.generate_proposals( - scores=rpn_cls_prob, - bbox_deltas=rpn_bbox_pred, - im_info=im_info, - anchors=self.anchor, - variances=self.anchor_var, - pre_nms_top_n=prop_op.pre_nms_top_n, - post_nms_top_n=prop_op.post_nms_top_n, - nms_thresh=prop_op.nms_thresh, - min_size=prop_op.min_size, - eta=prop_op.eta) - return rpn_rois - - def _transform_input(self, rpn_cls_score, rpn_bbox_pred, anchor, - anchor_var): - rpn_cls_score = fluid.layers.transpose(rpn_cls_score, perm=[0, 2, 3, 1]) - rpn_bbox_pred = fluid.layers.transpose(rpn_bbox_pred, perm=[0, 2, 3, 1]) - anchor = fluid.layers.reshape(anchor, shape=(-1, 4)) - anchor_var = fluid.layers.reshape(anchor_var, shape=(-1, 4)) - rpn_cls_score = fluid.layers.reshape( - x=rpn_cls_score, shape=(0, -1, self.num_classes)) - rpn_bbox_pred = fluid.layers.reshape(x=rpn_bbox_pred, shape=(0, -1, 4)) - return rpn_cls_score, rpn_bbox_pred, anchor, anchor_var - - def _get_loss_input(self): - for attr in ['rpn_cls_score', 'rpn_bbox_pred', 'anchor', 'anchor_var']: - if not getattr(self, attr, None): - raise ValueError("self.{} should not be None,".format(attr), - "call RPNHead.get_proposals first") - return self._transform_input(self.rpn_cls_score, self.rpn_bbox_pred, - self.anchor, self.anchor_var) - - def get_loss(self, im_info, gt_box, is_crowd, gt_label=None): - """ - Sample proposals and Calculate rpn loss. - - Args: - im_info(Variable): The information of image with shape [N, 3] with - shape (height, width, scale). - gt_box(Variable): The ground-truth bounding boxes with shape [M, 4]. - M is the number of groundtruth. - is_crowd(Variable): Indicates groud-truth is crowd or not with - shape [M, 1]. M is the number of groundtruth. - - Returns: - Type: dict - rpn_cls_loss(Variable): RPN classification loss. - rpn_bbox_loss(Variable): RPN bounding box regression loss. - - """ - rpn_cls, rpn_bbox, anchor, anchor_var = self._get_loss_input() - if self.num_classes == 1: - # self.rpn_target_assign - score_pred, loc_pred, score_tgt, loc_tgt, bbox_weight = \ - fluid.layers.rpn_target_assign( - bbox_pred=rpn_bbox, - cls_logits=rpn_cls, - anchor_box=anchor, - anchor_var=anchor_var, - gt_boxes=gt_box, - is_crowd=is_crowd, - im_info=im_info, - rpn_batch_size_per_im=self.rpn_target_assign.rpn_batch_size_per_im, - rpn_straddle_thresh=self.rpn_target_assign.rpn_straddle_thresh, - rpn_fg_fraction=self.rpn_target_assign.rpn_fg_fraction, - rpn_positive_overlap=self.rpn_target_assign.rpn_positive_overlap, - rpn_negative_overlap=self.rpn_target_assign.rpn_negative_overlap, - use_random=self.rpn_target_assign.use_random) - score_tgt = fluid.layers.cast(x=score_tgt, dtype='float32') - score_tgt.stop_gradient = True - rpn_cls_loss = fluid.layers.sigmoid_cross_entropy_with_logits( - x=score_pred, label=score_tgt) - else: - score_pred, loc_pred, score_tgt, loc_tgt, bbox_weight = \ - self.rpn_target_assign( - bbox_pred=rpn_bbox, - cls_logits=rpn_cls, - anchor_box=anchor, - anchor_var=anchor_var, - gt_boxes=gt_box, - gt_labels=gt_label, - is_crowd=is_crowd, - num_classes=self.num_classes, - im_info=im_info) - labels_int64 = fluid.layers.cast(x=score_tgt, dtype='int64') - labels_int64.stop_gradient = True - rpn_cls_loss = fluid.layers.softmax_with_cross_entropy( - logits=score_pred, label=labels_int64, numeric_stable_mode=True) - - rpn_cls_loss = fluid.layers.reduce_mean( - rpn_cls_loss, name='loss_rpn_cls') - - loc_tgt = fluid.layers.cast(x=loc_tgt, dtype='float32') - loc_tgt.stop_gradient = True - rpn_reg_loss = fluid.layers.smooth_l1( - x=loc_pred, - y=loc_tgt, - sigma=3.0, - inside_weight=bbox_weight, - outside_weight=bbox_weight) - rpn_reg_loss = fluid.layers.reduce_sum( - rpn_reg_loss, name='loss_rpn_bbox') - score_shape = fluid.layers.shape(score_tgt) - score_shape = fluid.layers.cast(x=score_shape, dtype='float32') - norm = fluid.layers.reduce_prod(score_shape) - norm.stop_gradient = True - rpn_reg_loss = rpn_reg_loss / norm - return {'rpn_cls_loss': rpn_cls_loss, 'rpn_reg_loss': rpn_reg_loss} diff --git a/modules/image/object_detection/faster_rcnn_resnet50_coco2017/test.py b/modules/image/object_detection/faster_rcnn_resnet50_coco2017/test.py new file mode 100644 index 000000000..d1d5d92fa --- /dev/null +++ b/modules/image/object_detection/faster_rcnn_resnet50_coco2017/test.py @@ -0,0 +1,108 @@ +import os +import shutil +import unittest + +import cv2 +import requests +import paddlehub as hub + + +class TestHubModule(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + img_url = 'https://ai-studio-static-online.cdn.bcebos.com/68313e182f5e4ad9907e69dac9ece8fc50840d7ffbd24fa88396f009958f969a' + if not os.path.exists('tests'): + os.makedirs('tests') + response = requests.get(img_url) + assert response.status_code == 200, 'Network Error.' + with open('tests/test.jpg', 'wb') as f: + f.write(response.content) + cls.module = hub.Module(name="faster_rcnn_resnet50_coco2017") + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree('tests') + shutil.rmtree('inference') + shutil.rmtree('detection_result') + + def test_object_detection1(self): + results = self.module.object_detection( + paths=['tests/test.jpg'] + ) + bbox = results[0]['data'][0] + label = bbox['label'] + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertEqual(label, 'cat') + self.assertTrue(confidence > 0.5) + self.assertTrue(200 < left < 800) + self.assertTrue(2500 < right < 3500) + self.assertTrue(500 < top < 1500) + self.assertTrue(3500 < bottom < 4500) + + def test_object_detection2(self): + results = self.module.object_detection( + images=[cv2.imread('tests/test.jpg')] + ) + bbox = results[0]['data'][0] + label = bbox['label'] + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertEqual(label, 'cat') + self.assertTrue(confidence > 0.5) + self.assertTrue(200 < left < 800) + self.assertTrue(2500 < right < 3500) + self.assertTrue(500 < top < 1500) + self.assertTrue(3500 < bottom < 4500) + + def test_object_detection3(self): + results = self.module.object_detection( + images=[cv2.imread('tests/test.jpg')], + visualization=False + ) + bbox = results[0]['data'][0] + label = bbox['label'] + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertEqual(label, 'cat') + self.assertTrue(confidence > 0.5) + self.assertTrue(200 < left < 800) + self.assertTrue(2500 < right < 3500) + self.assertTrue(500 < top < 1500) + self.assertTrue(3500 < bottom < 4500) + + def test_object_detection4(self): + self.assertRaises( + AssertionError, + self.module.object_detection, + paths=['no.jpg'] + ) + + def test_object_detection5(self): + self.assertRaises( + cv2.error, + self.module.object_detection, + images=['test.jpg'] + ) + + def test_save_inference_model(self): + self.module.save_inference_model('./inference/model') + + self.assertTrue(os.path.exists('./inference/model.pdmodel')) + self.assertTrue(os.path.exists('./inference/model.pdiparams')) + + +if __name__ == "__main__": + unittest.main() From 3fcdd7781ed0a4ecd45f3b25b32e0e5f387f4d76 Mon Sep 17 00:00:00 2001 From: chenjian Date: Fri, 16 Sep 2022 11:44:02 +0800 Subject: [PATCH 055/117] Fix save_inference_model bug in paddlehub --- paddlehub/module/module.py | 105 ++++++++++++++++++++----------------- 1 file changed, 56 insertions(+), 49 deletions(-) diff --git a/paddlehub/module/module.py b/paddlehub/module/module.py index d494eb393..a9d10ce02 100644 --- a/paddlehub/module/module.py +++ b/paddlehub/module/module.py @@ -37,7 +37,6 @@ class InvalidHubModule(Exception): - def __init__(self, directory: str): self.directory = directory @@ -200,11 +199,12 @@ def save_inference_model(self, for key, _sub_module in self.sub_modules().items(): try: sub_dirname = os.path.normpath(os.path.join(dirname, key)) - _sub_module.save_inference_model(sub_dirname, - include_sub_modules=include_sub_modules, - model_filename=model_filename, - params_filename=params_filename, - combined=combined) + _sub_module.save_inference_model( + sub_dirname, + include_sub_modules=include_sub_modules, + model_filename=model_filename, + params_filename=params_filename, + combined=combined) except: utils.record_exception('Failed to save sub module {}'.format(_sub_module.name)) @@ -231,14 +231,11 @@ def save_inference_model(self, if not self._pretrained_model_path: raise RuntimeError('Module {} does not support exporting models in Paddle Inference format.'.format( self.name)) - elif not os.path.exists(self._pretrained_model_path): + elif not os.path.exists( + self._pretrained_model_path) and not os.path.exists(self._pretrained_model_path + '.pdmodel'): log.logger.warning('The model path of Module {} does not exist.'.format(self.name)) return - model_filename = '__model__' if not model_filename else model_filename - if combined: - params_filename = '__params__' if not params_filename else params_filename - place = paddle.CPUPlace() exe = paddle.static.Executor(place) @@ -253,21 +250,25 @@ def save_inference_model(self, if os.path.exists(os.path.join(self._pretrained_model_path, '__params__')): _params_filename = '__params__' + if _model_filename is not None and _params_filename is not None: + program, feeded_var_names, target_vars = paddle.static.load_inference_model( + self._pretrained_model_path, + executor=exe, + model_filename=_model_filename, + params_filename=_params_filename, + ) + else: + program, feeded_var_names, target_vars = paddle.static.load_inference_model( + self._pretrained_model_path, executor=exe) - program, feeded_var_names, target_vars = paddle.static.load_inference_model( - dirname=self._pretrained_model_path, - executor=exe, - model_filename=_model_filename, - params_filename=_params_filename, - ) - - paddle.static.save_inference_model(dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) + global_block = program.global_block() + feed_vars = [global_block.var(item) for item in feeded_var_names] + + path_prefix = dirname + if os.path.isdir(dirname): + path_prefix = os.path.join(dirname, 'model') + paddle.static.save_inference_model( + path_prefix, feed_vars=feed_vars, fetch_vars=target_vars, executor=exe, program=program) log.logger.info('Paddle Inference model saved in {}.'.format(dirname)) @@ -337,17 +338,19 @@ def export_onnx_model(self, save_file = os.path.join(dirname, '{}.onnx'.format(self.name)) - program, inputs, outputs = paddle.static.load_inference_model(dirname=self._pretrained_model_path, - model_filename=model_filename, - params_filename=params_filename, - executor=exe) + program, inputs, outputs = paddle.static.load_inference_model( + dirname=self._pretrained_model_path, + model_filename=model_filename, + params_filename=params_filename, + executor=exe) - paddle2onnx.program2onnx(program=program, - scope=paddle.static.global_scope(), - feed_var_names=inputs, - target_vars=outputs, - save_file=save_file, - **kwargs) + paddle2onnx.program2onnx( + program=program, + scope=paddle.static.global_scope(), + feed_var_names=inputs, + target_vars=outputs, + save_file=save_file, + **kwargs) class Module(object): @@ -387,13 +390,14 @@ def __new__(cls, from paddlehub.server.server import CacheUpdater # This branch come from hub.Module(name='xxx') or hub.Module(directory='xxx') if name: - module = cls.init_with_name(name=name, - version=version, - source=source, - update=update, - branch=branch, - ignore_env_mismatch=ignore_env_mismatch, - **kwargs) + module = cls.init_with_name( + name=name, + version=version, + source=source, + update=update, + branch=branch, + ignore_env_mismatch=ignore_env_mismatch, + **kwargs) CacheUpdater("update_cache", module=name, version=version).start() elif directory: module = cls.init_with_directory(directory=directory, **kwargs) @@ -485,12 +489,13 @@ def init_with_name(cls, manager = LocalModuleManager() user_module_cls = manager.search(name, source=source, branch=branch) if not user_module_cls or not user_module_cls.version.match(version): - user_module_cls = manager.install(name=name, - version=version, - source=source, - update=update, - branch=branch, - ignore_env_mismatch=ignore_env_mismatch) + user_module_cls = manager.install( + name=name, + version=version, + source=source, + update=update, + branch=branch, + ignore_env_mismatch=ignore_env_mismatch) directory = manager._get_normalized_path(user_module_cls.name) @@ -555,7 +560,9 @@ def _wrapper(cls: Generic) -> Generic: _bases.append(_b) _bases.append(_meta) _bases = tuple(_bases) - wrap_cls = builtins.type(cls.__name__, _bases, dict(cls.__dict__)) + attr_dict = dict(cls.__dict__) + attr_dict.pop('__dict__', None) + wrap_cls = builtins.type(cls.__name__, _bases, attr_dict) wrap_cls.name = name wrap_cls.version = utils.Version(version) From 01002e40f93ed8d59c2e96cb9f4d7d32bbd87f99 Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Fri, 16 Sep 2022 14:27:14 +0800 Subject: [PATCH 056/117] update faster_rcnn_resnet50_fpn_coco2017 (#1948) * update faster_rcnn_resnet50_fpn_coco2017 * update unittest faster_rcnn_resnet50_fpn_coco2017 * update unittest * update unittest * update gpu config * update * add clean func * update save inference model Co-authored-by: chenjian --- .../README.md | 17 +- .../README_en.md | 17 +- .../bbox_assigner.py | 20 - .../bbox_head.py | 270 --------- .../data_feed.py | 3 - .../faster_rcnn_resnet50_fpn_coco2017/fpn.py | 296 ---------- .../module.py | 302 ++-------- .../name_adapter.py | 61 -- .../nonlocal_helper.py | 154 ----- .../processor.py | 8 +- .../resnet.py | 447 --------------- .../roi_extractor.py | 76 --- .../rpn_head.py | 533 ------------------ .../faster_rcnn_resnet50_fpn_coco2017/test.py | 108 ++++ 14 files changed, 162 insertions(+), 2150 deletions(-) delete mode 100644 modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/bbox_assigner.py delete mode 100644 modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/bbox_head.py delete mode 100644 modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/fpn.py delete mode 100644 modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/name_adapter.py delete mode 100644 modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/nonlocal_helper.py delete mode 100644 modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/resnet.py delete mode 100644 modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/roi_extractor.py delete mode 100644 modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/rpn_head.py create mode 100644 modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/test.py diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/README.md b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/README.md index 9d003b800..ef5324a7b 100644 --- a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/README.md +++ b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/README.md @@ -102,19 +102,13 @@ - ```python - def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True) + def save_inference_model(dirname) ``` - 将模型保存到指定路径。 - **参数** - - dirname: 存在模型的目录名称;
- - model\_filename: 模型文件名称,默认为\_\_model\_\_;
- - params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效);
- - combined: 是否将参数保存到统一的一个文件中。 + - dirname: 模型保存路径
## 四、服务部署 @@ -167,6 +161,11 @@ * 1.0.1 修复numpy数据读取问题 + +* 1.1.0 + + 移除 fluid api + - ```shell - $ hub install faster_rcnn_resnet50_fpn_coco2017==1.0.1 + $ hub install faster_rcnn_resnet50_fpn_coco2017==1.1.0 ``` diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/README_en.md b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/README_en.md index d90beb649..bf4c7274d 100644 --- a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/README_en.md +++ b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/README_en.md @@ -101,19 +101,13 @@ - ```python - def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True) + def save_inference_model(dirname) ``` - Save model to specific path - **Parameters** - - dirname: output dir for saving model - - model\_filename: filename for saving model - - params\_filename: filename for saving parameters - - combined: whether save parameters into one file + - dirname: model save path ## IV.Server Deployment @@ -166,6 +160,11 @@ * 1.0.1 Fix the problem of reading numpy + +* 1.1.0 + + Remove fluid api + - ```shell - $ hub install faster_rcnn_resnet50_fpn_coco2017==1.0.1 + $ hub install faster_rcnn_resnet50_fpn_coco2017==1.1.0 ``` diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/bbox_assigner.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/bbox_assigner.py deleted file mode 100644 index d033382c4..000000000 --- a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/bbox_assigner.py +++ /dev/null @@ -1,20 +0,0 @@ -class BBoxAssigner(object): - # __op__ = fluid.layers.generate_proposal_labels - def __init__(self, - batch_size_per_im=512, - fg_fraction=.25, - fg_thresh=.5, - bg_thresh_hi=.5, - bg_thresh_lo=0., - bbox_reg_weights=[0.1, 0.1, 0.2, 0.2], - class_nums=81, - shuffle_before_sample=True): - super(BBoxAssigner, self).__init__() - self.batch_size_per_im = batch_size_per_im - self.fg_fraction = fg_fraction - self.fg_thresh = fg_thresh - self.bg_thresh_hi = bg_thresh_hi - self.bg_thresh_lo = bg_thresh_lo - self.bbox_reg_weights = bbox_reg_weights - self.class_nums = class_nums - self.use_random = shuffle_before_sample diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/bbox_head.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/bbox_head.py deleted file mode 100644 index 8080ed22f..000000000 --- a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/bbox_head.py +++ /dev/null @@ -1,270 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from collections import OrderedDict - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.initializer import Normal, Xavier -from paddle.fluid.regularizer import L2Decay -from paddle.fluid.initializer import MSRA - - -class MultiClassNMS(object): - # __op__ = fluid.layers.multiclass_nms - def __init__(self, - score_threshold=.05, - nms_top_k=-1, - keep_top_k=100, - nms_threshold=.5, - normalized=False, - nms_eta=1.0, - background_label=0): - super(MultiClassNMS, self).__init__() - self.score_threshold = score_threshold - self.nms_top_k = nms_top_k - self.keep_top_k = keep_top_k - self.nms_threshold = nms_threshold - self.normalized = normalized - self.nms_eta = nms_eta - self.background_label = background_label - - -class SmoothL1Loss(object): - ''' - Smooth L1 loss - Args: - sigma (float): hyper param in smooth l1 loss - ''' - - def __init__(self, sigma=1.0): - super(SmoothL1Loss, self).__init__() - self.sigma = sigma - - def __call__(self, x, y, inside_weight=None, outside_weight=None): - return fluid.layers.smooth_l1( - x, - y, - inside_weight=inside_weight, - outside_weight=outside_weight, - sigma=self.sigma) - - -class BoxCoder(object): - def __init__(self, - prior_box_var=[0.1, 0.1, 0.2, 0.2], - code_type='decode_center_size', - box_normalized=False, - axis=1): - super(BoxCoder, self).__init__() - self.prior_box_var = prior_box_var - self.code_type = code_type - self.box_normalized = box_normalized - self.axis = axis - - -class TwoFCHead(object): - """ - RCNN head with two Fully Connected layers - - Args: - mlp_dim (int): num of filters for the fc layers - """ - - def __init__(self, mlp_dim=1024): - super(TwoFCHead, self).__init__() - self.mlp_dim = mlp_dim - - def __call__(self, roi_feat): - fan = roi_feat.shape[1] * roi_feat.shape[2] * roi_feat.shape[3] - - fc6 = fluid.layers.fc( - input=roi_feat, - size=self.mlp_dim, - act='relu', - name='fc6', - param_attr=ParamAttr(name='fc6_w', initializer=Xavier(fan_out=fan)), - bias_attr=ParamAttr( - name='fc6_b', learning_rate=2., regularizer=L2Decay(0.))) - head_feat = fluid.layers.fc( - input=fc6, - size=self.mlp_dim, - act='relu', - name='fc7', - param_attr=ParamAttr(name='fc7_w', initializer=Xavier()), - bias_attr=ParamAttr( - name='fc7_b', learning_rate=2., regularizer=L2Decay(0.))) - - return head_feat - - -class BBoxHead(object): - """ - RCNN bbox head - - Args: - head (object): the head module instance, e.g., `ResNetC5`, `TwoFCHead` - box_coder (object): `BoxCoder` instance - nms (object): `MultiClassNMS` instance - num_classes: number of output classes - """ - __inject__ = ['head', 'box_coder', 'nms', 'bbox_loss'] - __shared__ = ['num_classes'] - - def __init__(self, - head, - box_coder=BoxCoder(), - nms=MultiClassNMS(), - bbox_loss=SmoothL1Loss(), - num_classes=81): - super(BBoxHead, self).__init__() - self.head = head - self.num_classes = num_classes - self.box_coder = box_coder - self.nms = nms - self.bbox_loss = bbox_loss - self.head_feat = None - - def get_head_feat(self, input=None): - """ - Get the bbox head feature map. - """ - - if input is not None: - feat = self.head(input) - if isinstance(feat, OrderedDict): - feat = list(feat.values())[0] - self.head_feat = feat - return self.head_feat - - def _get_output(self, roi_feat): - """ - Get bbox head output. - - Args: - roi_feat (Variable): RoI feature from RoIExtractor. - - Returns: - cls_score(Variable): Output of rpn head with shape of - [N, num_anchors, H, W]. - bbox_pred(Variable): Output of rpn head with shape of - [N, num_anchors * 4, H, W]. - """ - head_feat = self.get_head_feat(roi_feat) - # when ResNetC5 output a single feature map - if not isinstance(self.head, TwoFCHead): - head_feat = fluid.layers.pool2d( - head_feat, pool_type='avg', global_pooling=True) - cls_score = fluid.layers.fc( - input=head_feat, - size=self.num_classes, - act=None, - name='cls_score', - param_attr=ParamAttr( - name='cls_score_w', initializer=Normal(loc=0.0, scale=0.01)), - bias_attr=ParamAttr( - name='cls_score_b', learning_rate=2., regularizer=L2Decay(0.))) - bbox_pred = fluid.layers.fc( - input=head_feat, - size=4 * self.num_classes, - act=None, - name='bbox_pred', - param_attr=ParamAttr( - name='bbox_pred_w', initializer=Normal(loc=0.0, scale=0.001)), - bias_attr=ParamAttr( - name='bbox_pred_b', learning_rate=2., regularizer=L2Decay(0.))) - return cls_score, bbox_pred - - def get_loss(self, roi_feat, labels_int32, bbox_targets, - bbox_inside_weights, bbox_outside_weights): - """ - Get bbox_head loss. - - Args: - roi_feat (Variable): RoI feature from RoIExtractor. - labels_int32(Variable): Class label of a RoI with shape [P, 1]. - P is the number of RoI. - bbox_targets(Variable): Box label of a RoI with shape - [P, 4 * class_nums]. - bbox_inside_weights(Variable): Indicates whether a box should - contribute to loss. Same shape as bbox_targets. - bbox_outside_weights(Variable): Indicates whether a box should - contribute to loss. Same shape as bbox_targets. - - Return: - Type: Dict - loss_cls(Variable): bbox_head loss. - loss_bbox(Variable): bbox_head loss. - """ - - cls_score, bbox_pred = self._get_output(roi_feat) - - labels_int64 = fluid.layers.cast(x=labels_int32, dtype='int64') - labels_int64.stop_gradient = True - loss_cls = fluid.layers.softmax_with_cross_entropy( - logits=cls_score, label=labels_int64, numeric_stable_mode=True) - loss_cls = fluid.layers.reduce_mean(loss_cls) - loss_bbox = self.bbox_loss( - x=bbox_pred, - y=bbox_targets, - inside_weight=bbox_inside_weights, - outside_weight=bbox_outside_weights) - loss_bbox = fluid.layers.reduce_mean(loss_bbox) - return {'loss_cls': loss_cls, 'loss_bbox': loss_bbox} - - def get_prediction(self, - roi_feat, - rois, - im_info, - im_shape, - return_box_score=False): - """ - Get prediction bounding box in test stage. - - Args: - roi_feat (Variable): RoI feature from RoIExtractor. - rois (Variable): Output of generate_proposals in rpn head. - im_info (Variable): A 2-D LoDTensor with shape [B, 3]. B is the - number of input images, each element consists of im_height, - im_width, im_scale. - im_shape (Variable): Actual shape of original image with shape - [B, 3]. B is the number of images, each element consists of - original_height, original_width, 1 - - Returns: - pred_result(Variable): Prediction result with shape [N, 6]. Each - row has 6 values: [label, confidence, xmin, ymin, xmax, ymax]. - N is the total number of prediction. - """ - cls_score, bbox_pred = self._get_output(roi_feat) - - im_scale = fluid.layers.slice(im_info, [1], starts=[2], ends=[3]) - im_scale = fluid.layers.sequence_expand(im_scale, rois) - boxes = rois / im_scale - cls_prob = fluid.layers.softmax(cls_score, use_cudnn=False) - bbox_pred = fluid.layers.reshape(bbox_pred, (-1, self.num_classes, 4)) - # self.box_coder - decoded_box = fluid.layers.box_coder( - prior_box=boxes, - target_box=bbox_pred, - prior_box_var=self.box_coder.prior_box_var, - code_type=self.box_coder.code_type, - box_normalized=self.box_coder.box_normalized, - axis=self.box_coder.axis) - cliped_box = fluid.layers.box_clip(input=decoded_box, im_info=im_shape) - if return_box_score: - return {'bbox': cliped_box, 'score': cls_prob} - # self.nms - pred_result = fluid.layers.multiclass_nms( - bboxes=cliped_box, - scores=cls_prob, - score_threshold=self.nms.score_threshold, - nms_top_k=self.nms.nms_top_k, - keep_top_k=self.nms.keep_top_k, - nms_threshold=self.nms.nms_threshold, - normalized=self.nms.normalized, - nms_eta=self.nms.nms_eta, - background_label=self.nms.background_label) - return pred_result diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/data_feed.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/data_feed.py index b38501e5b..c9e52d54c 100644 --- a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/data_feed.py +++ b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/data_feed.py @@ -4,12 +4,9 @@ from __future__ import division import os -from collections import OrderedDict import cv2 import numpy as np -from PIL import Image, ImageEnhance -from paddle import fluid __all__ = ['test_reader'] diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/fpn.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/fpn.py deleted file mode 100644 index bd19c712e..000000000 --- a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/fpn.py +++ /dev/null @@ -1,296 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import copy -from collections import OrderedDict - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.initializer import Xavier -from paddle.fluid.regularizer import L2Decay - -__all__ = ['ConvNorm', 'FPN'] - - -def ConvNorm(input, - num_filters, - filter_size, - stride=1, - groups=1, - norm_decay=0., - norm_type='affine_channel', - norm_groups=32, - dilation=1, - lr_scale=1, - freeze_norm=False, - act=None, - norm_name=None, - initializer=None, - name=None): - fan = num_filters - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=((filter_size - 1) // 2) * dilation, - dilation=dilation, - groups=groups, - act=None, - param_attr=ParamAttr( - name=name + "_weights", - initializer=initializer, - learning_rate=lr_scale), - bias_attr=False, - name=name + '.conv2d.output.1') - - norm_lr = 0. if freeze_norm else 1. - pattr = ParamAttr( - name=norm_name + '_scale', - learning_rate=norm_lr * lr_scale, - regularizer=L2Decay(norm_decay)) - battr = ParamAttr( - name=norm_name + '_offset', - learning_rate=norm_lr * lr_scale, - regularizer=L2Decay(norm_decay)) - - if norm_type in ['bn', 'sync_bn']: - global_stats = True if freeze_norm else False - out = fluid.layers.batch_norm( - input=conv, - act=act, - name=norm_name + '.output.1', - param_attr=pattr, - bias_attr=battr, - moving_mean_name=norm_name + '_mean', - moving_variance_name=norm_name + '_variance', - use_global_stats=global_stats) - scale = fluid.framework._get_var(pattr.name) - bias = fluid.framework._get_var(battr.name) - elif norm_type == 'gn': - out = fluid.layers.group_norm( - input=conv, - act=act, - name=norm_name + '.output.1', - groups=norm_groups, - param_attr=pattr, - bias_attr=battr) - scale = fluid.framework._get_var(pattr.name) - bias = fluid.framework._get_var(battr.name) - elif norm_type == 'affine_channel': - scale = fluid.layers.create_parameter( - shape=[conv.shape[1]], - dtype=conv.dtype, - attr=pattr, - default_initializer=fluid.initializer.Constant(1.)) - bias = fluid.layers.create_parameter( - shape=[conv.shape[1]], - dtype=conv.dtype, - attr=battr, - default_initializer=fluid.initializer.Constant(0.)) - out = fluid.layers.affine_channel( - x=conv, scale=scale, bias=bias, act=act) - if freeze_norm: - scale.stop_gradient = True - bias.stop_gradient = True - return out - - -class FPN(object): - """ - Feature Pyramid Network, see https://arxiv.org/abs/1612.03144 - - Args: - num_chan (int): number of feature channels - min_level (int): lowest level of the backbone feature map to use - max_level (int): highest level of the backbone feature map to use - spatial_scale (list): feature map scaling factor - has_extra_convs (bool): whether has extral convolutions in higher levels - norm_type (str|None): normalization type, 'bn'/'sync_bn'/'affine_channel' - """ - __shared__ = ['norm_type', 'freeze_norm'] - - def __init__(self, - num_chan=256, - min_level=2, - max_level=6, - spatial_scale=[1. / 32., 1. / 16., 1. / 8., 1. / 4.], - has_extra_convs=False, - norm_type=None, - freeze_norm=False): - self.freeze_norm = freeze_norm - self.num_chan = num_chan - self.min_level = min_level - self.max_level = max_level - self.spatial_scale = spatial_scale - self.has_extra_convs = has_extra_convs - self.norm_type = norm_type - - def _add_topdown_lateral(self, body_name, body_input, upper_output): - lateral_name = 'fpn_inner_' + body_name + '_lateral' - topdown_name = 'fpn_topdown_' + body_name - fan = body_input.shape[1] - if self.norm_type: - initializer = Xavier(fan_out=fan) - lateral = ConvNorm( - body_input, - self.num_chan, - 1, - initializer=initializer, - norm_type=self.norm_type, - freeze_norm=self.freeze_norm, - name=lateral_name, - norm_name=lateral_name) - else: - lateral = fluid.layers.conv2d( - body_input, - self.num_chan, - 1, - param_attr=ParamAttr( - name=lateral_name + "_w", initializer=Xavier(fan_out=fan)), - bias_attr=ParamAttr( - name=lateral_name + "_b", - learning_rate=2., - regularizer=L2Decay(0.)), - name=lateral_name) - topdown = fluid.layers.resize_nearest( - upper_output, scale=2., name=topdown_name) - return lateral + topdown - - def get_output(self, body_dict): - """ - Add FPN onto backbone. - - Args: - body_dict(OrderedDict): Dictionary of variables and each element is the - output of backbone. - - Return: - fpn_dict(OrderedDict): A dictionary represents the output of FPN with - their name. - spatial_scale(list): A list of multiplicative spatial scale factor. - """ - spatial_scale = copy.deepcopy(self.spatial_scale) - body_name_list = list(body_dict.keys())[::-1] - num_backbone_stages = len(body_name_list) - self.fpn_inner_output = [[] for _ in range(num_backbone_stages)] - fpn_inner_name = 'fpn_inner_' + body_name_list[0] - body_input = body_dict[body_name_list[0]] - fan = body_input.shape[1] - if self.norm_type: - initializer = Xavier(fan_out=fan) - self.fpn_inner_output[0] = ConvNorm( - body_input, - self.num_chan, - 1, - initializer=initializer, - norm_type=self.norm_type, - freeze_norm=self.freeze_norm, - name=fpn_inner_name, - norm_name=fpn_inner_name) - else: - self.fpn_inner_output[0] = fluid.layers.conv2d( - body_input, - self.num_chan, - 1, - param_attr=ParamAttr( - name=fpn_inner_name + "_w", - initializer=Xavier(fan_out=fan)), - bias_attr=ParamAttr( - name=fpn_inner_name + "_b", - learning_rate=2., - regularizer=L2Decay(0.)), - name=fpn_inner_name) - for i in range(1, num_backbone_stages): - body_name = body_name_list[i] - body_input = body_dict[body_name] - top_output = self.fpn_inner_output[i - 1] - fpn_inner_single = self._add_topdown_lateral( - body_name, body_input, top_output) - self.fpn_inner_output[i] = fpn_inner_single - fpn_dict = {} - fpn_name_list = [] - for i in range(num_backbone_stages): - fpn_name = 'fpn_' + body_name_list[i] - fan = self.fpn_inner_output[i].shape[1] * 3 * 3 - if self.norm_type: - initializer = Xavier(fan_out=fan) - fpn_output = ConvNorm( - self.fpn_inner_output[i], - self.num_chan, - 3, - initializer=initializer, - norm_type=self.norm_type, - freeze_norm=self.freeze_norm, - name=fpn_name, - norm_name=fpn_name) - else: - fpn_output = fluid.layers.conv2d( - self.fpn_inner_output[i], - self.num_chan, - filter_size=3, - padding=1, - param_attr=ParamAttr( - name=fpn_name + "_w", initializer=Xavier(fan_out=fan)), - bias_attr=ParamAttr( - name=fpn_name + "_b", - learning_rate=2., - regularizer=L2Decay(0.)), - name=fpn_name) - fpn_dict[fpn_name] = fpn_output - fpn_name_list.append(fpn_name) - if not self.has_extra_convs and self.max_level - self.min_level == len( - spatial_scale): - body_top_name = fpn_name_list[0] - body_top_extension = fluid.layers.pool2d( - fpn_dict[body_top_name], - 1, - 'max', - pool_stride=2, - name=body_top_name + '_subsampled_2x') - fpn_dict[body_top_name + '_subsampled_2x'] = body_top_extension - fpn_name_list.insert(0, body_top_name + '_subsampled_2x') - spatial_scale.insert(0, spatial_scale[0] * 0.5) - # Coarser FPN levels introduced for RetinaNet - highest_backbone_level = self.min_level + len(spatial_scale) - 1 - if self.has_extra_convs and self.max_level > highest_backbone_level: - fpn_blob = body_dict[body_name_list[0]] - for i in range(highest_backbone_level + 1, self.max_level + 1): - fpn_blob_in = fpn_blob - fpn_name = 'fpn_' + str(i) - if i > highest_backbone_level + 1: - fpn_blob_in = fluid.layers.relu(fpn_blob) - fan = fpn_blob_in.shape[1] * 3 * 3 - fpn_blob = fluid.layers.conv2d( - input=fpn_blob_in, - num_filters=self.num_chan, - filter_size=3, - stride=2, - padding=1, - param_attr=ParamAttr( - name=fpn_name + "_w", initializer=Xavier(fan_out=fan)), - bias_attr=ParamAttr( - name=fpn_name + "_b", - learning_rate=2., - regularizer=L2Decay(0.)), - name=fpn_name) - fpn_dict[fpn_name] = fpn_blob - fpn_name_list.insert(0, fpn_name) - spatial_scale.insert(0, spatial_scale[0] * 0.5) - res_dict = OrderedDict([(k, fpn_dict[k]) for k in fpn_name_list]) - return res_dict, spatial_scale diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/module.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/module.py index b8dd5afa4..650491894 100644 --- a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/module.py +++ b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/module.py @@ -6,41 +6,32 @@ import os import ast import argparse -from collections import OrderedDict -from functools import partial from math import ceil +import paddle import numpy as np -import paddle.fluid as fluid -import paddlehub as hub +import paddle.jit +import paddle.static from paddlehub.module.module import moduleinfo, runnable, serving -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.io.parser import txt_parser -from paddlehub.common.paddle_helper import add_vars_prefix - -from faster_rcnn_resnet50_fpn_coco2017.processor import load_label_info, postprocess, base64_to_cv2 -from faster_rcnn_resnet50_fpn_coco2017.data_feed import test_reader, padding_minibatch -from faster_rcnn_resnet50_fpn_coco2017.fpn import FPN -from faster_rcnn_resnet50_fpn_coco2017.resnet import ResNet -from faster_rcnn_resnet50_fpn_coco2017.rpn_head import AnchorGenerator, RPNTargetAssign, GenerateProposals, FPNRPNHead -from faster_rcnn_resnet50_fpn_coco2017.bbox_head import MultiClassNMS, BBoxHead, TwoFCHead -from faster_rcnn_resnet50_fpn_coco2017.bbox_assigner import BBoxAssigner -from faster_rcnn_resnet50_fpn_coco2017.roi_extractor import FPNRoIAlign +from paddle.inference import Config, create_predictor +from paddlehub.utils.parser import txt_parser +from .processor import load_label_info, postprocess, base64_to_cv2 +from .data_feed import test_reader, padding_minibatch @moduleinfo( name="faster_rcnn_resnet50_fpn_coco2017", - version="1.0.1", + version="1.1.0", type="cv/object_detection", summary= "Baidu's Faster-RCNN model for object detection, whose backbone is ResNet50, processed with Feature Pyramid Networks", author="paddlepaddle", author_email="paddle-dev@baidu.com") -class FasterRCNNResNet50RPN(hub.Module): - def _initialize(self): +class FasterRCNNResNet50RPN: + def __init__(self): # default pretrained model, Faster-RCNN with backbone ResNet50, shape of input tensor is [3, 800, 1333] self.default_pretrained_model_path = os.path.join( - self.directory, "faster_rcnn_resnet50_fpn_model") + self.directory, "faster_rcnn_resnet50_fpn_model", "model") self.label_names = load_label_info( os.path.join(self.directory, "label_file.txt")) self._set_config() @@ -49,10 +40,12 @@ def _set_config(self): """ predictor config setting """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) + model = self.default_pretrained_model_path+'.pdmodel' + params = self.default_pretrained_model_path+'.pdiparams' + cpu_config = Config(model, params) cpu_config.disable_glog_info() cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) + self.cpu_predictor = create_predictor(cpu_config) try: _places = os.environ["CUDA_VISIBLE_DEVICES"] @@ -61,245 +54,14 @@ def _set_config(self): except: use_gpu = False if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config = Config(model, params) gpu_config.disable_glog_info() gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) - - def context(self, - num_classes=81, - trainable=True, - pretrained=True, - phase='train'): - """ - Distill the Head Features, so as to perform transfer learning. - - Args: - num_classes (int): number of categories - trainable (bool): whether to set parameters trainable. - pretrained (bool): whether to load default pretrained model. - phase (str): optional choices are 'train' and 'predict'. - - Returns: - inputs (dict): the input variables. - outputs (dict): the output variables. - context_prog (Program): the program to execute transfer learning. - """ - context_prog = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(context_prog, startup_program): - with fluid.unique_name.guard(): - image = fluid.layers.data( - name='image', shape=[-1, 3, -1, -1], dtype='float32') - # backbone - backbone = ResNet( - norm_type='affine_channel', - depth=50, - feature_maps=[2, 3, 4, 5], - freeze_at=2) - body_feats = backbone(image) - # fpn - fpn = FPN( - max_level=6, - min_level=2, - num_chan=256, - spatial_scale=[0.03125, 0.0625, 0.125, 0.25]) - var_prefix = '@HUB_{}@'.format(self.name) - im_info = fluid.layers.data( - name='im_info', shape=[3], dtype='float32', lod_level=0) - im_shape = fluid.layers.data( - name='im_shape', shape=[3], dtype='float32', lod_level=0) - body_feat_names = list(body_feats.keys()) - body_feats, spatial_scale = fpn.get_output(body_feats) - # rpn_head: RPNHead - rpn_head = self.rpn_head() - rois = rpn_head.get_proposals(body_feats, im_info, mode=phase) - # train - if phase == 'train': - gt_bbox = fluid.layers.data( - name='gt_bbox', shape=[4], dtype='float32', lod_level=1) - is_crowd = fluid.layers.data( - name='is_crowd', shape=[1], dtype='int32', lod_level=1) - gt_class = fluid.layers.data( - name='gt_class', shape=[1], dtype='int32', lod_level=1) - rpn_loss = rpn_head.get_loss(im_info, gt_bbox, is_crowd) - # bbox_assigner: BBoxAssigner - bbox_assigner = self.bbox_assigner(num_classes) - outs = fluid.layers.generate_proposal_labels( - rpn_rois=rois, - gt_classes=gt_class, - is_crowd=is_crowd, - gt_boxes=gt_bbox, - im_info=im_info, - batch_size_per_im=bbox_assigner.batch_size_per_im, - fg_fraction=bbox_assigner.fg_fraction, - fg_thresh=bbox_assigner.fg_thresh, - bg_thresh_hi=bbox_assigner.bg_thresh_hi, - bg_thresh_lo=bbox_assigner.bg_thresh_lo, - bbox_reg_weights=bbox_assigner.bbox_reg_weights, - class_nums=bbox_assigner.class_nums, - use_random=bbox_assigner.use_random) - rois = outs[0] - - roi_extractor = self.roi_extractor() - roi_feat = roi_extractor( - head_inputs=body_feats, - rois=rois, - spatial_scale=spatial_scale) - # head_feat - bbox_head = self.bbox_head(num_classes) - head_feat = bbox_head.head(roi_feat) - if isinstance(head_feat, OrderedDict): - head_feat = list(head_feat.values())[0] - if phase == 'train': - inputs = { - 'image': var_prefix + image.name, - 'im_info': var_prefix + im_info.name, - 'im_shape': var_prefix + im_shape.name, - 'gt_class': var_prefix + gt_class.name, - 'gt_bbox': var_prefix + gt_bbox.name, - 'is_crowd': var_prefix + is_crowd.name - } - outputs = { - 'head_features': - var_prefix + head_feat.name, - 'rpn_cls_loss': - var_prefix + rpn_loss['rpn_cls_loss'].name, - 'rpn_reg_loss': - var_prefix + rpn_loss['rpn_reg_loss'].name, - 'generate_proposal_labels': - [var_prefix + var.name for var in outs] - } - elif phase == 'predict': - pred = bbox_head.get_prediction(roi_feat, rois, im_info, - im_shape) - inputs = { - 'image': var_prefix + image.name, - 'im_info': var_prefix + im_info.name, - 'im_shape': var_prefix + im_shape.name - } - outputs = { - 'head_features': var_prefix + head_feat.name, - 'rois': var_prefix + rois.name, - 'bbox_out': var_prefix + pred.name - } - add_vars_prefix(context_prog, var_prefix) - add_vars_prefix(startup_program, var_prefix) - - global_vars = context_prog.global_block().vars - inputs = { - key: global_vars[value] - for key, value in inputs.items() - } - outputs = { - key: global_vars[value] if not isinstance(value, list) else - [global_vars[var] for var in value] - for key, value in outputs.items() - } - - for param in context_prog.global_block().iter_parameters(): - param.trainable = trainable - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - exe.run(startup_program) - if pretrained: - - def _if_exist(var): - if num_classes != 81: - if 'bbox_pred' in var.name or 'cls_score' in var.name: - return False - return os.path.exists( - os.path.join(self.default_pretrained_model_path, - var.name)) - - fluid.io.load_vars( - exe, - self.default_pretrained_model_path, - predicate=_if_exist) - return inputs, outputs, context_prog - - def rpn_head(self): - return FPNRPNHead( - anchor_generator=AnchorGenerator( - anchor_sizes=[32, 64, 128, 256, 512], - aspect_ratios=[0.5, 1.0, 2.0], - stride=[16.0, 16.0], - variance=[1.0, 1.0, 1.0, 1.0]), - rpn_target_assign=RPNTargetAssign( - rpn_batch_size_per_im=256, - rpn_fg_fraction=0.5, - rpn_negative_overlap=0.3, - rpn_positive_overlap=0.7, - rpn_straddle_thresh=0.0), - train_proposal=GenerateProposals( - min_size=0.0, - nms_thresh=0.7, - post_nms_top_n=2000, - pre_nms_top_n=2000), - test_proposal=GenerateProposals( - min_size=0.0, - nms_thresh=0.7, - post_nms_top_n=1000, - pre_nms_top_n=1000), - anchor_start_size=32, - num_chan=256, - min_level=2, - max_level=6) - - def roi_extractor(self): - return FPNRoIAlign( - canconical_level=4, - canonical_size=224, - max_level=5, - min_level=2, - box_resolution=7, - sampling_ratio=2) - - def bbox_head(self, num_classes): - return BBoxHead( - head=TwoFCHead(mlp_dim=1024), - nms=MultiClassNMS( - keep_top_k=100, nms_threshold=0.5, score_threshold=0.05), - num_classes=num_classes) - - def bbox_assigner(self, num_classes): - return BBoxAssigner( - batch_size_per_im=512, - bbox_reg_weights=[0.1, 0.1, 0.2, 0.2], - bg_thresh_hi=0.5, - bg_thresh_lo=0.0, - fg_fraction=0.25, - fg_thresh=0.5, - class_nums=num_classes) - - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) + self.gpu_predictor = create_predictor(gpu_config) def object_detection(self, paths=None, images=None, - data=None, use_gpu=False, batch_size=1, output_dir='detection_result', @@ -337,8 +99,6 @@ def object_detection(self, ) paths = paths if paths else list() - if data and 'image' in data: - paths += data['image'] all_images = list() for yield_data in test_reader(paths, images): @@ -360,29 +120,37 @@ def object_detection(self, padding_image, padding_info, padding_shape = padding_minibatch( batch_data, coarsest_stride=32, use_padded_im_info=True) - padding_image_tensor = PaddleTensor(padding_image.copy()) - padding_info_tensor = PaddleTensor(padding_info.copy()) - padding_shape_tensor = PaddleTensor(padding_shape.copy()) feed_list = [ - padding_image_tensor, padding_info_tensor, padding_shape_tensor + padding_image, padding_info, padding_shape ] - if use_gpu: - data_out = self.gpu_predictor.run(feed_list) - else: - data_out = self.cpu_predictor.run(feed_list) + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + + feed_list = [ + padding_image, padding_info, padding_shape + ] + + input_names = predictor.get_input_names() + + for i, input_name in enumerate(input_names): + data = np.asarray(feed_list[i], dtype=np.float32) + handle = predictor.get_input_handle(input_name) + handle.copy_from_cpu(data) + + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) output = postprocess( paths=paths, images=images, - data_out=data_out, + data_out=output_handle, score_thresh=score_thresh, label_names=self.label_names, output_dir=output_dir, handle_id=handle_id, visualization=visualization) res += output - return res def add_module_config_arg(self): diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/name_adapter.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/name_adapter.py deleted file mode 100644 index bebf8bdee..000000000 --- a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/name_adapter.py +++ /dev/null @@ -1,61 +0,0 @@ -# coding=utf-8 - - -class NameAdapter(object): - """Fix the backbones variable names for pretrained weight""" - - def __init__(self, model): - super(NameAdapter, self).__init__() - self.model = model - - @property - def model_type(self): - return getattr(self.model, '_model_type', '') - - @property - def variant(self): - return getattr(self.model, 'variant', '') - - def fix_conv_norm_name(self, name): - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - # the naming rule is same as pretrained weight - if self.model_type == 'SEResNeXt': - bn_name = name + "_bn" - return bn_name - - def fix_shortcut_name(self, name): - if self.model_type == 'SEResNeXt': - name = 'conv' + name + '_prj' - return name - - def fix_bottleneck_name(self, name): - if self.model_type == 'SEResNeXt': - conv_name1 = 'conv' + name + '_x1' - conv_name2 = 'conv' + name + '_x2' - conv_name3 = 'conv' + name + '_x3' - shortcut_name = name - else: - conv_name1 = name + "_branch2a" - conv_name2 = name + "_branch2b" - conv_name3 = name + "_branch2c" - shortcut_name = name + "_branch1" - return conv_name1, conv_name2, conv_name3, shortcut_name - - def fix_layer_warp_name(self, stage_num, count, i): - name = 'res' + str(stage_num) - if count > 10 and stage_num == 4: - if i == 0: - conv_name = name + "a" - else: - conv_name = name + "b" + str(i) - else: - conv_name = name + chr(ord("a") + i) - if self.model_type == 'SEResNeXt': - conv_name = str(stage_num + 2) + '_' + str(i + 1) - return conv_name - - def fix_c1_stage_name(self): - return "res_conv1" if self.model_type == 'ResNeXt' else "conv1" diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/nonlocal_helper.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/nonlocal_helper.py deleted file mode 100644 index 599b8dfa0..000000000 --- a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/nonlocal_helper.py +++ /dev/null @@ -1,154 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import paddle.fluid as fluid -from paddle.fluid import ParamAttr - -nonlocal_params = { - "use_zero_init_conv": False, - "conv_init_std": 0.01, - "no_bias": True, - "use_maxpool": False, - "use_softmax": True, - "use_bn": False, - "use_scale": True, # vital for the model prformance!!! - "use_affine": False, - "bn_momentum": 0.9, - "bn_epsilon": 1.0000001e-5, - "bn_init_gamma": 0.9, - "weight_decay_bn": 1.e-4, -} - - -def space_nonlocal(input, dim_in, dim_out, prefix, dim_inner, - max_pool_stride=2): - cur = input - theta = fluid.layers.conv2d(input = cur, num_filters = dim_inner, \ - filter_size = [1, 1], stride = [1, 1], \ - padding = [0, 0], \ - param_attr=ParamAttr(name = prefix + '_theta' + "_w", \ - initializer = fluid.initializer.Normal(loc = 0.0, - scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_theta' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) \ - if not nonlocal_params["no_bias"] else False, \ - name = prefix + '_theta') - theta_shape = theta.shape - theta_shape_op = fluid.layers.shape(theta) - theta_shape_op.stop_gradient = True - - if nonlocal_params["use_maxpool"]: - max_pool = fluid.layers.pool2d(input = cur, \ - pool_size = [max_pool_stride, max_pool_stride], \ - pool_type = 'max', \ - pool_stride = [max_pool_stride, max_pool_stride], \ - pool_padding = [0, 0], \ - name = prefix + '_pool') - else: - max_pool = cur - - phi = fluid.layers.conv2d(input = max_pool, num_filters = dim_inner, \ - filter_size = [1, 1], stride = [1, 1], \ - padding = [0, 0], \ - param_attr = ParamAttr(name = prefix + '_phi' + "_w", \ - initializer = fluid.initializer.Normal(loc = 0.0, - scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_phi' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) \ - if (nonlocal_params["no_bias"] == 0) else False, \ - name = prefix + '_phi') - phi_shape = phi.shape - - g = fluid.layers.conv2d(input = max_pool, num_filters = dim_inner, \ - filter_size = [1, 1], stride = [1, 1], \ - padding = [0, 0], \ - param_attr = ParamAttr(name = prefix + '_g' + "_w", \ - initializer = fluid.initializer.Normal(loc = 0.0, scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_g' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) if (nonlocal_params["no_bias"] == 0) else False, \ - name = prefix + '_g') - g_shape = g.shape - # we have to use explicit batch size (to support arbitrary spacetime size) - # e.g. (8, 1024, 4, 14, 14) => (8, 1024, 784) - theta = fluid.layers.reshape(theta, shape=(0, 0, -1)) - theta = fluid.layers.transpose(theta, [0, 2, 1]) - phi = fluid.layers.reshape(phi, [0, 0, -1]) - theta_phi = fluid.layers.matmul(theta, phi, name=prefix + '_affinity') - g = fluid.layers.reshape(g, [0, 0, -1]) - - if nonlocal_params["use_softmax"]: - if nonlocal_params["use_scale"]: - theta_phi_sc = fluid.layers.scale(theta_phi, scale=dim_inner**-.5) - else: - theta_phi_sc = theta_phi - p = fluid.layers.softmax( - theta_phi_sc, name=prefix + '_affinity' + '_prob') - else: - # not clear about what is doing in xlw's code - p = None # not implemented - raise "Not implemented when not use softmax" - - # note g's axis[2] corresponds to p's axis[2] - # e.g. g(8, 1024, 784_2) * p(8, 784_1, 784_2) => (8, 1024, 784_1) - p = fluid.layers.transpose(p, [0, 2, 1]) - t = fluid.layers.matmul(g, p, name=prefix + '_y') - - # reshape back - # e.g. (8, 1024, 784) => (8, 1024, 4, 14, 14) - t_shape = t.shape - t_re = fluid.layers.reshape( - t, shape=list(theta_shape), actual_shape=theta_shape_op) - blob_out = t_re - blob_out = fluid.layers.conv2d(input = blob_out, num_filters = dim_out, \ - filter_size = [1, 1], stride = [1, 1], padding = [0, 0], \ - param_attr = ParamAttr(name = prefix + '_out' + "_w", \ - initializer = fluid.initializer.Constant(value = 0.) \ - if nonlocal_params["use_zero_init_conv"] \ - else fluid.initializer.Normal(loc = 0.0, - scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_out' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) \ - if (nonlocal_params["no_bias"] == 0) else False, \ - name = prefix + '_out') - blob_out_shape = blob_out.shape - - if nonlocal_params["use_bn"]: - bn_name = prefix + "_bn" - blob_out = fluid.layers.batch_norm(blob_out, \ - # is_test = test_mode, \ - momentum = nonlocal_params["bn_momentum"], \ - epsilon = nonlocal_params["bn_epsilon"], \ - name = bn_name, \ - param_attr = ParamAttr(name = bn_name + "_s", \ - initializer = fluid.initializer.Constant(value = nonlocal_params["bn_init_gamma"]), \ - regularizer = fluid.regularizer.L2Decay(nonlocal_params["weight_decay_bn"])), \ - bias_attr = ParamAttr(name = bn_name + "_b", \ - regularizer = fluid.regularizer.L2Decay(nonlocal_params["weight_decay_bn"])), \ - moving_mean_name = bn_name + "_rm", \ - moving_variance_name = bn_name + "_riv") # add bn - - if nonlocal_params["use_affine"]: - affine_scale = fluid.layers.create_parameter(\ - shape=[blob_out_shape[1]], dtype = blob_out.dtype, \ - attr=ParamAttr(name=prefix + '_affine' + '_s'), \ - default_initializer = fluid.initializer.Constant(value = 1.)) - affine_bias = fluid.layers.create_parameter(\ - shape=[blob_out_shape[1]], dtype = blob_out.dtype, \ - attr=ParamAttr(name=prefix + '_affine' + '_b'), \ - default_initializer = fluid.initializer.Constant(value = 0.)) - blob_out = fluid.layers.affine_channel(blob_out, scale = affine_scale, \ - bias = affine_bias, name = prefix + '_affine') # add affine - - return blob_out - - -def add_space_nonlocal(input, dim_in, dim_out, prefix, dim_inner): - ''' - add_space_nonlocal: - Non-local Neural Networks: see https://arxiv.org/abs/1711.07971 - ''' - conv = space_nonlocal(input, dim_in, dim_out, prefix, dim_inner) - output = fluid.layers.elementwise_add(input, conv, name=prefix + '_sum') - return output diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/processor.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/processor.py index 2b3e1ce9c..f15245643 100644 --- a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/processor.py +++ b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/processor.py @@ -12,7 +12,6 @@ 'postprocess', ] - def base64_to_cv2(b64str): data = base64.b64decode(b64str.encode('utf8')) data = np.fromstring(data, np.uint8) @@ -107,7 +106,7 @@ def postprocess(paths, handle_id, visualization=True): """ - postprocess the lod_tensor produced by fluid.Executor.run + postprocess the lod_tensor produced by Executor.run Args: paths (list[str]): the path of images. @@ -130,9 +129,8 @@ def postprocess(paths, confidence (float): The confidence of detection result. save_path (str): The path to save output images. """ - lod_tensor = data_out[0] - lod = lod_tensor.lod[0] - results = lod_tensor.as_ndarray() + lod = data_out.lod()[0] + results = data_out.copy_to_cpu() check_dir(output_dir) diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/resnet.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/resnet.py deleted file mode 100644 index 4bd6fb61e..000000000 --- a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/resnet.py +++ /dev/null @@ -1,447 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -from collections import OrderedDict -from numbers import Integral - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.framework import Variable -from paddle.fluid.regularizer import L2Decay -from paddle.fluid.initializer import Constant - -from .nonlocal_helper import add_space_nonlocal -from .name_adapter import NameAdapter - -__all__ = ['ResNet', 'ResNetC5'] - - -class ResNet(object): - """ - Residual Network, see https://arxiv.org/abs/1512.03385 - Args: - depth (int): ResNet depth, should be 34, 50. - freeze_at (int): freeze the backbone at which stage - norm_type (str): normalization type, 'bn'/'sync_bn'/'affine_channel' - freeze_norm (bool): freeze normalization layers - norm_decay (float): weight decay for normalization layer weights - variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently - feature_maps (list): index of stages whose feature maps are returned - dcn_v2_stages (list): index of stages who select deformable conv v2 - nonlocal_stages (list): index of stages who select nonlocal networks - """ - __shared__ = ['norm_type', 'freeze_norm', 'weight_prefix_name'] - - def __init__(self, - depth=50, - freeze_at=0, - norm_type='sync_bn', - freeze_norm=False, - norm_decay=0., - variant='b', - feature_maps=[3, 4, 5], - dcn_v2_stages=[], - weight_prefix_name='', - nonlocal_stages=[], - get_prediction=False, - class_dim=1000): - super(ResNet, self).__init__() - - if isinstance(feature_maps, Integral): - feature_maps = [feature_maps] - - assert depth in [34, 50], \ - "depth {} not in [34, 50]" - assert variant in ['a', 'b', 'c', 'd'], "invalid ResNet variant" - assert 0 <= freeze_at <= 4, "freeze_at should be 0, 1, 2, 3 or 4" - assert len(feature_maps) > 0, "need one or more feature maps" - assert norm_type in ['bn', 'sync_bn', 'affine_channel'] - assert not (len(nonlocal_stages)>0 and depth<50), \ - "non-local is not supported for resnet18 or resnet34" - - self.depth = depth - self.freeze_at = freeze_at - self.norm_type = norm_type - self.norm_decay = norm_decay - self.freeze_norm = freeze_norm - self.variant = variant - self._model_type = 'ResNet' - self.feature_maps = feature_maps - self.dcn_v2_stages = dcn_v2_stages - self.depth_cfg = { - 34: ([3, 4, 6, 3], self.basicblock), - 50: ([3, 4, 6, 3], self.bottleneck), - } - self.stage_filters = [64, 128, 256, 512] - self._c1_out_chan_num = 64 - self.na = NameAdapter(self) - self.prefix_name = weight_prefix_name - - self.nonlocal_stages = nonlocal_stages - self.nonlocal_mod_cfg = { - 50: 2, - 101: 5, - 152: 8, - 200: 12, - } - self.get_prediction = get_prediction - self.class_dim = class_dim - - def _conv_offset(self, - input, - filter_size, - stride, - padding, - act=None, - name=None): - out_channel = filter_size * filter_size * 3 - out = fluid.layers.conv2d( - input, - num_filters=out_channel, - filter_size=filter_size, - stride=stride, - padding=padding, - param_attr=ParamAttr(initializer=Constant(0.0), name=name + ".w_0"), - bias_attr=ParamAttr(initializer=Constant(0.0), name=name + ".b_0"), - act=act, - name=name) - return out - - def _conv_norm(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None, - dcn_v2=False): - _name = self.prefix_name + name if self.prefix_name != '' else name - if not dcn_v2: - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - param_attr=ParamAttr(name=_name + "_weights"), - bias_attr=False, - name=_name + '.conv2d.output.1') - else: - # select deformable conv" - offset_mask = self._conv_offset( - input=input, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - act=None, - name=_name + "_conv_offset") - offset_channel = filter_size**2 * 2 - mask_channel = filter_size**2 - offset, mask = fluid.layers.split( - input=offset_mask, - num_or_sections=[offset_channel, mask_channel], - dim=1) - mask = fluid.layers.sigmoid(mask) - conv = fluid.layers.deformable_conv( - input=input, - offset=offset, - mask=mask, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - deformable_groups=1, - im2col_step=1, - param_attr=ParamAttr(name=_name + "_weights"), - bias_attr=False, - name=_name + ".conv2d.output.1") - - bn_name = self.na.fix_conv_norm_name(name) - bn_name = self.prefix_name + bn_name if self.prefix_name != '' else bn_name - - norm_lr = 0. if self.freeze_norm else 1. - norm_decay = self.norm_decay - pattr = ParamAttr( - name=bn_name + '_scale', - learning_rate=norm_lr, - regularizer=L2Decay(norm_decay)) - battr = ParamAttr( - name=bn_name + '_offset', - learning_rate=norm_lr, - regularizer=L2Decay(norm_decay)) - - if self.norm_type in ['bn', 'sync_bn']: - global_stats = True if self.freeze_norm else False - out = fluid.layers.batch_norm( - input=conv, - act=act, - name=bn_name + '.output.1', - param_attr=pattr, - bias_attr=battr, - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance', - use_global_stats=global_stats) - scale = fluid.framework._get_var(pattr.name) - bias = fluid.framework._get_var(battr.name) - elif self.norm_type == 'affine_channel': - scale = fluid.layers.create_parameter( - shape=[conv.shape[1]], - dtype=conv.dtype, - attr=pattr, - default_initializer=fluid.initializer.Constant(1.)) - bias = fluid.layers.create_parameter( - shape=[conv.shape[1]], - dtype=conv.dtype, - attr=battr, - default_initializer=fluid.initializer.Constant(0.)) - out = fluid.layers.affine_channel( - x=conv, scale=scale, bias=bias, act=act) - if self.freeze_norm: - scale.stop_gradient = True - bias.stop_gradient = True - return out - - def _shortcut(self, input, ch_out, stride, is_first, name): - max_pooling_in_short_cut = self.variant == 'd' - ch_in = input.shape[1] - # the naming rule is same as pretrained weight - name = self.na.fix_shortcut_name(name) - std_senet = getattr(self, 'std_senet', False) - if ch_in != ch_out or stride != 1 or (self.depth < 50 and is_first): - if std_senet: - if is_first: - return self._conv_norm(input, ch_out, 1, stride, name=name) - else: - return self._conv_norm(input, ch_out, 3, stride, name=name) - if max_pooling_in_short_cut and not is_first: - input = fluid.layers.pool2d( - input=input, - pool_size=2, - pool_stride=2, - pool_padding=0, - ceil_mode=True, - pool_type='avg') - return self._conv_norm(input, ch_out, 1, 1, name=name) - return self._conv_norm(input, ch_out, 1, stride, name=name) - else: - return input - - def bottleneck(self, - input, - num_filters, - stride, - is_first, - name, - dcn_v2=False): - if self.variant == 'a': - stride1, stride2 = stride, 1 - else: - stride1, stride2 = 1, stride - - # ResNeXt - groups = getattr(self, 'groups', 1) - group_width = getattr(self, 'group_width', -1) - if groups == 1: - expand = 4 - elif (groups * group_width) == 256: - expand = 1 - else: # FIXME hard code for now, handles 32x4d, 64x4d and 32x8d - num_filters = num_filters // 2 - expand = 2 - - conv_name1, conv_name2, conv_name3, \ - shortcut_name = self.na.fix_bottleneck_name(name) - std_senet = getattr(self, 'std_senet', False) - if std_senet: - conv_def = [[ - int(num_filters / 2), 1, stride1, 'relu', 1, conv_name1 - ], [num_filters, 3, stride2, 'relu', groups, conv_name2], - [num_filters * expand, 1, 1, None, 1, conv_name3]] - else: - conv_def = [[num_filters, 1, stride1, 'relu', 1, conv_name1], - [num_filters, 3, stride2, 'relu', groups, conv_name2], - [num_filters * expand, 1, 1, None, 1, conv_name3]] - - residual = input - for i, (c, k, s, act, g, _name) in enumerate(conv_def): - residual = self._conv_norm( - input=residual, - num_filters=c, - filter_size=k, - stride=s, - act=act, - groups=g, - name=_name, - dcn_v2=(i == 1 and dcn_v2)) - short = self._shortcut( - input, - num_filters * expand, - stride, - is_first=is_first, - name=shortcut_name) - # Squeeze-and-Excitation - if callable(getattr(self, '_squeeze_excitation', None)): - residual = self._squeeze_excitation( - input=residual, num_channels=num_filters, name='fc' + name) - return fluid.layers.elementwise_add( - x=short, y=residual, act='relu', name=name + ".add.output.5") - - def basicblock(self, - input, - num_filters, - stride, - is_first, - name, - dcn_v2=False): - assert dcn_v2 is False, "Not implemented yet." - conv0 = self._conv_norm( - input=input, - num_filters=num_filters, - filter_size=3, - act='relu', - stride=stride, - name=name + "_branch2a") - conv1 = self._conv_norm( - input=conv0, - num_filters=num_filters, - filter_size=3, - act=None, - name=name + "_branch2b") - short = self._shortcut( - input, num_filters, stride, is_first, name=name + "_branch1") - return fluid.layers.elementwise_add(x=short, y=conv1, act='relu') - - def layer_warp(self, input, stage_num): - """ - Args: - input (Variable): input variable. - stage_num (int): the stage number, should be 2, 3, 4, 5 - - Returns: - The last variable in endpoint-th stage. - """ - assert stage_num in [2, 3, 4, 5] - - stages, block_func = self.depth_cfg[self.depth] - count = stages[stage_num - 2] - - ch_out = self.stage_filters[stage_num - 2] - is_first = False if stage_num != 2 else True - dcn_v2 = True if stage_num in self.dcn_v2_stages else False - - nonlocal_mod = 1000 - if stage_num in self.nonlocal_stages: - nonlocal_mod = self.nonlocal_mod_cfg[ - self.depth] if stage_num == 4 else 2 - - # Make the layer name and parameter name consistent - # with ImageNet pre-trained model - conv = input - for i in range(count): - conv_name = self.na.fix_layer_warp_name(stage_num, count, i) - if self.depth < 50: - is_first = True if i == 0 and stage_num == 2 else False - conv = block_func( - input=conv, - num_filters=ch_out, - stride=2 if i == 0 and stage_num != 2 else 1, - is_first=is_first, - name=conv_name, - dcn_v2=dcn_v2) - - # add non local model - dim_in = conv.shape[1] - nonlocal_name = "nonlocal_conv{}".format(stage_num) - if i % nonlocal_mod == nonlocal_mod - 1: - conv = add_space_nonlocal(conv, dim_in, dim_in, - nonlocal_name + '_{}'.format(i), - int(dim_in / 2)) - return conv - - def c1_stage(self, input): - out_chan = self._c1_out_chan_num - - conv1_name = self.na.fix_c1_stage_name() - - if self.variant in ['c', 'd']: - conv_def = [ - [out_chan // 2, 3, 2, "conv1_1"], - [out_chan // 2, 3, 1, "conv1_2"], - [out_chan, 3, 1, "conv1_3"], - ] - else: - conv_def = [[out_chan, 7, 2, conv1_name]] - - for (c, k, s, _name) in conv_def: - input = self._conv_norm( - input=input, - num_filters=c, - filter_size=k, - stride=s, - act='relu', - name=_name) - - output = fluid.layers.pool2d( - input=input, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max') - return output - - def __call__(self, input): - assert isinstance(input, Variable) - assert not (set(self.feature_maps) - set([2, 3, 4, 5])), \ - "feature maps {} not in [2, 3, 4, 5]".format(self.feature_maps) - - res_endpoints = [] - - res = input - feature_maps = self.feature_maps - severed_head = getattr(self, 'severed_head', False) - if not severed_head: - res = self.c1_stage(res) - feature_maps = range(2, max(self.feature_maps) + 1) - - for i in feature_maps: - res = self.layer_warp(res, i) - if i in self.feature_maps: - res_endpoints.append(res) - if self.freeze_at >= i: - res.stop_gradient = True - if self.get_prediction: - pool = fluid.layers.pool2d( - input=res, pool_type='avg', global_pooling=True) - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - - out = fluid.layers.fc( - input=pool, - size=self.class_dim, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv))) - out = fluid.layers.softmax(out) - return out - return OrderedDict([('res{}_sum'.format(self.feature_maps[idx]), feat) - for idx, feat in enumerate(res_endpoints)]) - - -class ResNetC5(ResNet): - def __init__(self, - depth=50, - freeze_at=2, - norm_type='affine_channel', - freeze_norm=True, - norm_decay=0., - variant='b', - feature_maps=[5], - weight_prefix_name=''): - super(ResNetC5, self).__init__(depth, freeze_at, norm_type, freeze_norm, - norm_decay, variant, feature_maps) - self.severed_head = True diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/roi_extractor.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/roi_extractor.py deleted file mode 100644 index 6e3398d8c..000000000 --- a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/roi_extractor.py +++ /dev/null @@ -1,76 +0,0 @@ -# coding=utf-8 -import paddle.fluid as fluid - -__all__ = ['FPNRoIAlign'] - - -class FPNRoIAlign(object): - """ - RoI align pooling for FPN feature maps - Args: - sampling_ratio (int): number of sampling points - min_level (int): lowest level of FPN layer - max_level (int): highest level of FPN layer - canconical_level (int): the canconical FPN feature map level - canonical_size (int): the canconical FPN feature map size - box_resolution (int): box resolution - mask_resolution (int): mask roi resolution - """ - - def __init__(self, - sampling_ratio=0, - min_level=2, - max_level=5, - canconical_level=4, - canonical_size=224, - box_resolution=7, - mask_resolution=14): - super(FPNRoIAlign, self).__init__() - self.sampling_ratio = sampling_ratio - self.min_level = min_level - self.max_level = max_level - self.canconical_level = canconical_level - self.canonical_size = canonical_size - self.box_resolution = box_resolution - self.mask_resolution = mask_resolution - - def __call__(self, head_inputs, rois, spatial_scale, is_mask=False): - """ - Adopt RoI align onto several level of feature maps to get RoI features. - Distribute RoIs to different levels by area and get a list of RoI - features by distributed RoIs and their corresponding feature maps. - - Returns: - roi_feat(Variable): RoI features with shape of [M, C, R, R], - where M is the number of RoIs and R is RoI resolution - - """ - k_min = self.min_level - k_max = self.max_level - num_roi_lvls = k_max - k_min + 1 - name_list = list(head_inputs.keys()) - input_name_list = name_list[-num_roi_lvls:] - spatial_scale = spatial_scale[-num_roi_lvls:] - rois_dist, restore_index = fluid.layers.distribute_fpn_proposals( - rois, k_min, k_max, self.canconical_level, self.canonical_size) - # rois_dist is in ascend order - roi_out_list = [] - resolution = is_mask and self.mask_resolution or self.box_resolution - for lvl in range(num_roi_lvls): - name_index = num_roi_lvls - lvl - 1 - rois_input = rois_dist[lvl] - head_input = head_inputs[input_name_list[name_index]] - sc = spatial_scale[name_index] - roi_out = fluid.layers.roi_align( - input=head_input, - rois=rois_input, - pooled_height=resolution, - pooled_width=resolution, - spatial_scale=sc, - sampling_ratio=self.sampling_ratio) - roi_out_list.append(roi_out) - roi_feat_shuffle = fluid.layers.concat(roi_out_list) - roi_feat_ = fluid.layers.gather(roi_feat_shuffle, restore_index) - roi_feat = fluid.layers.lod_reset(roi_feat_, rois) - - return roi_feat diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/rpn_head.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/rpn_head.py deleted file mode 100644 index e1b69866d..000000000 --- a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/rpn_head.py +++ /dev/null @@ -1,533 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.initializer import Normal -from paddle.fluid.regularizer import L2Decay - -__all__ = [ - 'AnchorGenerator', 'RPNTargetAssign', 'GenerateProposals', 'RPNHead', - 'FPNRPNHead' -] - - -class AnchorGenerator(object): - # __op__ = fluid.layers.anchor_generator - def __init__(self, - stride=[16.0, 16.0], - anchor_sizes=[32, 64, 128, 256, 512], - aspect_ratios=[0.5, 1., 2.], - variance=[1., 1., 1., 1.]): - super(AnchorGenerator, self).__init__() - self.anchor_sizes = anchor_sizes - self.aspect_ratios = aspect_ratios - self.variance = variance - self.stride = stride - - -class RPNTargetAssign(object): - # __op__ = fluid.layers.rpn_target_assign - def __init__(self, - rpn_batch_size_per_im=256, - rpn_straddle_thresh=0., - rpn_fg_fraction=0.5, - rpn_positive_overlap=0.7, - rpn_negative_overlap=0.3, - use_random=True): - super(RPNTargetAssign, self).__init__() - self.rpn_batch_size_per_im = rpn_batch_size_per_im - self.rpn_straddle_thresh = rpn_straddle_thresh - self.rpn_fg_fraction = rpn_fg_fraction - self.rpn_positive_overlap = rpn_positive_overlap - self.rpn_negative_overlap = rpn_negative_overlap - self.use_random = use_random - - -class GenerateProposals(object): - # __op__ = fluid.layers.generate_proposals - def __init__(self, - pre_nms_top_n=6000, - post_nms_top_n=1000, - nms_thresh=.5, - min_size=.1, - eta=1.): - super(GenerateProposals, self).__init__() - self.pre_nms_top_n = pre_nms_top_n - self.post_nms_top_n = post_nms_top_n - self.nms_thresh = nms_thresh - self.min_size = min_size - self.eta = eta - - -class RPNHead(object): - """ - RPN Head - - Args: - anchor_generator (object): `AnchorGenerator` instance - rpn_target_assign (object): `RPNTargetAssign` instance - train_proposal (object): `GenerateProposals` instance for training - test_proposal (object): `GenerateProposals` instance for testing - num_classes (int): number of classes in rpn output - """ - __inject__ = [ - 'anchor_generator', 'rpn_target_assign', 'train_proposal', - 'test_proposal' - ] - - def __init__(self, - anchor_generator, - rpn_target_assign, - train_proposal, - test_proposal, - num_classes=1): - super(RPNHead, self).__init__() - self.anchor_generator = anchor_generator - self.rpn_target_assign = rpn_target_assign - self.train_proposal = train_proposal - self.test_proposal = test_proposal - self.num_classes = num_classes - - def _get_output(self, input): - """ - Get anchor and RPN head output. - - Args: - input(Variable): feature map from backbone with shape of [N, C, H, W] - - Returns: - rpn_cls_score(Variable): Output of rpn head with shape of [N, num_anchors, H, W]. - rpn_bbox_pred(Variable): Output of rpn head with shape of [N, num_anchors * 4, H, W]. - """ - dim_out = input.shape[1] - rpn_conv = fluid.layers.conv2d( - input=input, - num_filters=dim_out, - filter_size=3, - stride=1, - padding=1, - act='relu', - name='conv_rpn', - param_attr=ParamAttr( - name="conv_rpn_w", initializer=Normal(loc=0., scale=0.01)), - bias_attr=ParamAttr( - name="conv_rpn_b", learning_rate=2., regularizer=L2Decay(0.))) - # Generate anchors self.anchor_generator - self.anchor, self.anchor_var = fluid.layers.anchor_generator( - input=rpn_conv, - anchor_sizes=self.anchor_generator.anchor_sizes, - aspect_ratios=self.anchor_generator.aspect_ratios, - variance=self.anchor_generator.variance, - stride=self.anchor_generator.stride) - - num_anchor = self.anchor.shape[2] - # Proposal classification scores - self.rpn_cls_score = fluid.layers.conv2d( - rpn_conv, - num_filters=num_anchor * self.num_classes, - filter_size=1, - stride=1, - padding=0, - act=None, - name='rpn_cls_score', - param_attr=ParamAttr( - name="rpn_cls_logits_w", initializer=Normal(loc=0., - scale=0.01)), - bias_attr=ParamAttr( - name="rpn_cls_logits_b", - learning_rate=2., - regularizer=L2Decay(0.))) - # Proposal bbox regression deltas - self.rpn_bbox_pred = fluid.layers.conv2d( - rpn_conv, - num_filters=4 * num_anchor, - filter_size=1, - stride=1, - padding=0, - act=None, - name='rpn_bbox_pred', - param_attr=ParamAttr( - name="rpn_bbox_pred_w", initializer=Normal(loc=0., scale=0.01)), - bias_attr=ParamAttr( - name="rpn_bbox_pred_b", - learning_rate=2., - regularizer=L2Decay(0.))) - return self.rpn_cls_score, self.rpn_bbox_pred - - def get_proposals(self, body_feats, im_info, mode='train'): - """ - Get proposals according to the output of backbone. - - Args: - body_feats (dict): The dictionary of feature maps from backbone. - im_info(Variable): The information of image with shape [N, 3] with - shape (height, width, scale). - body_feat_names(list): A list of names of feature maps from - backbone. - - Returns: - rpn_rois(Variable): Output proposals with shape of (rois_num, 4). - """ - # In RPN Heads, only the last feature map of backbone is used. - # And body_feat_names[-1] represents the last level name of backbone. - body_feat = list(body_feats.values())[-1] - rpn_cls_score, rpn_bbox_pred = self._get_output(body_feat) - - if self.num_classes == 1: - rpn_cls_prob = fluid.layers.sigmoid( - rpn_cls_score, name='rpn_cls_prob') - else: - rpn_cls_score = fluid.layers.transpose( - rpn_cls_score, perm=[0, 2, 3, 1]) - rpn_cls_score = fluid.layers.reshape( - rpn_cls_score, shape=(0, 0, 0, -1, self.num_classes)) - rpn_cls_prob_tmp = fluid.layers.softmax( - rpn_cls_score, use_cudnn=False, name='rpn_cls_prob') - rpn_cls_prob_slice = fluid.layers.slice( - rpn_cls_prob_tmp, axes=[4], starts=[1], ends=[self.num_classes]) - rpn_cls_prob, _ = fluid.layers.topk(rpn_cls_prob_slice, 1) - rpn_cls_prob = fluid.layers.reshape( - rpn_cls_prob, shape=(0, 0, 0, -1)) - rpn_cls_prob = fluid.layers.transpose( - rpn_cls_prob, perm=[0, 3, 1, 2]) - prop_op = self.train_proposal if mode == 'train' else self.test_proposal - # prop_op - rpn_rois, rpn_roi_probs = fluid.layers.generate_proposals( - scores=rpn_cls_prob, - bbox_deltas=rpn_bbox_pred, - im_info=im_info, - anchors=self.anchor, - variances=self.anchor_var, - pre_nms_top_n=prop_op.pre_nms_top_n, - post_nms_top_n=prop_op.post_nms_top_n, - nms_thresh=prop_op.nms_thresh, - min_size=prop_op.min_size, - eta=prop_op.eta) - return rpn_rois - - def _transform_input(self, rpn_cls_score, rpn_bbox_pred, anchor, - anchor_var): - rpn_cls_score = fluid.layers.transpose(rpn_cls_score, perm=[0, 2, 3, 1]) - rpn_bbox_pred = fluid.layers.transpose(rpn_bbox_pred, perm=[0, 2, 3, 1]) - anchor = fluid.layers.reshape(anchor, shape=(-1, 4)) - anchor_var = fluid.layers.reshape(anchor_var, shape=(-1, 4)) - rpn_cls_score = fluid.layers.reshape( - x=rpn_cls_score, shape=(0, -1, self.num_classes)) - rpn_bbox_pred = fluid.layers.reshape(x=rpn_bbox_pred, shape=(0, -1, 4)) - return rpn_cls_score, rpn_bbox_pred, anchor, anchor_var - - def _get_loss_input(self): - for attr in ['rpn_cls_score', 'rpn_bbox_pred', 'anchor', 'anchor_var']: - if not getattr(self, attr, None): - raise ValueError("self.{} should not be None,".format(attr), - "call RPNHead.get_proposals first") - return self._transform_input(self.rpn_cls_score, self.rpn_bbox_pred, - self.anchor, self.anchor_var) - - def get_loss(self, im_info, gt_box, is_crowd, gt_label=None): - """ - Sample proposals and Calculate rpn loss. - - Args: - im_info(Variable): The information of image with shape [N, 3] with - shape (height, width, scale). - gt_box(Variable): The ground-truth bounding boxes with shape [M, 4]. - M is the number of groundtruth. - is_crowd(Variable): Indicates groud-truth is crowd or not with - shape [M, 1]. M is the number of groundtruth. - - Returns: - Type: dict - rpn_cls_loss(Variable): RPN classification loss. - rpn_bbox_loss(Variable): RPN bounding box regression loss. - - """ - rpn_cls, rpn_bbox, anchor, anchor_var = self._get_loss_input() - if self.num_classes == 1: - # self.rpn_target_assign - score_pred, loc_pred, score_tgt, loc_tgt, bbox_weight = \ - fluid.layers.rpn_target_assign( - bbox_pred=rpn_bbox, - cls_logits=rpn_cls, - anchor_box=anchor, - anchor_var=anchor_var, - gt_boxes=gt_box, - is_crowd=is_crowd, - im_info=im_info, - rpn_batch_size_per_im=self.rpn_target_assign.rpn_batch_size_per_im, - rpn_straddle_thresh=self.rpn_target_assign.rpn_straddle_thresh, - rpn_fg_fraction=self.rpn_target_assign.rpn_fg_fraction, - rpn_positive_overlap=self.rpn_target_assign.rpn_positive_overlap, - rpn_negative_overlap=self.rpn_target_assign.rpn_negative_overlap, - use_random=self.rpn_target_assign.use_random) - score_tgt = fluid.layers.cast(x=score_tgt, dtype='float32') - score_tgt.stop_gradient = True - rpn_cls_loss = fluid.layers.sigmoid_cross_entropy_with_logits( - x=score_pred, label=score_tgt) - else: - score_pred, loc_pred, score_tgt, loc_tgt, bbox_weight = \ - self.rpn_target_assign( - bbox_pred=rpn_bbox, - cls_logits=rpn_cls, - anchor_box=anchor, - anchor_var=anchor_var, - gt_boxes=gt_box, - gt_labels=gt_label, - is_crowd=is_crowd, - num_classes=self.num_classes, - im_info=im_info) - labels_int64 = fluid.layers.cast(x=score_tgt, dtype='int64') - labels_int64.stop_gradient = True - rpn_cls_loss = fluid.layers.softmax_with_cross_entropy( - logits=score_pred, label=labels_int64, numeric_stable_mode=True) - - rpn_cls_loss = fluid.layers.reduce_mean( - rpn_cls_loss, name='loss_rpn_cls') - - loc_tgt = fluid.layers.cast(x=loc_tgt, dtype='float32') - loc_tgt.stop_gradient = True - rpn_reg_loss = fluid.layers.smooth_l1( - x=loc_pred, - y=loc_tgt, - sigma=3.0, - inside_weight=bbox_weight, - outside_weight=bbox_weight) - rpn_reg_loss = fluid.layers.reduce_sum( - rpn_reg_loss, name='loss_rpn_bbox') - score_shape = fluid.layers.shape(score_tgt) - score_shape = fluid.layers.cast(x=score_shape, dtype='float32') - norm = fluid.layers.reduce_prod(score_shape) - norm.stop_gradient = True - rpn_reg_loss = rpn_reg_loss / norm - return {'rpn_cls_loss': rpn_cls_loss, 'rpn_reg_loss': rpn_reg_loss} - - -class FPNRPNHead(RPNHead): - """ - RPN Head that supports FPN input - - Args: - anchor_generator (object): `AnchorGenerator` instance - rpn_target_assign (object): `RPNTargetAssign` instance - train_proposal (object): `GenerateProposals` instance for training - test_proposal (object): `GenerateProposals` instance for testing - anchor_start_size (int): size of anchor at the first scale - num_chan (int): number of FPN output channels - min_level (int): lowest level of FPN output - max_level (int): highest level of FPN output - num_classes (int): number of classes in rpn output - """ - - def __init__(self, - anchor_generator, - rpn_target_assign, - train_proposal, - test_proposal, - anchor_start_size=32, - num_chan=256, - min_level=2, - max_level=6, - num_classes=1): - super(FPNRPNHead, self).__init__(anchor_generator, rpn_target_assign, - train_proposal, test_proposal) - self.anchor_start_size = anchor_start_size - self.num_chan = num_chan - self.min_level = min_level - self.max_level = max_level - self.num_classes = num_classes - - self.fpn_rpn_list = [] - self.anchors_list = [] - self.anchor_var_list = [] - - def _get_output(self, input, feat_lvl): - """ - Get anchor and FPN RPN head output at one level. - - Args: - input(Variable): Body feature from backbone. - feat_lvl(int): Indicate the level of rpn output corresponding - to the level of feature map. - - Return: - rpn_cls_score(Variable): Output of one level of fpn rpn head with - shape of [N, num_anchors, H, W]. - rpn_bbox_pred(Variable): Output of one level of fpn rpn head with - shape of [N, num_anchors * 4, H, W]. - """ - slvl = str(feat_lvl) - conv_name = 'conv_rpn_fpn' + slvl - cls_name = 'rpn_cls_logits_fpn' + slvl - bbox_name = 'rpn_bbox_pred_fpn' + slvl - conv_share_name = 'conv_rpn_fpn' + str(self.min_level) - cls_share_name = 'rpn_cls_logits_fpn' + str(self.min_level) - bbox_share_name = 'rpn_bbox_pred_fpn' + str(self.min_level) - - num_anchors = len(self.anchor_generator.aspect_ratios) - conv_rpn_fpn = fluid.layers.conv2d( - input=input, - num_filters=self.num_chan, - filter_size=3, - padding=1, - act='relu', - name=conv_name, - param_attr=ParamAttr( - name=conv_share_name + '_w', - initializer=Normal(loc=0., scale=0.01)), - bias_attr=ParamAttr( - name=conv_share_name + '_b', - learning_rate=2., - regularizer=L2Decay(0.))) - - # self.anchor_generator - self.anchors, self.anchor_var = fluid.layers.anchor_generator( - input=conv_rpn_fpn, - anchor_sizes=(self.anchor_start_size * 2.** - (feat_lvl - self.min_level), ), - stride=(2.**feat_lvl, 2.**feat_lvl), - aspect_ratios=self.anchor_generator.aspect_ratios, - variance=self.anchor_generator.variance) - - cls_num_filters = num_anchors * self.num_classes - self.rpn_cls_score = fluid.layers.conv2d( - input=conv_rpn_fpn, - num_filters=cls_num_filters, - filter_size=1, - act=None, - name=cls_name, - param_attr=ParamAttr( - name=cls_share_name + '_w', - initializer=Normal(loc=0., scale=0.01)), - bias_attr=ParamAttr( - name=cls_share_name + '_b', - learning_rate=2., - regularizer=L2Decay(0.))) - self.rpn_bbox_pred = fluid.layers.conv2d( - input=conv_rpn_fpn, - num_filters=num_anchors * 4, - filter_size=1, - act=None, - name=bbox_name, - param_attr=ParamAttr( - name=bbox_share_name + '_w', - initializer=Normal(loc=0., scale=0.01)), - bias_attr=ParamAttr( - name=bbox_share_name + '_b', - learning_rate=2., - regularizer=L2Decay(0.))) - return self.rpn_cls_score, self.rpn_bbox_pred - - def _get_single_proposals(self, body_feat, im_info, feat_lvl, mode='train'): - """ - Get proposals in one level according to the output of fpn rpn head - - Args: - body_feat(Variable): the feature map from backone. - im_info(Variable): The information of image with shape [N, 3] with - format (height, width, scale). - feat_lvl(int): Indicate the level of proposals corresponding to - the feature maps. - - Returns: - rpn_rois_fpn(Variable): Output proposals with shape of (rois_num, 4). - rpn_roi_probs_fpn(Variable): Scores of proposals with - shape of (rois_num, 1). - """ - - rpn_cls_score_fpn, rpn_bbox_pred_fpn = self._get_output( - body_feat, feat_lvl) - - prop_op = self.train_proposal if mode == 'train' else self.test_proposal - if self.num_classes == 1: - rpn_cls_prob_fpn = fluid.layers.sigmoid( - rpn_cls_score_fpn, name='rpn_cls_prob_fpn' + str(feat_lvl)) - else: - rpn_cls_score_fpn = fluid.layers.transpose( - rpn_cls_score_fpn, perm=[0, 2, 3, 1]) - rpn_cls_score_fpn = fluid.layers.reshape( - rpn_cls_score_fpn, shape=(0, 0, 0, -1, self.num_classes)) - rpn_cls_prob_fpn = fluid.layers.softmax( - rpn_cls_score_fpn, - use_cudnn=False, - name='rpn_cls_prob_fpn' + str(feat_lvl)) - rpn_cls_prob_fpn = fluid.layers.slice( - rpn_cls_prob_fpn, axes=[4], starts=[1], ends=[self.num_classes]) - rpn_cls_prob_fpn, _ = fluid.layers.topk(rpn_cls_prob_fpn, 1) - rpn_cls_prob_fpn = fluid.layers.reshape( - rpn_cls_prob_fpn, shape=(0, 0, 0, -1)) - rpn_cls_prob_fpn = fluid.layers.transpose( - rpn_cls_prob_fpn, perm=[0, 3, 1, 2]) - # prop_op - rpn_rois_fpn, rpn_roi_prob_fpn = fluid.layers.generate_proposals( - scores=rpn_cls_prob_fpn, - bbox_deltas=rpn_bbox_pred_fpn, - im_info=im_info, - anchors=self.anchors, - variances=self.anchor_var, - pre_nms_top_n=prop_op.pre_nms_top_n, - post_nms_top_n=prop_op.post_nms_top_n, - nms_thresh=prop_op.nms_thresh, - min_size=prop_op.min_size, - eta=prop_op.eta) - return rpn_rois_fpn, rpn_roi_prob_fpn - - def get_proposals(self, fpn_feats, im_info, mode='train'): - """ - Get proposals in multiple levels according to the output of fpn - rpn head - - Args: - fpn_feats(dict): A dictionary represents the output feature map - of FPN with their name. - im_info(Variable): The information of image with shape [N, 3] with - format (height, width, scale). - - Return: - rois_list(Variable): Output proposals in shape of [rois_num, 4] - """ - rois_list = [] - roi_probs_list = [] - fpn_feat_names = list(fpn_feats.keys()) - for lvl in range(self.min_level, self.max_level + 1): - fpn_feat_name = fpn_feat_names[self.max_level - lvl] - fpn_feat = fpn_feats[fpn_feat_name] - rois_fpn, roi_probs_fpn = self._get_single_proposals( - fpn_feat, im_info, lvl, mode) - self.fpn_rpn_list.append((self.rpn_cls_score, self.rpn_bbox_pred)) - rois_list.append(rois_fpn) - roi_probs_list.append(roi_probs_fpn) - self.anchors_list.append(self.anchors) - self.anchor_var_list.append(self.anchor_var) - prop_op = self.train_proposal if mode == 'train' else self.test_proposal - post_nms_top_n = prop_op.post_nms_top_n - rois_collect = fluid.layers.collect_fpn_proposals( - rois_list, - roi_probs_list, - self.min_level, - self.max_level, - post_nms_top_n, - name='collect') - return rois_collect - - def _get_loss_input(self): - rpn_clses = [] - rpn_bboxes = [] - anchors = [] - anchor_vars = [] - for i in range(len(self.fpn_rpn_list)): - single_input = self._transform_input( - self.fpn_rpn_list[i][0], self.fpn_rpn_list[i][1], - self.anchors_list[i], self.anchor_var_list[i]) - rpn_clses.append(single_input[0]) - rpn_bboxes.append(single_input[1]) - anchors.append(single_input[2]) - anchor_vars.append(single_input[3]) - - rpn_cls = fluid.layers.concat(rpn_clses, axis=1) - rpn_bbox = fluid.layers.concat(rpn_bboxes, axis=1) - anchors = fluid.layers.concat(anchors) - anchor_var = fluid.layers.concat(anchor_vars) - return rpn_cls, rpn_bbox, anchors, anchor_var diff --git a/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/test.py b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/test.py new file mode 100644 index 000000000..0a775a4f5 --- /dev/null +++ b/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/test.py @@ -0,0 +1,108 @@ +import os +import shutil +import unittest + +import cv2 +import requests +import paddlehub as hub + + +class TestHubModule(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + img_url = 'https://ai-studio-static-online.cdn.bcebos.com/68313e182f5e4ad9907e69dac9ece8fc50840d7ffbd24fa88396f009958f969a' + if not os.path.exists('tests'): + os.makedirs('tests') + response = requests.get(img_url) + assert response.status_code == 200, 'Network Error.' + with open('tests/test.jpg', 'wb') as f: + f.write(response.content) + cls.module = hub.Module(name="faster_rcnn_resnet50_fpn_coco2017") + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree('tests') + shutil.rmtree('inference') + shutil.rmtree('detection_result') + + def test_object_detection1(self): + results = self.module.object_detection( + paths=['tests/test.jpg'] + ) + bbox = results[0]['data'][0] + label = bbox['label'] + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertEqual(label, 'cat') + self.assertTrue(confidence > 0.5) + self.assertTrue(200 < left < 800) + self.assertTrue(2500 < right < 3500) + self.assertTrue(500 < top < 1500) + self.assertTrue(3500 < bottom < 4500) + + def test_object_detection2(self): + results = self.module.object_detection( + images=[cv2.imread('tests/test.jpg')] + ) + bbox = results[0]['data'][0] + label = bbox['label'] + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertEqual(label, 'cat') + self.assertTrue(confidence > 0.5) + self.assertTrue(200 < left < 800) + self.assertTrue(2500 < right < 3500) + self.assertTrue(500 < top < 1500) + self.assertTrue(3500 < bottom < 4500) + + def test_object_detection3(self): + results = self.module.object_detection( + images=[cv2.imread('tests/test.jpg')], + visualization=False + ) + bbox = results[0]['data'][0] + label = bbox['label'] + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertEqual(label, 'cat') + self.assertTrue(confidence > 0.5) + self.assertTrue(200 < left < 800) + self.assertTrue(2500 < right < 3500) + self.assertTrue(500 < top < 1500) + self.assertTrue(3500 < bottom < 4500) + + def test_object_detection4(self): + self.assertRaises( + AssertionError, + self.module.object_detection, + paths=['no.jpg'] + ) + + def test_object_detection5(self): + self.assertRaises( + cv2.error, + self.module.object_detection, + images=['test.jpg'] + ) + + def test_save_inference_model(self): + self.module.save_inference_model('./inference/model') + + self.assertTrue(os.path.exists('./inference/model.pdmodel')) + self.assertTrue(os.path.exists('./inference/model.pdiparams')) + + +if __name__ == "__main__": + unittest.main() From 51427477fff7697a6dec1547158f9fb190515079 Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Fri, 16 Sep 2022 14:39:56 +0800 Subject: [PATCH 057/117] update ssd_vgg16_300_coco2017 (#1949) * update ssd_vgg16_300_model * update unittest * update unittest * update gpu config * update * add clean func * update save inference model Co-authored-by: chenjian --- .../ssd_vgg16_300_coco2017/README.md | 231 ++++++++++-------- .../ssd_vgg16_300_coco2017/README_en.md | 169 +++++++++++++ .../ssd_vgg16_300_coco2017/data_feed.py | 2 - .../ssd_vgg16_300_coco2017/module.py | 193 +++++---------- .../ssd_vgg16_300_coco2017/processor.py | 8 +- .../ssd_vgg16_300_coco2017/test.py | 108 ++++++++ .../ssd_vgg16_300_coco2017/vgg.py | 184 -------------- 7 files changed, 479 insertions(+), 416 deletions(-) create mode 100644 modules/image/object_detection/ssd_vgg16_300_coco2017/README_en.md create mode 100644 modules/image/object_detection/ssd_vgg16_300_coco2017/test.py delete mode 100644 modules/image/object_detection/ssd_vgg16_300_coco2017/vgg.py diff --git a/modules/image/object_detection/ssd_vgg16_300_coco2017/README.md b/modules/image/object_detection/ssd_vgg16_300_coco2017/README.md index 85510f3e3..567e575a3 100644 --- a/modules/image/object_detection/ssd_vgg16_300_coco2017/README.md +++ b/modules/image/object_detection/ssd_vgg16_300_coco2017/README.md @@ -1,138 +1,169 @@ -## 命令行预测 +# ssd_vgg16_300_coco2017 -```shell -$ hub run ssd_vgg16_300_coco2017 --input_path "/PATH/TO/IMAGE" -``` +|模型名称|ssd_vgg16_300_coco2017| +| :--- | :---: | +|类别|图像 - 目标检测| +|网络|SSD| +|数据集|COCO2017| +|是否支持Fine-tuning|否| +|模型大小|139MB| +|最新更新日期|2021-03-15| +|数据指标|-| -## API -```python -def context(trainable=True, - pretrained=True, - get_prediction=False) -``` +## 一、模型基本信息 -提取特征,用于迁移学习。 +- ### 应用效果展示 + - 样例结果示例: +

+ +
+

-**参数** +- ### 模型介绍 -* trainable(bool): 参数是否可训练; -* pretrained (bool): 是否加载预训练模型; -* get\_prediction (bool): 是否执行预测。 + - Single Shot MultiBox Detector (SSD) 是一种单阶段的目标检测器。与两阶段的检测方法不同,单阶段目标检测并不进行区域推荐,而是直接从特征图回归出目标的边界框和分类概率。SSD 运用了这种单阶段检测的思想,并且对其进行改进:在不同尺度的特征图上检测对应尺度的目标。该PaddleHub Module的基网络为VGG16模型,在Pascal数据集上预训练得到,目前仅支持预测。 -**返回** -* inputs (dict): 模型的输入,keys 包括 'image', 'im\_size',相应的取值为: - * image (Variable): 图像变量 - * im\_size (Variable): 图片的尺寸 -* outputs (dict): 模型的输出。如果 get\_prediction 为 False,输出 'head\_features',否则输出 'bbox\_out'。 -* context\_prog (Program): 用于迁移学习的 Program. +## 二、安装 -```python -def object_detection(paths=None, - images=None, - batch_size=1, - use_gpu=False, - output_dir='detection_result', - score_thresh=0.5, - visualization=True) -``` +- ### 1、环境依赖 -预测API,检测输入图片中的所有目标的位置。 + - paddlepaddle >= 1.6.2 -**参数** + - paddlehub >= 1.6.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) -* paths (list\[str\]): 图片的路径; -* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; -* batch\_size (int): batch 的大小; -* use\_gpu (bool): 是否使用 GPU; -* score\_thresh (float): 识别置信度的阈值; -* visualization (bool): 是否将识别结果保存为图片文件; -* output\_dir (str): 图片的保存路径,默认设为 detection\_result; +- ### 2、安装 -**返回** + - ```shell + $ hub install ssd_vgg16_300_coco2017 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) -* res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,各字段为: - * data (list): 检测结果,list的每一个元素为 dict,各字段为: - * confidence (float): 识别的置信度; - * label (str): 标签; - * left (int): 边界框的左上角x坐标; - * top (int): 边界框的左上角y坐标; - * right (int): 边界框的右下角x坐标; - * bottom (int): 边界框的右下角y坐标; - * save\_path (str, optional): 识别结果的保存路径 (仅当visualization=True时存在)。 +## 三、模型API预测 -```python -def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True) -``` +- ### 1、命令行预测 -将模型保存到指定路径。 + - ```shell + $ hub run ssd_vgg16_300_coco2017 --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现目标检测模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) +- ### 2、预测代码示例 -**参数** + - ```python + import paddlehub as hub + import cv2 -* dirname: 存在模型的目录名称 -* model\_filename: 模型文件名称,默认为\_\_model\_\_ -* params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) -* combined: 是否将参数保存到统一的一个文件中 + object_detector = hub.Module(name="ssd_vgg16_300_coco2017") + result = object_detector.object_detection(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = object_detector.object_detection((paths=['/PATH/TO/IMAGE']) + ``` -## 代码示例 +- ### 3、API -```python -import paddlehub as hub -import cv2 + - ```python + def object_detection(paths=None, + images=None, + batch_size=1, + use_gpu=False, + output_dir='detection_result', + score_thresh=0.5, + visualization=True) + ``` -object_detector = hub.Module(name="ssd_vgg16_300_coco2017") -result = object_detector.object_detection(images=[cv2.imread('/PATH/TO/IMAGE')]) -# or -# result = object_detector.object_detection((paths=['/PATH/TO/IMAGE']) -``` + - 预测API,检测输入图片中的所有目标的位置。 -## 服务部署 + - **参数** -PaddleHub Serving可以部署一个目标检测的在线服务。 + - paths (list\[str\]): 图片的路径;
+ - images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式;
+ - batch\_size (int): batch 的大小;
+ - use\_gpu (bool): 是否使用 GPU;
+ - output\_dir (str): 图片的保存路径,默认设为 detection\_result;
+ - score\_thresh (float): 识别置信度的阈值;
+ - visualization (bool): 是否将识别结果保存为图片文件。 -## 第一步:启动PaddleHub Serving + **NOTE:** paths和images两个参数选择其一进行提供数据 -运行启动命令: -```shell -$ hub serving start -m ssd_vgg16_300_coco2017 -``` + - **返回** -这样就完成了一个目标检测的服务化API的部署,默认端口号为8866。 + - res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,各字段为: + - data (list): 检测结果,list的每一个元素为 dict,各字段为: + - confidence (float): 识别的置信度 + - label (str): 标签 + - left (int): 边界框的左上角x坐标 + - top (int): 边界框的左上角y坐标 + - right (int): 边界框的右下角x坐标 + - bottom (int): 边界框的右下角y坐标 + - save\_path (str, optional): 识别结果的保存路径 (仅当visualization=True时存在) -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + - ```python + def save_inference_model(dirname) + ``` + - 将模型保存到指定路径。 -## 第二步:发送预测请求 + - **参数** -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - dirname: 模型保存路径
-```python -import requests -import json -import cv2 -import base64 +## 四、服务部署 -def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') +- PaddleHub Serving可以部署一个目标检测的在线服务。 +- ### 第一步:启动PaddleHub Serving -# 发送HTTP请求 -data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} -headers = {"Content-type": "application/json"} -url = "http://127.0.0.1:8866/predict/ssd_vgg16_300_coco2017" -r = requests.post(url=url, headers=headers, data=json.dumps(data)) + - 运行启动命令: + - ```shell + $ hub serving start -m ssd_vgg16_300_coco2017 + ``` -# 打印预测结果 -print(r.json()["results"]) -``` + - 这样就完成了一个目标检测的服务化API的部署,默认端口号为8866。 -### 依赖 + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 -paddlepaddle >= 1.6.2 +- ### 第二步:发送预测请求 -paddlehub >= 1.6.0 + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/ssd_vgg16_300_coco2017" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + ``` + + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + +* 1.0.2 + + 修复numpy数据读取问题 + +* 1.1.0 + + 移除 fluid api + + - ```shell + $ hub install ssd_vgg16_300_coco2017==1.1.0 + ``` diff --git a/modules/image/object_detection/ssd_vgg16_300_coco2017/README_en.md b/modules/image/object_detection/ssd_vgg16_300_coco2017/README_en.md new file mode 100644 index 000000000..0d53ce2f7 --- /dev/null +++ b/modules/image/object_detection/ssd_vgg16_300_coco2017/README_en.md @@ -0,0 +1,169 @@ +# ssd_vgg16_300_coco2017 + +|Module Name|ssd_vgg16_300_coco2017| +| :--- | :---: | +|Category|object detection| +|Network|SSD| +|Dataset|COCO2017| +|Fine-tuning supported or not|No| +|Module Size|139MB| +|Latest update date|2021-03-15| +|Data indicators|-| + + +## I.Basic Information + +- ### Application Effect Display + - Sample results: +

+ +
+

+ +- ### Module Introduction + + - Single Shot MultiBox Detector (SSD) is a one-stage detector. Different from two-stage detector, SSD frames object detection as a re- gression problem to spatially separated bounding boxes and associated class probabilities. This module is based on VGG16, trained on COCO2017 dataset, and can be used for object detection. + + + +## II.Installation + +- ### 1、Environmental Dependence + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + +- ### 2、Installation + + - ```shell + $ hub install ssd_vgg16_300_coco2017 + ``` + - In case of any problems during installation, please refer to: [Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + +## III.Module API Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run ssd_vgg16_300_coco2017 --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command Line Instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + object_detector = hub.Module(name="ssd_vgg16_300_coco2017") + result = object_detector.object_detection(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = object_detector.object_detection((paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + - ```python + def object_detection(paths=None, + images=None, + batch_size=1, + use_gpu=False, + output_dir='detection_result', + score_thresh=0.5, + visualization=True) + ``` + + - Detection API, detect positions of all objects in image + + - **Parameters** + + - paths (list[str]): image path; + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format [H, W, C], BGR; + - batch_size (int): the size of batch; + - use_gpu (bool): use GPU or not; **set the CUDA_VISIBLE_DEVICES environment variable first if you are using GPU** + - output_dir (str): save path of images; + - score\_thresh (float): confidence threshold;
+ - visualization (bool): Whether to save the results as picture files; + + **NOTE:** choose one parameter to provide data from paths and images + + - **Return** + + - res (list\[dict\]): results + - data (list): detection results, each element in the list is dict + - confidence (float): the confidence of the result + - label (str): label + - left (int): the upper left corner x coordinate of the detection box + - top (int): the upper left corner y coordinate of the detection box + - right (int): the lower right corner x coordinate of the detection box + - bottom (int): the lower right corner y coordinate of the detection box + - save\_path (str, optional): output path for saving results + + - ```python + def save_inference_model(dirname) + ``` + - Save model to specific path + + - **Parameters** + + - dirname: model save path + + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of object detection. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m ssd_vgg16_300_coco2017 + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result + + - ```python + import requests + import json + import cv2 + import base64 + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/ssd_vgg16_300_coco2017" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction results + print(r.json()["results"]) + ``` + + +## V.Release Note + +* 1.0.0 + + First release + +* 1.0.2 + + Fix the problem of reading numpy + +* 1.1.0 + + Remove fluid api + + - ```shell + $ hub install ssd_vgg16_300_coco2017==1.1.0 + ``` diff --git a/modules/image/object_detection/ssd_vgg16_300_coco2017/data_feed.py b/modules/image/object_detection/ssd_vgg16_300_coco2017/data_feed.py index 9fad7c95e..3d3382bb2 100644 --- a/modules/image/object_detection/ssd_vgg16_300_coco2017/data_feed.py +++ b/modules/image/object_detection/ssd_vgg16_300_coco2017/data_feed.py @@ -5,12 +5,10 @@ import os import random -from collections import OrderedDict import cv2 import numpy as np from PIL import Image -from paddle import fluid __all__ = ['reader'] diff --git a/modules/image/object_detection/ssd_vgg16_300_coco2017/module.py b/modules/image/object_detection/ssd_vgg16_300_coco2017/module.py index e0083b95f..beefaf6ab 100644 --- a/modules/image/object_detection/ssd_vgg16_300_coco2017/module.py +++ b/modules/image/object_detection/ssd_vgg16_300_coco2017/module.py @@ -7,39 +7,43 @@ from functools import partial import yaml +import paddle import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +import paddle.static +from paddle.inference import Config, create_predictor from paddlehub.module.module import moduleinfo, runnable, serving -from paddlehub.common.paddle_helper import add_vars_prefix -from ssd_vgg16_300_coco2017.vgg import VGG -from ssd_vgg16_300_coco2017.processor import load_label_info, postprocess, base64_to_cv2 -from ssd_vgg16_300_coco2017.data_feed import reader +from .processor import load_label_info, postprocess, base64_to_cv2 +from .data_feed import reader @moduleinfo( name="ssd_vgg16_300_coco2017", - version="1.0.1", + version="1.1.0", type="cv/object_detection", summary="SSD with backbone VGG16, trained with dataset COCO.", author="paddlepaddle", author_email="paddle-dev@baidu.com") -class SSDVGG16(hub.Module): - def _initialize(self): - self.default_pretrained_model_path = os.path.join(self.directory, "ssd_vgg16_300_model") - self.label_names = load_label_info(os.path.join(self.directory, "label_file.txt")) +class SSDVGG16: + def __init__(self): + self.default_pretrained_model_path = os.path.join( + self.directory, "ssd_vgg16_300_model", "model") + self.label_names = load_label_info( + os.path.join(self.directory, "label_file.txt")) self.model_config = None self._set_config() def _set_config(self): - # predictor config setting. - cpu_config = AnalysisConfig(self.default_pretrained_model_path) + """ + predictor config setting. + """ + model = self.default_pretrained_model_path+'.pdmodel' + params = self.default_pretrained_model_path+'.pdiparams' + cpu_config = Config(model, params) cpu_config.disable_glog_info() cpu_config.disable_gpu() cpu_config.switch_ir_optim(False) - self.cpu_predictor = create_paddle_predictor(cpu_config) + self.cpu_predictor = create_predictor(cpu_config) try: _places = os.environ["CUDA_VISIBLE_DEVICES"] @@ -48,10 +52,10 @@ def _set_config(self): except: use_gpu = False if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config = Config(model, params) gpu_config.disable_glog_info() gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) + self.gpu_predictor = create_predictor(gpu_config) # model config setting. if not self.model_config: @@ -61,73 +65,6 @@ def _set_config(self): self.multi_box_head_config = self.model_config['MultiBoxHead'] self.output_decoder_config = self.model_config['SSDOutputDecoder'] - def context(self, trainable=True, pretrained=True, get_prediction=False): - """ - Distill the Head Features, so as to perform transfer learning. - - Args: - trainable (bool): whether to set parameters trainable. - pretrained (bool): whether to load default pretrained model. - get_prediction (bool): whether to get prediction. - - Returns: - inputs(dict): the input variables. - outputs(dict): the output variables. - context_prog (Program): the program to execute transfer learning. - """ - context_prog = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(context_prog, startup_program): - with fluid.unique_name.guard(): - # image - image = fluid.layers.data(name='image', shape=[3, 300, 300], dtype='float32') - # backbone - backbone = VGG(depth=16, with_extra_blocks=True, normalizations=[20., -1, -1, -1, -1, -1]) - # body_feats - body_feats = backbone(image) - # im_size - im_size = fluid.layers.data(name='im_size', shape=[2], dtype='int32') - # var_prefix - var_prefix = '@HUB_{}@'.format(self.name) - # names of inputs - inputs = {'image': var_prefix + image.name, 'im_size': var_prefix + im_size.name} - # names of outputs - if get_prediction: - locs, confs, box, box_var = fluid.layers.multi_box_head( - inputs=body_feats, image=image, num_classes=81, **self.multi_box_head_config) - pred = fluid.layers.detection_output( - loc=locs, scores=confs, prior_box=box, prior_box_var=box_var, **self.output_decoder_config) - outputs = {'bbox_out': [var_prefix + pred.name]} - else: - outputs = {'body_features': [var_prefix + var.name for var in body_feats]} - - # add_vars_prefix - add_vars_prefix(context_prog, var_prefix) - add_vars_prefix(fluid.default_startup_program(), var_prefix) - # inputs - inputs = {key: context_prog.global_block().vars[value] for key, value in inputs.items()} - outputs = { - out_key: [context_prog.global_block().vars[varname] for varname in out_value] - for out_key, out_value in outputs.items() - } - # trainable - for param in context_prog.global_block().iter_parameters(): - param.trainable = trainable - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - # pretrained - if pretrained: - - def _if_exist(var): - return os.path.exists(os.path.join(self.default_pretrained_model_path, var.name)) - - fluid.io.load_vars(exe, self.default_pretrained_model_path, predicate=_if_exist) - else: - exe.run(startup_program) - - return inputs, outputs, context_prog - def object_detection(self, paths=None, images=None, @@ -160,47 +97,31 @@ def object_detection(self, """ paths = paths if paths else list() data_reader = partial(reader, paths, images) - batch_reader = fluid.io.batch(data_reader, batch_size=batch_size) + batch_reader = paddle.batch(data_reader, batch_size=batch_size) res = [] for iter_id, feed_data in enumerate(batch_reader()): feed_data = np.array(feed_data) - image_tensor = PaddleTensor(np.array(list(feed_data[:, 0])).copy()) - if use_gpu: - data_out = self.gpu_predictor.run([image_tensor]) - else: - data_out = self.cpu_predictor.run([image_tensor]) - output = postprocess( - paths=paths, - images=images, - data_out=data_out, - score_thresh=score_thresh, - label_names=self.label_names, - output_dir=output_dir, - handle_id=iter_id * batch_size, - visualization=visualization) + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + input_handle.copy_from_cpu(np.array(list(feed_data[:, 0]))) + + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) + + output = postprocess(paths=paths, + images=images, + data_out=output_handle, + score_thresh=score_thresh, + label_names=self.label_names, + output_dir=output_dir, + handle_id=iter_id * batch_size, + visualization=visualization) res.extend(output) return res - def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - @serving def serving_method(self, images, **kwargs): """ @@ -220,9 +141,12 @@ def run_cmd(self, argvs): prog='hub run {}'.format(self.name), usage='%(prog)s', add_help=True) - self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_input_group = self.parser.add_argument_group( + title="Input options", description="Input data. Required") self.arg_config_group = self.parser.add_argument_group( - title="Config options", description="Run configuration for controlling module behavior, not required.") + title="Config options", + description= + "Run configuration for controlling module behavior, not required.") self.add_module_config_arg() self.add_module_input_arg() args = self.parser.parse_args(argvs) @@ -240,17 +164,34 @@ def add_module_config_arg(self): Add the command config options. """ self.arg_config_group.add_argument( - '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") + '--use_gpu', + type=ast.literal_eval, + default=False, + help="whether use GPU or not") self.arg_config_group.add_argument( - '--output_dir', type=str, default='detection_result', help="The directory to save output images.") + '--output_dir', + type=str, + default='detection_result', + help="The directory to save output images.") self.arg_config_group.add_argument( - '--visualization', type=ast.literal_eval, default=False, help="whether to save output as images.") + '--visualization', + type=ast.literal_eval, + default=False, + help="whether to save output as images.") def add_module_input_arg(self): """ Add the command input options. """ - self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") - self.arg_input_group.add_argument('--batch_size', type=ast.literal_eval, default=1, help="batch size.") self.arg_input_group.add_argument( - '--score_thresh', type=ast.literal_eval, default=0.5, help="threshold for object detecion.") + '--input_path', type=str, help="path to image.") + self.arg_input_group.add_argument( + '--batch_size', + type=ast.literal_eval, + default=1, + help="batch size.") + self.arg_input_group.add_argument( + '--score_thresh', + type=ast.literal_eval, + default=0.5, + help="threshold for object detecion.") diff --git a/modules/image/object_detection/ssd_vgg16_300_coco2017/processor.py b/modules/image/object_detection/ssd_vgg16_300_coco2017/processor.py index ff4eb9fe5..9bf964540 100644 --- a/modules/image/object_detection/ssd_vgg16_300_coco2017/processor.py +++ b/modules/image/object_detection/ssd_vgg16_300_coco2017/processor.py @@ -85,7 +85,7 @@ def load_label_info(file_path): def postprocess(paths, images, data_out, score_thresh, label_names, output_dir, handle_id, visualization=True): """ - postprocess the lod_tensor produced by fluid.Executor.run + postprocess the lod_tensor produced by Executor.run Args: paths (list[str]): the path of images. @@ -108,9 +108,9 @@ def postprocess(paths, images, data_out, score_thresh, label_names, output_dir, confidence (float): The confidence of detection result. save_path (str): The path to save output images. """ - lod_tensor = data_out[0] - lod = lod_tensor.lod[0] - results = lod_tensor.as_ndarray() + lod = data_out.lod()[0] + results = data_out.copy_to_cpu() + if handle_id < len(paths): unhandled_paths = paths[handle_id:] unhandled_paths_num = len(unhandled_paths) diff --git a/modules/image/object_detection/ssd_vgg16_300_coco2017/test.py b/modules/image/object_detection/ssd_vgg16_300_coco2017/test.py new file mode 100644 index 000000000..922f3b601 --- /dev/null +++ b/modules/image/object_detection/ssd_vgg16_300_coco2017/test.py @@ -0,0 +1,108 @@ +import os +import shutil +import unittest + +import cv2 +import requests +import paddlehub as hub + + +class TestHubModule(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + img_url = 'https://ai-studio-static-online.cdn.bcebos.com/68313e182f5e4ad9907e69dac9ece8fc50840d7ffbd24fa88396f009958f969a' + if not os.path.exists('tests'): + os.makedirs('tests') + response = requests.get(img_url) + assert response.status_code == 200, 'Network Error.' + with open('tests/test.jpg', 'wb') as f: + f.write(response.content) + cls.module = hub.Module(name="ssd_vgg16_300_coco2017") + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree('tests') + shutil.rmtree('inference') + shutil.rmtree('detection_result') + + def test_object_detection1(self): + results = self.module.object_detection( + paths=['tests/test.jpg'] + ) + bbox = results[0]['data'][0] + label = bbox['label'] + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertEqual(label, 'cat') + self.assertTrue(confidence > 0.5) + self.assertTrue(200 < left < 800) + self.assertTrue(2500 < right < 3500) + self.assertTrue(500 < top < 1500) + self.assertTrue(3500 < bottom < 4500) + + def test_object_detection2(self): + results = self.module.object_detection( + images=[cv2.imread('tests/test.jpg')] + ) + bbox = results[0]['data'][0] + label = bbox['label'] + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertEqual(label, 'cat') + self.assertTrue(confidence > 0.5) + self.assertTrue(200 < left < 800) + self.assertTrue(2500 < right < 3500) + self.assertTrue(500 < top < 1500) + self.assertTrue(3500 < bottom < 4500) + + def test_object_detection3(self): + results = self.module.object_detection( + images=[cv2.imread('tests/test.jpg')], + visualization=False + ) + bbox = results[0]['data'][0] + label = bbox['label'] + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertEqual(label, 'cat') + self.assertTrue(confidence > 0.5) + self.assertTrue(200 < left < 800) + self.assertTrue(2500 < right < 3500) + self.assertTrue(500 < top < 1500) + self.assertTrue(3500 < bottom < 4500) + + def test_object_detection4(self): + self.assertRaises( + AssertionError, + self.module.object_detection, + paths=['no.jpg'] + ) + + def test_object_detection5(self): + self.assertRaises( + cv2.error, + self.module.object_detection, + images=['test.jpg'] + ) + + def test_save_inference_model(self): + self.module.save_inference_model('./inference/model') + + self.assertTrue(os.path.exists('./inference/model.pdmodel')) + self.assertTrue(os.path.exists('./inference/model.pdiparams')) + + +if __name__ == "__main__": + unittest.main() diff --git a/modules/image/object_detection/ssd_vgg16_300_coco2017/vgg.py b/modules/image/object_detection/ssd_vgg16_300_coco2017/vgg.py deleted file mode 100644 index d950c6b55..000000000 --- a/modules/image/object_detection/ssd_vgg16_300_coco2017/vgg.py +++ /dev/null @@ -1,184 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr - -__all__ = ['VGG'] - - -class VGG(object): - """ - VGG, see https://arxiv.org/abs/1409.1556 - - Args: - depth (int): the VGG net depth (16 or 19) - normalizations (list): params list of init scale in l2 norm, skip init - scale if param is -1. - with_extra_blocks (bool): whether or not extra blocks should be added - extra_block_filters (list): in each extra block, params: - [in_channel, out_channel, padding_size, stride_size, filter_size] - class_dim (int): number of class while classification - """ - - def __init__(self, - depth=16, - with_extra_blocks=False, - normalizations=[20., -1, -1, -1, -1, -1], - extra_block_filters=[[256, 512, 1, 2, 3], [128, 256, 1, 2, 3], [128, 256, 0, 1, 3], - [128, 256, 0, 1, 3]], - class_dim=1000): - assert depth in [16, 19], "depth {} not in [16, 19]" - self.depth = depth - self.depth_cfg = {16: [2, 2, 3, 3, 3], 19: [2, 2, 4, 4, 4]} - self.with_extra_blocks = with_extra_blocks - self.normalizations = normalizations - self.extra_block_filters = extra_block_filters - self.class_dim = class_dim - - def __call__(self, input): - layers = [] - layers += self._vgg_block(input) - - if not self.with_extra_blocks: - return layers[-1] - - layers += self._add_extras_block(layers[-1]) - norm_cfg = self.normalizations - for k, v in enumerate(layers): - if not norm_cfg[k] == -1: - layers[k] = self._l2_norm_scale(v, init_scale=norm_cfg[k]) - - return layers - - def _vgg_block(self, input): - nums = self.depth_cfg[self.depth] - vgg_base = [64, 128, 256, 512, 512] - conv = input - res_layer = [] - layers = [] - for k, v in enumerate(vgg_base): - conv = self._conv_block(conv, v, nums[k], name="conv{}_".format(k + 1)) - layers.append(conv) - if self.with_extra_blocks: - if k == 4: - conv = self._pooling_block(conv, 3, 1, pool_padding=1) - else: - conv = self._pooling_block(conv, 2, 2) - else: - conv = self._pooling_block(conv, 2, 2) - if not self.with_extra_blocks: - fc_dim = 4096 - fc_name = ["fc6", "fc7", "fc8"] - fc1 = fluid.layers.fc( - input=conv, - size=fc_dim, - act='relu', - param_attr=fluid.param_attr.ParamAttr(name=fc_name[0] + "_weights"), - bias_attr=fluid.param_attr.ParamAttr(name=fc_name[0] + "_offset")) - fc2 = fluid.layers.fc( - input=fc1, - size=fc_dim, - act='relu', - param_attr=fluid.param_attr.ParamAttr(name=fc_name[1] + "_weights"), - bias_attr=fluid.param_attr.ParamAttr(name=fc_name[1] + "_offset")) - out = fluid.layers.fc( - input=fc2, - size=self.class_dim, - param_attr=fluid.param_attr.ParamAttr(name=fc_name[2] + "_weights"), - bias_attr=fluid.param_attr.ParamAttr(name=fc_name[2] + "_offset")) - out = fluid.layers.softmax(out) - res_layer.append(out) - return [out] - else: - fc6 = self._conv_layer(conv, 1024, 3, 1, 6, dilation=6, name="fc6") - fc7 = self._conv_layer(fc6, 1024, 1, 1, 0, name="fc7") - return [layers[3], fc7] - - def _add_extras_block(self, input): - cfg = self.extra_block_filters - conv = input - layers = [] - for k, v in enumerate(cfg): - assert len(v) == 5, "extra_block_filters size not fix" - conv = self._extra_block(conv, v[0], v[1], v[2], v[3], v[4], name="conv{}_".format(6 + k)) - layers.append(conv) - - return layers - - def _conv_block(self, input, num_filter, groups, name=None): - conv = input - for i in range(groups): - conv = self._conv_layer( - input=conv, - num_filters=num_filter, - filter_size=3, - stride=1, - padding=1, - act='relu', - name=name + str(i + 1)) - return conv - - def _extra_block(self, input, num_filters1, num_filters2, padding_size, stride_size, filter_size, name=None): - # 1x1 conv - conv_1 = self._conv_layer( - input=input, num_filters=int(num_filters1), filter_size=1, stride=1, act='relu', padding=0, name=name + "1") - - # 3x3 conv - conv_2 = self._conv_layer( - input=conv_1, - num_filters=int(num_filters2), - filter_size=filter_size, - stride=stride_size, - act='relu', - padding=padding_size, - name=name + "2") - return conv_2 - - def _conv_layer(self, - input, - num_filters, - filter_size, - stride, - padding, - dilation=1, - act='relu', - use_cudnn=True, - name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=padding, - dilation=dilation, - act=act, - use_cudnn=use_cudnn, - param_attr=ParamAttr(name=name + "_weights"), - bias_attr=ParamAttr(name=name + "_biases") if self.with_extra_blocks else False, - name=name + '.conv2d.output.1') - return conv - - def _pooling_block(self, conv, pool_size, pool_stride, pool_padding=0, ceil_mode=True): - pool = fluid.layers.pool2d( - input=conv, - pool_size=pool_size, - pool_type='max', - pool_stride=pool_stride, - pool_padding=pool_padding, - ceil_mode=ceil_mode) - return pool - - def _l2_norm_scale(self, input, init_scale=1.0, channel_shared=False): - from paddle.fluid.layer_helper import LayerHelper - from paddle.fluid.initializer import Constant - helper = LayerHelper("Scale") - l2_norm = fluid.layers.l2_normalize(input, axis=1) # l2 norm along channel - shape = [1] if channel_shared else [input.shape[1]] - scale = helper.create_parameter( - attr=helper.param_attr, shape=shape, dtype=input.dtype, default_initializer=Constant(init_scale)) - out = fluid.layers.elementwise_mul( - x=l2_norm, y=scale, axis=-1 if channel_shared else 1, name="conv4_3_norm_scale") - return out From 0ed21a48f1cd9330209a8b436b19c87fdc4f8fa8 Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Fri, 16 Sep 2022 14:50:47 +0800 Subject: [PATCH 058/117] update ssd_vgg16_512_coco2017 (#1950) * update ssd_vgg16_512_model * update unittest * update unittest * update gpu config * update * add clean func * update save inference model Co-authored-by: wuzewu Co-authored-by: chenjian --- .../ssd_vgg16_512_coco2017/README.md | 16 +- .../ssd_vgg16_512_coco2017/README_en.md | 16 +- .../ssd_vgg16_512_coco2017/data_feed.py | 2 - .../ssd_vgg16_512_coco2017/module.py | 191 +++------------ .../ssd_vgg16_512_coco2017/processor.py | 7 +- .../ssd_vgg16_512_coco2017/test.py | 108 +++++++++ .../ssd_vgg16_512_coco2017/vgg.py | 224 ------------------ 7 files changed, 161 insertions(+), 403 deletions(-) create mode 100644 modules/image/object_detection/ssd_vgg16_512_coco2017/test.py delete mode 100644 modules/image/object_detection/ssd_vgg16_512_coco2017/vgg.py diff --git a/modules/image/object_detection/ssd_vgg16_512_coco2017/README.md b/modules/image/object_detection/ssd_vgg16_512_coco2017/README.md index 1f4618982..3ca5f3306 100644 --- a/modules/image/object_detection/ssd_vgg16_512_coco2017/README.md +++ b/modules/image/object_detection/ssd_vgg16_512_coco2017/README.md @@ -100,19 +100,13 @@ - save\_path (str, optional): 识别结果的保存路径 (仅当visualization=True时存在) - ```python - def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True) + def save_inference_model(dirname) ``` - 将模型保存到指定路径。 - **参数** - - dirname: 存在模型的目录名称;
- - model\_filename: 模型文件名称,默认为\_\_model\_\_;
- - params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效);
- - combined: 是否将参数保存到统一的一个文件中。 + - dirname: 模型保存路径
## 四、服务部署 @@ -166,6 +160,10 @@ 修复numpy数据读取问题 +* 1.1.0 + + 移除 fluid api + - ```shell - $ hub install ssd_vgg16_512_coco2017==1.0.2 + $ hub install ssd_vgg16_512_coco2017==1.1.0 ``` diff --git a/modules/image/object_detection/ssd_vgg16_512_coco2017/README_en.md b/modules/image/object_detection/ssd_vgg16_512_coco2017/README_en.md index 0d862abcf..38da1460c 100644 --- a/modules/image/object_detection/ssd_vgg16_512_coco2017/README_en.md +++ b/modules/image/object_detection/ssd_vgg16_512_coco2017/README_en.md @@ -100,19 +100,13 @@ - save\_path (str, optional): output path for saving results - ```python - def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True) + def save_inference_model(dirname) ``` - Save model to specific path - **Parameters** - - dirname: output dir for saving model - - model\_filename: filename for saving model - - params\_filename: filename for saving parameters - - combined: whether save parameters into one file + - dirname: model save path ## IV.Server Deployment @@ -166,6 +160,10 @@ Fix the problem of reading numpy +* 1.1.0 + + 移除 fluid api + - ```shell - $ hub install ssd_vgg16_512_coco2017==1.0.2 + $ hub install ssd_vgg16_512_coco2017==1.1.0 ``` diff --git a/modules/image/object_detection/ssd_vgg16_512_coco2017/data_feed.py b/modules/image/object_detection/ssd_vgg16_512_coco2017/data_feed.py index c1994f116..a235f9b0c 100644 --- a/modules/image/object_detection/ssd_vgg16_512_coco2017/data_feed.py +++ b/modules/image/object_detection/ssd_vgg16_512_coco2017/data_feed.py @@ -5,12 +5,10 @@ import os import random -from collections import OrderedDict import cv2 import numpy as np from PIL import Image -from paddle import fluid __all__ = ['reader'] diff --git a/modules/image/object_detection/ssd_vgg16_512_coco2017/module.py b/modules/image/object_detection/ssd_vgg16_512_coco2017/module.py index 9f8e3eb64..a4e8da16e 100644 --- a/modules/image/object_detection/ssd_vgg16_512_coco2017/module.py +++ b/modules/image/object_detection/ssd_vgg16_512_coco2017/module.py @@ -7,41 +7,43 @@ from functools import partial import yaml +import paddle import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +import paddle.static +from paddle.inference import Config, create_predictor from paddlehub.module.module import moduleinfo, runnable, serving -from paddlehub.common.paddle_helper import add_vars_prefix -from ssd_vgg16_512_coco2017.vgg import VGG -from ssd_vgg16_512_coco2017.processor import load_label_info, postprocess, base64_to_cv2 -from ssd_vgg16_512_coco2017.data_feed import reader +from .processor import load_label_info, postprocess, base64_to_cv2 +from .data_feed import reader @moduleinfo( name="ssd_vgg16_512_coco2017", - version="1.0.2", + version="1.1.0", type="cv/object_detection", summary="SSD with backbone VGG16, trained with dataset COCO.", author="paddlepaddle", author_email="paddle-dev@baidu.com") -class SSDVGG16_512(hub.Module): - def _initialize(self): +class SSDVGG16_512: + def __init__(self): self.default_pretrained_model_path = os.path.join( - self.directory, "ssd_vgg16_512_model") + self.directory, "ssd_vgg16_512_model", "model") self.label_names = load_label_info( os.path.join(self.directory, "label_file.txt")) self.model_config = None self._set_config() def _set_config(self): - # predictor config setting. - cpu_config = AnalysisConfig(self.default_pretrained_model_path) + """ + predictor config setting. + """ + model = self.default_pretrained_model_path+'.pdmodel' + params = self.default_pretrained_model_path+'.pdiparams' + cpu_config = Config(model, params) cpu_config.disable_glog_info() cpu_config.disable_gpu() cpu_config.switch_ir_optim(False) - self.cpu_predictor = create_paddle_predictor(cpu_config) + self.cpu_predictor = create_predictor(cpu_config) try: _places = os.environ["CUDA_VISIBLE_DEVICES"] @@ -50,10 +52,10 @@ def _set_config(self): except: use_gpu = False if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config = Config(model, params) gpu_config.disable_glog_info() gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) + self.gpu_predictor = create_predictor(gpu_config) # model config setting. if not self.model_config: @@ -63,107 +65,6 @@ def _set_config(self): self.multi_box_head_config = self.model_config['MultiBoxHead'] self.output_decoder_config = self.model_config['SSDOutputDecoder'] - def context(self, trainable=True, pretrained=True, get_prediction=False): - """ - Distill the Head Features, so as to perform transfer learning. - - Args: - trainable (bool): whether to set parameters trainable. - pretrained (bool): whether to load default pretrained model. - get_prediction (bool): whether to get prediction. - - Returns: - inputs(dict): the input variables. - outputs(dict): the output variables. - context_prog (Program): the program to execute transfer learning. - """ - context_prog = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(context_prog, startup_program): - with fluid.unique_name.guard(): - # image - image = fluid.layers.data( - name='image', shape=[3, 512, 512], dtype='float32') - # backbone - backbone = VGG( - depth=16, - with_extra_blocks=True, - normalizations=[20., -1, -1, -1, -1, -1, -1], - extra_block_filters=[[256, 512, 1, 2, - 3], [128, 256, 1, 2, 3], - [128, 256, 1, 2, - 3], [128, 256, 1, 2, 3], - [128, 256, 1, 1, 4]]) - # body_feats - body_feats = backbone(image) - # im_size - im_size = fluid.layers.data( - name='im_size', shape=[2], dtype='int32') - # var_prefix - var_prefix = '@HUB_{}@'.format(self.name) - # names of inputs - inputs = { - 'image': var_prefix + image.name, - 'im_size': var_prefix + im_size.name - } - # names of outputs - if get_prediction: - locs, confs, box, box_var = fluid.layers.multi_box_head( - inputs=body_feats, - image=image, - num_classes=81, - **self.multi_box_head_config) - pred = fluid.layers.detection_output( - loc=locs, - scores=confs, - prior_box=box, - prior_box_var=box_var, - **self.output_decoder_config) - outputs = {'bbox_out': [var_prefix + pred.name]} - else: - outputs = { - 'body_features': - [var_prefix + var.name for var in body_feats] - } - - # add_vars_prefix - add_vars_prefix(context_prog, var_prefix) - add_vars_prefix(fluid.default_startup_program(), var_prefix) - # inputs - inputs = { - key: context_prog.global_block().vars[value] - for key, value in inputs.items() - } - outputs = { - out_key: [ - context_prog.global_block().vars[varname] - for varname in out_value - ] - for out_key, out_value in outputs.items() - } - # trainable - for param in context_prog.global_block().iter_parameters(): - param.trainable = trainable - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - # pretrained - if pretrained: - - def _if_exist(var): - return os.path.exists( - os.path.join(self.default_pretrained_model_path, - var.name)) - - fluid.io.load_vars( - exe, - self.default_pretrained_model_path, - predicate=_if_exist) - else: - exe.run(startup_program) - - return inputs, outputs, context_prog - def object_detection(self, paths=None, images=None, @@ -205,51 +106,31 @@ def object_detection(self, paths = paths if paths else list() data_reader = partial(reader, paths, images) - batch_reader = fluid.io.batch(data_reader, batch_size=batch_size) + batch_reader = paddle.batch(data_reader, batch_size=batch_size) res = [] for iter_id, feed_data in enumerate(batch_reader()): feed_data = np.array(feed_data) - image_tensor = PaddleTensor(np.array(list(feed_data[:, 0])).copy()) - if use_gpu: - data_out = self.gpu_predictor.run([image_tensor]) - else: - data_out = self.cpu_predictor.run([image_tensor]) - output = postprocess( - paths=paths, - images=images, - data_out=data_out, - score_thresh=score_thresh, - label_names=self.label_names, - output_dir=output_dir, - handle_id=iter_id * batch_size, - visualization=visualization) + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + input_handle.copy_from_cpu(np.array(list(feed_data[:, 0]))) + + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) + + output = postprocess(paths=paths, + images=images, + data_out=output_handle, + score_thresh=score_thresh, + label_names=self.label_names, + output_dir=output_dir, + handle_id=iter_id * batch_size, + visualization=visualization) res.extend(output) return res - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - @serving def serving_method(self, images, **kwargs): """ diff --git a/modules/image/object_detection/ssd_vgg16_512_coco2017/processor.py b/modules/image/object_detection/ssd_vgg16_512_coco2017/processor.py index 82b2335f6..5079f50cd 100644 --- a/modules/image/object_detection/ssd_vgg16_512_coco2017/processor.py +++ b/modules/image/object_detection/ssd_vgg16_512_coco2017/processor.py @@ -104,7 +104,7 @@ def postprocess(paths, handle_id, visualization=True): """ - postprocess the lod_tensor produced by fluid.Executor.run + postprocess the lod_tensor produced by Executor.run Args: paths (list[str]): the path of images. @@ -127,9 +127,8 @@ def postprocess(paths, confidence (float): The confidence of detection result. save_path (str): The path to save output images. """ - lod_tensor = data_out[0] - lod = lod_tensor.lod[0] - results = lod_tensor.as_ndarray() + lod = data_out.lod()[0] + results = data_out.copy_to_cpu() check_dir(output_dir) diff --git a/modules/image/object_detection/ssd_vgg16_512_coco2017/test.py b/modules/image/object_detection/ssd_vgg16_512_coco2017/test.py new file mode 100644 index 000000000..f6c72cfb6 --- /dev/null +++ b/modules/image/object_detection/ssd_vgg16_512_coco2017/test.py @@ -0,0 +1,108 @@ +import os +import shutil +import unittest + +import cv2 +import requests +import paddlehub as hub + + +class TestHubModule(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + img_url = 'https://ai-studio-static-online.cdn.bcebos.com/68313e182f5e4ad9907e69dac9ece8fc50840d7ffbd24fa88396f009958f969a' + if not os.path.exists('tests'): + os.makedirs('tests') + response = requests.get(img_url) + assert response.status_code == 200, 'Network Error.' + with open('tests/test.jpg', 'wb') as f: + f.write(response.content) + cls.module = hub.Module(name="ssd_vgg16_512_coco2017") + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree('tests') + shutil.rmtree('inference') + shutil.rmtree('detection_result') + + def test_object_detection1(self): + results = self.module.object_detection( + paths=['tests/test.jpg'] + ) + bbox = results[0]['data'][0] + label = bbox['label'] + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertEqual(label, 'cat') + self.assertTrue(confidence > 0.5) + self.assertTrue(200 < left < 800) + self.assertTrue(2500 < right < 3500) + self.assertTrue(500 < top < 1500) + self.assertTrue(3500 < bottom < 4500) + + def test_object_detection2(self): + results = self.module.object_detection( + images=[cv2.imread('tests/test.jpg')] + ) + bbox = results[0]['data'][0] + label = bbox['label'] + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertEqual(label, 'cat') + self.assertTrue(confidence > 0.5) + self.assertTrue(200 < left < 800) + self.assertTrue(2500 < right < 3500) + self.assertTrue(500 < top < 1500) + self.assertTrue(3500 < bottom < 4500) + + def test_object_detection3(self): + results = self.module.object_detection( + images=[cv2.imread('tests/test.jpg')], + visualization=False + ) + bbox = results[0]['data'][0] + label = bbox['label'] + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertEqual(label, 'cat') + self.assertTrue(confidence > 0.5) + self.assertTrue(200 < left < 800) + self.assertTrue(2500 < right < 3500) + self.assertTrue(500 < top < 1500) + self.assertTrue(3500 < bottom < 4500) + + def test_object_detection4(self): + self.assertRaises( + AssertionError, + self.module.object_detection, + paths=['no.jpg'] + ) + + def test_object_detection5(self): + self.assertRaises( + cv2.error, + self.module.object_detection, + images=['test.jpg'] + ) + + def test_save_inference_model(self): + self.module.save_inference_model('./inference/model') + + self.assertTrue(os.path.exists('./inference/model.pdmodel')) + self.assertTrue(os.path.exists('./inference/model.pdiparams')) + + +if __name__ == "__main__": + unittest.main() diff --git a/modules/image/object_detection/ssd_vgg16_512_coco2017/vgg.py b/modules/image/object_detection/ssd_vgg16_512_coco2017/vgg.py deleted file mode 100644 index dc760f328..000000000 --- a/modules/image/object_detection/ssd_vgg16_512_coco2017/vgg.py +++ /dev/null @@ -1,224 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr - -__all__ = ['VGG'] - - -class VGG(object): - """ - VGG, see https://arxiv.org/abs/1409.1556 - - Args: - depth (int): the VGG net depth (16 or 19) - normalizations (list): params list of init scale in l2 norm, skip init - scale if param is -1. - with_extra_blocks (bool): whether or not extra blocks should be added - extra_block_filters (list): in each extra block, params: - [in_channel, out_channel, padding_size, stride_size, filter_size] - class_dim (int): number of class while classification - """ - - def __init__(self, - depth=16, - with_extra_blocks=False, - normalizations=[20., -1, -1, -1, -1, -1], - extra_block_filters=[[256, 512, 1, 2, 3], [128, 256, 1, 2, 3], - [128, 256, 0, 1, 3], [128, 256, 0, 1, 3]], - class_dim=1000): - assert depth in [16, 19], "depth {} not in [16, 19]" - self.depth = depth - self.depth_cfg = {16: [2, 2, 3, 3, 3], 19: [2, 2, 4, 4, 4]} - self.with_extra_blocks = with_extra_blocks - self.normalizations = normalizations - self.extra_block_filters = extra_block_filters - self.class_dim = class_dim - - def __call__(self, input): - layers = [] - layers += self._vgg_block(input) - - if not self.with_extra_blocks: - return layers[-1] - - layers += self._add_extras_block(layers[-1]) - norm_cfg = self.normalizations - for k, v in enumerate(layers): - if not norm_cfg[k] == -1: - layers[k] = self._l2_norm_scale(v, init_scale=norm_cfg[k]) - - return layers - - def _vgg_block(self, input): - nums = self.depth_cfg[self.depth] - vgg_base = [64, 128, 256, 512, 512] - conv = input - res_layer = [] - layers = [] - for k, v in enumerate(vgg_base): - conv = self._conv_block( - conv, v, nums[k], name="conv{}_".format(k + 1)) - layers.append(conv) - if self.with_extra_blocks: - if k == 4: - conv = self._pooling_block(conv, 3, 1, pool_padding=1) - else: - conv = self._pooling_block(conv, 2, 2) - else: - conv = self._pooling_block(conv, 2, 2) - if not self.with_extra_blocks: - fc_dim = 4096 - fc_name = ["fc6", "fc7", "fc8"] - fc1 = fluid.layers.fc( - input=conv, - size=fc_dim, - act='relu', - param_attr=fluid.param_attr.ParamAttr( - name=fc_name[0] + "_weights"), - bias_attr=fluid.param_attr.ParamAttr( - name=fc_name[0] + "_offset")) - fc2 = fluid.layers.fc( - input=fc1, - size=fc_dim, - act='relu', - param_attr=fluid.param_attr.ParamAttr( - name=fc_name[1] + "_weights"), - bias_attr=fluid.param_attr.ParamAttr( - name=fc_name[1] + "_offset")) - out = fluid.layers.fc( - input=fc2, - size=self.class_dim, - param_attr=fluid.param_attr.ParamAttr( - name=fc_name[2] + "_weights"), - bias_attr=fluid.param_attr.ParamAttr( - name=fc_name[2] + "_offset")) - out = fluid.layers.softmax(out) - res_layer.append(out) - return [out] - else: - fc6 = self._conv_layer(conv, 1024, 3, 1, 6, dilation=6, name="fc6") - fc7 = self._conv_layer(fc6, 1024, 1, 1, 0, name="fc7") - return [layers[3], fc7] - - def _add_extras_block(self, input): - cfg = self.extra_block_filters - conv = input - layers = [] - for k, v in enumerate(cfg): - assert len(v) == 5, "extra_block_filters size not fix" - conv = self._extra_block( - conv, - v[0], - v[1], - v[2], - v[3], - v[4], - name="conv{}_".format(6 + k)) - layers.append(conv) - - return layers - - def _conv_block(self, input, num_filter, groups, name=None): - conv = input - for i in range(groups): - conv = self._conv_layer( - input=conv, - num_filters=num_filter, - filter_size=3, - stride=1, - padding=1, - act='relu', - name=name + str(i + 1)) - return conv - - def _extra_block(self, - input, - num_filters1, - num_filters2, - padding_size, - stride_size, - filter_size, - name=None): - # 1x1 conv - conv_1 = self._conv_layer( - input=input, - num_filters=int(num_filters1), - filter_size=1, - stride=1, - act='relu', - padding=0, - name=name + "1") - - # 3x3 conv - conv_2 = self._conv_layer( - input=conv_1, - num_filters=int(num_filters2), - filter_size=filter_size, - stride=stride_size, - act='relu', - padding=padding_size, - name=name + "2") - return conv_2 - - def _conv_layer(self, - input, - num_filters, - filter_size, - stride, - padding, - dilation=1, - act='relu', - use_cudnn=True, - name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=padding, - dilation=dilation, - act=act, - use_cudnn=use_cudnn, - param_attr=ParamAttr(name=name + "_weights"), - bias_attr=ParamAttr( - name=name + "_biases") if self.with_extra_blocks else False, - name=name + '.conv2d.output.1') - return conv - - def _pooling_block(self, - conv, - pool_size, - pool_stride, - pool_padding=0, - ceil_mode=True): - pool = fluid.layers.pool2d( - input=conv, - pool_size=pool_size, - pool_type='max', - pool_stride=pool_stride, - pool_padding=pool_padding, - ceil_mode=ceil_mode) - return pool - - def _l2_norm_scale(self, input, init_scale=1.0, channel_shared=False): - from paddle.fluid.layer_helper import LayerHelper - from paddle.fluid.initializer import Constant - helper = LayerHelper("Scale") - l2_norm = fluid.layers.l2_normalize( - input, axis=1) # l2 norm along channel - shape = [1] if channel_shared else [input.shape[1]] - scale = helper.create_parameter( - attr=helper.param_attr, - shape=shape, - dtype=input.dtype, - default_initializer=Constant(init_scale)) - out = fluid.layers.elementwise_mul( - x=l2_norm, - y=scale, - axis=-1 if channel_shared else 1, - name="conv4_3_norm_scale") - return out From 3d3ca6579584b0aa98713857d3e95575ed455702 Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Fri, 16 Sep 2022 15:01:47 +0800 Subject: [PATCH 059/117] update yolov3_darknet53_coco2017 (#1951) * update ssd_vgg16_512_coco2017 * update unittest * update unittest * update version * update gpu config * update * add clean func * update save inference model Co-authored-by: chenjian --- .../yolov3_darknet53_coco2017/README.md | 15 +- .../yolov3_darknet53_coco2017/README_en.md | 15 +- .../yolov3_darknet53_coco2017/darknet.py | 121 --------- .../yolov3_darknet53_coco2017/module.py | 164 +++---------- .../yolov3_darknet53_coco2017/processor.py | 7 +- .../yolov3_darknet53_coco2017/test.py | 108 ++++++++ .../yolov3_darknet53_coco2017/yolo_head.py | 231 ------------------ 7 files changed, 159 insertions(+), 502 deletions(-) delete mode 100644 modules/image/object_detection/yolov3_darknet53_coco2017/darknet.py create mode 100644 modules/image/object_detection/yolov3_darknet53_coco2017/test.py delete mode 100644 modules/image/object_detection/yolov3_darknet53_coco2017/yolo_head.py diff --git a/modules/image/object_detection/yolov3_darknet53_coco2017/README.md b/modules/image/object_detection/yolov3_darknet53_coco2017/README.md index 154822517..200e9952a 100644 --- a/modules/image/object_detection/yolov3_darknet53_coco2017/README.md +++ b/modules/image/object_detection/yolov3_darknet53_coco2017/README.md @@ -100,19 +100,13 @@ - save\_path (str, optional): 识别结果的保存路径 (仅当visualization=True时存在) - ```python - def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True) + def save_inference_model(dirname) ``` - 将模型保存到指定路径。 - **参数** - - dirname: 存在模型的目录名称;
- - model\_filename: 模型文件名称,默认为\_\_model\_\_;
- - params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效);
- - combined: 是否将参数保存到统一的一个文件中。 + - dirname: 模型保存路径
## 四、服务部署 @@ -165,6 +159,9 @@ * 1.1.1 修复numpy数据读取问题 +* 1.2.0 + 移除 fluid api + - ```shell - $ hub install yolov3_darknet53_coco2017==1.1.1 + $ hub install yolov3_darknet53_coco2017==1.2.0 ``` diff --git a/modules/image/object_detection/yolov3_darknet53_coco2017/README_en.md b/modules/image/object_detection/yolov3_darknet53_coco2017/README_en.md index b6757ff38..a50f62780 100644 --- a/modules/image/object_detection/yolov3_darknet53_coco2017/README_en.md +++ b/modules/image/object_detection/yolov3_darknet53_coco2017/README_en.md @@ -99,19 +99,13 @@ - save\_path (str, optional): output path for saving results - ```python - def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True) + def save_inference_model(dirname) ``` - Save model to specific path - **Parameters** - - dirname: output dir for saving model - - model\_filename: filename for saving model - - params\_filename: filename for saving parameters - - combined: whether save parameters into one file + - dirname: model save path ## IV.Server Deployment @@ -164,6 +158,9 @@ * 1.1.1 Fix the problem of reading numpy +* 1.2.0 + Remove fluid api + - ```shell - $ hub install yolov3_darknet53_coco2017==1.1.1 + $ hub install yolov3_darknet53_coco2017==1.2.0 ``` diff --git a/modules/image/object_detection/yolov3_darknet53_coco2017/darknet.py b/modules/image/object_detection/yolov3_darknet53_coco2017/darknet.py deleted file mode 100644 index fe925fb4c..000000000 --- a/modules/image/object_detection/yolov3_darknet53_coco2017/darknet.py +++ /dev/null @@ -1,121 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import six -import math - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.regularizer import L2Decay - -__all__ = ['DarkNet'] - - -class DarkNet(object): - """DarkNet, see https://pjreddie.com/darknet/yolo/ - - Args: - depth (int): network depth, currently only darknet 53 is supported - norm_type (str): normalization type, 'bn' and 'sync_bn' are supported - norm_decay (float): weight decay for normalization layer weights - get_prediction (bool): whether to get prediction - class_dim (int): number of class while classification - """ - - def __init__(self, - depth=53, - norm_type='sync_bn', - norm_decay=0., - weight_prefix_name='', - get_prediction=False, - class_dim=1000): - assert depth in [53], "unsupported depth value" - self.depth = depth - self.norm_type = norm_type - self.norm_decay = norm_decay - self.depth_cfg = {53: ([1, 2, 8, 8, 4], self.basicblock)} - self.prefix_name = weight_prefix_name - self.class_dim = class_dim - self.get_prediction = get_prediction - - def _conv_norm(self, input, ch_out, filter_size, stride, padding, act='leaky', name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=ch_out, - filter_size=filter_size, - stride=stride, - padding=padding, - act=None, - param_attr=ParamAttr(name=name + ".conv.weights"), - bias_attr=False) - - bn_name = name + ".bn" - bn_param_attr = ParamAttr(regularizer=L2Decay(float(self.norm_decay)), name=bn_name + '.scale') - bn_bias_attr = ParamAttr(regularizer=L2Decay(float(self.norm_decay)), name=bn_name + '.offset') - - out = fluid.layers.batch_norm( - input=conv, - act=None, - param_attr=bn_param_attr, - bias_attr=bn_bias_attr, - moving_mean_name=bn_name + '.mean', - moving_variance_name=bn_name + '.var') - - # leaky relu here has `alpha` as 0.1, can not be set by - # `act` param in fluid.layers.batch_norm above. - if act == 'leaky': - out = fluid.layers.leaky_relu(x=out, alpha=0.1) - - return out - - def _downsample(self, input, ch_out, filter_size=3, stride=2, padding=1, name=None): - return self._conv_norm(input, ch_out=ch_out, filter_size=filter_size, stride=stride, padding=padding, name=name) - - def basicblock(self, input, ch_out, name=None): - conv1 = self._conv_norm(input, ch_out=ch_out, filter_size=1, stride=1, padding=0, name=name + ".0") - conv2 = self._conv_norm(conv1, ch_out=ch_out * 2, filter_size=3, stride=1, padding=1, name=name + ".1") - out = fluid.layers.elementwise_add(x=input, y=conv2, act=None) - return out - - def layer_warp(self, block_func, input, ch_out, count, name=None): - out = block_func(input, ch_out=ch_out, name='{}.0'.format(name)) - for j in six.moves.xrange(1, count): - out = block_func(out, ch_out=ch_out, name='{}.{}'.format(name, j)) - return out - - def __call__(self, input): - """ - Get the backbone of DarkNet, that is output for the 5 stages. - """ - stages, block_func = self.depth_cfg[self.depth] - stages = stages[0:5] - conv = self._conv_norm( - input=input, ch_out=32, filter_size=3, stride=1, padding=1, name=self.prefix_name + "yolo_input") - downsample_ = self._downsample( - input=conv, ch_out=conv.shape[1] * 2, name=self.prefix_name + "yolo_input.downsample") - blocks = [] - for i, stage in enumerate(stages): - block = self.layer_warp( - block_func=block_func, - input=downsample_, - ch_out=32 * 2**i, - count=stage, - name=self.prefix_name + "stage.{}".format(i)) - blocks.append(block) - if i < len(stages) - 1: # do not downsaple in the last stage - downsample_ = self._downsample( - input=block, ch_out=block.shape[1] * 2, name=self.prefix_name + "stage.{}.downsample".format(i)) - if self.get_prediction: - pool = fluid.layers.pool2d(input=block, pool_type='avg', global_pooling=True) - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - out = fluid.layers.fc( - input=pool, - size=self.class_dim, - param_attr=ParamAttr(initializer=fluid.initializer.Uniform(-stdv, stdv), name='fc_weights'), - bias_attr=ParamAttr(name='fc_offset')) - out = fluid.layers.softmax(out) - return out - else: - return blocks diff --git a/modules/image/object_detection/yolov3_darknet53_coco2017/module.py b/modules/image/object_detection/yolov3_darknet53_coco2017/module.py index 7886ef4e6..4b219bc5f 100644 --- a/modules/image/object_detection/yolov3_darknet53_coco2017/module.py +++ b/modules/image/object_detection/yolov3_darknet53_coco2017/module.py @@ -6,29 +6,27 @@ import os from functools import partial +import paddle import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +import paddle.jit +import paddle.static +from paddle.inference import Config, create_predictor from paddlehub.module.module import moduleinfo, runnable, serving -from paddlehub.common.paddle_helper import add_vars_prefix -from yolov3_darknet53_coco2017.darknet import DarkNet -from yolov3_darknet53_coco2017.processor import load_label_info, postprocess, base64_to_cv2 -from yolov3_darknet53_coco2017.data_feed import reader -from yolov3_darknet53_coco2017.yolo_head import MultiClassNMS, YOLOv3Head +from .processor import load_label_info, postprocess, base64_to_cv2 +from .data_feed import reader @moduleinfo( name="yolov3_darknet53_coco2017", - version="1.1.1", + version="1.2.0", type="CV/object_detection", summary="Baidu's YOLOv3 model for object detection, with backbone DarkNet53, trained with dataset coco2017.", author="paddlepaddle", author_email="paddle-dev@baidu.com") -class YOLOv3DarkNet53Coco2017(hub.Module): - def _initialize(self): - self.default_pretrained_model_path = os.path.join(self.directory, "yolov3_darknet53_model") +class YOLOv3DarkNet53Coco2017: + def __init__(self): + self.default_pretrained_model_path = os.path.join(self.directory, "yolov3_darknet53_model", "model") self.label_names = load_label_info(os.path.join(self.directory, "label_file.txt")) self._set_config() @@ -36,11 +34,13 @@ def _set_config(self): """ predictor config setting. """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) + model = self.default_pretrained_model_path+'.pdmodel' + params = self.default_pretrained_model_path+'.pdiparams' + cpu_config = Config(model, params) cpu_config.disable_glog_info() cpu_config.disable_gpu() cpu_config.switch_ir_optim(False) - self.cpu_predictor = create_paddle_predictor(cpu_config) + self.cpu_predictor = create_predictor(cpu_config) try: _places = os.environ["CUDA_VISIBLE_DEVICES"] @@ -49,88 +49,14 @@ def _set_config(self): except: use_gpu = False if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config = Config(model, params) gpu_config.disable_glog_info() gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) - - def context(self, trainable=True, pretrained=True, get_prediction=False): - """ - Distill the Head Features, so as to perform transfer learning. - - Args: - trainable (bool): whether to set parameters trainable. - pretrained (bool): whether to load default pretrained model. - get_prediction (bool): whether to get prediction. - - Returns: - inputs(dict): the input variables. - outputs(dict): the output variables. - context_prog (Program): the program to execute transfer learning. - """ - context_prog = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(context_prog, startup_program): - with fluid.unique_name.guard(): - # image - image = fluid.layers.data(name='image', shape=[3, 608, 608], dtype='float32') - # backbone - backbone = DarkNet(norm_type='bn', norm_decay=0., depth=53) - # body_feats - body_feats = backbone(image) - # im_size - im_size = fluid.layers.data(name='im_size', shape=[2], dtype='int32') - # yolo_head - yolo_head = YOLOv3Head(num_classes=80) - # head_features - head_features, body_features = yolo_head._get_outputs(body_feats, is_train=trainable) - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - exe.run(fluid.default_startup_program()) - - # var_prefix - var_prefix = '@HUB_{}@'.format(self.name) - # name of inputs - inputs = {'image': var_prefix + image.name, 'im_size': var_prefix + im_size.name} - # name of outputs - if get_prediction: - bbox_out = yolo_head.get_prediction(head_features, im_size) - outputs = {'bbox_out': [var_prefix + bbox_out.name]} - else: - outputs = { - 'head_features': [var_prefix + var.name for var in head_features], - 'body_features': [var_prefix + var.name for var in body_features] - } - # add_vars_prefix - add_vars_prefix(context_prog, var_prefix) - add_vars_prefix(fluid.default_startup_program(), var_prefix) - # inputs - inputs = {key: context_prog.global_block().vars[value] for key, value in inputs.items()} - # outputs - outputs = { - key: [context_prog.global_block().vars[varname] for varname in value] - for key, value in outputs.items() - } - # trainable - for param in context_prog.global_block().iter_parameters(): - param.trainable = trainable - # pretrained - if pretrained: - - def _if_exist(var): - return os.path.exists(os.path.join(self.default_pretrained_model_path, var.name)) - - fluid.io.load_vars(exe, self.default_pretrained_model_path, predicate=_if_exist) - else: - exe.run(startup_program) - - return inputs, outputs, context_prog + self.gpu_predictor = create_predictor(gpu_config) def object_detection(self, paths=None, images=None, - data=None, batch_size=1, use_gpu=False, output_dir='detection_result', @@ -168,52 +94,34 @@ def object_detection(self, ) paths = paths if paths else list() - if data and 'image' in data: - paths += data['image'] - data_reader = partial(reader, paths, images) - batch_reader = fluid.io.batch(data_reader, batch_size=batch_size) + batch_reader = paddle.batch(data_reader, batch_size=batch_size) res = [] for iter_id, feed_data in enumerate(batch_reader()): feed_data = np.array(feed_data) - image_tensor = PaddleTensor(np.array(list(feed_data[:, 0]))) - im_size_tensor = PaddleTensor(np.array(list(feed_data[:, 1]))) - if use_gpu: - data_out = self.gpu_predictor.run([image_tensor, im_size_tensor]) - else: - data_out = self.cpu_predictor.run([image_tensor, im_size_tensor]) - output = postprocess( - paths=paths, - images=images, - data_out=data_out, - score_thresh=score_thresh, - label_names=self.label_names, - output_dir=output_dir, - handle_id=iter_id * batch_size, - visualization=visualization) + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + input_handle.copy_from_cpu(np.array(list(feed_data[:, 0]))) + input_handle = predictor.get_input_handle(input_names[1]) + input_handle.copy_from_cpu(np.array(list(feed_data[:, 1]))) + + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) + + output = postprocess(paths=paths, + images=images, + data_out=output_handle, + score_thresh=score_thresh, + label_names=self.label_names, + output_dir=output_dir, + handle_id=iter_id * batch_size, + visualization=visualization) res.extend(output) return res - def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - @serving def serving_method(self, images, **kwargs): """ diff --git a/modules/image/object_detection/yolov3_darknet53_coco2017/processor.py b/modules/image/object_detection/yolov3_darknet53_coco2017/processor.py index 64049e42b..b3770fc5a 100644 --- a/modules/image/object_detection/yolov3_darknet53_coco2017/processor.py +++ b/modules/image/object_detection/yolov3_darknet53_coco2017/processor.py @@ -88,7 +88,7 @@ def load_label_info(file_path): def postprocess(paths, images, data_out, score_thresh, label_names, output_dir, handle_id, visualization=True): """ - postprocess the lod_tensor produced by fluid.Executor.run + postprocess the lod_tensor produced by Executor.run Args: paths (list[str]): The paths of images. @@ -113,9 +113,8 @@ def postprocess(paths, images, data_out, score_thresh, label_names, output_dir, confidence (float): The confidence of detection result. save_path (str): The path to save output images. """ - lod_tensor = data_out[0] - lod = lod_tensor.lod[0] - results = lod_tensor.as_ndarray() + lod = data_out.lod()[0] + results = data_out.copy_to_cpu() check_dir(output_dir) diff --git a/modules/image/object_detection/yolov3_darknet53_coco2017/test.py b/modules/image/object_detection/yolov3_darknet53_coco2017/test.py new file mode 100644 index 000000000..af430ee3d --- /dev/null +++ b/modules/image/object_detection/yolov3_darknet53_coco2017/test.py @@ -0,0 +1,108 @@ +import os +import shutil +import unittest + +import cv2 +import requests +import paddlehub as hub + + +class TestHubModule(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + img_url = 'https://ai-studio-static-online.cdn.bcebos.com/68313e182f5e4ad9907e69dac9ece8fc50840d7ffbd24fa88396f009958f969a' + if not os.path.exists('tests'): + os.makedirs('tests') + response = requests.get(img_url) + assert response.status_code == 200, 'Network Error.' + with open('tests/test.jpg', 'wb') as f: + f.write(response.content) + cls.module = hub.Module(name="yolov3_darknet53_coco2017") + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree('tests') + shutil.rmtree('inference') + shutil.rmtree('detection_result') + + def test_object_detection1(self): + results = self.module.object_detection( + paths=['tests/test.jpg'] + ) + bbox = results[0]['data'][0] + label = bbox['label'] + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertEqual(label, 'cat') + self.assertTrue(confidence > 0.5) + self.assertTrue(0 < left < 1000) + self.assertTrue(2500 < right < 3500) + self.assertTrue(500 < top < 1500) + self.assertTrue(3500 < bottom < 4500) + + def test_object_detection2(self): + results = self.module.object_detection( + images=[cv2.imread('tests/test.jpg')] + ) + bbox = results[0]['data'][0] + label = bbox['label'] + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertEqual(label, 'cat') + self.assertTrue(confidence > 0.5) + self.assertTrue(0 < left < 1000) + self.assertTrue(2500 < right < 3500) + self.assertTrue(500 < top < 1500) + self.assertTrue(3500 < bottom < 4500) + + def test_object_detection3(self): + results = self.module.object_detection( + images=[cv2.imread('tests/test.jpg')], + visualization=False + ) + bbox = results[0]['data'][0] + label = bbox['label'] + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertEqual(label, 'cat') + self.assertTrue(confidence > 0.5) + self.assertTrue(0 < left < 1000) + self.assertTrue(2500 < right < 3500) + self.assertTrue(500 < top < 1500) + self.assertTrue(3500 < bottom < 4500) + + def test_object_detection4(self): + self.assertRaises( + AssertionError, + self.module.object_detection, + paths=['no.jpg'] + ) + + def test_object_detection5(self): + self.assertRaises( + AttributeError, + self.module.object_detection, + images=['test.jpg'] + ) + + def test_save_inference_model(self): + self.module.save_inference_model('./inference/model') + + self.assertTrue(os.path.exists('./inference/model.pdmodel')) + self.assertTrue(os.path.exists('./inference/model.pdiparams')) + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file diff --git a/modules/image/object_detection/yolov3_darknet53_coco2017/yolo_head.py b/modules/image/object_detection/yolov3_darknet53_coco2017/yolo_head.py deleted file mode 100644 index cfe796c2e..000000000 --- a/modules/image/object_detection/yolov3_darknet53_coco2017/yolo_head.py +++ /dev/null @@ -1,231 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from collections import OrderedDict - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.regularizer import L2Decay - -__all__ = ['MultiClassNMS', 'YOLOv3Head'] - - -class MultiClassNMS(object): - # __op__ = fluid.layers.multiclass_nms - def __init__(self, background_label, keep_top_k, nms_threshold, nms_top_k, normalized, score_threshold): - super(MultiClassNMS, self).__init__() - self.background_label = background_label - self.keep_top_k = keep_top_k - self.nms_threshold = nms_threshold - self.nms_top_k = nms_top_k - self.normalized = normalized - self.score_threshold = score_threshold - - -class YOLOv3Head(object): - """Head block for YOLOv3 network - - Args: - norm_decay (float): weight decay for normalization layer weights - num_classes (int): number of output classes - ignore_thresh (float): threshold to ignore confidence loss - label_smooth (bool): whether to use label smoothing - anchors (list): anchors - anchor_masks (list): anchor masks - nms (object): an instance of `MultiClassNMS` - """ - - def __init__(self, - norm_decay=0., - num_classes=80, - ignore_thresh=0.7, - label_smooth=True, - anchors=[[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], [59, 119], [116, 90], [156, 198], - [373, 326]], - anchor_masks=[[6, 7, 8], [3, 4, 5], [0, 1, 2]], - nms=MultiClassNMS( - background_label=-1, - keep_top_k=100, - nms_threshold=0.45, - nms_top_k=1000, - normalized=True, - score_threshold=0.01), - weight_prefix_name=''): - self.norm_decay = norm_decay - self.num_classes = num_classes - self.ignore_thresh = ignore_thresh - self.label_smooth = label_smooth - self.anchor_masks = anchor_masks - self._parse_anchors(anchors) - self.nms = nms - self.prefix_name = weight_prefix_name - - def _conv_bn(self, input, ch_out, filter_size, stride, padding, act='leaky', is_test=True, name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=ch_out, - filter_size=filter_size, - stride=stride, - padding=padding, - act=None, - param_attr=ParamAttr(name=name + ".conv.weights"), - bias_attr=False) - - bn_name = name + ".bn" - bn_param_attr = ParamAttr(regularizer=L2Decay(self.norm_decay), name=bn_name + '.scale') - bn_bias_attr = ParamAttr(regularizer=L2Decay(self.norm_decay), name=bn_name + '.offset') - out = fluid.layers.batch_norm( - input=conv, - act=None, - is_test=is_test, - param_attr=bn_param_attr, - bias_attr=bn_bias_attr, - moving_mean_name=bn_name + '.mean', - moving_variance_name=bn_name + '.var') - - if act == 'leaky': - out = fluid.layers.leaky_relu(x=out, alpha=0.1) - return out - - def _detection_block(self, input, channel, is_test=True, name=None): - assert channel % 2 == 0, \ - "channel {} cannot be divided by 2 in detection block {}" \ - .format(channel, name) - - conv = input - for j in range(2): - conv = self._conv_bn( - conv, channel, filter_size=1, stride=1, padding=0, is_test=is_test, name='{}.{}.0'.format(name, j)) - conv = self._conv_bn( - conv, channel * 2, filter_size=3, stride=1, padding=1, is_test=is_test, name='{}.{}.1'.format(name, j)) - route = self._conv_bn( - conv, channel, filter_size=1, stride=1, padding=0, is_test=is_test, name='{}.2'.format(name)) - tip = self._conv_bn( - route, channel * 2, filter_size=3, stride=1, padding=1, is_test=is_test, name='{}.tip'.format(name)) - return route, tip - - def _upsample(self, input, scale=2, name=None): - out = fluid.layers.resize_nearest(input=input, scale=float(scale), name=name) - return out - - def _parse_anchors(self, anchors): - """ - Check ANCHORS/ANCHOR_MASKS in config and parse mask_anchors - - """ - self.anchors = [] - self.mask_anchors = [] - - assert len(anchors) > 0, "ANCHORS not set." - assert len(self.anchor_masks) > 0, "ANCHOR_MASKS not set." - - for anchor in anchors: - assert len(anchor) == 2, "anchor {} len should be 2".format(anchor) - self.anchors.extend(anchor) - - anchor_num = len(anchors) - for masks in self.anchor_masks: - self.mask_anchors.append([]) - for mask in masks: - assert mask < anchor_num, "anchor mask index overflow" - self.mask_anchors[-1].extend(anchors[mask]) - - def _get_outputs(self, input, is_train=True): - """ - Get YOLOv3 head output - - Args: - input (list): List of Variables, output of backbone stages - is_train (bool): whether in train or test mode - - Returns: - outputs (list): Variables of each output layer - """ - - outputs = [] - - # get last out_layer_num blocks in reverse order - out_layer_num = len(self.anchor_masks) - if isinstance(input, OrderedDict): - blocks = list(input.values())[-1:-out_layer_num - 1:-1] - else: - blocks = input[-1:-out_layer_num - 1:-1] - route = None - for i, block in enumerate(blocks): - if i > 0: # perform concat in first 2 detection_block - block = fluid.layers.concat(input=[route, block], axis=1) - route, tip = self._detection_block( - block, channel=512 // (2**i), is_test=(not is_train), name=self.prefix_name + "yolo_block.{}".format(i)) - - # out channel number = mask_num * (5 + class_num) - num_filters = len(self.anchor_masks[i]) * (self.num_classes + 5) - block_out = fluid.layers.conv2d( - input=tip, - num_filters=num_filters, - filter_size=1, - stride=1, - padding=0, - act=None, - param_attr=ParamAttr(name=self.prefix_name + "yolo_output.{}.conv.weights".format(i)), - bias_attr=ParamAttr( - regularizer=L2Decay(0.), name=self.prefix_name + "yolo_output.{}.conv.bias".format(i))) - outputs.append(block_out) - - if i < len(blocks) - 1: - # do not perform upsample in the last detection_block - route = self._conv_bn( - input=route, - ch_out=256 // (2**i), - filter_size=1, - stride=1, - padding=0, - is_test=(not is_train), - name=self.prefix_name + "yolo_transition.{}".format(i)) - # upsample - route = self._upsample(route) - - return outputs, blocks - - def get_prediction(self, outputs, im_size): - """ - Get prediction result of YOLOv3 network - - Args: - outputs (list): list of Variables, return from _get_outputs - im_size (Variable): Variable of size([h, w]) of each image - - Returns: - pred (Variable): The prediction result after non-max suppress. - - """ - boxes = [] - scores = [] - downsample = 32 - for i, output in enumerate(outputs): - box, score = fluid.layers.yolo_box( - x=output, - img_size=im_size, - anchors=self.mask_anchors[i], - class_num=self.num_classes, - conf_thresh=self.nms.score_threshold, - downsample_ratio=downsample, - name=self.prefix_name + "yolo_box" + str(i)) - boxes.append(box) - scores.append(fluid.layers.transpose(score, perm=[0, 2, 1])) - - downsample //= 2 - - yolo_boxes = fluid.layers.concat(boxes, axis=1) - yolo_scores = fluid.layers.concat(scores, axis=2) - pred = fluid.layers.multiclass_nms( - bboxes=yolo_boxes, - scores=yolo_scores, - score_threshold=self.nms.score_threshold, - nms_top_k=self.nms.nms_top_k, - keep_top_k=self.nms.keep_top_k, - nms_threshold=self.nms.nms_threshold, - background_label=self.nms.background_label, - normalized=self.nms.normalized, - name="multiclass_nms") - return pred From 199fe1f8d2b309db7bccc4125e944610af833735 Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Fri, 16 Sep 2022 15:02:56 +0800 Subject: [PATCH 060/117] update humanseg_mobile (#2001) * update humanseg_mobile * add clean func * update save inference model --- .../humanseg_mobile/README.md | 20 ++- .../humanseg_mobile/README_en.md | 24 +-- .../humanseg_mobile/module.py | 114 +++++++------- .../humanseg_mobile/test.py | 144 ++++++++++++++++++ 4 files changed, 232 insertions(+), 70 deletions(-) create mode 100644 modules/image/semantic_segmentation/humanseg_mobile/test.py diff --git a/modules/image/semantic_segmentation/humanseg_mobile/README.md b/modules/image/semantic_segmentation/humanseg_mobile/README.md index 188234ed2..3817be776 100644 --- a/modules/image/semantic_segmentation/humanseg_mobile/README.md +++ b/modules/image/semantic_segmentation/humanseg_mobile/README.md @@ -174,19 +174,13 @@ ```python - def save_inference_model(dirname='humanseg_mobile_model', - model_filename=None, - params_filename=None, - combined=True) + def save_inference_model(dirname) ``` - 将模型保存到指定路径。 - **参数** - * dirname: 存在模型的目录名称 - * model\_filename: 模型文件名称,默认为\_\_model\_\_ - * params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) - * combined: 是否将参数保存到统一的一个文件中 + * dirname: 模型保存路径 ## 四、服务部署 @@ -244,11 +238,21 @@ * 1.0.0 初始发布 + * 1.1.0 新增视频人像分割接口 新增视频流人像分割接口 + * 1.1.1 修复cudnn为8.0.4显存泄露问题 + +* 1.2.0 + + 移除 Fluid API + + ```shell + $ hub install humanseg_mobile == 1.2.0 + ``` diff --git a/modules/image/semantic_segmentation/humanseg_mobile/README_en.md b/modules/image/semantic_segmentation/humanseg_mobile/README_en.md index 7af902ced..0c5e849e5 100644 --- a/modules/image/semantic_segmentation/humanseg_mobile/README_en.md +++ b/modules/image/semantic_segmentation/humanseg_mobile/README_en.md @@ -172,10 +172,7 @@ ```python - def save_inference_model(dirname='humanseg_mobile_model', - model_filename=None, - params_filename=None, - combined=True) + def save_inference_model(dirname) ``` @@ -183,10 +180,7 @@ - **Parameters** - * dirname: Save path. - * model\_filename: Model file name,defalt is \_\_model\_\_ - * params\_filename: Parameter file name,defalt is \_\_params\_\_(Only takes effect when `combined` is True) - * combined: Whether to save the parameters to a unified file. + * dirname: Model save path. @@ -244,13 +238,23 @@ - 1.0.0 - First release + First release - 1.1.0 Added video portrait split interface Added video stream portrait segmentation interface + * 1.1.1 - Fix the video memory leakage problem of on cudnn 8.0.4 + Fix the video memory leakage problem of on cudnn 8.0.4 + +* 1.2.0 + + Remove Fluid API + + ```shell + $ hub install humanseg_mobile == 1.2.0 + ``` + diff --git a/modules/image/semantic_segmentation/humanseg_mobile/module.py b/modules/image/semantic_segmentation/humanseg_mobile/module.py index f7ac67966..413386a45 100644 --- a/modules/image/semantic_segmentation/humanseg_mobile/module.py +++ b/modules/image/semantic_segmentation/humanseg_mobile/module.py @@ -19,14 +19,15 @@ import cv2 import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +import paddle +import paddle.jit +import paddle.static +from paddle.inference import Config, create_predictor from paddlehub.module.module import moduleinfo, runnable, serving -from humanseg_mobile.processor import postprocess, base64_to_cv2, cv2_to_base64, check_dir -from humanseg_mobile.data_feed import reader, preprocess_v -from humanseg_mobile.optimal import postprocess_v, threshold_mask +from .processor import postprocess, base64_to_cv2, cv2_to_base64, check_dir +from .data_feed import reader, preprocess_v +from .optimal import postprocess_v, threshold_mask @moduleinfo( @@ -35,22 +36,23 @@ author="paddlepaddle", author_email="", summary="HRNet_w18_samll_v1 is a semantic segmentation model.", - version="1.1.0") -class HRNetw18samllv1humanseg(hub.Module): - def _initialize(self): - self.default_pretrained_model_path = os.path.join(self.directory, "humanseg_mobile_inference") + version="1.2.0") +class HRNetw18samllv1humanseg: + def __init__(self): + self.default_pretrained_model_path = os.path.join( + self.directory, "humanseg_mobile_inference", "model") self._set_config() def _set_config(self): """ predictor config setting """ - self.model_file_path = os.path.join(self.default_pretrained_model_path, '__model__') - self.params_file_path = os.path.join(self.default_pretrained_model_path, '__params__') - cpu_config = AnalysisConfig(self.model_file_path, self.params_file_path) + model = self.default_pretrained_model_path+'.pdmodel' + params = self.default_pretrained_model_path+'.pdiparams' + cpu_config = Config(model, params) cpu_config.disable_glog_info() cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) + self.cpu_predictor = create_predictor(cpu_config) try: _places = os.environ["CUDA_VISIBLE_DEVICES"] int(_places[0]) @@ -58,10 +60,14 @@ def _set_config(self): except: use_gpu = False if use_gpu: - gpu_config = AnalysisConfig(self.model_file_path, self.params_file_path) + gpu_config = Config(model, params) gpu_config.disable_glog_info() gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) + + if paddle.get_cudnn_version() == 8004: + gpu_config.delete_pass('conv_elementwise_add_act_fuse_pass') + gpu_config.delete_pass('conv_elementwise_add2_act_fuse_pass') + self.gpu_predictor = create_predictor(gpu_config) def segment(self, images=None, @@ -112,9 +118,16 @@ def segment(self, pass # feed batch image batch_image = np.array([data['image'] for data in batch_data]) - batch_image = PaddleTensor(batch_image.copy()) - output = self.gpu_predictor.run([batch_image]) if use_gpu else self.cpu_predictor.run([batch_image]) - output = output[1].as_ndarray() + + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + input_handle.copy_from_cpu(batch_image.copy()) + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[1]) + output = output_handle.copy_to_cpu() + output = np.expand_dims(output[:, 1, :, :], axis=1) # postprocess one by one for i in range(len(batch_data)): @@ -152,9 +165,16 @@ def video_stream_segment(self, frame_org, frame_id, prev_gray, prev_cfd, use_gpu height = int(frame_org.shape[1]) disflow = cv2.DISOpticalFlow_create(cv2.DISOPTICAL_FLOW_PRESET_ULTRAFAST) frame = preprocess_v(frame_org, resize_w, resize_h) - image = PaddleTensor(np.array([frame.copy()])) - output = self.gpu_predictor.run([image]) if use_gpu else self.cpu_predictor.run([image]) - score_map = output[1].as_ndarray() + + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + input_handle.copy_from_cpu(frame.copy()[None, ...]) + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[1]) + score_map = output_handle.copy_to_cpu() + frame = np.transpose(frame, axes=[1, 2, 0]) score_map = np.transpose(np.squeeze(score_map, 0), axes=[1, 2, 0]) cur_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) @@ -217,9 +237,16 @@ def video_segment(self, video_path=None, use_gpu=False, save_dir='humanseg_mobil ret, frame_org = cap_video.read() if ret: frame = preprocess_v(frame_org, resize_w, resize_h) - image = PaddleTensor(np.array([frame.copy()])) - output = self.gpu_predictor.run([image]) if use_gpu else self.cpu_predictor.run([image]) - score_map = output[1].as_ndarray() + + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + input_handle.copy_from_cpu(frame.copy()[None, ...]) + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[1]) + score_map = output_handle.copy_to_cpu() + frame = np.transpose(frame, axes=[1, 2, 0]) score_map = np.transpose(np.squeeze(score_map, 0), axes=[1, 2, 0]) cur_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) @@ -244,9 +271,16 @@ def video_segment(self, video_path=None, use_gpu=False, save_dir='humanseg_mobil ret, frame_org = cap_video.read() if ret: frame = preprocess_v(frame_org, resize_w, resize_h) - image = PaddleTensor(np.array([frame.copy()])) - output = self.gpu_predictor.run([image]) if use_gpu else self.cpu_predictor.run([image]) - score_map = output[1].as_ndarray() + + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + input_handle.copy_from_cpu(frame.copy()[None, ...]) + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[1]) + score_map = output_handle.copy_to_cpu() + frame = np.transpose(frame, axes=[1, 2, 0]) score_map = np.transpose(np.squeeze(score_map, 0), axes=[1, 2, 0]) cur_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) @@ -268,30 +302,6 @@ def video_segment(self, video_path=None, use_gpu=False, save_dir='humanseg_mobil break cap_video.release() - def save_inference_model(self, - dirname='humanseg_mobile_model', - model_filename=None, - params_filename=None, - combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.default_pretrained_model_path, - model_filename=model_filename, - params_filename=params_filename, - executor=exe) - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - @serving def serving_method(self, images, **kwargs): """ diff --git a/modules/image/semantic_segmentation/humanseg_mobile/test.py b/modules/image/semantic_segmentation/humanseg_mobile/test.py new file mode 100644 index 000000000..c38977f77 --- /dev/null +++ b/modules/image/semantic_segmentation/humanseg_mobile/test.py @@ -0,0 +1,144 @@ +import os +import shutil +import unittest + +import cv2 +import requests +import numpy as np +import paddlehub as hub + + +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + + +class TestHubModule(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + img_url = 'https://unsplash.com/photos/pg_WCHWSdT8/download?ixid=MnwxMjA3fDB8MXxhbGx8fHx8fHx8fHwxNjYyNDM2ODI4&force=true&w=640' + if not os.path.exists('tests'): + os.makedirs('tests') + response = requests.get(img_url) + assert response.status_code == 200, 'Network Error.' + with open('tests/test.jpg', 'wb') as f: + f.write(response.content) + fourcc = cv2.VideoWriter_fourcc('M', 'J', 'P', 'G') + img = cv2.imread('tests/test.jpg') + video = cv2.VideoWriter('tests/test.avi', fourcc, + 20.0, tuple(img.shape[:2])) + for i in range(40): + video.write(img) + video.release() + cls.module = hub.Module(name="humanseg_mobile") + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree('tests') + shutil.rmtree('inference') + shutil.rmtree('humanseg_mobile_output') + shutil.rmtree('humanseg_mobile_video_result') + + def test_segment1(self): + results = self.module.segment( + paths=['tests/test.jpg'], + use_gpu=False, + visualization=False + ) + self.assertIsInstance(results[0]['data'], np.ndarray) + + def test_segment2(self): + results = self.module.segment( + images=[cv2.imread('tests/test.jpg')], + use_gpu=False, + visualization=False + ) + self.assertIsInstance(results[0]['data'], np.ndarray) + + def test_segment3(self): + results = self.module.segment( + images=[cv2.imread('tests/test.jpg')], + use_gpu=False, + visualization=True + ) + self.assertIsInstance(results[0]['data'], np.ndarray) + + def test_segment4(self): + results = self.module.segment( + images=[cv2.imread('tests/test.jpg')], + use_gpu=True, + visualization=False + ) + self.assertIsInstance(results[0]['data'], np.ndarray) + + def test_segment5(self): + self.assertRaises( + AssertionError, + self.module.segment, + paths=['no.jpg'] + ) + + def test_segment6(self): + self.assertRaises( + AttributeError, + self.module.segment, + images=['test.jpg'] + ) + + def test_video_stream_segment1(self): + img_matting, cur_gray, optflow_map = self.module.video_stream_segment( + frame_org=cv2.imread('tests/test.jpg'), + frame_id=1, + prev_gray=None, + prev_cfd=None, + use_gpu=False + ) + self.assertIsInstance(img_matting, np.ndarray) + self.assertIsInstance(cur_gray, np.ndarray) + self.assertIsInstance(optflow_map, np.ndarray) + img_matting, cur_gray, optflow_map = self.module.video_stream_segment( + frame_org=cv2.imread('tests/test.jpg'), + frame_id=2, + prev_gray=cur_gray, + prev_cfd=optflow_map, + use_gpu=False + ) + self.assertIsInstance(img_matting, np.ndarray) + self.assertIsInstance(cur_gray, np.ndarray) + self.assertIsInstance(optflow_map, np.ndarray) + + def test_video_stream_segment2(self): + img_matting, cur_gray, optflow_map = self.module.video_stream_segment( + frame_org=cv2.imread('tests/test.jpg'), + frame_id=1, + prev_gray=None, + prev_cfd=None, + use_gpu=True + ) + self.assertIsInstance(img_matting, np.ndarray) + self.assertIsInstance(cur_gray, np.ndarray) + self.assertIsInstance(optflow_map, np.ndarray) + img_matting, cur_gray, optflow_map = self.module.video_stream_segment( + frame_org=cv2.imread('tests/test.jpg'), + frame_id=2, + prev_gray=cur_gray, + prev_cfd=optflow_map, + use_gpu=True + ) + self.assertIsInstance(img_matting, np.ndarray) + self.assertIsInstance(cur_gray, np.ndarray) + self.assertIsInstance(optflow_map, np.ndarray) + + def test_video_segment1(self): + self.module.video_segment( + video_path="tests/test.avi", + use_gpu=False + ) + + def test_save_inference_model(self): + self.module.save_inference_model('./inference/model') + + self.assertTrue(os.path.exists('./inference/model.pdmodel')) + self.assertTrue(os.path.exists('./inference/model.pdiparams')) + + +if __name__ == "__main__": + unittest.main() From 12a6bf9251ebaae97c1ba90660def951a600bec8 Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Fri, 16 Sep 2022 15:08:48 +0800 Subject: [PATCH 061/117] update yolov3_resnet34_coco2017 (#1953) * update yolov3_resnet34_coco2017 * update gpu config * update * add clean func * update save inference model Co-authored-by: chenjian --- .../yolov3_resnet34_coco2017/README.md | 16 +- .../yolov3_resnet34_coco2017/README_en.md | 16 +- .../yolov3_resnet34_coco2017/module.py | 192 ++------ .../yolov3_resnet34_coco2017/name_adapter.py | 61 --- .../nonlocal_helper.py | 154 ------ .../yolov3_resnet34_coco2017/processor.py | 7 +- .../yolov3_resnet34_coco2017/resnet.py | 447 ------------------ .../yolov3_resnet34_coco2017/test.py | 108 +++++ .../yolov3_resnet34_coco2017/yolo_head.py | 273 ----------- 9 files changed, 162 insertions(+), 1112 deletions(-) delete mode 100644 modules/image/object_detection/yolov3_resnet34_coco2017/name_adapter.py delete mode 100644 modules/image/object_detection/yolov3_resnet34_coco2017/nonlocal_helper.py delete mode 100644 modules/image/object_detection/yolov3_resnet34_coco2017/resnet.py create mode 100644 modules/image/object_detection/yolov3_resnet34_coco2017/test.py delete mode 100644 modules/image/object_detection/yolov3_resnet34_coco2017/yolo_head.py diff --git a/modules/image/object_detection/yolov3_resnet34_coco2017/README.md b/modules/image/object_detection/yolov3_resnet34_coco2017/README.md index bb245f340..2384ba87f 100644 --- a/modules/image/object_detection/yolov3_resnet34_coco2017/README.md +++ b/modules/image/object_detection/yolov3_resnet34_coco2017/README.md @@ -100,19 +100,13 @@ - save\_path (str, optional): 识别结果的保存路径 (仅当visualization=True时存在) - ```python - def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True) + def save_inference_model(dirname) ``` - 将模型保存到指定路径。 - **参数** - - dirname: 存在模型的目录名称;
- - model\_filename: 模型文件名称,默认为\_\_model\_\_;
- - params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效);
- - combined: 是否将参数保存到统一的一个文件中。 + - dirname: 模型保存路径
## 四、服务部署 @@ -166,6 +160,10 @@ 修复numpy数据读取问题 +* 1.1.0 + + 移除 fluid api + - ```shell - $ hub install yolov3_resnet34_coco2017==1.0.2 + $ hub install yolov3_resnet34_coco2017==1.1.0 ``` diff --git a/modules/image/object_detection/yolov3_resnet34_coco2017/README_en.md b/modules/image/object_detection/yolov3_resnet34_coco2017/README_en.md index c10a2466f..2e1e6e5f4 100644 --- a/modules/image/object_detection/yolov3_resnet34_coco2017/README_en.md +++ b/modules/image/object_detection/yolov3_resnet34_coco2017/README_en.md @@ -99,19 +99,13 @@ - save\_path (str, optional): output path for saving results - ```python - def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True) + def save_inference_model(dirname) ``` - Save model to specific path - **Parameters** - - dirname: output dir for saving model - - model\_filename: filename for saving model - - params\_filename: filename for saving parameters - - combined: whether save parameters into one file + - dirname: model save path ## IV.Server Deployment @@ -165,6 +159,10 @@ Fix the problem of reading numpy +* 1.1.0 + + Remove fluid api + - ```shell - $ hub install yolov3_resnet34_coco2017==1.0.2 + $ hub install yolov3_resnet34_coco2017==1.1.0 ``` diff --git a/modules/image/object_detection/yolov3_resnet34_coco2017/module.py b/modules/image/object_detection/yolov3_resnet34_coco2017/module.py index 5c26e52ec..2a2b8d595 100644 --- a/modules/image/object_detection/yolov3_resnet34_coco2017/module.py +++ b/modules/image/object_detection/yolov3_resnet34_coco2017/module.py @@ -6,31 +6,30 @@ import os from functools import partial +import paddle import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +import paddle.jit +import paddle.static +import paddle.static +from paddle.inference import Config, create_predictor from paddlehub.module.module import moduleinfo, runnable, serving -from paddlehub.common.paddle_helper import add_vars_prefix -from yolov3_resnet34_coco2017.resnet import ResNet -from yolov3_resnet34_coco2017.processor import load_label_info, postprocess, base64_to_cv2 -from yolov3_resnet34_coco2017.data_feed import reader -from yolov3_resnet34_coco2017.yolo_head import MultiClassNMS, YOLOv3Head +from .processor import load_label_info, postprocess, base64_to_cv2 +from .data_feed import reader @moduleinfo( name="yolov3_resnet34_coco2017", - version="1.0.2", + version="1.1.0", type="CV/object_detection", summary= "Baidu's YOLOv3 model for object detection with backbone ResNet34, trained with dataset coco2017.", author="paddlepaddle", author_email="paddle-dev@baidu.com") -class YOLOv3ResNet34Coco2017(hub.Module): - def _initialize(self): +class YOLOv3ResNet34Coco2017: + def __init__(self): self.default_pretrained_model_path = os.path.join( - self.directory, "yolov3_resnet34_model") + self.directory, "yolov3_resnet34_model", "model") self.label_names = load_label_info( os.path.join(self.directory, "label_file.txt")) self._set_config() @@ -39,11 +38,13 @@ def _set_config(self): """ predictor config setting. """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) + model = self.default_pretrained_model_path+'.pdmodel' + params = self.default_pretrained_model_path+'.pdiparams' + cpu_config = Config(model, params) cpu_config.disable_glog_info() cpu_config.disable_gpu() cpu_config.switch_ir_optim(False) - self.cpu_predictor = create_paddle_predictor(cpu_config) + self.cpu_predictor = create_predictor(cpu_config) try: _places = os.environ["CUDA_VISIBLE_DEVICES"] @@ -52,108 +53,10 @@ def _set_config(self): except: use_gpu = False if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config = Config(model, params) gpu_config.disable_glog_info() gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) - - def context(self, trainable=True, pretrained=True, get_prediction=False): - """ - Distill the Head Features, so as to perform transfer learning. - - Args: - trainable (bool): whether to set parameters trainable. - pretrained (bool): whether to load default pretrained model. - get_prediction (bool): whether to get prediction. - - Returns: - inputs(dict): the input variables. - outputs(dict): the output variables. - context_prog (Program): the program to execute transfer learning. - """ - context_prog = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(context_prog, startup_program): - with fluid.unique_name.guard(): - # image - image = fluid.layers.data( - name='image', shape=[3, 608, 608], dtype='float32') - # backbone - backbone = ResNet( - norm_type='bn', - freeze_at=0, - freeze_norm=False, - norm_decay=0., - depth=34, - feature_maps=[3, 4, 5]) - # body_feats - body_feats = backbone(image) - # im_size - im_size = fluid.layers.data( - name='im_size', shape=[2], dtype='int32') - # yolo_head - yolo_head = YOLOv3Head(num_classes=80) - # head_features - head_features, body_features = yolo_head._get_outputs( - body_feats, is_train=trainable) - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - exe.run(fluid.default_startup_program()) - - # var_prefix - var_prefix = '@HUB_{}@'.format(self.name) - # name of inputs - inputs = { - 'image': var_prefix + image.name, - 'im_size': var_prefix + im_size.name - } - # name of outputs - if get_prediction: - bbox_out = yolo_head.get_prediction(head_features, im_size) - outputs = {'bbox_out': [var_prefix + bbox_out.name]} - else: - outputs = { - 'head_features': - [var_prefix + var.name for var in head_features], - 'body_features': - [var_prefix + var.name for var in body_features] - } - # add_vars_prefix - add_vars_prefix(context_prog, var_prefix) - add_vars_prefix(fluid.default_startup_program(), var_prefix) - # inputs - inputs = { - key: context_prog.global_block().vars[value] - for key, value in inputs.items() - } - # outputs - outputs = { - key: [ - context_prog.global_block().vars[varname] - for varname in value - ] - for key, value in outputs.items() - } - # trainable - for param in context_prog.global_block().iter_parameters(): - param.trainable = trainable - # pretrained - if pretrained: - - def _if_exist(var): - return os.path.exists( - os.path.join(self.default_pretrained_model_path, - var.name)) - - fluid.io.load_vars( - exe, - self.default_pretrained_model_path, - predicate=_if_exist) - else: - exe.run(startup_program) - - return inputs, outputs, context_prog + self.gpu_predictor = create_predictor(gpu_config) def object_detection(self, paths=None, @@ -196,54 +99,33 @@ def object_detection(self, paths = paths if paths else list() data_reader = partial(reader, paths, images) - batch_reader = fluid.io.batch(data_reader, batch_size=batch_size) + batch_reader = paddle.batch(data_reader, batch_size=batch_size) res = [] for iter_id, feed_data in enumerate(batch_reader()): feed_data = np.array(feed_data) - image_tensor = PaddleTensor(np.array(list(feed_data[:, 0]))) - im_size_tensor = PaddleTensor(np.array(list(feed_data[:, 1]))) - if use_gpu: - data_out = self.gpu_predictor.run( - [image_tensor, im_size_tensor]) - else: - data_out = self.cpu_predictor.run( - [image_tensor, im_size_tensor]) - output = postprocess( - paths=paths, - images=images, - data_out=data_out, - score_thresh=score_thresh, - label_names=self.label_names, - output_dir=output_dir, - handle_id=iter_id * batch_size, - visualization=visualization) + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + input_handle.copy_from_cpu(np.array(list(feed_data[:, 0]))) + input_handle = predictor.get_input_handle(input_names[1]) + input_handle.copy_from_cpu(np.array(list(feed_data[:, 1]))) + + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) + + output = postprocess(paths=paths, + images=images, + data_out=output_handle, + score_thresh=score_thresh, + label_names=self.label_names, + output_dir=output_dir, + handle_id=iter_id * batch_size, + visualization=visualization) res.extend(output) return res - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - @serving def serving_method(self, images, **kwargs): """ diff --git a/modules/image/object_detection/yolov3_resnet34_coco2017/name_adapter.py b/modules/image/object_detection/yolov3_resnet34_coco2017/name_adapter.py deleted file mode 100644 index bebf8bdee..000000000 --- a/modules/image/object_detection/yolov3_resnet34_coco2017/name_adapter.py +++ /dev/null @@ -1,61 +0,0 @@ -# coding=utf-8 - - -class NameAdapter(object): - """Fix the backbones variable names for pretrained weight""" - - def __init__(self, model): - super(NameAdapter, self).__init__() - self.model = model - - @property - def model_type(self): - return getattr(self.model, '_model_type', '') - - @property - def variant(self): - return getattr(self.model, 'variant', '') - - def fix_conv_norm_name(self, name): - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - # the naming rule is same as pretrained weight - if self.model_type == 'SEResNeXt': - bn_name = name + "_bn" - return bn_name - - def fix_shortcut_name(self, name): - if self.model_type == 'SEResNeXt': - name = 'conv' + name + '_prj' - return name - - def fix_bottleneck_name(self, name): - if self.model_type == 'SEResNeXt': - conv_name1 = 'conv' + name + '_x1' - conv_name2 = 'conv' + name + '_x2' - conv_name3 = 'conv' + name + '_x3' - shortcut_name = name - else: - conv_name1 = name + "_branch2a" - conv_name2 = name + "_branch2b" - conv_name3 = name + "_branch2c" - shortcut_name = name + "_branch1" - return conv_name1, conv_name2, conv_name3, shortcut_name - - def fix_layer_warp_name(self, stage_num, count, i): - name = 'res' + str(stage_num) - if count > 10 and stage_num == 4: - if i == 0: - conv_name = name + "a" - else: - conv_name = name + "b" + str(i) - else: - conv_name = name + chr(ord("a") + i) - if self.model_type == 'SEResNeXt': - conv_name = str(stage_num + 2) + '_' + str(i + 1) - return conv_name - - def fix_c1_stage_name(self): - return "res_conv1" if self.model_type == 'ResNeXt' else "conv1" diff --git a/modules/image/object_detection/yolov3_resnet34_coco2017/nonlocal_helper.py b/modules/image/object_detection/yolov3_resnet34_coco2017/nonlocal_helper.py deleted file mode 100644 index 599b8dfa0..000000000 --- a/modules/image/object_detection/yolov3_resnet34_coco2017/nonlocal_helper.py +++ /dev/null @@ -1,154 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import paddle.fluid as fluid -from paddle.fluid import ParamAttr - -nonlocal_params = { - "use_zero_init_conv": False, - "conv_init_std": 0.01, - "no_bias": True, - "use_maxpool": False, - "use_softmax": True, - "use_bn": False, - "use_scale": True, # vital for the model prformance!!! - "use_affine": False, - "bn_momentum": 0.9, - "bn_epsilon": 1.0000001e-5, - "bn_init_gamma": 0.9, - "weight_decay_bn": 1.e-4, -} - - -def space_nonlocal(input, dim_in, dim_out, prefix, dim_inner, - max_pool_stride=2): - cur = input - theta = fluid.layers.conv2d(input = cur, num_filters = dim_inner, \ - filter_size = [1, 1], stride = [1, 1], \ - padding = [0, 0], \ - param_attr=ParamAttr(name = prefix + '_theta' + "_w", \ - initializer = fluid.initializer.Normal(loc = 0.0, - scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_theta' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) \ - if not nonlocal_params["no_bias"] else False, \ - name = prefix + '_theta') - theta_shape = theta.shape - theta_shape_op = fluid.layers.shape(theta) - theta_shape_op.stop_gradient = True - - if nonlocal_params["use_maxpool"]: - max_pool = fluid.layers.pool2d(input = cur, \ - pool_size = [max_pool_stride, max_pool_stride], \ - pool_type = 'max', \ - pool_stride = [max_pool_stride, max_pool_stride], \ - pool_padding = [0, 0], \ - name = prefix + '_pool') - else: - max_pool = cur - - phi = fluid.layers.conv2d(input = max_pool, num_filters = dim_inner, \ - filter_size = [1, 1], stride = [1, 1], \ - padding = [0, 0], \ - param_attr = ParamAttr(name = prefix + '_phi' + "_w", \ - initializer = fluid.initializer.Normal(loc = 0.0, - scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_phi' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) \ - if (nonlocal_params["no_bias"] == 0) else False, \ - name = prefix + '_phi') - phi_shape = phi.shape - - g = fluid.layers.conv2d(input = max_pool, num_filters = dim_inner, \ - filter_size = [1, 1], stride = [1, 1], \ - padding = [0, 0], \ - param_attr = ParamAttr(name = prefix + '_g' + "_w", \ - initializer = fluid.initializer.Normal(loc = 0.0, scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_g' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) if (nonlocal_params["no_bias"] == 0) else False, \ - name = prefix + '_g') - g_shape = g.shape - # we have to use explicit batch size (to support arbitrary spacetime size) - # e.g. (8, 1024, 4, 14, 14) => (8, 1024, 784) - theta = fluid.layers.reshape(theta, shape=(0, 0, -1)) - theta = fluid.layers.transpose(theta, [0, 2, 1]) - phi = fluid.layers.reshape(phi, [0, 0, -1]) - theta_phi = fluid.layers.matmul(theta, phi, name=prefix + '_affinity') - g = fluid.layers.reshape(g, [0, 0, -1]) - - if nonlocal_params["use_softmax"]: - if nonlocal_params["use_scale"]: - theta_phi_sc = fluid.layers.scale(theta_phi, scale=dim_inner**-.5) - else: - theta_phi_sc = theta_phi - p = fluid.layers.softmax( - theta_phi_sc, name=prefix + '_affinity' + '_prob') - else: - # not clear about what is doing in xlw's code - p = None # not implemented - raise "Not implemented when not use softmax" - - # note g's axis[2] corresponds to p's axis[2] - # e.g. g(8, 1024, 784_2) * p(8, 784_1, 784_2) => (8, 1024, 784_1) - p = fluid.layers.transpose(p, [0, 2, 1]) - t = fluid.layers.matmul(g, p, name=prefix + '_y') - - # reshape back - # e.g. (8, 1024, 784) => (8, 1024, 4, 14, 14) - t_shape = t.shape - t_re = fluid.layers.reshape( - t, shape=list(theta_shape), actual_shape=theta_shape_op) - blob_out = t_re - blob_out = fluid.layers.conv2d(input = blob_out, num_filters = dim_out, \ - filter_size = [1, 1], stride = [1, 1], padding = [0, 0], \ - param_attr = ParamAttr(name = prefix + '_out' + "_w", \ - initializer = fluid.initializer.Constant(value = 0.) \ - if nonlocal_params["use_zero_init_conv"] \ - else fluid.initializer.Normal(loc = 0.0, - scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_out' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) \ - if (nonlocal_params["no_bias"] == 0) else False, \ - name = prefix + '_out') - blob_out_shape = blob_out.shape - - if nonlocal_params["use_bn"]: - bn_name = prefix + "_bn" - blob_out = fluid.layers.batch_norm(blob_out, \ - # is_test = test_mode, \ - momentum = nonlocal_params["bn_momentum"], \ - epsilon = nonlocal_params["bn_epsilon"], \ - name = bn_name, \ - param_attr = ParamAttr(name = bn_name + "_s", \ - initializer = fluid.initializer.Constant(value = nonlocal_params["bn_init_gamma"]), \ - regularizer = fluid.regularizer.L2Decay(nonlocal_params["weight_decay_bn"])), \ - bias_attr = ParamAttr(name = bn_name + "_b", \ - regularizer = fluid.regularizer.L2Decay(nonlocal_params["weight_decay_bn"])), \ - moving_mean_name = bn_name + "_rm", \ - moving_variance_name = bn_name + "_riv") # add bn - - if nonlocal_params["use_affine"]: - affine_scale = fluid.layers.create_parameter(\ - shape=[blob_out_shape[1]], dtype = blob_out.dtype, \ - attr=ParamAttr(name=prefix + '_affine' + '_s'), \ - default_initializer = fluid.initializer.Constant(value = 1.)) - affine_bias = fluid.layers.create_parameter(\ - shape=[blob_out_shape[1]], dtype = blob_out.dtype, \ - attr=ParamAttr(name=prefix + '_affine' + '_b'), \ - default_initializer = fluid.initializer.Constant(value = 0.)) - blob_out = fluid.layers.affine_channel(blob_out, scale = affine_scale, \ - bias = affine_bias, name = prefix + '_affine') # add affine - - return blob_out - - -def add_space_nonlocal(input, dim_in, dim_out, prefix, dim_inner): - ''' - add_space_nonlocal: - Non-local Neural Networks: see https://arxiv.org/abs/1711.07971 - ''' - conv = space_nonlocal(input, dim_in, dim_out, prefix, dim_inner) - output = fluid.layers.elementwise_add(input, conv, name=prefix + '_sum') - return output diff --git a/modules/image/object_detection/yolov3_resnet34_coco2017/processor.py b/modules/image/object_detection/yolov3_resnet34_coco2017/processor.py index 2f9a42d9c..aa9a61bd0 100644 --- a/modules/image/object_detection/yolov3_resnet34_coco2017/processor.py +++ b/modules/image/object_detection/yolov3_resnet34_coco2017/processor.py @@ -101,7 +101,7 @@ def postprocess(paths, handle_id, visualization=True): """ - postprocess the lod_tensor produced by fluid.Executor.run + postprocess the lod_tensor produced by Executor.run Args: paths (list[str]): The paths of images. @@ -126,9 +126,8 @@ def postprocess(paths, confidence (float): The confidence of detection result. save_path (str): The path to save output images. """ - lod_tensor = data_out[0] - lod = lod_tensor.lod[0] - results = lod_tensor.as_ndarray() + lod = data_out.lod()[0] + results = data_out.copy_to_cpu() check_dir(output_dir) diff --git a/modules/image/object_detection/yolov3_resnet34_coco2017/resnet.py b/modules/image/object_detection/yolov3_resnet34_coco2017/resnet.py deleted file mode 100644 index 4bd6fb61e..000000000 --- a/modules/image/object_detection/yolov3_resnet34_coco2017/resnet.py +++ /dev/null @@ -1,447 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -from collections import OrderedDict -from numbers import Integral - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.framework import Variable -from paddle.fluid.regularizer import L2Decay -from paddle.fluid.initializer import Constant - -from .nonlocal_helper import add_space_nonlocal -from .name_adapter import NameAdapter - -__all__ = ['ResNet', 'ResNetC5'] - - -class ResNet(object): - """ - Residual Network, see https://arxiv.org/abs/1512.03385 - Args: - depth (int): ResNet depth, should be 34, 50. - freeze_at (int): freeze the backbone at which stage - norm_type (str): normalization type, 'bn'/'sync_bn'/'affine_channel' - freeze_norm (bool): freeze normalization layers - norm_decay (float): weight decay for normalization layer weights - variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently - feature_maps (list): index of stages whose feature maps are returned - dcn_v2_stages (list): index of stages who select deformable conv v2 - nonlocal_stages (list): index of stages who select nonlocal networks - """ - __shared__ = ['norm_type', 'freeze_norm', 'weight_prefix_name'] - - def __init__(self, - depth=50, - freeze_at=0, - norm_type='sync_bn', - freeze_norm=False, - norm_decay=0., - variant='b', - feature_maps=[3, 4, 5], - dcn_v2_stages=[], - weight_prefix_name='', - nonlocal_stages=[], - get_prediction=False, - class_dim=1000): - super(ResNet, self).__init__() - - if isinstance(feature_maps, Integral): - feature_maps = [feature_maps] - - assert depth in [34, 50], \ - "depth {} not in [34, 50]" - assert variant in ['a', 'b', 'c', 'd'], "invalid ResNet variant" - assert 0 <= freeze_at <= 4, "freeze_at should be 0, 1, 2, 3 or 4" - assert len(feature_maps) > 0, "need one or more feature maps" - assert norm_type in ['bn', 'sync_bn', 'affine_channel'] - assert not (len(nonlocal_stages)>0 and depth<50), \ - "non-local is not supported for resnet18 or resnet34" - - self.depth = depth - self.freeze_at = freeze_at - self.norm_type = norm_type - self.norm_decay = norm_decay - self.freeze_norm = freeze_norm - self.variant = variant - self._model_type = 'ResNet' - self.feature_maps = feature_maps - self.dcn_v2_stages = dcn_v2_stages - self.depth_cfg = { - 34: ([3, 4, 6, 3], self.basicblock), - 50: ([3, 4, 6, 3], self.bottleneck), - } - self.stage_filters = [64, 128, 256, 512] - self._c1_out_chan_num = 64 - self.na = NameAdapter(self) - self.prefix_name = weight_prefix_name - - self.nonlocal_stages = nonlocal_stages - self.nonlocal_mod_cfg = { - 50: 2, - 101: 5, - 152: 8, - 200: 12, - } - self.get_prediction = get_prediction - self.class_dim = class_dim - - def _conv_offset(self, - input, - filter_size, - stride, - padding, - act=None, - name=None): - out_channel = filter_size * filter_size * 3 - out = fluid.layers.conv2d( - input, - num_filters=out_channel, - filter_size=filter_size, - stride=stride, - padding=padding, - param_attr=ParamAttr(initializer=Constant(0.0), name=name + ".w_0"), - bias_attr=ParamAttr(initializer=Constant(0.0), name=name + ".b_0"), - act=act, - name=name) - return out - - def _conv_norm(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None, - dcn_v2=False): - _name = self.prefix_name + name if self.prefix_name != '' else name - if not dcn_v2: - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - param_attr=ParamAttr(name=_name + "_weights"), - bias_attr=False, - name=_name + '.conv2d.output.1') - else: - # select deformable conv" - offset_mask = self._conv_offset( - input=input, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - act=None, - name=_name + "_conv_offset") - offset_channel = filter_size**2 * 2 - mask_channel = filter_size**2 - offset, mask = fluid.layers.split( - input=offset_mask, - num_or_sections=[offset_channel, mask_channel], - dim=1) - mask = fluid.layers.sigmoid(mask) - conv = fluid.layers.deformable_conv( - input=input, - offset=offset, - mask=mask, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - deformable_groups=1, - im2col_step=1, - param_attr=ParamAttr(name=_name + "_weights"), - bias_attr=False, - name=_name + ".conv2d.output.1") - - bn_name = self.na.fix_conv_norm_name(name) - bn_name = self.prefix_name + bn_name if self.prefix_name != '' else bn_name - - norm_lr = 0. if self.freeze_norm else 1. - norm_decay = self.norm_decay - pattr = ParamAttr( - name=bn_name + '_scale', - learning_rate=norm_lr, - regularizer=L2Decay(norm_decay)) - battr = ParamAttr( - name=bn_name + '_offset', - learning_rate=norm_lr, - regularizer=L2Decay(norm_decay)) - - if self.norm_type in ['bn', 'sync_bn']: - global_stats = True if self.freeze_norm else False - out = fluid.layers.batch_norm( - input=conv, - act=act, - name=bn_name + '.output.1', - param_attr=pattr, - bias_attr=battr, - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance', - use_global_stats=global_stats) - scale = fluid.framework._get_var(pattr.name) - bias = fluid.framework._get_var(battr.name) - elif self.norm_type == 'affine_channel': - scale = fluid.layers.create_parameter( - shape=[conv.shape[1]], - dtype=conv.dtype, - attr=pattr, - default_initializer=fluid.initializer.Constant(1.)) - bias = fluid.layers.create_parameter( - shape=[conv.shape[1]], - dtype=conv.dtype, - attr=battr, - default_initializer=fluid.initializer.Constant(0.)) - out = fluid.layers.affine_channel( - x=conv, scale=scale, bias=bias, act=act) - if self.freeze_norm: - scale.stop_gradient = True - bias.stop_gradient = True - return out - - def _shortcut(self, input, ch_out, stride, is_first, name): - max_pooling_in_short_cut = self.variant == 'd' - ch_in = input.shape[1] - # the naming rule is same as pretrained weight - name = self.na.fix_shortcut_name(name) - std_senet = getattr(self, 'std_senet', False) - if ch_in != ch_out or stride != 1 or (self.depth < 50 and is_first): - if std_senet: - if is_first: - return self._conv_norm(input, ch_out, 1, stride, name=name) - else: - return self._conv_norm(input, ch_out, 3, stride, name=name) - if max_pooling_in_short_cut and not is_first: - input = fluid.layers.pool2d( - input=input, - pool_size=2, - pool_stride=2, - pool_padding=0, - ceil_mode=True, - pool_type='avg') - return self._conv_norm(input, ch_out, 1, 1, name=name) - return self._conv_norm(input, ch_out, 1, stride, name=name) - else: - return input - - def bottleneck(self, - input, - num_filters, - stride, - is_first, - name, - dcn_v2=False): - if self.variant == 'a': - stride1, stride2 = stride, 1 - else: - stride1, stride2 = 1, stride - - # ResNeXt - groups = getattr(self, 'groups', 1) - group_width = getattr(self, 'group_width', -1) - if groups == 1: - expand = 4 - elif (groups * group_width) == 256: - expand = 1 - else: # FIXME hard code for now, handles 32x4d, 64x4d and 32x8d - num_filters = num_filters // 2 - expand = 2 - - conv_name1, conv_name2, conv_name3, \ - shortcut_name = self.na.fix_bottleneck_name(name) - std_senet = getattr(self, 'std_senet', False) - if std_senet: - conv_def = [[ - int(num_filters / 2), 1, stride1, 'relu', 1, conv_name1 - ], [num_filters, 3, stride2, 'relu', groups, conv_name2], - [num_filters * expand, 1, 1, None, 1, conv_name3]] - else: - conv_def = [[num_filters, 1, stride1, 'relu', 1, conv_name1], - [num_filters, 3, stride2, 'relu', groups, conv_name2], - [num_filters * expand, 1, 1, None, 1, conv_name3]] - - residual = input - for i, (c, k, s, act, g, _name) in enumerate(conv_def): - residual = self._conv_norm( - input=residual, - num_filters=c, - filter_size=k, - stride=s, - act=act, - groups=g, - name=_name, - dcn_v2=(i == 1 and dcn_v2)) - short = self._shortcut( - input, - num_filters * expand, - stride, - is_first=is_first, - name=shortcut_name) - # Squeeze-and-Excitation - if callable(getattr(self, '_squeeze_excitation', None)): - residual = self._squeeze_excitation( - input=residual, num_channels=num_filters, name='fc' + name) - return fluid.layers.elementwise_add( - x=short, y=residual, act='relu', name=name + ".add.output.5") - - def basicblock(self, - input, - num_filters, - stride, - is_first, - name, - dcn_v2=False): - assert dcn_v2 is False, "Not implemented yet." - conv0 = self._conv_norm( - input=input, - num_filters=num_filters, - filter_size=3, - act='relu', - stride=stride, - name=name + "_branch2a") - conv1 = self._conv_norm( - input=conv0, - num_filters=num_filters, - filter_size=3, - act=None, - name=name + "_branch2b") - short = self._shortcut( - input, num_filters, stride, is_first, name=name + "_branch1") - return fluid.layers.elementwise_add(x=short, y=conv1, act='relu') - - def layer_warp(self, input, stage_num): - """ - Args: - input (Variable): input variable. - stage_num (int): the stage number, should be 2, 3, 4, 5 - - Returns: - The last variable in endpoint-th stage. - """ - assert stage_num in [2, 3, 4, 5] - - stages, block_func = self.depth_cfg[self.depth] - count = stages[stage_num - 2] - - ch_out = self.stage_filters[stage_num - 2] - is_first = False if stage_num != 2 else True - dcn_v2 = True if stage_num in self.dcn_v2_stages else False - - nonlocal_mod = 1000 - if stage_num in self.nonlocal_stages: - nonlocal_mod = self.nonlocal_mod_cfg[ - self.depth] if stage_num == 4 else 2 - - # Make the layer name and parameter name consistent - # with ImageNet pre-trained model - conv = input - for i in range(count): - conv_name = self.na.fix_layer_warp_name(stage_num, count, i) - if self.depth < 50: - is_first = True if i == 0 and stage_num == 2 else False - conv = block_func( - input=conv, - num_filters=ch_out, - stride=2 if i == 0 and stage_num != 2 else 1, - is_first=is_first, - name=conv_name, - dcn_v2=dcn_v2) - - # add non local model - dim_in = conv.shape[1] - nonlocal_name = "nonlocal_conv{}".format(stage_num) - if i % nonlocal_mod == nonlocal_mod - 1: - conv = add_space_nonlocal(conv, dim_in, dim_in, - nonlocal_name + '_{}'.format(i), - int(dim_in / 2)) - return conv - - def c1_stage(self, input): - out_chan = self._c1_out_chan_num - - conv1_name = self.na.fix_c1_stage_name() - - if self.variant in ['c', 'd']: - conv_def = [ - [out_chan // 2, 3, 2, "conv1_1"], - [out_chan // 2, 3, 1, "conv1_2"], - [out_chan, 3, 1, "conv1_3"], - ] - else: - conv_def = [[out_chan, 7, 2, conv1_name]] - - for (c, k, s, _name) in conv_def: - input = self._conv_norm( - input=input, - num_filters=c, - filter_size=k, - stride=s, - act='relu', - name=_name) - - output = fluid.layers.pool2d( - input=input, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max') - return output - - def __call__(self, input): - assert isinstance(input, Variable) - assert not (set(self.feature_maps) - set([2, 3, 4, 5])), \ - "feature maps {} not in [2, 3, 4, 5]".format(self.feature_maps) - - res_endpoints = [] - - res = input - feature_maps = self.feature_maps - severed_head = getattr(self, 'severed_head', False) - if not severed_head: - res = self.c1_stage(res) - feature_maps = range(2, max(self.feature_maps) + 1) - - for i in feature_maps: - res = self.layer_warp(res, i) - if i in self.feature_maps: - res_endpoints.append(res) - if self.freeze_at >= i: - res.stop_gradient = True - if self.get_prediction: - pool = fluid.layers.pool2d( - input=res, pool_type='avg', global_pooling=True) - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - - out = fluid.layers.fc( - input=pool, - size=self.class_dim, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv))) - out = fluid.layers.softmax(out) - return out - return OrderedDict([('res{}_sum'.format(self.feature_maps[idx]), feat) - for idx, feat in enumerate(res_endpoints)]) - - -class ResNetC5(ResNet): - def __init__(self, - depth=50, - freeze_at=2, - norm_type='affine_channel', - freeze_norm=True, - norm_decay=0., - variant='b', - feature_maps=[5], - weight_prefix_name=''): - super(ResNetC5, self).__init__(depth, freeze_at, norm_type, freeze_norm, - norm_decay, variant, feature_maps) - self.severed_head = True diff --git a/modules/image/object_detection/yolov3_resnet34_coco2017/test.py b/modules/image/object_detection/yolov3_resnet34_coco2017/test.py new file mode 100644 index 000000000..b84ff35d7 --- /dev/null +++ b/modules/image/object_detection/yolov3_resnet34_coco2017/test.py @@ -0,0 +1,108 @@ +import os +import shutil +import unittest + +import cv2 +import requests +import paddlehub as hub + + +class TestHubModule(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + img_url = 'https://ai-studio-static-online.cdn.bcebos.com/68313e182f5e4ad9907e69dac9ece8fc50840d7ffbd24fa88396f009958f969a' + if not os.path.exists('tests'): + os.makedirs('tests') + response = requests.get(img_url) + assert response.status_code == 200, 'Network Error.' + with open('tests/test.jpg', 'wb') as f: + f.write(response.content) + cls.module = hub.Module(name="yolov3_resnet34_coco2017") + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree('tests') + shutil.rmtree('inference') + shutil.rmtree('detection_result') + + def test_object_detection1(self): + results = self.module.object_detection( + paths=['tests/test.jpg'] + ) + bbox = results[0]['data'][0] + label = bbox['label'] + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertEqual(label, 'cat') + self.assertTrue(confidence > 0.5) + self.assertTrue(0 < left < 1000) + self.assertTrue(1000 < right < 3500) + self.assertTrue(500 < top < 1500) + self.assertTrue(1000 < bottom < 4500) + + def test_object_detection2(self): + results = self.module.object_detection( + images=[cv2.imread('tests/test.jpg')] + ) + bbox = results[0]['data'][0] + label = bbox['label'] + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertEqual(label, 'cat') + self.assertTrue(confidence > 0.5) + self.assertTrue(0 < left < 1000) + self.assertTrue(1000 < right < 3500) + self.assertTrue(500 < top < 1500) + self.assertTrue(1000 < bottom < 4500) + + def test_object_detection3(self): + results = self.module.object_detection( + images=[cv2.imread('tests/test.jpg')], + visualization=False + ) + bbox = results[0]['data'][0] + label = bbox['label'] + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertEqual(label, 'cat') + self.assertTrue(confidence > 0.5) + self.assertTrue(0 < left < 1000) + self.assertTrue(1000 < right < 3500) + self.assertTrue(500 < top < 1500) + self.assertTrue(1000 < bottom < 4500) + + def test_object_detection4(self): + self.assertRaises( + AssertionError, + self.module.object_detection, + paths=['no.jpg'] + ) + + def test_object_detection5(self): + self.assertRaises( + AttributeError, + self.module.object_detection, + images=['test.jpg'] + ) + + def test_save_inference_model(self): + self.module.save_inference_model('./inference/model') + + self.assertTrue(os.path.exists('./inference/model.pdmodel')) + self.assertTrue(os.path.exists('./inference/model.pdiparams')) + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file diff --git a/modules/image/object_detection/yolov3_resnet34_coco2017/yolo_head.py b/modules/image/object_detection/yolov3_resnet34_coco2017/yolo_head.py deleted file mode 100644 index 7428fb4c2..000000000 --- a/modules/image/object_detection/yolov3_resnet34_coco2017/yolo_head.py +++ /dev/null @@ -1,273 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from collections import OrderedDict - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.regularizer import L2Decay - -__all__ = ['MultiClassNMS', 'YOLOv3Head'] - - -class MultiClassNMS(object): - # __op__ = fluid.layers.multiclass_nms - def __init__(self, background_label, keep_top_k, nms_threshold, nms_top_k, - normalized, score_threshold): - super(MultiClassNMS, self).__init__() - self.background_label = background_label - self.keep_top_k = keep_top_k - self.nms_threshold = nms_threshold - self.nms_top_k = nms_top_k - self.normalized = normalized - self.score_threshold = score_threshold - - -class YOLOv3Head(object): - """Head block for YOLOv3 network - - Args: - norm_decay (float): weight decay for normalization layer weights - num_classes (int): number of output classes - ignore_thresh (float): threshold to ignore confidence loss - label_smooth (bool): whether to use label smoothing - anchors (list): anchors - anchor_masks (list): anchor masks - nms (object): an instance of `MultiClassNMS` - """ - - def __init__(self, - norm_decay=0., - num_classes=80, - ignore_thresh=0.7, - label_smooth=True, - anchors=[[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], - [59, 119], [116, 90], [156, 198], [373, 326]], - anchor_masks=[[6, 7, 8], [3, 4, 5], [0, 1, 2]], - nms=MultiClassNMS( - background_label=-1, - keep_top_k=100, - nms_threshold=0.45, - nms_top_k=1000, - normalized=True, - score_threshold=0.01), - weight_prefix_name=''): - self.norm_decay = norm_decay - self.num_classes = num_classes - self.ignore_thresh = ignore_thresh - self.label_smooth = label_smooth - self.anchor_masks = anchor_masks - self._parse_anchors(anchors) - self.nms = nms - self.prefix_name = weight_prefix_name - - def _conv_bn(self, - input, - ch_out, - filter_size, - stride, - padding, - act='leaky', - is_test=True, - name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=ch_out, - filter_size=filter_size, - stride=stride, - padding=padding, - act=None, - param_attr=ParamAttr(name=name + ".conv.weights"), - bias_attr=False) - - bn_name = name + ".bn" - bn_param_attr = ParamAttr( - regularizer=L2Decay(self.norm_decay), name=bn_name + '.scale') - bn_bias_attr = ParamAttr( - regularizer=L2Decay(self.norm_decay), name=bn_name + '.offset') - out = fluid.layers.batch_norm( - input=conv, - act=None, - is_test=is_test, - param_attr=bn_param_attr, - bias_attr=bn_bias_attr, - moving_mean_name=bn_name + '.mean', - moving_variance_name=bn_name + '.var') - - if act == 'leaky': - out = fluid.layers.leaky_relu(x=out, alpha=0.1) - return out - - def _detection_block(self, input, channel, is_test=True, name=None): - assert channel % 2 == 0, \ - "channel {} cannot be divided by 2 in detection block {}" \ - .format(channel, name) - - conv = input - for j in range(2): - conv = self._conv_bn( - conv, - channel, - filter_size=1, - stride=1, - padding=0, - is_test=is_test, - name='{}.{}.0'.format(name, j)) - conv = self._conv_bn( - conv, - channel * 2, - filter_size=3, - stride=1, - padding=1, - is_test=is_test, - name='{}.{}.1'.format(name, j)) - route = self._conv_bn( - conv, - channel, - filter_size=1, - stride=1, - padding=0, - is_test=is_test, - name='{}.2'.format(name)) - tip = self._conv_bn( - route, - channel * 2, - filter_size=3, - stride=1, - padding=1, - is_test=is_test, - name='{}.tip'.format(name)) - return route, tip - - def _upsample(self, input, scale=2, name=None): - out = fluid.layers.resize_nearest( - input=input, scale=float(scale), name=name) - return out - - def _parse_anchors(self, anchors): - """ - Check ANCHORS/ANCHOR_MASKS in config and parse mask_anchors - - """ - self.anchors = [] - self.mask_anchors = [] - - assert len(anchors) > 0, "ANCHORS not set." - assert len(self.anchor_masks) > 0, "ANCHOR_MASKS not set." - - for anchor in anchors: - assert len(anchor) == 2, "anchor {} len should be 2".format(anchor) - self.anchors.extend(anchor) - - anchor_num = len(anchors) - for masks in self.anchor_masks: - self.mask_anchors.append([]) - for mask in masks: - assert mask < anchor_num, "anchor mask index overflow" - self.mask_anchors[-1].extend(anchors[mask]) - - def _get_outputs(self, input, is_train=True): - """ - Get YOLOv3 head output - - Args: - input (list): List of Variables, output of backbone stages - is_train (bool): whether in train or test mode - - Returns: - outputs (list): Variables of each output layer - """ - - outputs = [] - - # get last out_layer_num blocks in reverse order - out_layer_num = len(self.anchor_masks) - if isinstance(input, OrderedDict): - blocks = list(input.values())[-1:-out_layer_num - 1:-1] - else: - blocks = input[-1:-out_layer_num - 1:-1] - route = None - for i, block in enumerate(blocks): - if i > 0: # perform concat in first 2 detection_block - block = fluid.layers.concat(input=[route, block], axis=1) - route, tip = self._detection_block( - block, - channel=512 // (2**i), - is_test=(not is_train), - name=self.prefix_name + "yolo_block.{}".format(i)) - - # out channel number = mask_num * (5 + class_num) - num_filters = len(self.anchor_masks[i]) * (self.num_classes + 5) - block_out = fluid.layers.conv2d( - input=tip, - num_filters=num_filters, - filter_size=1, - stride=1, - padding=0, - act=None, - param_attr=ParamAttr(name=self.prefix_name + - "yolo_output.{}.conv.weights".format(i)), - bias_attr=ParamAttr( - regularizer=L2Decay(0.), - name=self.prefix_name + - "yolo_output.{}.conv.bias".format(i))) - outputs.append(block_out) - - if i < len(blocks) - 1: - # do not perform upsample in the last detection_block - route = self._conv_bn( - input=route, - ch_out=256 // (2**i), - filter_size=1, - stride=1, - padding=0, - is_test=(not is_train), - name=self.prefix_name + "yolo_transition.{}".format(i)) - # upsample - route = self._upsample(route) - - return outputs, blocks - - def get_prediction(self, outputs, im_size): - """ - Get prediction result of YOLOv3 network - - Args: - outputs (list): list of Variables, return from _get_outputs - im_size (Variable): Variable of size([h, w]) of each image - - Returns: - pred (Variable): The prediction result after non-max suppress. - - """ - boxes = [] - scores = [] - downsample = 32 - for i, output in enumerate(outputs): - box, score = fluid.layers.yolo_box( - x=output, - img_size=im_size, - anchors=self.mask_anchors[i], - class_num=self.num_classes, - conf_thresh=self.nms.score_threshold, - downsample_ratio=downsample, - name=self.prefix_name + "yolo_box" + str(i)) - boxes.append(box) - scores.append(fluid.layers.transpose(score, perm=[0, 2, 1])) - - downsample //= 2 - - yolo_boxes = fluid.layers.concat(boxes, axis=1) - yolo_scores = fluid.layers.concat(scores, axis=2) - pred = fluid.layers.multiclass_nms( - bboxes=yolo_boxes, - scores=yolo_scores, - score_threshold=self.nms.score_threshold, - nms_top_k=self.nms.nms_top_k, - keep_top_k=self.nms.keep_top_k, - nms_threshold=self.nms.nms_threshold, - background_label=self.nms.background_label, - normalized=self.nms.normalized, - name="multiclass_nms") - return pred From 9d700dd6337c51b10c0426990343de26bdceb4e1 Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Fri, 16 Sep 2022 15:10:39 +0800 Subject: [PATCH 062/117] update yolov3_resnet50_vd_coco2017 (#1954) * update yolov3_resnet50_vd_coco2017 * update unittest * update gpu config * update * add clean func * update save inference model --- .../yolov3_resnet50_vd_coco2017/README.md | 17 +- .../yolov3_resnet50_vd_coco2017/README_en.md | 16 +- .../yolov3_resnet50_vd_coco2017/module.py | 194 ++------ .../name_adapter.py | 61 --- .../nonlocal_helper.py | 154 ------ .../yolov3_resnet50_vd_coco2017/processor.py | 7 +- .../yolov3_resnet50_vd_coco2017/resnet.py | 447 ------------------ .../yolov3_resnet50_vd_coco2017/test.py | 108 +++++ .../yolov3_resnet50_vd_coco2017/yolo_head.py | 273 ----------- 9 files changed, 161 insertions(+), 1116 deletions(-) delete mode 100644 modules/image/object_detection/yolov3_resnet50_vd_coco2017/name_adapter.py delete mode 100644 modules/image/object_detection/yolov3_resnet50_vd_coco2017/nonlocal_helper.py delete mode 100644 modules/image/object_detection/yolov3_resnet50_vd_coco2017/resnet.py create mode 100644 modules/image/object_detection/yolov3_resnet50_vd_coco2017/test.py delete mode 100644 modules/image/object_detection/yolov3_resnet50_vd_coco2017/yolo_head.py diff --git a/modules/image/object_detection/yolov3_resnet50_vd_coco2017/README.md b/modules/image/object_detection/yolov3_resnet50_vd_coco2017/README.md index 0ad42e87a..c481bb47c 100644 --- a/modules/image/object_detection/yolov3_resnet50_vd_coco2017/README.md +++ b/modules/image/object_detection/yolov3_resnet50_vd_coco2017/README.md @@ -100,20 +100,13 @@ - save\_path (str, optional): 识别结果的保存路径 (仅当visualization=True时存在) - ```python - def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True) + def save_inference_model(dirname) ``` - 将模型保存到指定路径。 - **参数** - - dirname: 存在模型的目录名称;
- - model\_filename: 模型文件名称,默认为\_\_model\_\_;
- - params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效);
- - combined: 是否将参数保存到统一的一个文件中。 - + - dirname: 模型保存路径
## 四、服务部署 @@ -166,6 +159,10 @@ 修复numpy数据读取问题 +* 1.1.0 + + 移除 fluid api + - ```shell - $ hub install yolov3_resnet50_vd_coco2017==1.0.2 + $ hub install yolov3_resnet50_vd_coco2017==1.1.0 ``` diff --git a/modules/image/object_detection/yolov3_resnet50_vd_coco2017/README_en.md b/modules/image/object_detection/yolov3_resnet50_vd_coco2017/README_en.md index 7bb7b10ae..2f9b46bd8 100644 --- a/modules/image/object_detection/yolov3_resnet50_vd_coco2017/README_en.md +++ b/modules/image/object_detection/yolov3_resnet50_vd_coco2017/README_en.md @@ -99,19 +99,13 @@ - save\_path (str, optional): output path for saving results - ```python - def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True) + def save_inference_model(dirname) ``` - Save model to specific path - **Parameters** - - dirname: output dir for saving model - - model\_filename: filename for saving model - - params\_filename: filename for saving parameters - - combined: whether save parameters into one file + - dirname: save model path ## IV.Server Deployment @@ -165,6 +159,10 @@ Fix the problem of reading numpy +* 1.1.0 + + Remove fluid api + - ```shell - $ hub install yolov3_resnet50_vd_coco2017==1.0.2 + $ hub install yolov3_resnet50_vd_coco2017==1.1.0 ``` diff --git a/modules/image/object_detection/yolov3_resnet50_vd_coco2017/module.py b/modules/image/object_detection/yolov3_resnet50_vd_coco2017/module.py index cdd037d89..7e1101dd8 100644 --- a/modules/image/object_detection/yolov3_resnet50_vd_coco2017/module.py +++ b/modules/image/object_detection/yolov3_resnet50_vd_coco2017/module.py @@ -6,44 +6,43 @@ import os from functools import partial +import paddle import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +import paddle.static +from paddle.inference import Config, create_predictor from paddlehub.module.module import moduleinfo, runnable, serving -from paddlehub.common.paddle_helper import add_vars_prefix -from yolov3_resnet50_vd_coco2017.resnet import ResNet -from yolov3_resnet50_vd_coco2017.processor import load_label_info, postprocess, base64_to_cv2 -from yolov3_resnet50_vd_coco2017.data_feed import reader -from yolov3_resnet50_vd_coco2017.yolo_head import MultiClassNMS, YOLOv3Head +from .processor import load_label_info, postprocess, base64_to_cv2 +from .data_feed import reader @moduleinfo( name="yolov3_resnet50_vd_coco2017", - version="1.0.2", + version="1.1.0", type="CV/object_detection", summary= "Baidu's YOLOv3 model for object detection with backbone ResNet50, trained with dataset coco2017.", author="paddlepaddle", author_email="paddle-dev@baidu.com") -class YOLOv3ResNet50Coco2017(hub.Module): - def _initialize(self): +class YOLOv3ResNet50Coco2017: + def __init__(self): self.default_pretrained_model_path = os.path.join( - self.directory, "yolov3_resnet50_model") + self.directory, "yolov3_resnet50_model", "model") self.label_names = load_label_info( os.path.join(self.directory, "label_file.txt")) self._set_config() - + def _set_config(self): """ predictor config setting. """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) + model = self.default_pretrained_model_path+'.pdmodel' + params = self.default_pretrained_model_path+'.pdiparams' + cpu_config = Config(model, params) cpu_config.disable_glog_info() cpu_config.disable_gpu() cpu_config.switch_ir_optim(False) - self.cpu_predictor = create_paddle_predictor(cpu_config) + self.cpu_predictor = create_predictor(cpu_config) try: _places = os.environ["CUDA_VISIBLE_DEVICES"] @@ -52,110 +51,10 @@ def _set_config(self): except: use_gpu = False if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config = Config(model, params) gpu_config.disable_glog_info() gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) - - def context(self, trainable=True, pretrained=True, get_prediction=False): - """ - Distill the Head Features, so as to perform transfer learning. - - Args: - trainable (bool): whether to set parameters trainable. - pretrained (bool): whether to load default pretrained model. - get_prediction (bool): whether to get prediction. - - Returns: - inputs(dict): the input variables. - outputs(dict): the output variables. - context_prog (Program): the program to execute transfer learning. - """ - context_prog = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(context_prog, startup_program): - with fluid.unique_name.guard(): - # image - image = fluid.layers.data( - name='image', shape=[3, 608, 608], dtype='float32') - # backbone - backbone = ResNet( - norm_type='sync_bn', - freeze_at=0, - freeze_norm=False, - norm_decay=0., - dcn_v2_stages=[5], - depth=50, - variant='d', - feature_maps=[3, 4, 5]) - # body_feats - body_feats = backbone(image) - # im_size - im_size = fluid.layers.data( - name='im_size', shape=[2], dtype='int32') - # yolo_head - yolo_head = YOLOv3Head(num_classes=80) - # head_features - head_features, body_features = yolo_head._get_outputs( - body_feats, is_train=trainable) - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - exe.run(fluid.default_startup_program()) - - # var_prefix - var_prefix = '@HUB_{}@'.format(self.name) - # name of inputs - inputs = { - 'image': var_prefix + image.name, - 'im_size': var_prefix + im_size.name - } - # name of outputs - if get_prediction: - bbox_out = yolo_head.get_prediction(head_features, im_size) - outputs = {'bbox_out': [var_prefix + bbox_out.name]} - else: - outputs = { - 'head_features': - [var_prefix + var.name for var in head_features], - 'body_features': - [var_prefix + var.name for var in body_features] - } - # add_vars_prefix - add_vars_prefix(context_prog, var_prefix) - add_vars_prefix(fluid.default_startup_program(), var_prefix) - # inputs - inputs = { - key: context_prog.global_block().vars[value] - for key, value in inputs.items() - } - # outputs - outputs = { - key: [ - context_prog.global_block().vars[varname] - for varname in value - ] - for key, value in outputs.items() - } - # trainable - for param in context_prog.global_block().iter_parameters(): - param.trainable = trainable - # pretrained - if pretrained: - - def _if_exist(var): - return os.path.exists( - os.path.join(self.default_pretrained_model_path, - var.name)) - - fluid.io.load_vars( - exe, - self.default_pretrained_model_path, - predicate=_if_exist) - else: - exe.run(startup_program) - - return inputs, outputs, context_prog + self.gpu_predictor = create_predictor(gpu_config) def object_detection(self, paths=None, @@ -198,54 +97,33 @@ def object_detection(self, paths = paths if paths else list() data_reader = partial(reader, paths, images) - batch_reader = fluid.io.batch(data_reader, batch_size=batch_size) + batch_reader = paddle.batch(data_reader, batch_size=batch_size) res = [] for iter_id, feed_data in enumerate(batch_reader()): feed_data = np.array(feed_data) - image_tensor = PaddleTensor(np.array(list(feed_data[:, 0]))) - im_size_tensor = PaddleTensor(np.array(list(feed_data[:, 1]))) - if use_gpu: - data_out = self.gpu_predictor.run( - [image_tensor, im_size_tensor]) - else: - data_out = self.cpu_predictor.run( - [image_tensor, im_size_tensor]) - output = postprocess( - paths=paths, - images=images, - data_out=data_out, - score_thresh=score_thresh, - label_names=self.label_names, - output_dir=output_dir, - handle_id=iter_id * batch_size, - visualization=visualization) + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + input_handle.copy_from_cpu(np.array(list(feed_data[:, 0]))) + input_handle = predictor.get_input_handle(input_names[1]) + input_handle.copy_from_cpu(np.array(list(feed_data[:, 1]))) + + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) + + output = postprocess(paths=paths, + images=images, + data_out=output_handle, + score_thresh=score_thresh, + label_names=self.label_names, + output_dir=output_dir, + handle_id=iter_id * batch_size, + visualization=visualization) res.extend(output) return res - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - @serving def serving_method(self, images, **kwargs): """ diff --git a/modules/image/object_detection/yolov3_resnet50_vd_coco2017/name_adapter.py b/modules/image/object_detection/yolov3_resnet50_vd_coco2017/name_adapter.py deleted file mode 100644 index bebf8bdee..000000000 --- a/modules/image/object_detection/yolov3_resnet50_vd_coco2017/name_adapter.py +++ /dev/null @@ -1,61 +0,0 @@ -# coding=utf-8 - - -class NameAdapter(object): - """Fix the backbones variable names for pretrained weight""" - - def __init__(self, model): - super(NameAdapter, self).__init__() - self.model = model - - @property - def model_type(self): - return getattr(self.model, '_model_type', '') - - @property - def variant(self): - return getattr(self.model, 'variant', '') - - def fix_conv_norm_name(self, name): - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - # the naming rule is same as pretrained weight - if self.model_type == 'SEResNeXt': - bn_name = name + "_bn" - return bn_name - - def fix_shortcut_name(self, name): - if self.model_type == 'SEResNeXt': - name = 'conv' + name + '_prj' - return name - - def fix_bottleneck_name(self, name): - if self.model_type == 'SEResNeXt': - conv_name1 = 'conv' + name + '_x1' - conv_name2 = 'conv' + name + '_x2' - conv_name3 = 'conv' + name + '_x3' - shortcut_name = name - else: - conv_name1 = name + "_branch2a" - conv_name2 = name + "_branch2b" - conv_name3 = name + "_branch2c" - shortcut_name = name + "_branch1" - return conv_name1, conv_name2, conv_name3, shortcut_name - - def fix_layer_warp_name(self, stage_num, count, i): - name = 'res' + str(stage_num) - if count > 10 and stage_num == 4: - if i == 0: - conv_name = name + "a" - else: - conv_name = name + "b" + str(i) - else: - conv_name = name + chr(ord("a") + i) - if self.model_type == 'SEResNeXt': - conv_name = str(stage_num + 2) + '_' + str(i + 1) - return conv_name - - def fix_c1_stage_name(self): - return "res_conv1" if self.model_type == 'ResNeXt' else "conv1" diff --git a/modules/image/object_detection/yolov3_resnet50_vd_coco2017/nonlocal_helper.py b/modules/image/object_detection/yolov3_resnet50_vd_coco2017/nonlocal_helper.py deleted file mode 100644 index 599b8dfa0..000000000 --- a/modules/image/object_detection/yolov3_resnet50_vd_coco2017/nonlocal_helper.py +++ /dev/null @@ -1,154 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import paddle.fluid as fluid -from paddle.fluid import ParamAttr - -nonlocal_params = { - "use_zero_init_conv": False, - "conv_init_std": 0.01, - "no_bias": True, - "use_maxpool": False, - "use_softmax": True, - "use_bn": False, - "use_scale": True, # vital for the model prformance!!! - "use_affine": False, - "bn_momentum": 0.9, - "bn_epsilon": 1.0000001e-5, - "bn_init_gamma": 0.9, - "weight_decay_bn": 1.e-4, -} - - -def space_nonlocal(input, dim_in, dim_out, prefix, dim_inner, - max_pool_stride=2): - cur = input - theta = fluid.layers.conv2d(input = cur, num_filters = dim_inner, \ - filter_size = [1, 1], stride = [1, 1], \ - padding = [0, 0], \ - param_attr=ParamAttr(name = prefix + '_theta' + "_w", \ - initializer = fluid.initializer.Normal(loc = 0.0, - scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_theta' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) \ - if not nonlocal_params["no_bias"] else False, \ - name = prefix + '_theta') - theta_shape = theta.shape - theta_shape_op = fluid.layers.shape(theta) - theta_shape_op.stop_gradient = True - - if nonlocal_params["use_maxpool"]: - max_pool = fluid.layers.pool2d(input = cur, \ - pool_size = [max_pool_stride, max_pool_stride], \ - pool_type = 'max', \ - pool_stride = [max_pool_stride, max_pool_stride], \ - pool_padding = [0, 0], \ - name = prefix + '_pool') - else: - max_pool = cur - - phi = fluid.layers.conv2d(input = max_pool, num_filters = dim_inner, \ - filter_size = [1, 1], stride = [1, 1], \ - padding = [0, 0], \ - param_attr = ParamAttr(name = prefix + '_phi' + "_w", \ - initializer = fluid.initializer.Normal(loc = 0.0, - scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_phi' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) \ - if (nonlocal_params["no_bias"] == 0) else False, \ - name = prefix + '_phi') - phi_shape = phi.shape - - g = fluid.layers.conv2d(input = max_pool, num_filters = dim_inner, \ - filter_size = [1, 1], stride = [1, 1], \ - padding = [0, 0], \ - param_attr = ParamAttr(name = prefix + '_g' + "_w", \ - initializer = fluid.initializer.Normal(loc = 0.0, scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_g' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) if (nonlocal_params["no_bias"] == 0) else False, \ - name = prefix + '_g') - g_shape = g.shape - # we have to use explicit batch size (to support arbitrary spacetime size) - # e.g. (8, 1024, 4, 14, 14) => (8, 1024, 784) - theta = fluid.layers.reshape(theta, shape=(0, 0, -1)) - theta = fluid.layers.transpose(theta, [0, 2, 1]) - phi = fluid.layers.reshape(phi, [0, 0, -1]) - theta_phi = fluid.layers.matmul(theta, phi, name=prefix + '_affinity') - g = fluid.layers.reshape(g, [0, 0, -1]) - - if nonlocal_params["use_softmax"]: - if nonlocal_params["use_scale"]: - theta_phi_sc = fluid.layers.scale(theta_phi, scale=dim_inner**-.5) - else: - theta_phi_sc = theta_phi - p = fluid.layers.softmax( - theta_phi_sc, name=prefix + '_affinity' + '_prob') - else: - # not clear about what is doing in xlw's code - p = None # not implemented - raise "Not implemented when not use softmax" - - # note g's axis[2] corresponds to p's axis[2] - # e.g. g(8, 1024, 784_2) * p(8, 784_1, 784_2) => (8, 1024, 784_1) - p = fluid.layers.transpose(p, [0, 2, 1]) - t = fluid.layers.matmul(g, p, name=prefix + '_y') - - # reshape back - # e.g. (8, 1024, 784) => (8, 1024, 4, 14, 14) - t_shape = t.shape - t_re = fluid.layers.reshape( - t, shape=list(theta_shape), actual_shape=theta_shape_op) - blob_out = t_re - blob_out = fluid.layers.conv2d(input = blob_out, num_filters = dim_out, \ - filter_size = [1, 1], stride = [1, 1], padding = [0, 0], \ - param_attr = ParamAttr(name = prefix + '_out' + "_w", \ - initializer = fluid.initializer.Constant(value = 0.) \ - if nonlocal_params["use_zero_init_conv"] \ - else fluid.initializer.Normal(loc = 0.0, - scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_out' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) \ - if (nonlocal_params["no_bias"] == 0) else False, \ - name = prefix + '_out') - blob_out_shape = blob_out.shape - - if nonlocal_params["use_bn"]: - bn_name = prefix + "_bn" - blob_out = fluid.layers.batch_norm(blob_out, \ - # is_test = test_mode, \ - momentum = nonlocal_params["bn_momentum"], \ - epsilon = nonlocal_params["bn_epsilon"], \ - name = bn_name, \ - param_attr = ParamAttr(name = bn_name + "_s", \ - initializer = fluid.initializer.Constant(value = nonlocal_params["bn_init_gamma"]), \ - regularizer = fluid.regularizer.L2Decay(nonlocal_params["weight_decay_bn"])), \ - bias_attr = ParamAttr(name = bn_name + "_b", \ - regularizer = fluid.regularizer.L2Decay(nonlocal_params["weight_decay_bn"])), \ - moving_mean_name = bn_name + "_rm", \ - moving_variance_name = bn_name + "_riv") # add bn - - if nonlocal_params["use_affine"]: - affine_scale = fluid.layers.create_parameter(\ - shape=[blob_out_shape[1]], dtype = blob_out.dtype, \ - attr=ParamAttr(name=prefix + '_affine' + '_s'), \ - default_initializer = fluid.initializer.Constant(value = 1.)) - affine_bias = fluid.layers.create_parameter(\ - shape=[blob_out_shape[1]], dtype = blob_out.dtype, \ - attr=ParamAttr(name=prefix + '_affine' + '_b'), \ - default_initializer = fluid.initializer.Constant(value = 0.)) - blob_out = fluid.layers.affine_channel(blob_out, scale = affine_scale, \ - bias = affine_bias, name = prefix + '_affine') # add affine - - return blob_out - - -def add_space_nonlocal(input, dim_in, dim_out, prefix, dim_inner): - ''' - add_space_nonlocal: - Non-local Neural Networks: see https://arxiv.org/abs/1711.07971 - ''' - conv = space_nonlocal(input, dim_in, dim_out, prefix, dim_inner) - output = fluid.layers.elementwise_add(input, conv, name=prefix + '_sum') - return output diff --git a/modules/image/object_detection/yolov3_resnet50_vd_coco2017/processor.py b/modules/image/object_detection/yolov3_resnet50_vd_coco2017/processor.py index 1039e3e48..dd2aea11a 100644 --- a/modules/image/object_detection/yolov3_resnet50_vd_coco2017/processor.py +++ b/modules/image/object_detection/yolov3_resnet50_vd_coco2017/processor.py @@ -101,7 +101,7 @@ def postprocess(paths, handle_id, visualization=True): """ - postprocess the lod_tensor produced by fluid.Executor.run + postprocess the lod_tensor produced by Executor.run Args: paths (list[str]): The paths of images. @@ -126,9 +126,8 @@ def postprocess(paths, confidence (float): The confidence of detection result. save_path (str): The path to save output images. """ - lod_tensor = data_out[0] - lod = lod_tensor.lod[0] - results = lod_tensor.as_ndarray() + lod = data_out.lod()[0] + results = data_out.copy_to_cpu() check_dir(output_dir) diff --git a/modules/image/object_detection/yolov3_resnet50_vd_coco2017/resnet.py b/modules/image/object_detection/yolov3_resnet50_vd_coco2017/resnet.py deleted file mode 100644 index 4bd6fb61e..000000000 --- a/modules/image/object_detection/yolov3_resnet50_vd_coco2017/resnet.py +++ /dev/null @@ -1,447 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -from collections import OrderedDict -from numbers import Integral - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.framework import Variable -from paddle.fluid.regularizer import L2Decay -from paddle.fluid.initializer import Constant - -from .nonlocal_helper import add_space_nonlocal -from .name_adapter import NameAdapter - -__all__ = ['ResNet', 'ResNetC5'] - - -class ResNet(object): - """ - Residual Network, see https://arxiv.org/abs/1512.03385 - Args: - depth (int): ResNet depth, should be 34, 50. - freeze_at (int): freeze the backbone at which stage - norm_type (str): normalization type, 'bn'/'sync_bn'/'affine_channel' - freeze_norm (bool): freeze normalization layers - norm_decay (float): weight decay for normalization layer weights - variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently - feature_maps (list): index of stages whose feature maps are returned - dcn_v2_stages (list): index of stages who select deformable conv v2 - nonlocal_stages (list): index of stages who select nonlocal networks - """ - __shared__ = ['norm_type', 'freeze_norm', 'weight_prefix_name'] - - def __init__(self, - depth=50, - freeze_at=0, - norm_type='sync_bn', - freeze_norm=False, - norm_decay=0., - variant='b', - feature_maps=[3, 4, 5], - dcn_v2_stages=[], - weight_prefix_name='', - nonlocal_stages=[], - get_prediction=False, - class_dim=1000): - super(ResNet, self).__init__() - - if isinstance(feature_maps, Integral): - feature_maps = [feature_maps] - - assert depth in [34, 50], \ - "depth {} not in [34, 50]" - assert variant in ['a', 'b', 'c', 'd'], "invalid ResNet variant" - assert 0 <= freeze_at <= 4, "freeze_at should be 0, 1, 2, 3 or 4" - assert len(feature_maps) > 0, "need one or more feature maps" - assert norm_type in ['bn', 'sync_bn', 'affine_channel'] - assert not (len(nonlocal_stages)>0 and depth<50), \ - "non-local is not supported for resnet18 or resnet34" - - self.depth = depth - self.freeze_at = freeze_at - self.norm_type = norm_type - self.norm_decay = norm_decay - self.freeze_norm = freeze_norm - self.variant = variant - self._model_type = 'ResNet' - self.feature_maps = feature_maps - self.dcn_v2_stages = dcn_v2_stages - self.depth_cfg = { - 34: ([3, 4, 6, 3], self.basicblock), - 50: ([3, 4, 6, 3], self.bottleneck), - } - self.stage_filters = [64, 128, 256, 512] - self._c1_out_chan_num = 64 - self.na = NameAdapter(self) - self.prefix_name = weight_prefix_name - - self.nonlocal_stages = nonlocal_stages - self.nonlocal_mod_cfg = { - 50: 2, - 101: 5, - 152: 8, - 200: 12, - } - self.get_prediction = get_prediction - self.class_dim = class_dim - - def _conv_offset(self, - input, - filter_size, - stride, - padding, - act=None, - name=None): - out_channel = filter_size * filter_size * 3 - out = fluid.layers.conv2d( - input, - num_filters=out_channel, - filter_size=filter_size, - stride=stride, - padding=padding, - param_attr=ParamAttr(initializer=Constant(0.0), name=name + ".w_0"), - bias_attr=ParamAttr(initializer=Constant(0.0), name=name + ".b_0"), - act=act, - name=name) - return out - - def _conv_norm(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None, - dcn_v2=False): - _name = self.prefix_name + name if self.prefix_name != '' else name - if not dcn_v2: - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - param_attr=ParamAttr(name=_name + "_weights"), - bias_attr=False, - name=_name + '.conv2d.output.1') - else: - # select deformable conv" - offset_mask = self._conv_offset( - input=input, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - act=None, - name=_name + "_conv_offset") - offset_channel = filter_size**2 * 2 - mask_channel = filter_size**2 - offset, mask = fluid.layers.split( - input=offset_mask, - num_or_sections=[offset_channel, mask_channel], - dim=1) - mask = fluid.layers.sigmoid(mask) - conv = fluid.layers.deformable_conv( - input=input, - offset=offset, - mask=mask, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - deformable_groups=1, - im2col_step=1, - param_attr=ParamAttr(name=_name + "_weights"), - bias_attr=False, - name=_name + ".conv2d.output.1") - - bn_name = self.na.fix_conv_norm_name(name) - bn_name = self.prefix_name + bn_name if self.prefix_name != '' else bn_name - - norm_lr = 0. if self.freeze_norm else 1. - norm_decay = self.norm_decay - pattr = ParamAttr( - name=bn_name + '_scale', - learning_rate=norm_lr, - regularizer=L2Decay(norm_decay)) - battr = ParamAttr( - name=bn_name + '_offset', - learning_rate=norm_lr, - regularizer=L2Decay(norm_decay)) - - if self.norm_type in ['bn', 'sync_bn']: - global_stats = True if self.freeze_norm else False - out = fluid.layers.batch_norm( - input=conv, - act=act, - name=bn_name + '.output.1', - param_attr=pattr, - bias_attr=battr, - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance', - use_global_stats=global_stats) - scale = fluid.framework._get_var(pattr.name) - bias = fluid.framework._get_var(battr.name) - elif self.norm_type == 'affine_channel': - scale = fluid.layers.create_parameter( - shape=[conv.shape[1]], - dtype=conv.dtype, - attr=pattr, - default_initializer=fluid.initializer.Constant(1.)) - bias = fluid.layers.create_parameter( - shape=[conv.shape[1]], - dtype=conv.dtype, - attr=battr, - default_initializer=fluid.initializer.Constant(0.)) - out = fluid.layers.affine_channel( - x=conv, scale=scale, bias=bias, act=act) - if self.freeze_norm: - scale.stop_gradient = True - bias.stop_gradient = True - return out - - def _shortcut(self, input, ch_out, stride, is_first, name): - max_pooling_in_short_cut = self.variant == 'd' - ch_in = input.shape[1] - # the naming rule is same as pretrained weight - name = self.na.fix_shortcut_name(name) - std_senet = getattr(self, 'std_senet', False) - if ch_in != ch_out or stride != 1 or (self.depth < 50 and is_first): - if std_senet: - if is_first: - return self._conv_norm(input, ch_out, 1, stride, name=name) - else: - return self._conv_norm(input, ch_out, 3, stride, name=name) - if max_pooling_in_short_cut and not is_first: - input = fluid.layers.pool2d( - input=input, - pool_size=2, - pool_stride=2, - pool_padding=0, - ceil_mode=True, - pool_type='avg') - return self._conv_norm(input, ch_out, 1, 1, name=name) - return self._conv_norm(input, ch_out, 1, stride, name=name) - else: - return input - - def bottleneck(self, - input, - num_filters, - stride, - is_first, - name, - dcn_v2=False): - if self.variant == 'a': - stride1, stride2 = stride, 1 - else: - stride1, stride2 = 1, stride - - # ResNeXt - groups = getattr(self, 'groups', 1) - group_width = getattr(self, 'group_width', -1) - if groups == 1: - expand = 4 - elif (groups * group_width) == 256: - expand = 1 - else: # FIXME hard code for now, handles 32x4d, 64x4d and 32x8d - num_filters = num_filters // 2 - expand = 2 - - conv_name1, conv_name2, conv_name3, \ - shortcut_name = self.na.fix_bottleneck_name(name) - std_senet = getattr(self, 'std_senet', False) - if std_senet: - conv_def = [[ - int(num_filters / 2), 1, stride1, 'relu', 1, conv_name1 - ], [num_filters, 3, stride2, 'relu', groups, conv_name2], - [num_filters * expand, 1, 1, None, 1, conv_name3]] - else: - conv_def = [[num_filters, 1, stride1, 'relu', 1, conv_name1], - [num_filters, 3, stride2, 'relu', groups, conv_name2], - [num_filters * expand, 1, 1, None, 1, conv_name3]] - - residual = input - for i, (c, k, s, act, g, _name) in enumerate(conv_def): - residual = self._conv_norm( - input=residual, - num_filters=c, - filter_size=k, - stride=s, - act=act, - groups=g, - name=_name, - dcn_v2=(i == 1 and dcn_v2)) - short = self._shortcut( - input, - num_filters * expand, - stride, - is_first=is_first, - name=shortcut_name) - # Squeeze-and-Excitation - if callable(getattr(self, '_squeeze_excitation', None)): - residual = self._squeeze_excitation( - input=residual, num_channels=num_filters, name='fc' + name) - return fluid.layers.elementwise_add( - x=short, y=residual, act='relu', name=name + ".add.output.5") - - def basicblock(self, - input, - num_filters, - stride, - is_first, - name, - dcn_v2=False): - assert dcn_v2 is False, "Not implemented yet." - conv0 = self._conv_norm( - input=input, - num_filters=num_filters, - filter_size=3, - act='relu', - stride=stride, - name=name + "_branch2a") - conv1 = self._conv_norm( - input=conv0, - num_filters=num_filters, - filter_size=3, - act=None, - name=name + "_branch2b") - short = self._shortcut( - input, num_filters, stride, is_first, name=name + "_branch1") - return fluid.layers.elementwise_add(x=short, y=conv1, act='relu') - - def layer_warp(self, input, stage_num): - """ - Args: - input (Variable): input variable. - stage_num (int): the stage number, should be 2, 3, 4, 5 - - Returns: - The last variable in endpoint-th stage. - """ - assert stage_num in [2, 3, 4, 5] - - stages, block_func = self.depth_cfg[self.depth] - count = stages[stage_num - 2] - - ch_out = self.stage_filters[stage_num - 2] - is_first = False if stage_num != 2 else True - dcn_v2 = True if stage_num in self.dcn_v2_stages else False - - nonlocal_mod = 1000 - if stage_num in self.nonlocal_stages: - nonlocal_mod = self.nonlocal_mod_cfg[ - self.depth] if stage_num == 4 else 2 - - # Make the layer name and parameter name consistent - # with ImageNet pre-trained model - conv = input - for i in range(count): - conv_name = self.na.fix_layer_warp_name(stage_num, count, i) - if self.depth < 50: - is_first = True if i == 0 and stage_num == 2 else False - conv = block_func( - input=conv, - num_filters=ch_out, - stride=2 if i == 0 and stage_num != 2 else 1, - is_first=is_first, - name=conv_name, - dcn_v2=dcn_v2) - - # add non local model - dim_in = conv.shape[1] - nonlocal_name = "nonlocal_conv{}".format(stage_num) - if i % nonlocal_mod == nonlocal_mod - 1: - conv = add_space_nonlocal(conv, dim_in, dim_in, - nonlocal_name + '_{}'.format(i), - int(dim_in / 2)) - return conv - - def c1_stage(self, input): - out_chan = self._c1_out_chan_num - - conv1_name = self.na.fix_c1_stage_name() - - if self.variant in ['c', 'd']: - conv_def = [ - [out_chan // 2, 3, 2, "conv1_1"], - [out_chan // 2, 3, 1, "conv1_2"], - [out_chan, 3, 1, "conv1_3"], - ] - else: - conv_def = [[out_chan, 7, 2, conv1_name]] - - for (c, k, s, _name) in conv_def: - input = self._conv_norm( - input=input, - num_filters=c, - filter_size=k, - stride=s, - act='relu', - name=_name) - - output = fluid.layers.pool2d( - input=input, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max') - return output - - def __call__(self, input): - assert isinstance(input, Variable) - assert not (set(self.feature_maps) - set([2, 3, 4, 5])), \ - "feature maps {} not in [2, 3, 4, 5]".format(self.feature_maps) - - res_endpoints = [] - - res = input - feature_maps = self.feature_maps - severed_head = getattr(self, 'severed_head', False) - if not severed_head: - res = self.c1_stage(res) - feature_maps = range(2, max(self.feature_maps) + 1) - - for i in feature_maps: - res = self.layer_warp(res, i) - if i in self.feature_maps: - res_endpoints.append(res) - if self.freeze_at >= i: - res.stop_gradient = True - if self.get_prediction: - pool = fluid.layers.pool2d( - input=res, pool_type='avg', global_pooling=True) - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - - out = fluid.layers.fc( - input=pool, - size=self.class_dim, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv))) - out = fluid.layers.softmax(out) - return out - return OrderedDict([('res{}_sum'.format(self.feature_maps[idx]), feat) - for idx, feat in enumerate(res_endpoints)]) - - -class ResNetC5(ResNet): - def __init__(self, - depth=50, - freeze_at=2, - norm_type='affine_channel', - freeze_norm=True, - norm_decay=0., - variant='b', - feature_maps=[5], - weight_prefix_name=''): - super(ResNetC5, self).__init__(depth, freeze_at, norm_type, freeze_norm, - norm_decay, variant, feature_maps) - self.severed_head = True diff --git a/modules/image/object_detection/yolov3_resnet50_vd_coco2017/test.py b/modules/image/object_detection/yolov3_resnet50_vd_coco2017/test.py new file mode 100644 index 000000000..c70c92380 --- /dev/null +++ b/modules/image/object_detection/yolov3_resnet50_vd_coco2017/test.py @@ -0,0 +1,108 @@ +import os +import shutil +import unittest + +import cv2 +import requests +import paddlehub as hub + + +class TestHubModule(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + img_url = 'https://ai-studio-static-online.cdn.bcebos.com/68313e182f5e4ad9907e69dac9ece8fc50840d7ffbd24fa88396f009958f969a' + if not os.path.exists('tests'): + os.makedirs('tests') + response = requests.get(img_url) + assert response.status_code == 200, 'Network Error.' + with open('tests/test.jpg', 'wb') as f: + f.write(response.content) + cls.module = hub.Module(name="yolov3_resnet50_vd_coco2017") + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree('tests') + shutil.rmtree('inference') + shutil.rmtree('detection_result') + + def test_object_detection1(self): + results = self.module.object_detection( + paths=['tests/test.jpg'] + ) + bbox = results[0]['data'][0] + label = bbox['label'] + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertEqual(label, 'cat') + self.assertTrue(confidence > 0.5) + self.assertTrue(0 < left < 1000) + self.assertTrue(1000 < right < 3500) + self.assertTrue(500 < top < 1500) + self.assertTrue(1000 < bottom < 4500) + + def test_object_detection2(self): + results = self.module.object_detection( + images=[cv2.imread('tests/test.jpg')] + ) + bbox = results[0]['data'][0] + label = bbox['label'] + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertEqual(label, 'cat') + self.assertTrue(confidence > 0.5) + self.assertTrue(0 < left < 1000) + self.assertTrue(1000 < right < 3500) + self.assertTrue(500 < top < 1500) + self.assertTrue(1000 < bottom < 4500) + + def test_object_detection3(self): + results = self.module.object_detection( + images=[cv2.imread('tests/test.jpg')], + visualization=False + ) + bbox = results[0]['data'][0] + label = bbox['label'] + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertEqual(label, 'cat') + self.assertTrue(confidence > 0.5) + self.assertTrue(0 < left < 1000) + self.assertTrue(1000 < right < 3500) + self.assertTrue(500 < top < 1500) + self.assertTrue(1000 < bottom < 4500) + + def test_object_detection4(self): + self.assertRaises( + AssertionError, + self.module.object_detection, + paths=['no.jpg'] + ) + + def test_object_detection5(self): + self.assertRaises( + AttributeError, + self.module.object_detection, + images=['test.jpg'] + ) + + def test_save_inference_model(self): + self.module.save_inference_model('./inference/model') + + self.assertTrue(os.path.exists('./inference/model.pdmodel')) + self.assertTrue(os.path.exists('./inference/model.pdiparams')) + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file diff --git a/modules/image/object_detection/yolov3_resnet50_vd_coco2017/yolo_head.py b/modules/image/object_detection/yolov3_resnet50_vd_coco2017/yolo_head.py deleted file mode 100644 index 7428fb4c2..000000000 --- a/modules/image/object_detection/yolov3_resnet50_vd_coco2017/yolo_head.py +++ /dev/null @@ -1,273 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from collections import OrderedDict - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.regularizer import L2Decay - -__all__ = ['MultiClassNMS', 'YOLOv3Head'] - - -class MultiClassNMS(object): - # __op__ = fluid.layers.multiclass_nms - def __init__(self, background_label, keep_top_k, nms_threshold, nms_top_k, - normalized, score_threshold): - super(MultiClassNMS, self).__init__() - self.background_label = background_label - self.keep_top_k = keep_top_k - self.nms_threshold = nms_threshold - self.nms_top_k = nms_top_k - self.normalized = normalized - self.score_threshold = score_threshold - - -class YOLOv3Head(object): - """Head block for YOLOv3 network - - Args: - norm_decay (float): weight decay for normalization layer weights - num_classes (int): number of output classes - ignore_thresh (float): threshold to ignore confidence loss - label_smooth (bool): whether to use label smoothing - anchors (list): anchors - anchor_masks (list): anchor masks - nms (object): an instance of `MultiClassNMS` - """ - - def __init__(self, - norm_decay=0., - num_classes=80, - ignore_thresh=0.7, - label_smooth=True, - anchors=[[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], - [59, 119], [116, 90], [156, 198], [373, 326]], - anchor_masks=[[6, 7, 8], [3, 4, 5], [0, 1, 2]], - nms=MultiClassNMS( - background_label=-1, - keep_top_k=100, - nms_threshold=0.45, - nms_top_k=1000, - normalized=True, - score_threshold=0.01), - weight_prefix_name=''): - self.norm_decay = norm_decay - self.num_classes = num_classes - self.ignore_thresh = ignore_thresh - self.label_smooth = label_smooth - self.anchor_masks = anchor_masks - self._parse_anchors(anchors) - self.nms = nms - self.prefix_name = weight_prefix_name - - def _conv_bn(self, - input, - ch_out, - filter_size, - stride, - padding, - act='leaky', - is_test=True, - name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=ch_out, - filter_size=filter_size, - stride=stride, - padding=padding, - act=None, - param_attr=ParamAttr(name=name + ".conv.weights"), - bias_attr=False) - - bn_name = name + ".bn" - bn_param_attr = ParamAttr( - regularizer=L2Decay(self.norm_decay), name=bn_name + '.scale') - bn_bias_attr = ParamAttr( - regularizer=L2Decay(self.norm_decay), name=bn_name + '.offset') - out = fluid.layers.batch_norm( - input=conv, - act=None, - is_test=is_test, - param_attr=bn_param_attr, - bias_attr=bn_bias_attr, - moving_mean_name=bn_name + '.mean', - moving_variance_name=bn_name + '.var') - - if act == 'leaky': - out = fluid.layers.leaky_relu(x=out, alpha=0.1) - return out - - def _detection_block(self, input, channel, is_test=True, name=None): - assert channel % 2 == 0, \ - "channel {} cannot be divided by 2 in detection block {}" \ - .format(channel, name) - - conv = input - for j in range(2): - conv = self._conv_bn( - conv, - channel, - filter_size=1, - stride=1, - padding=0, - is_test=is_test, - name='{}.{}.0'.format(name, j)) - conv = self._conv_bn( - conv, - channel * 2, - filter_size=3, - stride=1, - padding=1, - is_test=is_test, - name='{}.{}.1'.format(name, j)) - route = self._conv_bn( - conv, - channel, - filter_size=1, - stride=1, - padding=0, - is_test=is_test, - name='{}.2'.format(name)) - tip = self._conv_bn( - route, - channel * 2, - filter_size=3, - stride=1, - padding=1, - is_test=is_test, - name='{}.tip'.format(name)) - return route, tip - - def _upsample(self, input, scale=2, name=None): - out = fluid.layers.resize_nearest( - input=input, scale=float(scale), name=name) - return out - - def _parse_anchors(self, anchors): - """ - Check ANCHORS/ANCHOR_MASKS in config and parse mask_anchors - - """ - self.anchors = [] - self.mask_anchors = [] - - assert len(anchors) > 0, "ANCHORS not set." - assert len(self.anchor_masks) > 0, "ANCHOR_MASKS not set." - - for anchor in anchors: - assert len(anchor) == 2, "anchor {} len should be 2".format(anchor) - self.anchors.extend(anchor) - - anchor_num = len(anchors) - for masks in self.anchor_masks: - self.mask_anchors.append([]) - for mask in masks: - assert mask < anchor_num, "anchor mask index overflow" - self.mask_anchors[-1].extend(anchors[mask]) - - def _get_outputs(self, input, is_train=True): - """ - Get YOLOv3 head output - - Args: - input (list): List of Variables, output of backbone stages - is_train (bool): whether in train or test mode - - Returns: - outputs (list): Variables of each output layer - """ - - outputs = [] - - # get last out_layer_num blocks in reverse order - out_layer_num = len(self.anchor_masks) - if isinstance(input, OrderedDict): - blocks = list(input.values())[-1:-out_layer_num - 1:-1] - else: - blocks = input[-1:-out_layer_num - 1:-1] - route = None - for i, block in enumerate(blocks): - if i > 0: # perform concat in first 2 detection_block - block = fluid.layers.concat(input=[route, block], axis=1) - route, tip = self._detection_block( - block, - channel=512 // (2**i), - is_test=(not is_train), - name=self.prefix_name + "yolo_block.{}".format(i)) - - # out channel number = mask_num * (5 + class_num) - num_filters = len(self.anchor_masks[i]) * (self.num_classes + 5) - block_out = fluid.layers.conv2d( - input=tip, - num_filters=num_filters, - filter_size=1, - stride=1, - padding=0, - act=None, - param_attr=ParamAttr(name=self.prefix_name + - "yolo_output.{}.conv.weights".format(i)), - bias_attr=ParamAttr( - regularizer=L2Decay(0.), - name=self.prefix_name + - "yolo_output.{}.conv.bias".format(i))) - outputs.append(block_out) - - if i < len(blocks) - 1: - # do not perform upsample in the last detection_block - route = self._conv_bn( - input=route, - ch_out=256 // (2**i), - filter_size=1, - stride=1, - padding=0, - is_test=(not is_train), - name=self.prefix_name + "yolo_transition.{}".format(i)) - # upsample - route = self._upsample(route) - - return outputs, blocks - - def get_prediction(self, outputs, im_size): - """ - Get prediction result of YOLOv3 network - - Args: - outputs (list): list of Variables, return from _get_outputs - im_size (Variable): Variable of size([h, w]) of each image - - Returns: - pred (Variable): The prediction result after non-max suppress. - - """ - boxes = [] - scores = [] - downsample = 32 - for i, output in enumerate(outputs): - box, score = fluid.layers.yolo_box( - x=output, - img_size=im_size, - anchors=self.mask_anchors[i], - class_num=self.num_classes, - conf_thresh=self.nms.score_threshold, - downsample_ratio=downsample, - name=self.prefix_name + "yolo_box" + str(i)) - boxes.append(box) - scores.append(fluid.layers.transpose(score, perm=[0, 2, 1])) - - downsample //= 2 - - yolo_boxes = fluid.layers.concat(boxes, axis=1) - yolo_scores = fluid.layers.concat(scores, axis=2) - pred = fluid.layers.multiclass_nms( - bboxes=yolo_boxes, - scores=yolo_scores, - score_threshold=self.nms.score_threshold, - nms_top_k=self.nms.nms_top_k, - keep_top_k=self.nms.keep_top_k, - nms_threshold=self.nms.nms_threshold, - background_label=self.nms.background_label, - normalized=self.nms.normalized, - name="multiclass_nms") - return pred From a1c050dff013b693720d2dc074b4358806773b5e Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Fri, 16 Sep 2022 15:12:00 +0800 Subject: [PATCH 063/117] update ssd_mobilenet_v1_pascal (#1955) * update ssd_mobilenet_v1_pascal * update gpu config * update * add clean func * update save inference model --- .../ssd_mobilenet_v1_pascal/README.md | 16 ++- .../ssd_mobilenet_v1_pascal/README_en.md | 16 ++- .../ssd_mobilenet_v1_pascal/data_feed.py | 2 - .../ssd_mobilenet_v1_pascal/module.py | 45 +++----- .../ssd_mobilenet_v1_pascal/test.py | 108 ++++++++++++++++++ 5 files changed, 136 insertions(+), 51 deletions(-) create mode 100644 modules/image/object_detection/ssd_mobilenet_v1_pascal/test.py diff --git a/modules/image/object_detection/ssd_mobilenet_v1_pascal/README.md b/modules/image/object_detection/ssd_mobilenet_v1_pascal/README.md index 4b3ac1822..ff5b0e231 100644 --- a/modules/image/object_detection/ssd_mobilenet_v1_pascal/README.md +++ b/modules/image/object_detection/ssd_mobilenet_v1_pascal/README.md @@ -102,19 +102,13 @@ - save\_path (str, optional): 识别结果的保存路径 (仅当visualization=True时存在) - ```python - def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True) + def save_inference_model(dirname) ``` - 将模型保存到指定路径。 - **参数** - - dirname: 存在模型的目录名称;
- - model\_filename: 模型文件名称,默认为\_\_model\_\_;
- - params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效);
- - combined: 是否将参数保存到统一的一个文件中。 + - dirname: 模型保存路径
## 四、服务部署 @@ -172,6 +166,10 @@ 移除 fluid api +* 1.2.0 + + 修复推理模型无法导出的问题 + - ```shell - $ hub install ssd_mobilenet_v1_pascal==1.1.3 + $ hub install ssd_mobilenet_v1_pascal==1.2.0 ``` diff --git a/modules/image/object_detection/ssd_mobilenet_v1_pascal/README_en.md b/modules/image/object_detection/ssd_mobilenet_v1_pascal/README_en.md index 4bad42420..9876bcc03 100644 --- a/modules/image/object_detection/ssd_mobilenet_v1_pascal/README_en.md +++ b/modules/image/object_detection/ssd_mobilenet_v1_pascal/README_en.md @@ -101,19 +101,13 @@ - save\_path (str, optional): output path for saving results - ```python - def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True) + def save_inference_model(dirname) ``` - Save model to specific path - **Parameters** - - dirname: output dir for saving model - - model\_filename: filename for saving model - - params\_filename: filename for saving parameters - - combined: whether save parameters into one file + - dirname: model save path ## IV.Server Deployment @@ -171,6 +165,10 @@ Remove fluid api +* 1.2.0 + + Fix bug of save_inference_model + - ```shell - $ hub install ssd_mobilenet_v1_pascal==1.1.3 + $ hub install ssd_mobilenet_v1_pascal==1.2.0 ``` diff --git a/modules/image/object_detection/ssd_mobilenet_v1_pascal/data_feed.py b/modules/image/object_detection/ssd_mobilenet_v1_pascal/data_feed.py index 42677536f..6768b03e0 100644 --- a/modules/image/object_detection/ssd_mobilenet_v1_pascal/data_feed.py +++ b/modules/image/object_detection/ssd_mobilenet_v1_pascal/data_feed.py @@ -5,12 +5,10 @@ import os import random -from collections import OrderedDict import cv2 import numpy as np from PIL import Image -from paddle import fluid __all__ = ['reader'] diff --git a/modules/image/object_detection/ssd_mobilenet_v1_pascal/module.py b/modules/image/object_detection/ssd_mobilenet_v1_pascal/module.py index 7460115c3..a926a4402 100644 --- a/modules/image/object_detection/ssd_mobilenet_v1_pascal/module.py +++ b/modules/image/object_detection/ssd_mobilenet_v1_pascal/module.py @@ -8,38 +8,39 @@ import numpy as np import paddle +import paddle.jit +import paddle.static import yaml from paddle.inference import Config from paddle.inference import create_predictor -from ssd_mobilenet_v1_pascal.data_feed import reader -from ssd_mobilenet_v1_pascal.processor import base64_to_cv2 -from ssd_mobilenet_v1_pascal.processor import load_label_info -from ssd_mobilenet_v1_pascal.processor import postprocess +from .data_feed import reader +from .processor import base64_to_cv2 +from .processor import load_label_info +from .processor import postprocess -import paddlehub as hub -from paddlehub.common.paddle_helper import add_vars_prefix from paddlehub.module.module import moduleinfo from paddlehub.module.module import runnable from paddlehub.module.module import serving @moduleinfo(name="ssd_mobilenet_v1_pascal", - version="1.1.3", + version="1.2.0", type="cv/object_detection", summary="SSD with backbone MobileNet_V1, trained with dataset Pasecal VOC.", author="paddlepaddle", author_email="paddle-dev@baidu.com") -class SSDMobileNetv1(hub.Module): - - def _initialize(self): - self.default_pretrained_model_path = os.path.join(self.directory, "ssd_mobilenet_v1_model") +class SSDMobileNetv1: + def __init__(self): + self.default_pretrained_model_path = os.path.join(self.directory, "ssd_mobilenet_v1_model", "model") self.label_names = load_label_info(os.path.join(self.directory, "label_file.txt")) self.model_config = None self._set_config() def _set_config(self): # predictor config setting. - cpu_config = Config(self.default_pretrained_model_path) + model = self.default_pretrained_model_path+'.pdmodel' + params = self.default_pretrained_model_path+'.pdiparams' + cpu_config = Config(model, params) cpu_config.disable_glog_info() cpu_config.disable_gpu() cpu_config.switch_ir_optim(False) @@ -52,7 +53,7 @@ def _set_config(self): except: use_gpu = False if use_gpu: - gpu_config = Config(self.default_pretrained_model_path) + gpu_config = Config(model, params) gpu_config.disable_glog_info() gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) self.gpu_predictor = create_predictor(gpu_config) @@ -136,24 +137,6 @@ def object_detection(self, res.extend(output) return res - def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = paddle.CPUPlace() - exe = paddle.Executor(place) - - program, feeded_var_names, target_vars = paddle.static.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - paddle.static.save_inference_model(dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - @serving def serving_method(self, images, **kwargs): """ diff --git a/modules/image/object_detection/ssd_mobilenet_v1_pascal/test.py b/modules/image/object_detection/ssd_mobilenet_v1_pascal/test.py new file mode 100644 index 000000000..c27307b8a --- /dev/null +++ b/modules/image/object_detection/ssd_mobilenet_v1_pascal/test.py @@ -0,0 +1,108 @@ +import os +import shutil +import unittest + +import cv2 +import requests +import paddlehub as hub + + +class TestHubModule(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + img_url = 'https://ai-studio-static-online.cdn.bcebos.com/68313e182f5e4ad9907e69dac9ece8fc50840d7ffbd24fa88396f009958f969a' + if not os.path.exists('tests'): + os.makedirs('tests') + response = requests.get(img_url) + assert response.status_code == 200, 'Network Error.' + with open('tests/test.jpg', 'wb') as f: + f.write(response.content) + cls.module = hub.Module(name="ssd_mobilenet_v1_pascal") + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree('tests') + shutil.rmtree('inference') + shutil.rmtree('detection_result') + + def test_object_detection1(self): + results = self.module.object_detection( + paths=['tests/test.jpg'] + ) + bbox = results[0]['data'][0] + label = bbox['label'] + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertEqual(label, 'cat') + self.assertTrue(confidence > 0.5) + self.assertTrue(200 < left < 800) + self.assertTrue(2500 < right < 3500) + self.assertTrue(500 < top < 1500) + self.assertTrue(3500 < bottom < 4500) + + def test_object_detection2(self): + results = self.module.object_detection( + images=[cv2.imread('tests/test.jpg')] + ) + bbox = results[0]['data'][0] + label = bbox['label'] + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertEqual(label, 'cat') + self.assertTrue(confidence > 0.5) + self.assertTrue(200 < left < 800) + self.assertTrue(2500 < right < 3500) + self.assertTrue(500 < top < 1500) + self.assertTrue(3500 < bottom < 4500) + + def test_object_detection3(self): + results = self.module.object_detection( + images=[cv2.imread('tests/test.jpg')], + visualization=False + ) + bbox = results[0]['data'][0] + label = bbox['label'] + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertEqual(label, 'cat') + self.assertTrue(confidence > 0.5) + self.assertTrue(200 < left < 800) + self.assertTrue(2500 < right < 3500) + self.assertTrue(500 < top < 1500) + self.assertTrue(3500 < bottom < 4500) + + def test_object_detection4(self): + self.assertRaises( + AssertionError, + self.module.object_detection, + paths=['no.jpg'] + ) + + def test_object_detection5(self): + self.assertRaises( + cv2.error, + self.module.object_detection, + images=['test.jpg'] + ) + + def test_save_inference_model(self): + self.module.save_inference_model('./inference/model') + + self.assertTrue(os.path.exists('./inference/model.pdmodel')) + self.assertTrue(os.path.exists('./inference/model.pdiparams')) + + +if __name__ == "__main__": + unittest.main() From f3d7b12ca2ef769e703b39cdfd631b03107cf453 Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Fri, 16 Sep 2022 15:13:26 +0800 Subject: [PATCH 064/117] update yolov3_darknet53_pedestrian (#1956) * update yolov3_darknet53_pedestrian * update gpu config * update * add clean func * update save inference model Co-authored-by: chenjian --- .../yolov3_darknet53_pedestrian/README.md | 16 ++- .../yolov3_darknet53_pedestrian/README_en.md | 16 ++- .../yolov3_darknet53_pedestrian/module.py | 45 +++----- .../yolov3_darknet53_pedestrian/processor.py | 2 +- .../yolov3_darknet53_pedestrian/test.py | 108 ++++++++++++++++++ 5 files changed, 137 insertions(+), 50 deletions(-) create mode 100644 modules/image/object_detection/yolov3_darknet53_pedestrian/test.py diff --git a/modules/image/object_detection/yolov3_darknet53_pedestrian/README.md b/modules/image/object_detection/yolov3_darknet53_pedestrian/README.md index 1cdc13767..c1ba42e79 100644 --- a/modules/image/object_detection/yolov3_darknet53_pedestrian/README.md +++ b/modules/image/object_detection/yolov3_darknet53_pedestrian/README.md @@ -101,19 +101,13 @@ - save\_path (str, optional): 识别结果的保存路径 (仅当visualization=True时存在) - ```python - def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True) + def save_inference_model(dirname) ``` - 将模型保存到指定路径。 - **参数** - - dirname: 存在模型的目录名称;
- - model\_filename: 模型文件名称,默认为\_\_model\_\_;
- - params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效);
- - combined: 是否将参数保存到统一的一个文件中。 + - dirname: 模型保存路径
## 四、服务部署 @@ -171,6 +165,10 @@ 移除 fluid api +* 1.1.0 + + 修复推理模型无法导出的问题 + - ```shell - $ hub install yolov3_darknet53_pedestrian==1.0.3 + $ hub install yolov3_darknet53_pedestrian==1.1.0 ``` diff --git a/modules/image/object_detection/yolov3_darknet53_pedestrian/README_en.md b/modules/image/object_detection/yolov3_darknet53_pedestrian/README_en.md index 09d82d391..faaf48e3c 100644 --- a/modules/image/object_detection/yolov3_darknet53_pedestrian/README_en.md +++ b/modules/image/object_detection/yolov3_darknet53_pedestrian/README_en.md @@ -100,19 +100,13 @@ - save\_path (str, optional): output path for saving results - ```python - def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True) + def save_inference_model(dirname) ``` - Save model to specific path - **Parameters** - - dirname: output dir for saving model - - model\_filename: filename for saving model - - params\_filename: filename for saving parameters - - combined: whether save parameters into one file + - dirname: model save path ## IV.Server Deployment @@ -170,6 +164,10 @@ Remove fluid api +* 1.1.0 + + Fix bug of save_inference_model + - ```shell - $ hub install yolov3_darknet53_pedestrian==1.0.3 + $ hub install yolov3_darknet53_pedestrian==1.1.0 ``` diff --git a/modules/image/object_detection/yolov3_darknet53_pedestrian/module.py b/modules/image/object_detection/yolov3_darknet53_pedestrian/module.py index 5b8a4c842..7d52f1fef 100644 --- a/modules/image/object_detection/yolov3_darknet53_pedestrian/module.py +++ b/modules/image/object_detection/yolov3_darknet53_pedestrian/module.py @@ -8,30 +8,29 @@ import numpy as np import paddle +import paddle.jit +import paddle.static from paddle.inference import Config from paddle.inference import create_predictor -from yolov3_darknet53_pedestrian.data_feed import reader -from yolov3_darknet53_pedestrian.processor import base64_to_cv2 -from yolov3_darknet53_pedestrian.processor import load_label_info -from yolov3_darknet53_pedestrian.processor import postprocess +from .data_feed import reader +from .processor import base64_to_cv2 +from .processor import load_label_info +from .processor import postprocess -import paddlehub as hub -from paddlehub.common.paddle_helper import add_vars_prefix from paddlehub.module.module import moduleinfo from paddlehub.module.module import runnable from paddlehub.module.module import serving @moduleinfo(name="yolov3_darknet53_pedestrian", - version="1.0.3", + version="1.1.0", type="CV/object_detection", summary="Baidu's YOLOv3 model for pedestrian detection, with backbone DarkNet53.", author="paddlepaddle", author_email="paddle-dev@baidu.com") -class YOLOv3DarkNet53Pedestrian(hub.Module): - - def _initialize(self): - self.default_pretrained_model_path = os.path.join(self.directory, "yolov3_darknet53_pedestrian_model") +class YOLOv3DarkNet53Pedestrian: + def __init__(self): + self.default_pretrained_model_path = os.path.join(self.directory, "yolov3_darknet53_pedestrian_model", "model") self.label_names = load_label_info(os.path.join(self.directory, "label_file.txt")) self._set_config() @@ -39,7 +38,9 @@ def _set_config(self): """ predictor config setting. """ - cpu_config = Config(self.default_pretrained_model_path) + model = self.default_pretrained_model_path+'.pdmodel' + params = self.default_pretrained_model_path+'.pdiparams' + cpu_config = Config(model, params) cpu_config.disable_glog_info() cpu_config.disable_gpu() cpu_config.switch_ir_optim(False) @@ -52,7 +53,7 @@ def _set_config(self): except: use_gpu = False if use_gpu: - gpu_config = Config(self.default_pretrained_model_path) + gpu_config = Config(model, params) gpu_config.disable_glog_info() gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) self.gpu_predictor = create_predictor(gpu_config) @@ -125,24 +126,6 @@ def object_detection(self, res.extend(output) return res - def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = paddle.CPUPlace() - exe = paddle.Executor(place) - - program, feeded_var_names, target_vars = paddle.static.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - paddle.static.save_inference_model(dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - @serving def serving_method(self, images, **kwargs): """ diff --git a/modules/image/object_detection/yolov3_darknet53_pedestrian/processor.py b/modules/image/object_detection/yolov3_darknet53_pedestrian/processor.py index 356ce0342..25390dcf8 100644 --- a/modules/image/object_detection/yolov3_darknet53_pedestrian/processor.py +++ b/modules/image/object_detection/yolov3_darknet53_pedestrian/processor.py @@ -89,7 +89,7 @@ def load_label_info(file_path): def postprocess(paths, images, data_out, score_thresh, label_names, output_dir, handle_id, visualization=True): """ - postprocess the lod_tensor produced by fluid.Executor.run + postprocess the lod_tensor produced by Executor.run Args: paths (list[str]): The paths of images. diff --git a/modules/image/object_detection/yolov3_darknet53_pedestrian/test.py b/modules/image/object_detection/yolov3_darknet53_pedestrian/test.py new file mode 100644 index 000000000..72a015d8c --- /dev/null +++ b/modules/image/object_detection/yolov3_darknet53_pedestrian/test.py @@ -0,0 +1,108 @@ +import os +import shutil +import unittest + +import cv2 +import requests +import paddlehub as hub + + +class TestHubModule(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + img_url = 'https://ai-studio-static-online.cdn.bcebos.com/15310014bf794c87a1e3b289d904ecae122aafe8c8fe47fd98634e79a8e4012f' + if not os.path.exists('tests'): + os.makedirs('tests') + response = requests.get(img_url) + assert response.status_code == 200, 'Network Error.' + with open('tests/test.jpg', 'wb') as f: + f.write(response.content) + cls.module = hub.Module(name="yolov3_darknet53_pedestrian") + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree('tests') + shutil.rmtree('inference') + shutil.rmtree('yolov3_pedestrian_detect_output') + + def test_object_detection1(self): + results = self.module.object_detection( + paths=['tests/test.jpg'] + ) + bbox = results[0]['data'][0] + label = bbox['label'] + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertEqual(label, 'pedestrian') + self.assertTrue(confidence > 0.5) + self.assertTrue(0 < left < 1000) + self.assertTrue(1000 < right < 3500) + self.assertTrue(500 < top < 1500) + self.assertTrue(1000 < bottom < 4500) + + def test_object_detection2(self): + results = self.module.object_detection( + images=[cv2.imread('tests/test.jpg')] + ) + bbox = results[0]['data'][0] + label = bbox['label'] + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertEqual(label, 'pedestrian') + self.assertTrue(confidence > 0.5) + self.assertTrue(0 < left < 1000) + self.assertTrue(1000 < right < 3500) + self.assertTrue(500 < top < 1500) + self.assertTrue(1000 < bottom < 4500) + + def test_object_detection3(self): + results = self.module.object_detection( + images=[cv2.imread('tests/test.jpg')], + visualization=False + ) + bbox = results[0]['data'][0] + label = bbox['label'] + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertEqual(label, 'pedestrian') + self.assertTrue(confidence > 0.5) + self.assertTrue(0 < left < 1000) + self.assertTrue(1000 < right < 3500) + self.assertTrue(500 < top < 1500) + self.assertTrue(1000 < bottom < 4500) + + def test_object_detection4(self): + self.assertRaises( + AssertionError, + self.module.object_detection, + paths=['no.jpg'] + ) + + def test_object_detection5(self): + self.assertRaises( + AttributeError, + self.module.object_detection, + images=['test.jpg'] + ) + + def test_save_inference_model(self): + self.module.save_inference_model('./inference/model') + + self.assertTrue(os.path.exists('./inference/model.pdmodel')) + self.assertTrue(os.path.exists('./inference/model.pdiparams')) + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file From 7a847a39b1da6e6867031f52f713d92391b9729d Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Fri, 16 Sep 2022 15:13:47 +0800 Subject: [PATCH 065/117] update yolov3_darknet53_vehicles (#1957) * update yolov3_darknet53_vehicles * update gpu config * update * add clean func * update save inference model --- .../yolov3_darknet53_vehicles/README.md | 16 ++- .../yolov3_darknet53_vehicles/README_en.md | 16 ++- .../yolov3_darknet53_vehicles/module.py | 49 +++----- .../yolov3_darknet53_vehicles/processor.py | 2 +- .../yolov3_darknet53_vehicles/test.py | 108 ++++++++++++++++++ 5 files changed, 139 insertions(+), 52 deletions(-) create mode 100644 modules/image/object_detection/yolov3_darknet53_vehicles/test.py diff --git a/modules/image/object_detection/yolov3_darknet53_vehicles/README.md b/modules/image/object_detection/yolov3_darknet53_vehicles/README.md index fdf4569de..9c42eef96 100644 --- a/modules/image/object_detection/yolov3_darknet53_vehicles/README.md +++ b/modules/image/object_detection/yolov3_darknet53_vehicles/README.md @@ -100,19 +100,13 @@ - save\_path (str, optional): 识别结果的保存路径 (仅当visualization=True时存在) - ```python - def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True) + def save_inference_model(dirname) ``` - 将模型保存到指定路径。 - **参数** - - dirname: 存在模型的目录名称;
- - model\_filename: 模型文件名称,默认为\_\_model\_\_;
- - params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效);
- - combined: 是否将参数保存到统一的一个文件中。 + - dirname: 模型保存路径
## 四、服务部署 @@ -170,6 +164,10 @@ 移除 fluid api +* 1.1.0 + + 修复推理模型无法导出的问题 + - ```shell - $ hub install yolov3_darknet53_vehicles==1.0.3 + $ hub install yolov3_darknet53_vehicles==1.1.0 ``` diff --git a/modules/image/object_detection/yolov3_darknet53_vehicles/README_en.md b/modules/image/object_detection/yolov3_darknet53_vehicles/README_en.md index 59cb62134..d014fc3ad 100644 --- a/modules/image/object_detection/yolov3_darknet53_vehicles/README_en.md +++ b/modules/image/object_detection/yolov3_darknet53_vehicles/README_en.md @@ -100,19 +100,13 @@ - save\_path (str, optional): output path for saving results - ```python - def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True) + def save_inference_model(dirname) ``` - Save model to specific path - **Parameters** - - dirname: output dir for saving model - - model\_filename: filename for saving model - - params\_filename: filename for saving parameters - - combined: whether save parameters into one file + - dirname: model save path ## IV.Server Deployment @@ -170,6 +164,10 @@ Remove fluid api +* 1.1.0 + + Fix bug of save_inference_model + - ```shell - $ hub install yolov3_darknet53_vehicles==1.0.3 + $ hub install yolov3_darknet53_vehicles==1.1.0 ``` diff --git a/modules/image/object_detection/yolov3_darknet53_vehicles/module.py b/modules/image/object_detection/yolov3_darknet53_vehicles/module.py index b4586de5a..05ae70855 100644 --- a/modules/image/object_detection/yolov3_darknet53_vehicles/module.py +++ b/modules/image/object_detection/yolov3_darknet53_vehicles/module.py @@ -8,30 +8,29 @@ import numpy as np import paddle +import paddle.jit +import paddle.static from paddle.inference import Config from paddle.inference import create_predictor -from yolov3_darknet53_vehicles.data_feed import reader -from yolov3_darknet53_vehicles.processor import base64_to_cv2 -from yolov3_darknet53_vehicles.processor import load_label_info -from yolov3_darknet53_vehicles.processor import postprocess +from .data_feed import reader +from .processor import base64_to_cv2 +from .processor import load_label_info +from .processor import postprocess -import paddlehub as hub -from paddlehub.common.paddle_helper import add_vars_prefix from paddlehub.module.module import moduleinfo from paddlehub.module.module import runnable from paddlehub.module.module import serving @moduleinfo(name="yolov3_darknet53_vehicles", - version="1.0.3", + version="1.1.0", type="CV/object_detection", summary="Baidu's YOLOv3 model for vehicles detection, with backbone DarkNet53.", author="paddlepaddle", author_email="paddle-dev@baidu.com") -class YOLOv3DarkNet53Vehicles(hub.Module): - - def _initialize(self): - self.default_pretrained_model_path = os.path.join(self.directory, "yolov3_darknet53_vehicles_model") +class YOLOv3DarkNet53Vehicles: + def __init__(self): + self.default_pretrained_model_path = os.path.join(self.directory, "yolov3_darknet53_vehicles_model", "model") self.label_names = load_label_info(os.path.join(self.directory, "label_file.txt")) self._set_config() @@ -49,7 +48,9 @@ def _set_config(self): """ # create default cpu predictor - cpu_config = Config(self.default_pretrained_model_path) + model = self.default_pretrained_model_path+'.pdmodel' + params = self.default_pretrained_model_path+'.pdiparams' + cpu_config = Config(model, params) cpu_config.disable_glog_info() cpu_config.disable_gpu() self.cpu_predictor = create_predictor(cpu_config) @@ -60,7 +61,7 @@ def _set_config(self): npu_id = self._get_device_id("FLAGS_selected_npus") if npu_id != -1: # use npu - npu_config = Config(self.default_pretrained_model_path) + npu_config = Config(model, params) npu_config.disable_glog_info() npu_config.enable_npu(device_id=npu_id) self.npu_predictor = create_predictor(npu_config) @@ -69,7 +70,7 @@ def _set_config(self): gpu_id = self._get_device_id("CUDA_VISIBLE_DEVICES") if gpu_id != -1: # use gpu - gpu_config = Config(self.default_pretrained_model_path) + gpu_config = Config(model, params) gpu_config.disable_glog_info() gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=gpu_id) self.gpu_predictor = create_predictor(gpu_config) @@ -78,7 +79,7 @@ def _set_config(self): xpu_id = self._get_device_id("XPU_VISIBLE_DEVICES") if xpu_id != -1: # use xpu - xpu_config = Config(self.default_pretrained_model_path) + xpu_config = Config(model, params) xpu_config.disable_glog_info() xpu_config.enable_xpu(100) self.xpu_predictor = create_predictor(xpu_config) @@ -169,24 +170,6 @@ def object_detection(self, res.extend(output) return res - def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = paddle.CPUPlace() - exe = paddle.Executor(place) - - program, feeded_var_names, target_vars = paddle.static.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - paddle.static.save_inference_model(dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - @serving def serving_method(self, images, **kwargs): """ diff --git a/modules/image/object_detection/yolov3_darknet53_vehicles/processor.py b/modules/image/object_detection/yolov3_darknet53_vehicles/processor.py index 5aa464e6b..95e12def7 100644 --- a/modules/image/object_detection/yolov3_darknet53_vehicles/processor.py +++ b/modules/image/object_detection/yolov3_darknet53_vehicles/processor.py @@ -88,7 +88,7 @@ def load_label_info(file_path): def postprocess(paths, images, data_out, score_thresh, label_names, output_dir, handle_id, visualization=True): """ - postprocess the lod_tensor produced by fluid.Executor.run + postprocess the lod_tensor produced by Executor.run Args: paths (list[str]): The paths of images. diff --git a/modules/image/object_detection/yolov3_darknet53_vehicles/test.py b/modules/image/object_detection/yolov3_darknet53_vehicles/test.py new file mode 100644 index 000000000..6ab7c6e42 --- /dev/null +++ b/modules/image/object_detection/yolov3_darknet53_vehicles/test.py @@ -0,0 +1,108 @@ +import os +import shutil +import unittest + +import cv2 +import requests +import paddlehub as hub + + +class TestHubModule(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + img_url = 'https://ai-studio-static-online.cdn.bcebos.com/036990d3d8654d789c2138492155d9dd95dba2a2fc8e410ab059eea42b330f59' + if not os.path.exists('tests'): + os.makedirs('tests') + response = requests.get(img_url) + assert response.status_code == 200, 'Network Error.' + with open('tests/test.jpg', 'wb') as f: + f.write(response.content) + cls.module = hub.Module(name="yolov3_darknet53_vehicles") + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree('tests') + shutil.rmtree('inference') + shutil.rmtree('yolov3_vehicles_detect_output') + + def test_object_detection1(self): + results = self.module.object_detection( + paths=['tests/test.jpg'] + ) + bbox = results[0]['data'][0] + label = bbox['label'] + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertEqual(label, 'car') + self.assertTrue(confidence > 0.5) + self.assertTrue(2000 < left < 4000) + self.assertTrue(4000 < right < 6000) + self.assertTrue(1000 < top < 3000) + self.assertTrue(2000 < bottom < 5000) + + def test_object_detection2(self): + results = self.module.object_detection( + images=[cv2.imread('tests/test.jpg')] + ) + bbox = results[0]['data'][0] + label = bbox['label'] + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertEqual(label, 'car') + self.assertTrue(confidence > 0.5) + self.assertTrue(2000 < left < 4000) + self.assertTrue(4000 < right < 6000) + self.assertTrue(1000 < top < 3000) + self.assertTrue(2000 < bottom < 5000) + + def test_object_detection3(self): + results = self.module.object_detection( + images=[cv2.imread('tests/test.jpg')], + visualization=False + ) + bbox = results[0]['data'][0] + label = bbox['label'] + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertEqual(label, 'car') + self.assertTrue(confidence > 0.5) + self.assertTrue(2000 < left < 4000) + self.assertTrue(4000 < right < 6000) + self.assertTrue(1000 < top < 3000) + self.assertTrue(2000 < bottom < 5000) + + def test_object_detection4(self): + self.assertRaises( + AssertionError, + self.module.object_detection, + paths=['no.jpg'] + ) + + def test_object_detection5(self): + self.assertRaises( + AttributeError, + self.module.object_detection, + images=['test.jpg'] + ) + + def test_save_inference_model(self): + self.module.save_inference_model('./inference/model') + + self.assertTrue(os.path.exists('./inference/model.pdmodel')) + self.assertTrue(os.path.exists('./inference/model.pdiparams')) + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file From ce4efe174556048176adf55c5006883a0f25b7e8 Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Fri, 16 Sep 2022 15:14:10 +0800 Subject: [PATCH 066/117] update ultra_light_fast_generic_face_detector_1mb_640 (#1964) * update ultra_light_fast_generic_face_detector_1mb * add clean func * update save inference model --- .../README.md | 17 ++- .../README_en.md | 16 +-- .../module.py | 41 ++---- .../test.py | 133 ++++++++++++++++++ 4 files changed, 160 insertions(+), 47 deletions(-) create mode 100644 modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/test.py diff --git a/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/README.md b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/README.md index b7b36aa78..b015f9e77 100644 --- a/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/README.md +++ b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/README.md @@ -102,19 +102,13 @@ - ```python - def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True) + def save_inference_model(dirname) ``` - 将模型保存到指定路径。 - **参数** - - dirname: 存在模型的目录名称;
- - model\_filename: 模型文件名称,默认为\_\_model\_\_;
- - params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效);
- - combined: 是否将参数保存到统一的一个文件中。 + - dirname: 模型保存路径
## 四、服务部署 @@ -168,6 +162,11 @@ 移除 fluid api +* 1.2.0 + + 修复无法导出推理模型的问题 + - ```shell - $ hub install ultra_light_fast_generic_face_detector_1mb_640==1.1.3 + $ hub install ultra_light_fast_generic_face_detector_1mb_640==1.2.0 ``` + diff --git a/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/README_en.md b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/README_en.md index e200bb397..473715081 100644 --- a/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/README_en.md +++ b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/README_en.md @@ -101,19 +101,13 @@ - ```python - def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True) + def save_inference_model(dirname) ``` - Save model to specific path - **Parameters** - - dirname: output dir for saving model - - model\_filename: filename for saving model - - params\_filename: filename for saving parameters - - combined: whether save parameters into one file + - dirname: model save path ## IV.Server Deployment @@ -167,6 +161,10 @@ Remove fluid api +* 1.2.0 + + Fix a bug of save_inference_model + - ```shell - $ hub install ultra_light_fast_generic_face_detector_1mb_640==1.1.3 + $ hub install ultra_light_fast_generic_face_detector_1mb_640==1.2.0 ``` diff --git a/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/module.py b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/module.py index 6caa32ace..ceebbfb30 100644 --- a/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/module.py +++ b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/module.py @@ -8,13 +8,13 @@ import numpy as np import paddle +import paddle.static from paddle.inference import Config from paddle.inference import create_predictor -from ultra_light_fast_generic_face_detector_1mb_640.data_feed import reader -from ultra_light_fast_generic_face_detector_1mb_640.processor import base64_to_cv2 -from ultra_light_fast_generic_face_detector_1mb_640.processor import postprocess +from .data_feed import reader +from .processor import base64_to_cv2 +from .processor import postprocess -import paddlehub as hub from paddlehub.module.module import moduleinfo from paddlehub.module.module import runnable from paddlehub.module.module import serving @@ -27,19 +27,20 @@ author_email="paddle-dev@baidu.com", summary= "Ultra-Light-Fast-Generic-Face-Detector-1MB is a high-performance object detection model release on https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB.", - version="1.1.3") -class FaceDetector640(hub.Module): - - def _initialize(self): + version="1.2.0") +class FaceDetector640: + def __init__(self): self.default_pretrained_model_path = os.path.join(self.directory, - "ultra_light_fast_generic_face_detector_1mb_640") + "ultra_light_fast_generic_face_detector_1mb_640", "model") self._set_config() def _set_config(self): """ predictor config setting """ - cpu_config = Config(self.default_pretrained_model_path) + model = self.default_pretrained_model_path+'.pdmodel' + params = self.default_pretrained_model_path+'.pdiparams' + cpu_config = Config(model, params) cpu_config.disable_glog_info() cpu_config.disable_gpu() self.cpu_predictor = create_predictor(cpu_config) @@ -51,29 +52,11 @@ def _set_config(self): except: use_gpu = False if use_gpu: - gpu_config = Config(self.default_pretrained_model_path) + gpu_config = Config(model, params) gpu_config.disable_glog_info() gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) self.gpu_predictor = create_predictor(gpu_config) - def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = paddle.CPUPlace() - exe = paddle.Executor(place) - - program, feeded_var_names, target_vars = paddle.static.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - paddle.static.save_inference_model(dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - def face_detection(self, images=None, paths=None, diff --git a/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/test.py b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/test.py new file mode 100644 index 000000000..ecf7365fa --- /dev/null +++ b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_640/test.py @@ -0,0 +1,133 @@ +import os +import shutil +import unittest + +import cv2 +import requests +import paddlehub as hub + + +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + + +class TestHubModule(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + img_url = 'https://ai-studio-static-online.cdn.bcebos.com/7799a8ccc5f6471b9d56fb6eff94f82a08b70ca2c7594d3f99877e366c0a2619' + if not os.path.exists('tests'): + os.makedirs('tests') + response = requests.get(img_url) + assert response.status_code == 200, 'Network Error.' + with open('tests/test.jpg', 'wb') as f: + f.write(response.content) + cls.module = hub.Module(name="ultra_light_fast_generic_face_detector_1mb_640") + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree('tests') + shutil.rmtree('inference') + shutil.rmtree('face_detector_640_predict_output') + + def test_face_detection1(self): + results = self.module.face_detection( + paths=['tests/test.jpg'], + use_gpu=False, + visualization=False + ) + bbox = results[0]['data'][0] + + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertTrue(confidence > 0.5) + self.assertTrue(1000 < left < 4000) + self.assertTrue(1000 < right < 4000) + self.assertTrue(0 < top < 2000) + self.assertTrue(0 < bottom < 2000) + + def test_face_detection2(self): + results = self.module.face_detection( + images=[cv2.imread('tests/test.jpg')], + use_gpu=False, + visualization=False + ) + bbox = results[0]['data'][0] + + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertTrue(confidence > 0.5) + self.assertTrue(1000 < left < 4000) + self.assertTrue(1000 < right < 4000) + self.assertTrue(0 < top < 2000) + self.assertTrue(0 < bottom < 2000) + + def test_face_detection3(self): + results = self.module.face_detection( + images=[cv2.imread('tests/test.jpg')], + use_gpu=False, + visualization=True + ) + bbox = results[0]['data'][0] + + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertTrue(confidence > 0.5) + self.assertTrue(1000 < left < 4000) + self.assertTrue(1000 < right < 4000) + self.assertTrue(0 < top < 2000) + self.assertTrue(0 < bottom < 2000) + + def test_face_detection4(self): + results = self.module.face_detection( + images=[cv2.imread('tests/test.jpg')], + use_gpu=True, + visualization=False + ) + bbox = results[0]['data'][0] + + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertTrue(confidence > 0.5) + self.assertTrue(1000 < left < 4000) + self.assertTrue(1000 < right < 4000) + self.assertTrue(0 < top < 2000) + self.assertTrue(0 < bottom < 2000) + + def test_face_detection5(self): + self.assertRaises( + AssertionError, + self.module.face_detection, + paths=['no.jpg'] + ) + + def test_face_detection6(self): + self.assertRaises( + AttributeError, + self.module.face_detection, + images=['test.jpg'] + ) + + def test_save_inference_model(self): + self.module.save_inference_model('./inference/model') + + self.assertTrue(os.path.exists('./inference/model.pdmodel')) + self.assertTrue(os.path.exists('./inference/model.pdiparams')) + + +if __name__ == "__main__": + unittest.main() From 02674b5db2e13aa1ba563d3ba9f660c27cb3c263 Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Fri, 16 Sep 2022 15:14:34 +0800 Subject: [PATCH 067/117] update ultra_light_fast_generic_face_detector_1mb_320 (#1965) * update ultra_light_fast_generic_face_detector_1mb * add clean func * update save inference model --- .../README.md | 16 +-- .../README_en.md | 16 +-- .../module.py | 40 ++---- .../test.py | 133 ++++++++++++++++++ 4 files changed, 158 insertions(+), 47 deletions(-) create mode 100644 modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/test.py diff --git a/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/README.md b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/README.md index bb44d9c6a..c2ff5a40a 100644 --- a/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/README.md +++ b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/README.md @@ -102,19 +102,13 @@ - ```python - def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True) + def save_inference_model(dirname) ``` - 将模型保存到指定路径。 - **参数** - - dirname: 存在模型的目录名称;
- - model\_filename: 模型文件名称,默认为\_\_model\_\_;
- - params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效);
- - combined: 是否将参数保存到统一的一个文件中。 + - dirname: 模型保存路径
## 四、服务部署 @@ -168,6 +162,10 @@ 移除 fluid api +* 1.2.0 + + 修复无法导出推理模型的问题 + - ```shell - $ hub install ultra_light_fast_generic_face_detector_1mb_320==1.1.3 + $ hub install ultra_light_fast_generic_face_detector_1mb_320==1.2.0 ``` diff --git a/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/README_en.md b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/README_en.md index 2bad14e23..cadcdc2bb 100644 --- a/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/README_en.md +++ b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/README_en.md @@ -101,19 +101,13 @@ - ```python - def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True) + def save_inference_model(dirname) ``` - Save model to specific path - **Parameters** - - dirname: output dir for saving model - - model\_filename: filename for saving model - - params\_filename: filename for saving parameters - - combined: whether save parameters into one file + - dirname: model save path ## IV.Server Deployment @@ -167,6 +161,10 @@ Remove fluid api +* 1.2.0 + + Fix a bug of save_inference_model + - ```shell - $ hub install ultra_light_fast_generic_face_detector_1mb_320==1.1.3 + $ hub install ultra_light_fast_generic_face_detector_1mb_320==1.2.0 ``` diff --git a/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/module.py b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/module.py index 6744e3571..8e5340077 100644 --- a/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/module.py +++ b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/module.py @@ -10,11 +10,10 @@ import paddle from paddle.inference import Config from paddle.inference import create_predictor -from ultra_light_fast_generic_face_detector_1mb_320.data_feed import reader -from ultra_light_fast_generic_face_detector_1mb_320.processor import base64_to_cv2 -from ultra_light_fast_generic_face_detector_1mb_320.processor import postprocess +from .data_feed import reader +from .processor import base64_to_cv2 +from .processor import postprocess -import paddlehub as hub from paddlehub.module.module import moduleinfo from paddlehub.module.module import runnable from paddlehub.module.module import serving @@ -27,19 +26,20 @@ author_email="paddle-dev@baidu.com", summary= "Ultra-Light-Fast-Generic-Face-Detector-1MB is a high-performance object detection model release on https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB.", - version="1.1.3") -class FaceDetector320(hub.Module): - - def _initialize(self): + version="1.2.0") +class FaceDetector320: + def __init__(self): self.default_pretrained_model_path = os.path.join(self.directory, - "ultra_light_fast_generic_face_detector_1mb_320") + "ultra_light_fast_generic_face_detector_1mb_320", "model") self._set_config() def _set_config(self): """ predictor config setting """ - cpu_config = Config(self.default_pretrained_model_path) + model = self.default_pretrained_model_path+'.pdmodel' + params = self.default_pretrained_model_path+'.pdiparams' + cpu_config = Config(model, params) cpu_config.disable_glog_info() cpu_config.disable_gpu() self.cpu_predictor = create_predictor(cpu_config) @@ -51,29 +51,11 @@ def _set_config(self): except: use_gpu = False if use_gpu: - gpu_config = Config(self.default_pretrained_model_path) + gpu_config = Config(model, params) gpu_config.disable_glog_info() gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) self.gpu_predictor = create_predictor(gpu_config) - def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = paddle.CPUPlace() - exe = paddle.Executor(place) - - program, feeded_var_names, target_vars = paddle.static.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - paddle.static.save_inference_model(dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - def face_detection(self, images=None, paths=None, diff --git a/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/test.py b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/test.py new file mode 100644 index 000000000..a180acfee --- /dev/null +++ b/modules/image/face_detection/ultra_light_fast_generic_face_detector_1mb_320/test.py @@ -0,0 +1,133 @@ +import os +import shutil +import unittest + +import cv2 +import requests +import paddlehub as hub + + +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + + +class TestHubModule(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + img_url = 'https://ai-studio-static-online.cdn.bcebos.com/7799a8ccc5f6471b9d56fb6eff94f82a08b70ca2c7594d3f99877e366c0a2619' + if not os.path.exists('tests'): + os.makedirs('tests') + response = requests.get(img_url) + assert response.status_code == 200, 'Network Error.' + with open('tests/test.jpg', 'wb') as f: + f.write(response.content) + cls.module = hub.Module(name="ultra_light_fast_generic_face_detector_1mb_320") + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree('tests') + shutil.rmtree('inference') + shutil.rmtree('face_detector_320_predict_output') + + def test_face_detection1(self): + results = self.module.face_detection( + paths=['tests/test.jpg'], + use_gpu=False, + visualization=False + ) + bbox = results[0]['data'][0] + + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertTrue(confidence > 0.5) + self.assertTrue(1000 < left < 4000) + self.assertTrue(1000 < right < 4000) + self.assertTrue(0 < top < 2000) + self.assertTrue(0 < bottom < 2000) + + def test_face_detection2(self): + results = self.module.face_detection( + images=[cv2.imread('tests/test.jpg')], + use_gpu=False, + visualization=False + ) + bbox = results[0]['data'][0] + + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertTrue(confidence > 0.5) + self.assertTrue(1000 < left < 4000) + self.assertTrue(1000 < right < 4000) + self.assertTrue(0 < top < 2000) + self.assertTrue(0 < bottom < 2000) + + def test_face_detection3(self): + results = self.module.face_detection( + images=[cv2.imread('tests/test.jpg')], + use_gpu=False, + visualization=True + ) + bbox = results[0]['data'][0] + + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertTrue(confidence > 0.5) + self.assertTrue(1000 < left < 4000) + self.assertTrue(1000 < right < 4000) + self.assertTrue(0 < top < 2000) + self.assertTrue(0 < bottom < 2000) + + def test_face_detection4(self): + results = self.module.face_detection( + images=[cv2.imread('tests/test.jpg')], + use_gpu=True, + visualization=False + ) + bbox = results[0]['data'][0] + + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertTrue(confidence > 0.5) + self.assertTrue(1000 < left < 4000) + self.assertTrue(1000 < right < 4000) + self.assertTrue(0 < top < 2000) + self.assertTrue(0 < bottom < 2000) + + def test_face_detection5(self): + self.assertRaises( + AssertionError, + self.module.face_detection, + paths=['no.jpg'] + ) + + def test_face_detection6(self): + self.assertRaises( + AttributeError, + self.module.face_detection, + images=['test.jpg'] + ) + + def test_save_inference_model(self): + self.module.save_inference_model('./inference/model') + + self.assertTrue(os.path.exists('./inference/model.pdmodel')) + self.assertTrue(os.path.exists('./inference/model.pdiparams')) + + +if __name__ == "__main__": + unittest.main() From 931f1e7c7e60b027e641bcf897132341a6608b90 Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Fri, 16 Sep 2022 15:15:00 +0800 Subject: [PATCH 068/117] update face_landmark_localization (#1966) * update face_landmark_localization * fix typo * update * add clean func * update save inference model * update save inference model Co-authored-by: chenjian --- .../face_landmark_localization/README.md | 19 ++-- .../face_landmark_localization/data_feed.py | 2 - .../face_landmark_localization/module.py | 50 +++------- .../face_landmark_localization/processor.py | 1 - .../face_landmark_localization/test.py | 91 +++++++++++++++++++ 5 files changed, 112 insertions(+), 51 deletions(-) create mode 100644 modules/image/keypoint_detection/face_landmark_localization/test.py diff --git a/modules/image/keypoint_detection/face_landmark_localization/README.md b/modules/image/keypoint_detection/face_landmark_localization/README.md index c24205000..6fbd197c2 100644 --- a/modules/image/keypoint_detection/face_landmark_localization/README.md +++ b/modules/image/keypoint_detection/face_landmark_localization/README.md @@ -120,18 +120,11 @@ - 当前模型使用的人脸检测模型。 - ```python - def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=False): + def save_inference_model(dirname): ``` - - - 将模型保存到指定路径,由于人脸关键点检测模型由人脸检测+关键点检测两个模型组成,因此保存后会存在两个子目录,其中`face_landmark`为人脸关键点模型,`detector`为人脸检测模型。 + - **参数** - - dirname: 存在模型的目录名称 - - model_filename: 模型文件名称,默认为\__model__ - - params_filename: 参数文件名称,默认为\__params__(仅当combined为True时生效) - - combined: 是否将参数保存到统一的一个文件中 + - dirname: 模型保存路径 ## 四、服务部署 @@ -187,6 +180,10 @@ 移除 fluid api +* 1.1.0 + + 修复无法导出推理模型的问题 + * ```shell - $ hub install face_landmark_localization==1.0.3 + $ hub install face_landmark_localization==1.1.0 ``` diff --git a/modules/image/keypoint_detection/face_landmark_localization/data_feed.py b/modules/image/keypoint_detection/face_landmark_localization/data_feed.py index c270a20b6..beae75644 100644 --- a/modules/image/keypoint_detection/face_landmark_localization/data_feed.py +++ b/modules/image/keypoint_detection/face_landmark_localization/data_feed.py @@ -1,11 +1,9 @@ # coding=utf-8 import os -import time from collections import OrderedDict import cv2 import numpy as np -from PIL import Image __all__ = ['reader'] diff --git a/modules/image/keypoint_detection/face_landmark_localization/module.py b/modules/image/keypoint_detection/face_landmark_localization/module.py index 31d635235..254899a5b 100644 --- a/modules/image/keypoint_detection/face_landmark_localization/module.py +++ b/modules/image/keypoint_detection/face_landmark_localization/module.py @@ -5,15 +5,14 @@ import argparse import ast import os -import time -from collections import OrderedDict -import cv2 import numpy as np import paddle -from face_landmark_localization.data_feed import reader -from face_landmark_localization.processor import base64_to_cv2 -from face_landmark_localization.processor import postprocess +import paddle.jit +import paddle.static +from .data_feed import reader +from .processor import base64_to_cv2 +from .processor import postprocess from paddle.inference import Config from paddle.inference import create_predictor @@ -30,15 +29,14 @@ author_email="paddle-dev@baidu.com", summary= "Face_Landmark_Localization can be used to locate face landmark. This Module is trained through the MPII Human Pose dataset.", - version="1.0.3") -class FaceLandmarkLocalization(hub.Module): - - def _initialize(self, face_detector_module=None): + version="1.1.0") +class FaceLandmarkLocalization: + def __init__(self, face_detector_module=None): """ Args: face_detector_module (class): module to detect face. """ - self.default_pretrained_model_path = os.path.join(self.directory, "face_landmark_localization") + self.default_pretrained_model_path = os.path.join(self.directory, "face_landmark_localization", "model") if face_detector_module is None: self.face_detector = hub.Module(name="ultra_light_fast_generic_face_detector_1mb_640") else: @@ -49,7 +47,9 @@ def _set_config(self): """ predictor config setting """ - cpu_config = Config(self.default_pretrained_model_path) + model = self.default_pretrained_model_path+'.pdmodel' + params = self.default_pretrained_model_path+'.pdiparams' + cpu_config = Config(model, params) cpu_config.disable_glog_info() cpu_config.disable_gpu() self.cpu_predictor = create_predictor(cpu_config) @@ -61,7 +61,7 @@ def _set_config(self): except: use_gpu = False if use_gpu: - gpu_config = Config(self.default_pretrained_model_path) + gpu_config = Config(model, params) gpu_config.disable_glog_info() gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) self.gpu_predictor = create_predictor(gpu_config) @@ -78,30 +78,6 @@ def set_face_detector_module(self, face_detector_module): def get_face_detector_module(self): return self.face_detector - def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = paddle.CPUPlace() - exe = paddle.Executor(place) - - program, feeded_var_names, target_vars = paddle.static.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - face_landmark_dir = os.path.join(dirname, "face_landmark") - detector_dir = os.path.join(dirname, "detector") - - paddle.static.save_inference_model(dirname=face_landmark_dir, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - self.face_detector.save_inference_model(dirname=detector_dir, - model_filename=model_filename, - params_filename=params_filename, - combined=combined) - def keypoint_detection(self, images=None, paths=None, diff --git a/modules/image/keypoint_detection/face_landmark_localization/processor.py b/modules/image/keypoint_detection/face_landmark_localization/processor.py index 2ac325479..23ea16b96 100644 --- a/modules/image/keypoint_detection/face_landmark_localization/processor.py +++ b/modules/image/keypoint_detection/face_landmark_localization/processor.py @@ -6,7 +6,6 @@ import base64 import os import time -from collections import OrderedDict import cv2 import numpy as np diff --git a/modules/image/keypoint_detection/face_landmark_localization/test.py b/modules/image/keypoint_detection/face_landmark_localization/test.py new file mode 100644 index 000000000..7afb41fd7 --- /dev/null +++ b/modules/image/keypoint_detection/face_landmark_localization/test.py @@ -0,0 +1,91 @@ +import os +import shutil +import unittest + +import cv2 +import requests +import paddlehub as hub + + +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + + +class TestHubModule(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + img_url = 'https://ai-studio-static-online.cdn.bcebos.com/7799a8ccc5f6471b9d56fb6eff94f82a08b70ca2c7594d3f99877e366c0a2619' + if not os.path.exists('tests'): + os.makedirs('tests') + response = requests.get(img_url) + assert response.status_code == 200, 'Network Error.' + with open('tests/test.jpg', 'wb') as f: + f.write(response.content) + cls.module = hub.Module(name="face_landmark_localization") + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree('tests') + shutil.rmtree('inference') + shutil.rmtree('face_landmark_output') + + def test_keypoint_detection1(self): + results = self.module.keypoint_detection( + paths=['tests/test.jpg'], + use_gpu=False, + visualization=False + ) + kps = results[0]['data'][0] + self.assertIsInstance(kps, list) + + def test_keypoint_detection2(self): + results = self.module.keypoint_detection( + images=[cv2.imread('tests/test.jpg')], + use_gpu=False, + visualization=False + ) + kps = results[0]['data'][0] + self.assertIsInstance(kps, list) + + def test_keypoint_detection3(self): + results = self.module.keypoint_detection( + images=[cv2.imread('tests/test.jpg')], + use_gpu=False, + visualization=True + ) + kps = results[0]['data'][0] + self.assertIsInstance(kps, list) + + def test_keypoint_detection4(self): + results = self.module.keypoint_detection( + images=[cv2.imread('tests/test.jpg')], + use_gpu=True, + visualization=False + ) + kps = results[0]['data'][0] + self.assertIsInstance(kps, list) + + def test_keypoint_detection5(self): + self.assertRaises( + AssertionError, + self.module.keypoint_detection, + paths=['no.jpg'] + ) + + def test_keypoint_detection6(self): + self.assertRaises( + AttributeError, + self.module.keypoint_detection, + images=['test.jpg'] + ) + + def test_save_inference_model(self): + self.module.save_inference_model('./inference/model') + + self.assertTrue(os.path.exists('./inference/model/model.pdmodel')) + self.assertTrue(os.path.exists('./inference/model/model.pdiparams')) + + self.assertTrue(os.path.exists('./inference/model/face_detector.pdmodel')) + self.assertTrue(os.path.exists('./inference/model/face_detector.pdiparams')) + +if __name__ == "__main__": + unittest.main() From fa3c3c93785867322ff21862107699c6378c1047 Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Fri, 16 Sep 2022 15:15:27 +0800 Subject: [PATCH 069/117] update openpose_body_estimation (#1969) * update openpose_body_estimation * add clean func --- .../openpose_body_estimation/module.py | 5 +- .../openpose_body_estimation/processor.py | 3 +- .../openpose_body_estimation/readme.md | 6 +- .../openpose_body_estimation/test.py | 77 +++++++++++++++++++ 4 files changed, 84 insertions(+), 7 deletions(-) create mode 100644 modules/image/keypoint_detection/openpose_body_estimation/test.py diff --git a/modules/image/keypoint_detection/openpose_body_estimation/module.py b/modules/image/keypoint_detection/openpose_body_estimation/module.py index bccd3c810..248999312 100644 --- a/modules/image/keypoint_detection/openpose_body_estimation/module.py +++ b/modules/image/keypoint_detection/openpose_body_estimation/module.py @@ -15,7 +15,6 @@ import os import time import copy -import base64 import argparse from typing import Union from collections import OrderedDict @@ -26,7 +25,7 @@ import numpy as np from paddlehub.module.module import moduleinfo, runnable, serving import paddlehub.vision.transforms as T -import openpose_body_estimation.processor as P +from . import processor as P @moduleinfo( @@ -36,7 +35,7 @@ author_email="", summary="Openpose_body_estimation is a body pose estimation model based on Realtime Multi-Person 2D Pose \ Estimation using Part Affinity Fields.", - version="1.0.0") + version="1.1.0") class BodyPoseModel(nn.Layer): """ BodyposeModel diff --git a/modules/image/keypoint_detection/openpose_body_estimation/processor.py b/modules/image/keypoint_detection/openpose_body_estimation/processor.py index aa17240e5..ba8ef998b 100644 --- a/modules/image/keypoint_detection/openpose_body_estimation/processor.py +++ b/modules/image/keypoint_detection/openpose_body_estimation/processor.py @@ -1,11 +1,10 @@ -import os import base64 import math from typing import Callable import cv2 import numpy as np -from scipy.ndimage.filters import gaussian_filter +from scipy.ndimage import gaussian_filter class PadDownRight: diff --git a/modules/image/keypoint_detection/openpose_body_estimation/readme.md b/modules/image/keypoint_detection/openpose_body_estimation/readme.md index 1ea0b295a..b64569310 100644 --- a/modules/image/keypoint_detection/openpose_body_estimation/readme.md +++ b/modules/image/keypoint_detection/openpose_body_estimation/readme.md @@ -153,8 +153,10 @@ * 1.0.0 初始发布 - + +* 1.1.0 + * ```shell - $ hub install openpose_body_estimation==1.0.0 + $ hub install openpose_body_estimation==1.1.0 ``` diff --git a/modules/image/keypoint_detection/openpose_body_estimation/test.py b/modules/image/keypoint_detection/openpose_body_estimation/test.py new file mode 100644 index 000000000..7b4c13f6e --- /dev/null +++ b/modules/image/keypoint_detection/openpose_body_estimation/test.py @@ -0,0 +1,77 @@ +import os +import shutil +import unittest + +import cv2 +import requests +import paddlehub as hub + + +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + + +class TestHubModule(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + img_url = 'https://ai-studio-static-online.cdn.bcebos.com/7799a8ccc5f6471b9d56fb6eff94f82a08b70ca2c7594d3f99877e366c0a2619' + if not os.path.exists('tests'): + os.makedirs('tests') + response = requests.get(img_url) + assert response.status_code == 200, 'Network Error.' + with open('tests/test.jpg', 'wb') as f: + f.write(response.content) + cls.module = hub.Module(name="openpose_body_estimation") + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree('tests') + shutil.rmtree('inference') + shutil.rmtree('openpose_body') + + def test_predict1(self): + results = self.module.predict( + img='tests/test.jpg', + visualization=False + ) + kps = results['candidate'].tolist() + self.assertIsInstance(kps, list) + + def test_predict2(self): + results = self.module.predict( + img=cv2.imread('tests/test.jpg'), + visualization=False + ) + kps = results['candidate'].tolist() + self.assertIsInstance(kps, list) + + def test_predict3(self): + results = self.module.predict( + img=cv2.imread('tests/test.jpg'), + visualization=True + ) + kps = results['candidate'].tolist() + self.assertIsInstance(kps, list) + + def test_predict4(self): + self.assertRaises( + AttributeError, + self.module.predict, + img='no.jpg' + ) + + def test_predict5(self): + self.assertRaises( + AttributeError, + self.module.predict, + img=['test.jpg'] + ) + + def test_save_inference_model(self): + self.module.save_inference_model('./inference/model') + + self.assertTrue(os.path.exists('./inference/model/openpose_body_estimation.pdmodel')) + self.assertTrue(os.path.exists('./inference/model/openpose_body_estimation.pdiparams')) + + +if __name__ == "__main__": + unittest.main() From a223025d35a19de9710651bf44ea6aa32d880b78 Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Fri, 16 Sep 2022 15:16:11 +0800 Subject: [PATCH 070/117] update yolov3_mobilenet_v1_coco2017 (#1952) * update yolov3_mobilenet_v1_coco2017 * update gpu config * update * add clean func * update save inference model Co-authored-by: chenjian --- .../yolov3_mobilenet_v1_coco2017/README.md | 16 +- .../yolov3_mobilenet_v1_coco2017/README_en.md | 16 +- .../mobilenet_v1.py | 194 ------------- .../yolov3_mobilenet_v1_coco2017/module.py | 189 +++--------- .../yolov3_mobilenet_v1_coco2017/processor.py | 7 +- .../yolov3_mobilenet_v1_coco2017/test.py | 108 +++++++ .../yolov3_mobilenet_v1_coco2017/yolo_head.py | 273 ------------------ 7 files changed, 161 insertions(+), 642 deletions(-) delete mode 100644 modules/image/object_detection/yolov3_mobilenet_v1_coco2017/mobilenet_v1.py create mode 100644 modules/image/object_detection/yolov3_mobilenet_v1_coco2017/test.py delete mode 100644 modules/image/object_detection/yolov3_mobilenet_v1_coco2017/yolo_head.py diff --git a/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/README.md b/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/README.md index 456de66ba..2e5032d0b 100644 --- a/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/README.md +++ b/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/README.md @@ -101,19 +101,13 @@ - save\_path (str, optional): 识别结果的保存路径 (仅当visualization=True时存在) - ```python - def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True) + def save_inference_model(dirname) ``` - 将模型保存到指定路径。 - **参数** - - dirname: 存在模型的目录名称;
- - model\_filename: 模型文件名称,默认为\_\_model\_\_;
- - params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效);
- - combined: 是否将参数保存到统一的一个文件中。 + - dirname: 模型保存路径
## 四、服务部署 @@ -167,6 +161,10 @@ 修复numpy数据读取问题 +* 1.1.0 + + 移除 fluid api + - ```shell - $ hub install yolov3_mobilenet_v1_coco2017==1.0.2 + $ hub install yolov3_mobilenet_v1_coco2017==1.1.0 ``` diff --git a/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/README_en.md b/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/README_en.md index f80472bfa..08ecd92a9 100644 --- a/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/README_en.md +++ b/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/README_en.md @@ -100,19 +100,13 @@ - save\_path (str, optional): output path for saving results - ```python - def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True) + def save_inference_model(dirname) ``` - Save model to specific path - **Parameters** - - dirname: output dir for saving model - - model\_filename: filename for saving model - - params\_filename: filename for saving parameters - - combined: whether save parameters into one file + - dirname: model save path ## IV.Server Deployment @@ -166,6 +160,10 @@ Fix the problem of reading numpy +* 1.1.0 + + Remove fluid api + - ```shell - $ hub install yolov3_mobilenet_v1_coco2017==1.0.2 + $ hub install yolov3_mobilenet_v1_coco2017==1.1.0 ``` diff --git a/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/mobilenet_v1.py b/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/mobilenet_v1.py deleted file mode 100644 index 05f64c938..000000000 --- a/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/mobilenet_v1.py +++ /dev/null @@ -1,194 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.regularizer import L2Decay - -__all__ = ['MobileNet'] - - -class MobileNet(object): - """ - MobileNet v1, see https://arxiv.org/abs/1704.04861 - - Args: - norm_type (str): normalization type, 'bn' and 'sync_bn' are supported - norm_decay (float): weight decay for normalization layer weights - conv_group_scale (int): scaling factor for convolution groups - with_extra_blocks (bool): if extra blocks should be added - extra_block_filters (list): number of filter for each extra block - """ - __shared__ = ['norm_type', 'weight_prefix_name'] - - def __init__(self, - norm_type='bn', - norm_decay=0., - conv_group_scale=1, - conv_learning_rate=1.0, - with_extra_blocks=False, - extra_block_filters=[[256, 512], [128, 256], [128, 256], - [64, 128]], - weight_prefix_name=''): - self.norm_type = norm_type - self.norm_decay = norm_decay - self.conv_group_scale = conv_group_scale - self.conv_learning_rate = conv_learning_rate - self.with_extra_blocks = with_extra_blocks - self.extra_block_filters = extra_block_filters - self.prefix_name = weight_prefix_name - - def _conv_norm(self, - input, - filter_size, - num_filters, - stride, - padding, - num_groups=1, - act='relu', - use_cudnn=True, - name=None): - parameter_attr = ParamAttr( - learning_rate=self.conv_learning_rate, - initializer=fluid.initializer.MSRA(), - name=name + "_weights") - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=padding, - groups=num_groups, - act=None, - use_cudnn=use_cudnn, - param_attr=parameter_attr, - bias_attr=False) - - bn_name = name + "_bn" - norm_decay = self.norm_decay - bn_param_attr = ParamAttr( - regularizer=L2Decay(norm_decay), name=bn_name + '_scale') - bn_bias_attr = ParamAttr( - regularizer=L2Decay(norm_decay), name=bn_name + '_offset') - return fluid.layers.batch_norm( - input=conv, - act=act, - param_attr=bn_param_attr, - bias_attr=bn_bias_attr, - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def depthwise_separable(self, - input, - num_filters1, - num_filters2, - num_groups, - stride, - scale, - name=None): - depthwise_conv = self._conv_norm( - input=input, - filter_size=3, - num_filters=int(num_filters1 * scale), - stride=stride, - padding=1, - num_groups=int(num_groups * scale), - use_cudnn=False, - name=name + "_dw") - - pointwise_conv = self._conv_norm( - input=depthwise_conv, - filter_size=1, - num_filters=int(num_filters2 * scale), - stride=1, - padding=0, - name=name + "_sep") - return pointwise_conv - - def _extra_block(self, - input, - num_filters1, - num_filters2, - num_groups, - stride, - name=None): - pointwise_conv = self._conv_norm( - input=input, - filter_size=1, - num_filters=int(num_filters1), - stride=1, - num_groups=int(num_groups), - padding=0, - name=name + "_extra1") - normal_conv = self._conv_norm( - input=pointwise_conv, - filter_size=3, - num_filters=int(num_filters2), - stride=2, - num_groups=int(num_groups), - padding=1, - name=name + "_extra2") - return normal_conv - - def __call__(self, input): - scale = self.conv_group_scale - - blocks = [] - # input 1/1 - out = self._conv_norm( - input, 3, int(32 * scale), 2, 1, name=self.prefix_name + "conv1") - # 1/2 - out = self.depthwise_separable( - out, 32, 64, 32, 1, scale, name=self.prefix_name + "conv2_1") - out = self.depthwise_separable( - out, 64, 128, 64, 2, scale, name=self.prefix_name + "conv2_2") - # 1/4 - out = self.depthwise_separable( - out, 128, 128, 128, 1, scale, name=self.prefix_name + "conv3_1") - out = self.depthwise_separable( - out, 128, 256, 128, 2, scale, name=self.prefix_name + "conv3_2") - # 1/8 - blocks.append(out) - out = self.depthwise_separable( - out, 256, 256, 256, 1, scale, name=self.prefix_name + "conv4_1") - out = self.depthwise_separable( - out, 256, 512, 256, 2, scale, name=self.prefix_name + "conv4_2") - # 1/16 - blocks.append(out) - for i in range(5): - out = self.depthwise_separable( - out, - 512, - 512, - 512, - 1, - scale, - name=self.prefix_name + "conv5_" + str(i + 1)) - module11 = out - - out = self.depthwise_separable( - out, 512, 1024, 512, 2, scale, name=self.prefix_name + "conv5_6") - # 1/32 - out = self.depthwise_separable( - out, 1024, 1024, 1024, 1, scale, name=self.prefix_name + "conv6") - module13 = out - blocks.append(out) - if not self.with_extra_blocks: - return blocks - - num_filters = self.extra_block_filters - module14 = self._extra_block(module13, num_filters[0][0], - num_filters[0][1], 1, 2, - self.prefix_name + "conv7_1") - module15 = self._extra_block(module14, num_filters[1][0], - num_filters[1][1], 1, 2, - self.prefix_name + "conv7_2") - module16 = self._extra_block(module15, num_filters[2][0], - num_filters[2][1], 1, 2, - self.prefix_name + "conv7_3") - module17 = self._extra_block(module16, num_filters[3][0], - num_filters[3][1], 1, 2, - self.prefix_name + "conv7_4") - return module11, module13, module14, module15, module16, module17 diff --git a/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/module.py b/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/module.py index 98e1110a0..0a642907e 100644 --- a/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/module.py +++ b/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/module.py @@ -6,31 +6,29 @@ import os from functools import partial +import paddle import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +import paddle.jit +import paddle.static +from paddle.inference import Config, create_predictor from paddlehub.module.module import moduleinfo, runnable, serving -from paddlehub.common.paddle_helper import add_vars_prefix -from yolov3_mobilenet_v1_coco2017.mobilenet_v1 import MobileNet -from yolov3_mobilenet_v1_coco2017.processor import load_label_info, postprocess, base64_to_cv2 -from yolov3_mobilenet_v1_coco2017.data_feed import reader -from yolov3_mobilenet_v1_coco2017.yolo_head import MultiClassNMS, YOLOv3Head +from .processor import load_label_info, postprocess, base64_to_cv2 +from .data_feed import reader @moduleinfo( name="yolov3_mobilenet_v1_coco2017", - version="1.0.2", + version="1.1.0", type="CV/object_detection", summary= "Baidu's YOLOv3 model for object detection with backbone MobileNet_V1, trained with dataset COCO2017.", author="paddlepaddle", author_email="paddle-dev@baidu.com") -class YOLOv3MobileNetV1Coco2017(hub.Module): - def _initialize(self): +class YOLOv3MobileNetV1Coco2017: + def __init__(self): self.default_pretrained_model_path = os.path.join( - self.directory, "yolov3_mobilenet_v1_model") + self.directory, "yolov3_mobilenet_v1_model", "model") self.label_names = load_label_info( os.path.join(self.directory, "label_file.txt")) self._set_config() @@ -39,11 +37,13 @@ def _set_config(self): """ predictor config setting. """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) + model = self.default_pretrained_model_path+'.pdmodel' + params = self.default_pretrained_model_path+'.pdiparams' + cpu_config = Config(model, params) cpu_config.disable_glog_info() cpu_config.disable_gpu() cpu_config.switch_ir_optim(False) - self.cpu_predictor = create_paddle_predictor(cpu_config) + self.cpu_predictor = create_predictor(cpu_config) try: _places = os.environ["CUDA_VISIBLE_DEVICES"] @@ -52,106 +52,10 @@ def _set_config(self): except: use_gpu = False if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config = Config(model, params) gpu_config.disable_glog_info() gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) - - def context(self, trainable=True, pretrained=True, get_prediction=False): - """ - Distill the Head Features, so as to perform transfer learning. - - Args: - trainable (bool): whether to set parameters trainable. - pretrained (bool): whether to load default pretrained model. - get_prediction (bool): whether to get prediction. - - Returns: - inputs(dict): the input variables. - outputs(dict): the output variables. - context_prog (Program): the program to execute transfer learning. - """ - context_prog = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(context_prog, startup_program): - with fluid.unique_name.guard(): - # image - image = fluid.layers.data( - name='image', shape=[3, 608, 608], dtype='float32') - # backbone - backbone = MobileNet( - norm_type='sync_bn', - norm_decay=0., - conv_group_scale=1, - with_extra_blocks=False) - # body_feats - body_feats = backbone(image) - # im_size - im_size = fluid.layers.data( - name='im_size', shape=[2], dtype='int32') - # yolo_head - yolo_head = YOLOv3Head(num_classes=80) - # head_features - head_features, body_features = yolo_head._get_outputs( - body_feats, is_train=trainable) - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - exe.run(startup_program) - - # var_prefix - var_prefix = '@HUB_{}@'.format(self.name) - # name of inputs - inputs = { - 'image': var_prefix + image.name, - 'im_size': var_prefix + im_size.name - } - # name of outputs - if get_prediction: - bbox_out = yolo_head.get_prediction(head_features, im_size) - outputs = {'bbox_out': [var_prefix + bbox_out.name]} - else: - outputs = { - 'head_features': - [var_prefix + var.name for var in head_features], - 'body_features': - [var_prefix + var.name for var in body_features] - } - # add_vars_prefix - add_vars_prefix(context_prog, var_prefix) - add_vars_prefix(startup_program, var_prefix) - # inputs - inputs = { - key: context_prog.global_block().vars[value] - for key, value in inputs.items() - } - # outputs - outputs = { - key: [ - context_prog.global_block().vars[varname] - for varname in value - ] - for key, value in outputs.items() - } - # trainable - for param in context_prog.global_block().iter_parameters(): - param.trainable = trainable - # pretrained - if pretrained: - - def _if_exist(var): - return os.path.exists( - os.path.join(self.default_pretrained_model_path, - var.name)) - - fluid.io.load_vars( - exe, - self.default_pretrained_model_path, - predicate=_if_exist) - else: - exe.run(startup_program) - - return inputs, outputs, context_prog + self.gpu_predictor = create_predictor(gpu_config) def object_detection(self, paths=None, @@ -194,54 +98,33 @@ def object_detection(self, paths = paths if paths else list() data_reader = partial(reader, paths, images) - batch_reader = fluid.io.batch(data_reader, batch_size=batch_size) + batch_reader = paddle.batch(data_reader, batch_size=batch_size) res = [] for iter_id, feed_data in enumerate(batch_reader()): feed_data = np.array(feed_data) - image_tensor = PaddleTensor(np.array(list(feed_data[:, 0]))) - im_size_tensor = PaddleTensor(np.array(list(feed_data[:, 1]))) - if use_gpu: - data_out = self.gpu_predictor.run( - [image_tensor, im_size_tensor]) - else: - data_out = self.cpu_predictor.run( - [image_tensor, im_size_tensor]) - output = postprocess( - paths=paths, - images=images, - data_out=data_out, - score_thresh=score_thresh, - label_names=self.label_names, - output_dir=output_dir, - handle_id=iter_id * batch_size, - visualization=visualization) + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + input_handle.copy_from_cpu(np.array(list(feed_data[:, 0]))) + input_handle = predictor.get_input_handle(input_names[1]) + input_handle.copy_from_cpu(np.array(list(feed_data[:, 1]))) + + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) + + output = postprocess(paths=paths, + images=images, + data_out=output_handle, + score_thresh=score_thresh, + label_names=self.label_names, + output_dir=output_dir, + handle_id=iter_id * batch_size, + visualization=visualization) res.extend(output) return res - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - @serving def serving_method(self, images, **kwargs): """ diff --git a/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/processor.py b/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/processor.py index 2f9a42d9c..aa9a61bd0 100644 --- a/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/processor.py +++ b/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/processor.py @@ -101,7 +101,7 @@ def postprocess(paths, handle_id, visualization=True): """ - postprocess the lod_tensor produced by fluid.Executor.run + postprocess the lod_tensor produced by Executor.run Args: paths (list[str]): The paths of images. @@ -126,9 +126,8 @@ def postprocess(paths, confidence (float): The confidence of detection result. save_path (str): The path to save output images. """ - lod_tensor = data_out[0] - lod = lod_tensor.lod[0] - results = lod_tensor.as_ndarray() + lod = data_out.lod()[0] + results = data_out.copy_to_cpu() check_dir(output_dir) diff --git a/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/test.py b/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/test.py new file mode 100644 index 000000000..ed99b6289 --- /dev/null +++ b/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/test.py @@ -0,0 +1,108 @@ +import os +import shutil +import unittest + +import cv2 +import requests +import paddlehub as hub + + +class TestHubModule(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + img_url = 'https://ai-studio-static-online.cdn.bcebos.com/68313e182f5e4ad9907e69dac9ece8fc50840d7ffbd24fa88396f009958f969a' + if not os.path.exists('tests'): + os.makedirs('tests') + response = requests.get(img_url) + assert response.status_code == 200, 'Network Error.' + with open('tests/test.jpg', 'wb') as f: + f.write(response.content) + cls.module = hub.Module(name="yolov3_mobilenet_v1_coco2017") + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree('tests') + shutil.rmtree('inference') + shutil.rmtree('detection_result') + + def test_object_detection1(self): + results = self.module.object_detection( + paths=['tests/test.jpg'] + ) + bbox = results[0]['data'][0] + label = bbox['label'] + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertEqual(label, 'cat') + self.assertTrue(confidence > 0.5) + self.assertTrue(0 < left < 1000) + self.assertTrue(1000 < right < 3500) + self.assertTrue(500 < top < 1500) + self.assertTrue(1000 < bottom < 4500) + + def test_object_detection2(self): + results = self.module.object_detection( + images=[cv2.imread('tests/test.jpg')] + ) + bbox = results[0]['data'][0] + label = bbox['label'] + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertEqual(label, 'cat') + self.assertTrue(confidence > 0.5) + self.assertTrue(0 < left < 1000) + self.assertTrue(1000 < right < 3500) + self.assertTrue(500 < top < 1500) + self.assertTrue(1000 < bottom < 4500) + + def test_object_detection3(self): + results = self.module.object_detection( + images=[cv2.imread('tests/test.jpg')], + visualization=False + ) + bbox = results[0]['data'][0] + label = bbox['label'] + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertEqual(label, 'cat') + self.assertTrue(confidence > 0.5) + self.assertTrue(0 < left < 1000) + self.assertTrue(1000 < right < 3500) + self.assertTrue(500 < top < 1500) + self.assertTrue(1000 < bottom < 4500) + + def test_object_detection4(self): + self.assertRaises( + AssertionError, + self.module.object_detection, + paths=['no.jpg'] + ) + + def test_object_detection5(self): + self.assertRaises( + AttributeError, + self.module.object_detection, + images=['test.jpg'] + ) + + def test_save_inference_model(self): + self.module.save_inference_model('./inference/model') + + self.assertTrue(os.path.exists('./inference/model.pdmodel')) + self.assertTrue(os.path.exists('./inference/model.pdiparams')) + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file diff --git a/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/yolo_head.py b/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/yolo_head.py deleted file mode 100644 index 7428fb4c2..000000000 --- a/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/yolo_head.py +++ /dev/null @@ -1,273 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from collections import OrderedDict - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.regularizer import L2Decay - -__all__ = ['MultiClassNMS', 'YOLOv3Head'] - - -class MultiClassNMS(object): - # __op__ = fluid.layers.multiclass_nms - def __init__(self, background_label, keep_top_k, nms_threshold, nms_top_k, - normalized, score_threshold): - super(MultiClassNMS, self).__init__() - self.background_label = background_label - self.keep_top_k = keep_top_k - self.nms_threshold = nms_threshold - self.nms_top_k = nms_top_k - self.normalized = normalized - self.score_threshold = score_threshold - - -class YOLOv3Head(object): - """Head block for YOLOv3 network - - Args: - norm_decay (float): weight decay for normalization layer weights - num_classes (int): number of output classes - ignore_thresh (float): threshold to ignore confidence loss - label_smooth (bool): whether to use label smoothing - anchors (list): anchors - anchor_masks (list): anchor masks - nms (object): an instance of `MultiClassNMS` - """ - - def __init__(self, - norm_decay=0., - num_classes=80, - ignore_thresh=0.7, - label_smooth=True, - anchors=[[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], - [59, 119], [116, 90], [156, 198], [373, 326]], - anchor_masks=[[6, 7, 8], [3, 4, 5], [0, 1, 2]], - nms=MultiClassNMS( - background_label=-1, - keep_top_k=100, - nms_threshold=0.45, - nms_top_k=1000, - normalized=True, - score_threshold=0.01), - weight_prefix_name=''): - self.norm_decay = norm_decay - self.num_classes = num_classes - self.ignore_thresh = ignore_thresh - self.label_smooth = label_smooth - self.anchor_masks = anchor_masks - self._parse_anchors(anchors) - self.nms = nms - self.prefix_name = weight_prefix_name - - def _conv_bn(self, - input, - ch_out, - filter_size, - stride, - padding, - act='leaky', - is_test=True, - name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=ch_out, - filter_size=filter_size, - stride=stride, - padding=padding, - act=None, - param_attr=ParamAttr(name=name + ".conv.weights"), - bias_attr=False) - - bn_name = name + ".bn" - bn_param_attr = ParamAttr( - regularizer=L2Decay(self.norm_decay), name=bn_name + '.scale') - bn_bias_attr = ParamAttr( - regularizer=L2Decay(self.norm_decay), name=bn_name + '.offset') - out = fluid.layers.batch_norm( - input=conv, - act=None, - is_test=is_test, - param_attr=bn_param_attr, - bias_attr=bn_bias_attr, - moving_mean_name=bn_name + '.mean', - moving_variance_name=bn_name + '.var') - - if act == 'leaky': - out = fluid.layers.leaky_relu(x=out, alpha=0.1) - return out - - def _detection_block(self, input, channel, is_test=True, name=None): - assert channel % 2 == 0, \ - "channel {} cannot be divided by 2 in detection block {}" \ - .format(channel, name) - - conv = input - for j in range(2): - conv = self._conv_bn( - conv, - channel, - filter_size=1, - stride=1, - padding=0, - is_test=is_test, - name='{}.{}.0'.format(name, j)) - conv = self._conv_bn( - conv, - channel * 2, - filter_size=3, - stride=1, - padding=1, - is_test=is_test, - name='{}.{}.1'.format(name, j)) - route = self._conv_bn( - conv, - channel, - filter_size=1, - stride=1, - padding=0, - is_test=is_test, - name='{}.2'.format(name)) - tip = self._conv_bn( - route, - channel * 2, - filter_size=3, - stride=1, - padding=1, - is_test=is_test, - name='{}.tip'.format(name)) - return route, tip - - def _upsample(self, input, scale=2, name=None): - out = fluid.layers.resize_nearest( - input=input, scale=float(scale), name=name) - return out - - def _parse_anchors(self, anchors): - """ - Check ANCHORS/ANCHOR_MASKS in config and parse mask_anchors - - """ - self.anchors = [] - self.mask_anchors = [] - - assert len(anchors) > 0, "ANCHORS not set." - assert len(self.anchor_masks) > 0, "ANCHOR_MASKS not set." - - for anchor in anchors: - assert len(anchor) == 2, "anchor {} len should be 2".format(anchor) - self.anchors.extend(anchor) - - anchor_num = len(anchors) - for masks in self.anchor_masks: - self.mask_anchors.append([]) - for mask in masks: - assert mask < anchor_num, "anchor mask index overflow" - self.mask_anchors[-1].extend(anchors[mask]) - - def _get_outputs(self, input, is_train=True): - """ - Get YOLOv3 head output - - Args: - input (list): List of Variables, output of backbone stages - is_train (bool): whether in train or test mode - - Returns: - outputs (list): Variables of each output layer - """ - - outputs = [] - - # get last out_layer_num blocks in reverse order - out_layer_num = len(self.anchor_masks) - if isinstance(input, OrderedDict): - blocks = list(input.values())[-1:-out_layer_num - 1:-1] - else: - blocks = input[-1:-out_layer_num - 1:-1] - route = None - for i, block in enumerate(blocks): - if i > 0: # perform concat in first 2 detection_block - block = fluid.layers.concat(input=[route, block], axis=1) - route, tip = self._detection_block( - block, - channel=512 // (2**i), - is_test=(not is_train), - name=self.prefix_name + "yolo_block.{}".format(i)) - - # out channel number = mask_num * (5 + class_num) - num_filters = len(self.anchor_masks[i]) * (self.num_classes + 5) - block_out = fluid.layers.conv2d( - input=tip, - num_filters=num_filters, - filter_size=1, - stride=1, - padding=0, - act=None, - param_attr=ParamAttr(name=self.prefix_name + - "yolo_output.{}.conv.weights".format(i)), - bias_attr=ParamAttr( - regularizer=L2Decay(0.), - name=self.prefix_name + - "yolo_output.{}.conv.bias".format(i))) - outputs.append(block_out) - - if i < len(blocks) - 1: - # do not perform upsample in the last detection_block - route = self._conv_bn( - input=route, - ch_out=256 // (2**i), - filter_size=1, - stride=1, - padding=0, - is_test=(not is_train), - name=self.prefix_name + "yolo_transition.{}".format(i)) - # upsample - route = self._upsample(route) - - return outputs, blocks - - def get_prediction(self, outputs, im_size): - """ - Get prediction result of YOLOv3 network - - Args: - outputs (list): list of Variables, return from _get_outputs - im_size (Variable): Variable of size([h, w]) of each image - - Returns: - pred (Variable): The prediction result after non-max suppress. - - """ - boxes = [] - scores = [] - downsample = 32 - for i, output in enumerate(outputs): - box, score = fluid.layers.yolo_box( - x=output, - img_size=im_size, - anchors=self.mask_anchors[i], - class_num=self.num_classes, - conf_thresh=self.nms.score_threshold, - downsample_ratio=downsample, - name=self.prefix_name + "yolo_box" + str(i)) - boxes.append(box) - scores.append(fluid.layers.transpose(score, perm=[0, 2, 1])) - - downsample //= 2 - - yolo_boxes = fluid.layers.concat(boxes, axis=1) - yolo_scores = fluid.layers.concat(scores, axis=2) - pred = fluid.layers.multiclass_nms( - bboxes=yolo_boxes, - scores=yolo_scores, - score_threshold=self.nms.score_threshold, - nms_top_k=self.nms.nms_top_k, - keep_top_k=self.nms.keep_top_k, - nms_threshold=self.nms.nms_threshold, - background_label=self.nms.background_label, - normalized=self.nms.normalized, - name="multiclass_nms") - return pred From 0a26a1fabfa7f2d0a8239498f1da79e1b47a57ae Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Fri, 16 Sep 2022 15:16:39 +0800 Subject: [PATCH 071/117] update human_pose_estimation_resnet50_mpii (#1961) * update human_pose_estimation_resnet50_mpii * update * add clean func * update save inference model --- .../README.md | 16 +- .../data_feed.py | 1 - .../module.py | 61 +++---- .../pose_resnet.py | 157 ------------------ .../test.py | 83 +++++++++ 5 files changed, 114 insertions(+), 204 deletions(-) delete mode 100644 modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/pose_resnet.py create mode 100644 modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/test.py diff --git a/modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/README.md b/modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/README.md index 7bb495224..ad3b7c7c7 100644 --- a/modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/README.md +++ b/modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/README.md @@ -89,20 +89,14 @@ - data (OrderedDict): 人体骨骼关键点的坐标。 - ```python - def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True): + def save_inference_model(dirname): ``` - 将模型保存到指定路径。 - **参数** - - dirname: 存在模型的目录名称 - - model_filename: 模型文件名称,默认为__model__ - - params_filename: 参数文件名称,默认为__params__(仅当combined为True时生效) - - combined: 是否将参数保存到统一的一个文件中 + - dirname: 模型保存路径 ## 四、服务部署 @@ -155,6 +149,10 @@ * 1.1.1 +* 1.2.0 + + 移除 fluid api + * ```shell - $ hub install human_pose_estimation_resnet50_mpii==1.1.1 + $ hub install human_pose_estimation_resnet50_mpii==1.2.0 ``` diff --git a/modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/data_feed.py b/modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/data_feed.py index a86e90cff..ab9d57a6c 100644 --- a/modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/data_feed.py +++ b/modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/data_feed.py @@ -5,7 +5,6 @@ import cv2 import numpy as np -from PIL import Image __all__ = ['reader'] diff --git a/modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/module.py b/modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/module.py index 43bf5b84e..e7a8e19e6 100644 --- a/modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/module.py +++ b/modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/module.py @@ -6,15 +6,15 @@ import os import argparse +import paddle +import paddle.jit +import paddle.static import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddle.inference import Config, create_predictor from paddlehub.module.module import moduleinfo, runnable, serving -from human_pose_estimation_resnet50_mpii.processor import base64_to_cv2, postprocess -from human_pose_estimation_resnet50_mpii.data_feed import reader -from human_pose_estimation_resnet50_mpii.pose_resnet import ResNet +from .processor import base64_to_cv2, postprocess +from .data_feed import reader @moduleinfo( @@ -24,20 +24,22 @@ author_email="paddle-dev@baidu.comi", summary= "Paddle implementation for the paper `Simple baselines for human pose estimation and tracking`, trained with the MPII dataset.", - version="1.1.1") -class HumanPoseEstimation(hub.Module): - def _initialize(self): - self.default_pretrained_model_path = os.path.join(self.directory, "pose-resnet50-mpii-384x384") + version="1.2.0") +class HumanPoseEstimation: + def __init__(self): + self.default_pretrained_model_path = os.path.join(self.directory, "pose-resnet50-mpii-384x384", "model") self._set_config() def _set_config(self): """ predictor config setting """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) + model = self.default_pretrained_model_path+'.pdmodel' + params = self.default_pretrained_model_path+'.pdiparams' + cpu_config = Config(model, params) cpu_config.disable_glog_info() cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) + self.cpu_predictor = create_predictor(cpu_config) try: _places = os.environ["CUDA_VISIBLE_DEVICES"] @@ -46,10 +48,10 @@ def _set_config(self): except: use_gpu = False if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config = Config(model, params) gpu_config.disable_glog_info() gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) + self.gpu_predictor = create_predictor(gpu_config) def keypoint_detection(self, images=None, @@ -80,7 +82,6 @@ def keypoint_detection(self, total_num = len(all_data) loop_num = int(np.ceil(total_num / batch_size)) - res = list() for iter_id in range(loop_num): batch_data = list() @@ -92,9 +93,14 @@ def keypoint_detection(self, pass # feed batch image batch_image = np.array([data['image'] for data in batch_data]) - batch_image = PaddleTensor(batch_image.copy()) - output = self.gpu_predictor.run([batch_image]) if use_gpu else self.cpu_predictor.run([batch_image]) - output = np.expand_dims(output[0].as_ndarray(), axis=1) + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + input_handle.copy_from_cpu(batch_image) + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) + output = np.expand_dims(output_handle.copy_to_cpu(), axis=1) # postprocess one by one for i in range(len(batch_data)): out = postprocess( @@ -107,25 +113,6 @@ def keypoint_detection(self, res.append(out) return res - def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - @serving def serving_method(self, images, **kwargs): """ diff --git a/modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/pose_resnet.py b/modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/pose_resnet.py deleted file mode 100644 index f5a7638a2..000000000 --- a/modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/pose_resnet.py +++ /dev/null @@ -1,157 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle.fluid as fluid - -__all__ = ["ResNet", "ResNet50", "ResNet101", "ResNet152"] - -BN_MOMENTUM = 0.9 - - -class ResNet(): - def __init__(self, layers=50, kps_num=16, test_mode=False): - """ - :param layers: int, the layers number which is used here - :param kps_num: int, the number of keypoints in accord with the dataset - :param test_mode: bool, if True, only return output heatmaps, no loss - - :return: loss, output heatmaps - """ - self.k = kps_num - self.layers = layers - self.test_mode = test_mode - - def net(self, input, target=None, target_weight=None): - layers = self.layers - supported_layers = [50, 101, 152] - assert layers in supported_layers, \ - "supported layers are {} but input layer is {}".format(supported_layers, layers) - - if layers == 50: - depth = [3, 4, 6, 3] - elif layers == 101: - depth = [3, 4, 23, 3] - elif layers == 152: - depth = [3, 8, 36, 3] - num_filters = [64, 128, 256, 512] - - conv = self.conv_bn_layer(input=input, num_filters=64, filter_size=7, stride=2, act='relu') - conv = fluid.layers.pool2d(input=conv, pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') - - for block in range(len(depth)): - for i in range(depth[block]): - conv = self.bottleneck_block( - input=conv, num_filters=num_filters[block], stride=2 if i == 0 and block != 0 else 1) - - conv = fluid.layers.conv2d_transpose( - input=conv, - num_filters=256, - filter_size=4, - padding=1, - stride=2, - param_attr=fluid.param_attr.ParamAttr(initializer=fluid.initializer.Normal(0., 0.001)), - act=None, - bias_attr=False) - conv = fluid.layers.batch_norm(input=conv, act='relu', momentum=BN_MOMENTUM) - conv = fluid.layers.conv2d_transpose( - input=conv, - num_filters=256, - filter_size=4, - padding=1, - stride=2, - param_attr=fluid.param_attr.ParamAttr(initializer=fluid.initializer.Normal(0., 0.001)), - act=None, - bias_attr=False) - conv = fluid.layers.batch_norm(input=conv, act='relu', momentum=BN_MOMENTUM) - conv = fluid.layers.conv2d_transpose( - input=conv, - num_filters=256, - filter_size=4, - padding=1, - stride=2, - param_attr=fluid.param_attr.ParamAttr(initializer=fluid.initializer.Normal(0., 0.001)), - act=None, - bias_attr=False) - conv = fluid.layers.batch_norm(input=conv, act='relu', momentum=BN_MOMENTUM) - - out = fluid.layers.conv2d( - input=conv, - num_filters=self.k, - filter_size=1, - stride=1, - padding=0, - act=None, - param_attr=fluid.param_attr.ParamAttr(initializer=fluid.initializer.Normal(0., 0.001))) - - if self.test_mode: - return out - else: - loss = self.calc_loss(out, target, target_weight) - return loss, out - - def conv_bn_layer(self, input, num_filters, filter_size, stride=1, groups=1, act=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - param_attr=fluid.param_attr.ParamAttr(initializer=fluid.initializer.Normal(0., 0.001)), - act=None, - bias_attr=False) - return fluid.layers.batch_norm(input=conv, act=act, momentum=BN_MOMENTUM) - - def shortcut(self, input, ch_out, stride): - ch_in = input.shape[1] - if ch_in != ch_out or stride != 1: - return self.conv_bn_layer(input, ch_out, 1, stride) - else: - return input - - def calc_loss(self, heatmap, target, target_weight): - _, c, h, w = heatmap.shape - x = fluid.layers.reshape(heatmap, (-1, self.k, h * w)) - y = fluid.layers.reshape(target, (-1, self.k, h * w)) - w = fluid.layers.reshape(target_weight, (-1, self.k)) - - x = fluid.layers.split(x, num_or_sections=self.k, dim=1) - y = fluid.layers.split(y, num_or_sections=self.k, dim=1) - w = fluid.layers.split(w, num_or_sections=self.k, dim=1) - - _list = [] - for idx in range(self.k): - _tmp = fluid.layers.scale(x=x[idx] - y[idx], scale=1.) - _tmp = _tmp * _tmp - _tmp = fluid.layers.reduce_mean(_tmp, dim=2) - _list.append(_tmp * w[idx]) - - _loss = fluid.layers.concat(_list, axis=0) - _loss = fluid.layers.reduce_mean(_loss) - return 0.5 * _loss - - def bottleneck_block(self, input, num_filters, stride): - conv0 = self.conv_bn_layer(input=input, num_filters=num_filters, filter_size=1, act='relu') - conv1 = self.conv_bn_layer(input=conv0, num_filters=num_filters, filter_size=3, stride=stride, act='relu') - conv2 = self.conv_bn_layer(input=conv1, num_filters=num_filters * 4, filter_size=1, act=None) - - short = self.shortcut(input, num_filters * 4, stride) - - return fluid.layers.elementwise_add(x=short, y=conv2, act='relu') - - -def ResNet50(): - model = ResNet(layers=50) - return model - - -def ResNet101(): - model = ResNet(layers=101) - return model - - -def ResNet152(): - model = ResNet(layers=152) - return model diff --git a/modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/test.py b/modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/test.py new file mode 100644 index 000000000..0878026bf --- /dev/null +++ b/modules/image/keypoint_detection/human_pose_estimation_resnet50_mpii/test.py @@ -0,0 +1,83 @@ +import os +import shutil +import unittest + +import cv2 +import requests +import paddlehub as hub + + +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + + +class TestHubModule(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + img_url = 'https://ai-studio-static-online.cdn.bcebos.com/7799a8ccc5f6471b9d56fb6eff94f82a08b70ca2c7594d3f99877e366c0a2619' + if not os.path.exists('tests'): + os.makedirs('tests') + response = requests.get(img_url) + assert response.status_code == 200, 'Network Error.' + with open('tests/test.jpg', 'wb') as f: + f.write(response.content) + cls.module = hub.Module(name="human_pose_estimation_resnet50_mpii") + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree('tests') + shutil.rmtree('inference') + shutil.rmtree('output_pose') + + def test_keypoint_detection1(self): + results = self.module.keypoint_detection( + paths=['tests/test.jpg'] + ) + kps = results[0]['data'] + self.assertIsInstance(kps, dict) + + def test_keypoint_detection2(self): + results = self.module.keypoint_detection( + images=[cv2.imread('tests/test.jpg')] + ) + kps = results[0]['data'] + self.assertIsInstance(kps, dict) + + def test_keypoint_detection3(self): + results = self.module.keypoint_detection( + images=[cv2.imread('tests/test.jpg')], + visualization=True + ) + kps = results[0]['data'] + self.assertIsInstance(kps, dict) + + def test_keypoint_detection4(self): + results = self.module.keypoint_detection( + images=[cv2.imread('tests/test.jpg')], + use_gpu=True + ) + kps = results[0]['data'] + self.assertIsInstance(kps, dict) + + def test_keypoint_detection5(self): + self.assertRaises( + AssertionError, + self.module.keypoint_detection, + paths=['no.jpg'] + ) + + def test_keypoint_detection6(self): + self.assertRaises( + AttributeError, + self.module.keypoint_detection, + images=['test.jpg'] + ) + + def test_save_inference_model(self): + self.module.save_inference_model('./inference/model') + + self.assertTrue(os.path.exists('./inference/model.pdmodel')) + self.assertTrue(os.path.exists('./inference/model.pdiparams')) + + +if __name__ == "__main__": + unittest.main() From 6b42963d62833925ffed1cdb73400e7d528a5353 Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Fri, 16 Sep 2022 15:17:18 +0800 Subject: [PATCH 072/117] update hand_pose_localization (#1967) * update hand_pose_localization * add clean func --- .../hand_pose_localization/model.py | 169 ++++++++++++++---- .../hand_pose_localization/module.py | 24 ++- .../hand_pose_localization/readme.md | 4 +- .../hand_pose_localization/test.py | 79 ++++++++ 4 files changed, 228 insertions(+), 48 deletions(-) create mode 100644 modules/image/keypoint_detection/hand_pose_localization/test.py diff --git a/modules/image/keypoint_detection/hand_pose_localization/model.py b/modules/image/keypoint_detection/hand_pose_localization/model.py index ebe429716..e737c99c0 100644 --- a/modules/image/keypoint_detection/hand_pose_localization/model.py +++ b/modules/image/keypoint_detection/hand_pose_localization/model.py @@ -3,74 +3,167 @@ from paddle.inference import create_predictor, Config -__all__ = ['Model'] +__all__ = ['InferenceModel'] -class Model(): +class InferenceModel: # 初始化函数 - def __init__(self, modelpath, use_gpu=False, use_mkldnn=True, combined=True): - # 加载模型预测器 - self.predictor = self.load_model(modelpath, use_gpu, use_mkldnn, combined) + def __init__(self, + modelpath, + use_gpu=False, + gpu_id=0, + use_mkldnn=False, + cpu_threads=1): + ''' + init the inference model + modelpath: inference model path + use_gpu: use gpu or not + use_mkldnn: use mkldnn or not + ''' + # 加载模型配置 + self.config = self.load_config(modelpath, use_gpu, gpu_id, use_mkldnn, cpu_threads) - # 获取模型的输入输出 - self.input_names = self.predictor.get_input_names() - self.output_names = self.predictor.get_output_names() - self.input_handle = self.predictor.get_input_handle(self.input_names[0]) - self.output_handle = self.predictor.get_output_handle(self.output_names[0]) + # 打印函数 + def __repr__(self): + ''' + get the numbers and name of inputs and outputs + ''' + return 'input_num: %d\ninput_names: %s\noutput_num: %d\noutput_names: %s' % ( + self.input_num, + str(self.input_names), + self.output_num, + str(self.output_names) + ) - # 模型加载函数 - def load_model(self, modelpath, use_gpu, use_mkldnn, combined): + # 类调用函数 + def __call__(self, *input_datas, batch_size=1): + ''' + call function + ''' + return self.forward(*input_datas, batch_size=batch_size) + + # 模型参数加载函数 + def load_config(self, modelpath, use_gpu, gpu_id, use_mkldnn, cpu_threads): + ''' + load the model config + modelpath: inference model path + use_gpu: use gpu or not + use_mkldnn: use mkldnn or not + ''' # 对运行位置进行配置 if use_gpu: try: int(os.environ.get('CUDA_VISIBLE_DEVICES')) except Exception: print( - 'Error! Unable to use GPU. Please set the environment variables "CUDA_VISIBLE_DEVICES=GPU_id" to use GPU.' - ) + '''Error! Unable to use GPU. Please set the environment variables "CUDA_VISIBLE_DEVICES=GPU_id" to use GPU. Now switch to CPU to continue...''') use_gpu = False - # 加载模型参数 - if combined: - model = os.path.join(modelpath, "__model__") - params = os.path.join(modelpath, "__params__") + if os.path.isdir(modelpath): + if os.path.exists(os.path.join(modelpath, "__params__")): + # __model__ + __params__ + model = os.path.join(modelpath, "__model__") + params = os.path.join(modelpath, "__params__") + config = Config(model, params) + elif os.path.exists(os.path.join(modelpath, "params")): + # model + params + model = os.path.join(modelpath, "model") + params = os.path.join(modelpath, "params") + config = Config(model, params) + elif os.path.exists(os.path.join(modelpath, "__model__")): + # __model__ + others + config = Config(modelpath) + else: + raise Exception( + "Error! Can\'t find the model in: %s. Please check your model path." % os.path.abspath(modelpath)) + elif os.path.exists(modelpath + ".pdmodel"): + # *.pdmodel + *.pdiparams + model = modelpath + ".pdmodel" + params = modelpath + ".pdiparams" config = Config(model, params) + elif isinstance(modelpath, Config): + config = modelpath else: - config = Config(modelpath) + raise Exception( + "Error! Can\'t find the model in: %s. Please check your model path." % os.path.abspath(modelpath)) # 设置参数 if use_gpu: - config.enable_use_gpu(100, 0) + config.enable_use_gpu(100, gpu_id) else: config.disable_gpu() + config.set_cpu_math_library_num_threads(cpu_threads) if use_mkldnn: config.enable_mkldnn() + config.disable_glog_info() - config.switch_ir_optim(True) - config.enable_memory_optim() - config.switch_use_feed_fetch_ops(False) - config.switch_specify_input_names(True) - # 通过参数加载模型预测器 - predictor = create_predictor(config) + # 返回配置 + return config - # 返回预测器 - return predictor + # 预测器创建函数 + def eval(self): + ''' + create the model predictor by model config + ''' + # 创建预测器 + self.predictor = create_predictor(self.config) + + # 获取模型的输入输出名称 + self.input_names = self.predictor.get_input_names() + self.output_names = self.predictor.get_output_names() - # 模型预测函数 - def predict(self, input_datas): - outputs = [] + # 获取模型的输入输出节点数量 + self.input_num = len(self.input_names) + self.output_num = len(self.output_names) + + # 获取输入 + self.input_handles = [] + for input_name in self.input_names: + self.input_handles.append( + self.predictor.get_input_handle(input_name)) + + # 获取输出 + self.output_handles = [] + for output_name in self.output_names: + self.output_handles.append( + self.predictor.get_output_handle(output_name)) + + # 前向计算函数 + def forward(self, *input_datas, batch_size=1): + """ + model inference + batch_size: batch size + *input_datas: x1, x2, ..., xn + """ + # 切分输入数据 + datas_num = input_datas[0].shape[0] + split_num = datas_num // batch_size + \ + 1 if datas_num % batch_size != 0 else datas_num // batch_size + input_datas = [np.array_split(input_data, split_num) + for input_data in input_datas] # 遍历输入数据进行预测 - for input_data in input_datas: - inputs = input_data.copy() - self.input_handle.copy_from_cpu(inputs) + outputs = {} + for step in range(split_num): + for i in range(self.input_num): + input_data = input_datas[i][step].copy() + self.input_handles[i].copy_from_cpu(input_data) + self.predictor.run() - output = self.output_handle.copy_to_cpu() - outputs.append(output) + + for i in range(self.output_num): + output = self.output_handles[i].copy_to_cpu() + if i in outputs: + outputs[i].append(output) + else: + outputs[i] = [output] # 预测结果合并 - outputs = np.concatenate(outputs, 0) + for key in outputs.keys(): + outputs[key] = np.concatenate(outputs[key], 0) + + outputs = [v for v in outputs.values()] # 返回预测结果 - return outputs + return tuple(outputs) if len(outputs) > 1 else outputs[0] \ No newline at end of file diff --git a/modules/image/keypoint_detection/hand_pose_localization/module.py b/modules/image/keypoint_detection/hand_pose_localization/module.py index c855319f1..3176283d0 100644 --- a/modules/image/keypoint_detection/hand_pose_localization/module.py +++ b/modules/image/keypoint_detection/hand_pose_localization/module.py @@ -1,11 +1,11 @@ # coding=utf-8 import os -from paddlehub import Module +import numpy as np from paddlehub.module.module import moduleinfo, serving -from hand_pose_localization.model import Model -from hand_pose_localization.processor import base64_to_cv2, Processor +from .model import InferenceModel +from .processor import base64_to_cv2, Processor @moduleinfo( @@ -14,16 +14,18 @@ author="jm12138", # 作者名称 author_email="jm12138@qq.com", # 作者邮箱 summary="hand_pose_localization", # 模型介绍 - version="1.0.2" # 版本号 + version="1.1.0" # 版本号 ) -class Hand_Pose_Localization(Module): +class Hand_Pose_Localization: # 初始化函数 - def __init__(self, name=None, use_gpu=False): + def __init__(self, use_gpu=False): # 设置模型路径 - self.model_path = os.path.join(self.directory, "hand_pose_localization") + self.model_path = os.path.join(self.directory, "hand_pose_localization", "model") # 加载模型 - self.model = Model(modelpath=self.model_path, use_gpu=use_gpu, use_mkldnn=False, combined=True) + self.model = InferenceModel(modelpath=self.model_path, use_gpu=use_gpu) + + self.model.eval() # 关键点检测函数 def keypoint_detection(self, images=None, paths=None, batch_size=1, output_dir='output', visualization=False): @@ -31,7 +33,11 @@ def keypoint_detection(self, images=None, paths=None, batch_size=1, output_dir=' processor = Processor(images, paths, batch_size, output_dir) # 模型预测 - outputs = self.model.predict(processor.input_datas) + outputs = [] + for input_data in processor.input_datas: + output = self.model(input_data) + outputs.append(output) + outputs = np.concatenate(outputs, 0) # 结果后处理 results = processor.postprocess(outputs, visualization) diff --git a/modules/image/keypoint_detection/hand_pose_localization/readme.md b/modules/image/keypoint_detection/hand_pose_localization/readme.md index 0852a5bc0..1309f8087 100644 --- a/modules/image/keypoint_detection/hand_pose_localization/readme.md +++ b/modules/image/keypoint_detection/hand_pose_localization/readme.md @@ -130,8 +130,10 @@ 适配paddlehub 2.0 +* 1.1.0 + * ```shell - $ hub install hand_pose_localization==1.0.1 + $ hub install hand_pose_localization==1.1.0 ``` diff --git a/modules/image/keypoint_detection/hand_pose_localization/test.py b/modules/image/keypoint_detection/hand_pose_localization/test.py new file mode 100644 index 000000000..fc28e9a92 --- /dev/null +++ b/modules/image/keypoint_detection/hand_pose_localization/test.py @@ -0,0 +1,79 @@ +import os +import shutil +import unittest + +import cv2 +import requests +import paddlehub as hub + + +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + + +class TestHubModule(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + img_url = 'https://unsplash.com/photos/8UAUuP97RlY/download?ixid=MnwxMjA3fDB8MXxhbGx8fHx8fHx8fHwxNjYxODQxMzI1&force=true&w=640' + if not os.path.exists('tests'): + os.makedirs('tests') + response = requests.get(img_url) + assert response.status_code == 200, 'Network Error.' + with open('tests/test.jpg', 'wb') as f: + f.write(response.content) + cls.module = hub.Module(name="hand_pose_localization") + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree('tests') + shutil.rmtree('output') + + def test_keypoint_detection1(self): + results = self.module.keypoint_detection( + paths=['tests/test.jpg'], + visualization=False + ) + kps = results[0] + self.assertIsInstance(kps, list) + + def test_keypoint_detection2(self): + results = self.module.keypoint_detection( + images=[cv2.imread('tests/test.jpg')], + visualization=False + ) + kps = results[0] + self.assertIsInstance(kps, list) + + def test_keypoint_detection3(self): + results = self.module.keypoint_detection( + images=[cv2.imread('tests/test.jpg')], + visualization=True + ) + kps = results[0] + self.assertIsInstance(kps, list) + + def test_keypoint_detection4(self): + self.module = hub.Module(name="hand_pose_localization", use_gpu=True) + results = self.module.keypoint_detection( + images=[cv2.imread('tests/test.jpg')], + visualization=False + ) + kps = results[0] + self.assertIsInstance(kps, list) + + def test_keypoint_detection5(self): + self.assertRaises( + AssertionError, + self.module.keypoint_detection, + paths=['no.jpg'] + ) + + def test_keypoint_detection6(self): + self.assertRaises( + AttributeError, + self.module.keypoint_detection, + images=['test.jpg'] + ) + + +if __name__ == "__main__": + unittest.main() From 16165a742fed5b37aa2a6c7c750b1950c8c29040 Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Fri, 16 Sep 2022 15:17:39 +0800 Subject: [PATCH 073/117] update pyramidbox_face_detection (#1975) * update pyramidbox_face_detection * update * add clean func * update save inference model --- .../pyramidbox_face_detection/README.md | 17 ++- .../pyramidbox_face_detection/README_en.md | 17 ++- .../pyramidbox_face_detection/module.py | 60 ++++---- .../pyramidbox_face_detection/processor.py | 3 +- .../pyramidbox_face_detection/test.py | 133 ++++++++++++++++++ 5 files changed, 175 insertions(+), 55 deletions(-) create mode 100644 modules/image/face_detection/pyramidbox_face_detection/test.py diff --git a/modules/image/face_detection/pyramidbox_face_detection/README.md b/modules/image/face_detection/pyramidbox_face_detection/README.md index d7c26e9b2..7a6293727 100644 --- a/modules/image/face_detection/pyramidbox_face_detection/README.md +++ b/modules/image/face_detection/pyramidbox_face_detection/README.md @@ -100,19 +100,13 @@ - ```python - def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True) + def save_inference_model(dirname) ``` - 将模型保存到指定路径。 - **参数** - - dirname: 存在模型的目录名称;
- - model\_filename: 模型文件名称,默认为\_\_model\_\_;
- - params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效);
- - combined: 是否将参数保存到统一的一个文件中。 + - dirname: 模型保存路径
## 四、服务部署 @@ -165,6 +159,11 @@ * 1.1.0 修复numpy数据读取问题 + +* 1.2.0 + + 修复无法导出推理模型的问题 + - ```shell - $ hub install pyramidbox_face_detection==1.1.0 + $ hub install pyramidbox_face_detection==1.2.0 ``` diff --git a/modules/image/face_detection/pyramidbox_face_detection/README_en.md b/modules/image/face_detection/pyramidbox_face_detection/README_en.md index 5f12c1def..502437e0c 100644 --- a/modules/image/face_detection/pyramidbox_face_detection/README_en.md +++ b/modules/image/face_detection/pyramidbox_face_detection/README_en.md @@ -99,19 +99,13 @@ - ```python - def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True) + def save_inference_model(dirname) ``` - Save model to specific path - **Parameters** - - dirname: output dir for saving model - - model\_filename: filename for saving model - - params\_filename: filename for saving parameters - - combined: whether save parameters into one file + - dirname: model save path ## IV.Server Deployment @@ -164,6 +158,11 @@ * 1.1.0 Fix the problem of reading numpy + +* 1.2.0 + + Fix a bug of save_inference_model + - ```shell - $ hub install pyramidbox_face_detection==1.1.0 + $ hub install pyramidbox_face_detection==1.2.0 ``` diff --git a/modules/image/face_detection/pyramidbox_face_detection/module.py b/modules/image/face_detection/pyramidbox_face_detection/module.py index 8b44a11da..89fa16c43 100644 --- a/modules/image/face_detection/pyramidbox_face_detection/module.py +++ b/modules/image/face_detection/pyramidbox_face_detection/module.py @@ -7,13 +7,14 @@ import os import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +import paddle +import paddle.jit +import paddle.static +from paddle.inference import Config, create_predictor from paddlehub.module.module import moduleinfo, runnable, serving -from pyramidbox_face_detection.data_feed import reader -from pyramidbox_face_detection.processor import postprocess, base64_to_cv2 +from .data_feed import reader +from .processor import postprocess, base64_to_cv2 @moduleinfo( @@ -22,20 +23,22 @@ author="baidu-vis", author_email="", summary="Baidu's PyramidBox model for face detection.", - version="1.1.0") -class PyramidBoxFaceDetection(hub.Module): - def _initialize(self): - self.default_pretrained_model_path = os.path.join(self.directory, "pyramidbox_face_detection_widerface") + version="1.2.0") +class PyramidBoxFaceDetection: + def __init__(self): + self.default_pretrained_model_path = os.path.join(self.directory, "pyramidbox_face_detection_widerface", "model") self._set_config() def _set_config(self): """ predictor config setting """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) + model = self.default_pretrained_model_path+'.pdmodel' + params = self.default_pretrained_model_path+'.pdiparams' + cpu_config = Config(model, params) cpu_config.disable_glog_info() cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) + self.cpu_predictor = create_predictor(cpu_config) try: _places = os.environ["CUDA_VISIBLE_DEVICES"] @@ -44,10 +47,10 @@ def _set_config(self): except: use_gpu = False if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config = Config(model, params) gpu_config.disable_glog_info() gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) + self.gpu_predictor = create_predictor(gpu_config) def face_detection(self, images=None, @@ -95,11 +98,17 @@ def face_detection(self, # process one by one for element in reader(images, paths): image = np.expand_dims(element['image'], axis=0).astype('float32') - image_tensor = PaddleTensor(image.copy()) - data_out = self.gpu_predictor.run([image_tensor]) if use_gpu else self.cpu_predictor.run([image_tensor]) + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + input_handle.copy_from_cpu(image) + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) + output = np.expand_dims(output_handle.copy_to_cpu(), axis=1) # print(len(data_out)) # 1 out = postprocess( - data_out=data_out[0].as_ndarray(), + data_out=output_handle.copy_to_cpu(), org_im=element['org_im'], org_im_path=element['org_im_path'], org_im_width=element['org_im_width'], @@ -110,25 +119,6 @@ def face_detection(self, res.append(out) return res - def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - @serving def serving_method(self, images, **kwargs): """ diff --git a/modules/image/face_detection/pyramidbox_face_detection/processor.py b/modules/image/face_detection/pyramidbox_face_detection/processor.py index 0d27ee57c..3fee41e8f 100644 --- a/modules/image/face_detection/pyramidbox_face_detection/processor.py +++ b/modules/image/face_detection/pyramidbox_face_detection/processor.py @@ -5,12 +5,11 @@ import os import time -from collections import OrderedDict import base64 import cv2 import numpy as np -from PIL import Image, ImageDraw +from PIL import ImageDraw __all__ = ['base64_to_cv2', 'postprocess'] diff --git a/modules/image/face_detection/pyramidbox_face_detection/test.py b/modules/image/face_detection/pyramidbox_face_detection/test.py new file mode 100644 index 000000000..730a31417 --- /dev/null +++ b/modules/image/face_detection/pyramidbox_face_detection/test.py @@ -0,0 +1,133 @@ +import os +import shutil +import unittest + +import cv2 +import requests +import paddlehub as hub + + +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + + +class TestHubModule(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + img_url = 'https://ai-studio-static-online.cdn.bcebos.com/7799a8ccc5f6471b9d56fb6eff94f82a08b70ca2c7594d3f99877e366c0a2619' + if not os.path.exists('tests'): + os.makedirs('tests') + response = requests.get(img_url) + assert response.status_code == 200, 'Network Error.' + with open('tests/test.jpg', 'wb') as f: + f.write(response.content) + cls.module = hub.Module(name="pyramidbox_face_detection") + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree('tests') + shutil.rmtree('inference') + shutil.rmtree('detection_result') + + def test_face_detection1(self): + results = self.module.face_detection( + paths=['tests/test.jpg'], + use_gpu=False, + visualization=False + ) + bbox = results[0]['data'][0] + + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertTrue(confidence > 0.5) + self.assertTrue(1000 < left < 4000) + self.assertTrue(1000 < right < 4000) + self.assertTrue(0 < top < 2000) + self.assertTrue(0 < bottom < 2000) + + def test_face_detection2(self): + results = self.module.face_detection( + images=[cv2.imread('tests/test.jpg')], + use_gpu=False, + visualization=False + ) + bbox = results[0]['data'][0] + + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertTrue(confidence > 0.5) + self.assertTrue(1000 < left < 4000) + self.assertTrue(1000 < right < 4000) + self.assertTrue(0 < top < 2000) + self.assertTrue(0 < bottom < 2000) + + def test_face_detection3(self): + results = self.module.face_detection( + images=[cv2.imread('tests/test.jpg')], + use_gpu=False, + visualization=True + ) + bbox = results[0]['data'][0] + + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertTrue(confidence > 0.5) + self.assertTrue(1000 < left < 4000) + self.assertTrue(1000 < right < 4000) + self.assertTrue(0 < top < 2000) + self.assertTrue(0 < bottom < 2000) + + def test_face_detection4(self): + results = self.module.face_detection( + images=[cv2.imread('tests/test.jpg')], + use_gpu=True, + visualization=False + ) + bbox = results[0]['data'][0] + + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertTrue(confidence > 0.5) + self.assertTrue(1000 < left < 4000) + self.assertTrue(1000 < right < 4000) + self.assertTrue(0 < top < 2000) + self.assertTrue(0 < bottom < 2000) + + def test_face_detection5(self): + self.assertRaises( + AssertionError, + self.module.face_detection, + paths=['no.jpg'] + ) + + def test_face_detection6(self): + self.assertRaises( + cv2.error, + self.module.face_detection, + images=['test.jpg'] + ) + + def test_save_inference_model(self): + self.module.save_inference_model('./inference/model') + + self.assertTrue(os.path.exists('./inference/model.pdmodel')) + self.assertTrue(os.path.exists('./inference/model.pdiparams')) + + +if __name__ == "__main__": + unittest.main() From 497279b52a1df55a29123781359c2f27a8dd4e30 Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Fri, 16 Sep 2022 15:20:31 +0800 Subject: [PATCH 074/117] update pyramidbox_lite_mobile (#1976) * update pyramidbox_lite_mobile * update * add clean func * update save inference model * update save inference model --- .../pyramidbox_lite_mobile/README.md | 16 +-- .../pyramidbox_lite_mobile/README_en.md | 16 +-- .../pyramidbox_lite_mobile/module.py | 42 ++---- .../pyramidbox_lite_mobile/processor.py | 1 - .../pyramidbox_lite_mobile/test.py | 133 ++++++++++++++++++ 5 files changed, 158 insertions(+), 50 deletions(-) create mode 100644 modules/image/face_detection/pyramidbox_lite_mobile/test.py diff --git a/modules/image/face_detection/pyramidbox_lite_mobile/README.md b/modules/image/face_detection/pyramidbox_lite_mobile/README.md index b4fd8b8c8..e4f99608a 100644 --- a/modules/image/face_detection/pyramidbox_lite_mobile/README.md +++ b/modules/image/face_detection/pyramidbox_lite_mobile/README.md @@ -101,19 +101,13 @@ - ```python - def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True) + def save_inference_model(dirname) ``` - 将模型保存到指定路径。 - **参数** - - dirname: 存在模型的目录名称;
- - model\_filename: 模型文件名称,默认为\_\_model\_\_;
- - params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效);
- - combined: 是否将参数保存到统一的一个文件中。 + - dirname: 模型保存路径
## 四、服务部署 @@ -167,6 +161,10 @@ 移除 fluid api +* 1.3.0 + + 修复无法导出推理模型的问题 + - ```shell - $ hub install pyramidbox_lite_mobile==1.2.1 + $ hub install pyramidbox_lite_mobile==1.3.0 ``` diff --git a/modules/image/face_detection/pyramidbox_lite_mobile/README_en.md b/modules/image/face_detection/pyramidbox_lite_mobile/README_en.md index d1439fc22..3c50825ef 100644 --- a/modules/image/face_detection/pyramidbox_lite_mobile/README_en.md +++ b/modules/image/face_detection/pyramidbox_lite_mobile/README_en.md @@ -100,19 +100,13 @@ - ```python - def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True) + def save_inference_model(dirname) ``` - Save model to specific path - **Parameters** - - dirname: output dir for saving model - - model\_filename: filename for saving model - - params\_filename: filename for saving parameters - - combined: whether save parameters into one file + - dirname: model save path ## IV.Server Deployment @@ -166,6 +160,10 @@ Remove fluid api +* 1.3.0 + + Fix a bug of save_inference_model + - ```shell - $ hub install pyramidbox_lite_mobile==1.2.1 + $ hub install pyramidbox_lite_mobile==1.3.0 ``` diff --git a/modules/image/face_detection/pyramidbox_lite_mobile/module.py b/modules/image/face_detection/pyramidbox_lite_mobile/module.py index 2f6e665e9..2a550bb5f 100644 --- a/modules/image/face_detection/pyramidbox_lite_mobile/module.py +++ b/modules/image/face_detection/pyramidbox_lite_mobile/module.py @@ -10,11 +10,10 @@ import paddle from paddle.inference import Config from paddle.inference import create_predictor -from pyramidbox_lite_mobile.data_feed import reader -from pyramidbox_lite_mobile.processor import base64_to_cv2 -from pyramidbox_lite_mobile.processor import postprocess +from .data_feed import reader +from .processor import base64_to_cv2 +from .processor import postprocess -import paddlehub as hub from paddlehub.module.module import moduleinfo from paddlehub.module.module import runnable from paddlehub.module.module import serving @@ -25,11 +24,10 @@ author="baidu-vis", author_email="", summary="PyramidBox-Lite-Mobile is a high-performance face detection model.", - version="1.2.1") -class PyramidBoxLiteMobile(hub.Module): - - def _initialize(self): - self.default_pretrained_model_path = os.path.join(self.directory, "pyramidbox_lite_mobile_face_detection") + version="1.3.0") +class PyramidBoxLiteMobile: + def __init__(self): + self.default_pretrained_model_path = os.path.join(self.directory, "pyramidbox_lite_mobile_face_detection", "model") self._set_config() self.processor = self @@ -37,7 +35,9 @@ def _set_config(self): """ predictor config setting """ - cpu_config = Config(self.default_pretrained_model_path) + model = self.default_pretrained_model_path+'.pdmodel' + params = self.default_pretrained_model_path+'.pdiparams' + cpu_config = Config(model, params) cpu_config.disable_glog_info() cpu_config.disable_gpu() self.cpu_predictor = create_predictor(cpu_config) @@ -49,7 +49,7 @@ def _set_config(self): except: use_gpu = False if use_gpu: - gpu_config = Config(self.default_pretrained_model_path) + gpu_config = Config(model, params) gpu_config.disable_glog_info() gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) self.gpu_predictor = create_predictor(gpu_config) @@ -125,26 +125,6 @@ def face_detection(self, res.append(out) return res - def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = paddle.CPUPlace() - exe = paddle.Executor(place) - - program, feeded_var_names, target_vars = paddle.static.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - var = program.global_block().vars['detection_output_0.tmp_1'] - - paddle.static.save_inference_model(dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - @serving def serving_method(self, images, **kwargs): """ diff --git a/modules/image/face_detection/pyramidbox_lite_mobile/processor.py b/modules/image/face_detection/pyramidbox_lite_mobile/processor.py index 5057ab5b1..2045f51c2 100644 --- a/modules/image/face_detection/pyramidbox_lite_mobile/processor.py +++ b/modules/image/face_detection/pyramidbox_lite_mobile/processor.py @@ -5,7 +5,6 @@ import os import time -from collections import OrderedDict import base64 import cv2 diff --git a/modules/image/face_detection/pyramidbox_lite_mobile/test.py b/modules/image/face_detection/pyramidbox_lite_mobile/test.py new file mode 100644 index 000000000..1b06f9b7f --- /dev/null +++ b/modules/image/face_detection/pyramidbox_lite_mobile/test.py @@ -0,0 +1,133 @@ +import os +import shutil +import unittest + +import cv2 +import requests +import paddlehub as hub + + +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + + +class TestHubModule(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + img_url = 'https://ai-studio-static-online.cdn.bcebos.com/7799a8ccc5f6471b9d56fb6eff94f82a08b70ca2c7594d3f99877e366c0a2619' + if not os.path.exists('tests'): + os.makedirs('tests') + response = requests.get(img_url) + assert response.status_code == 200, 'Network Error.' + with open('tests/test.jpg', 'wb') as f: + f.write(response.content) + cls.module = hub.Module(name="pyramidbox_lite_mobile") + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree('tests') + shutil.rmtree('inference') + shutil.rmtree('detection_result') + + def test_face_detection1(self): + results = self.module.face_detection( + paths=['tests/test.jpg'], + use_gpu=False, + visualization=False + ) + bbox = results[0]['data'][0] + + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertTrue(confidence > 0.5) + self.assertTrue(1000 < left < 4000) + self.assertTrue(1000 < right < 4000) + self.assertTrue(0 < top < 2000) + self.assertTrue(0 < bottom < 2000) + + def test_face_detection2(self): + results = self.module.face_detection( + images=[cv2.imread('tests/test.jpg')], + use_gpu=False, + visualization=False + ) + bbox = results[0]['data'][0] + + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertTrue(confidence > 0.5) + self.assertTrue(1000 < left < 4000) + self.assertTrue(1000 < right < 4000) + self.assertTrue(0 < top < 2000) + self.assertTrue(0 < bottom < 2000) + + def test_face_detection3(self): + results = self.module.face_detection( + images=[cv2.imread('tests/test.jpg')], + use_gpu=False, + visualization=True + ) + bbox = results[0]['data'][0] + + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertTrue(confidence > 0.5) + self.assertTrue(1000 < left < 4000) + self.assertTrue(1000 < right < 4000) + self.assertTrue(0 < top < 2000) + self.assertTrue(0 < bottom < 2000) + + def test_face_detection4(self): + results = self.module.face_detection( + images=[cv2.imread('tests/test.jpg')], + use_gpu=True, + visualization=False + ) + bbox = results[0]['data'][0] + + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertTrue(confidence > 0.5) + self.assertTrue(1000 < left < 4000) + self.assertTrue(1000 < right < 4000) + self.assertTrue(0 < top < 2000) + self.assertTrue(0 < bottom < 2000) + + def test_face_detection5(self): + self.assertRaises( + AssertionError, + self.module.face_detection, + paths=['no.jpg'] + ) + + def test_face_detection6(self): + self.assertRaises( + AttributeError, + self.module.face_detection, + images=['test.jpg'] + ) + + def test_save_inference_model(self): + self.module.save_inference_model('./inference/model') + + self.assertTrue(os.path.exists('./inference/model.pdmodel')) + self.assertTrue(os.path.exists('./inference/model.pdiparams')) + + +if __name__ == "__main__": + unittest.main() From 2588a324ab29f067c009ee499d0354b57600c169 Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Fri, 16 Sep 2022 15:21:00 +0800 Subject: [PATCH 075/117] update pyramidbox_lite_server_mask (#1981) * update pyramidbox_lite_server_mask * update * add clean func * update save inference model --- .../pyramidbox_lite_server_mask/README.md | 17 +-- .../pyramidbox_lite_server_mask/README_en.md | 16 +- .../pyramidbox_lite_server_mask/data_feed.py | 20 +-- .../pyramidbox_lite_server_mask/module.py | 48 ++---- .../pyramidbox_lite_server_mask/processor.py | 1 - .../pyramidbox_lite_server_mask/test.py | 144 ++++++++++++++++++ 6 files changed, 179 insertions(+), 67 deletions(-) create mode 100644 modules/image/face_detection/pyramidbox_lite_server_mask/test.py diff --git a/modules/image/face_detection/pyramidbox_lite_server_mask/README.md b/modules/image/face_detection/pyramidbox_lite_server_mask/README.md index 6f21a6ab7..744bef6cb 100644 --- a/modules/image/face_detection/pyramidbox_lite_server_mask/README.md +++ b/modules/image/face_detection/pyramidbox_lite_server_mask/README.md @@ -131,19 +131,13 @@ - ```python - def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True) + def save_inference_model(dirname) ``` - 将模型保存到指定路径。 - **参数** - - dirname: 存在模型的目录名称;
- - model\_filename: 模型文件名称,默认为\_\_model\_\_;
- - params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效);
- - combined: 是否将参数保存到统一的一个文件中。 + - dirname: 模型保存路径
## 四、服务部署 @@ -194,7 +188,6 @@ # 将模型保存在test_program文件夹之中 pyramidbox_lite_server_mask.save_inference_model(dirname="test_program") ``` - 通过以上命令,可以获得人脸检测和口罩佩戴判断模型,分别存储在pyramidbox\_lite和mask\_detector之中。文件夹中的\_\_model\_\_是模型结构文件,\_\_params\_\_文件是权重文件。 - ### 进行模型转换 - 从paddlehub下载的是预测模型,可以使用PaddleLite提供的模型优化工具OPT对预测模型进行转换,转换之后进而可以实现在手机等端侧硬件上的部署,具体请请参考[OPT工具](https://paddle-lite.readthedocs.io/zh/latest/user_guides/model_optimize_tool.html) @@ -212,6 +205,10 @@ 移除 fluid api +* 1.4.0 + + 修复无法导出推理模型的问题 + - ```shell - $ hub install pyramidbox_lite_server_mask==1.3.2 + $ hub install pyramidbox_lite_server_mask==1.4.0 ``` diff --git a/modules/image/face_detection/pyramidbox_lite_server_mask/README_en.md b/modules/image/face_detection/pyramidbox_lite_server_mask/README_en.md index da5ba9e38..f4d878b4d 100644 --- a/modules/image/face_detection/pyramidbox_lite_server_mask/README_en.md +++ b/modules/image/face_detection/pyramidbox_lite_server_mask/README_en.md @@ -107,19 +107,13 @@ - ```python - def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True) + def save_inference_model(dirname) ``` - Save model to specific path - **Parameters** - - dirname: output dir for saving model - - model\_filename: filename for saving model - - params\_filename: filename for saving parameters - - combined: whether save parameters into one file + - dirname: model save path ## IV.Server Deployment @@ -189,6 +183,10 @@ Remove fluid api +* 1.4.0 + + Fix a bug of save_inference_model + - ```shell - $ hub install pyramidbox_lite_server_mask==1.3.2 + $ hub install pyramidbox_lite_server_mask==1.4.0 ``` diff --git a/modules/image/face_detection/pyramidbox_lite_server_mask/data_feed.py b/modules/image/face_detection/pyramidbox_lite_server_mask/data_feed.py index 7d1316482..5068a2a14 100644 --- a/modules/image/face_detection/pyramidbox_lite_server_mask/data_feed.py +++ b/modules/image/face_detection/pyramidbox_lite_server_mask/data_feed.py @@ -180,17 +180,17 @@ def reader(face_detector, shrink, confs_threshold, images, paths, use_gpu, if _s: scale_res.append(np.array(_s)) if scale_res: - scale_res = np.row_stack(scale_res) - scale_res = bbox_vote(scale_res) - keep_index = np.where(scale_res[:, 4] >= confs_threshold)[0] - scale_res = scale_res[keep_index, :] - for data in scale_res: + scale_res = np.row_stack(scale_res) + scale_res = bbox_vote(scale_res) + keep_index = np.where(scale_res[:, 4] >= confs_threshold)[0] + scale_res = scale_res[keep_index, :] + for data in scale_res: face = { - 'left': data[0], - 'top': data[1], - 'right': data[2], - 'bottom': data[3], - 'confidence': data[4] + 'left': data[0], + 'top': data[1], + 'right': data[2], + 'bottom': data[3], + 'confidence': data[4] } detect_faces.append(face) else: diff --git a/modules/image/face_detection/pyramidbox_lite_server_mask/module.py b/modules/image/face_detection/pyramidbox_lite_server_mask/module.py index 9184fa6f6..0a7e34d37 100644 --- a/modules/image/face_detection/pyramidbox_lite_server_mask/module.py +++ b/modules/image/face_detection/pyramidbox_lite_server_mask/module.py @@ -10,9 +10,9 @@ import paddle from paddle.inference import Config from paddle.inference import create_predictor -from pyramidbox_lite_server_mask.data_feed import reader -from pyramidbox_lite_server_mask.processor import base64_to_cv2 -from pyramidbox_lite_server_mask.processor import postprocess +from .data_feed import reader +from .processor import base64_to_cv2 +from .processor import postprocess import paddlehub as hub from paddlehub.module.module import moduleinfo @@ -27,15 +27,14 @@ author_email="", summary= "PyramidBox-Lite-Server-Mask is a high-performance face detection model used to detect whether people wear masks.", - version="1.3.2") -class PyramidBoxLiteServerMask(hub.Module): - - def _initialize(self, face_detector_module=None): + version="1.4.0") +class PyramidBoxLiteServerMask: + def __init__(self, face_detector_module=None): """ Args: face_detector_module (class): module to detect face. """ - self.default_pretrained_model_path = os.path.join(self.directory, "pyramidbox_lite_server_mask_model") + self.default_pretrained_model_path = os.path.join(self.directory, "pyramidbox_lite_server_mask_model", "model") if face_detector_module is None: self.face_detector = hub.Module(name='pyramidbox_lite_server') else: @@ -47,7 +46,9 @@ def _set_config(self): """ predictor config setting """ - cpu_config = Config(self.default_pretrained_model_path) + model = self.default_pretrained_model_path+'.pdmodel' + params = self.default_pretrained_model_path+'.pdiparams' + cpu_config = Config(model, params) cpu_config.disable_glog_info() cpu_config.disable_gpu() self.cpu_predictor = create_predictor(cpu_config) @@ -59,7 +60,7 @@ def _set_config(self): except: use_gpu = False if use_gpu: - gpu_config = Config(self.default_pretrained_model_path) + gpu_config = Config(model, params) gpu_config.disable_glog_info() gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) self.gpu_predictor = create_predictor(gpu_config) @@ -179,33 +180,6 @@ def face_detection(self, res.append(out) return res - def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): - classifier_dir = os.path.join(dirname, 'mask_detector') - detector_dir = os.path.join(dirname, 'pyramidbox_lite') - self._save_classifier_model(classifier_dir, model_filename, params_filename, combined) - self._save_detector_model(detector_dir, model_filename, params_filename, combined) - - def _save_detector_model(self, dirname, model_filename=None, params_filename=None, combined=True): - self.face_detector.save_inference_model(dirname, model_filename, params_filename, combined) - - def _save_classifier_model(self, dirname, model_filename=None, params_filename=None, combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = paddle.CPUPlace() - exe = paddle.Executor(place) - - program, feeded_var_names, target_vars = paddle.static.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - paddle.static.save_inference_model(dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - @serving def serving_method(self, images, **kwargs): """ diff --git a/modules/image/face_detection/pyramidbox_lite_server_mask/processor.py b/modules/image/face_detection/pyramidbox_lite_server_mask/processor.py index 61c7be6ad..9c2268761 100644 --- a/modules/image/face_detection/pyramidbox_lite_server_mask/processor.py +++ b/modules/image/face_detection/pyramidbox_lite_server_mask/processor.py @@ -5,7 +5,6 @@ import os import time -from collections import OrderedDict import base64 import cv2 diff --git a/modules/image/face_detection/pyramidbox_lite_server_mask/test.py b/modules/image/face_detection/pyramidbox_lite_server_mask/test.py new file mode 100644 index 000000000..1ed3810e2 --- /dev/null +++ b/modules/image/face_detection/pyramidbox_lite_server_mask/test.py @@ -0,0 +1,144 @@ +import os +import shutil +import unittest + +import cv2 +import requests +import paddlehub as hub + + +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + + +class TestHubModule(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + img_url = 'https://unsplash.com/photos/iFgRcqHznqg/download?ixid=MnwxMjA3fDB8MXxzZWFyY2h8MXx8ZmFjZXxlbnwwfHx8fDE2NjE5ODAyMTc&force=true&w=640' + if not os.path.exists('tests'): + os.makedirs('tests') + response = requests.get(img_url) + assert response.status_code == 200, 'Network Error.' + with open('tests/test.jpg', 'wb') as f: + f.write(response.content) + cls.module = hub.Module(name="pyramidbox_lite_server_mask") + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree('tests') + shutil.rmtree('inference') + shutil.rmtree('detection_result') + + def test_face_detection1(self): + results = self.module.face_detection( + paths=['tests/test.jpg'], + use_gpu=False, + visualization=False + ) + bbox = results[0]['data'][0] + + label = bbox['label'] + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertEqual(label, 'NO MASK') + self.assertTrue(confidence > 0.5) + self.assertTrue(0 < left < 2000) + self.assertTrue(0 < right < 2000) + self.assertTrue(0 < top < 2000) + self.assertTrue(0 < bottom < 2000) + + def test_face_detection2(self): + results = self.module.face_detection( + images=[cv2.imread('tests/test.jpg')], + use_gpu=False, + visualization=False + ) + bbox = results[0]['data'][0] + + label = bbox['label'] + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertEqual(label, 'NO MASK') + self.assertTrue(confidence > 0.5) + self.assertTrue(0 < left < 2000) + self.assertTrue(0 < right < 2000) + self.assertTrue(0 < top < 2000) + self.assertTrue(0 < bottom < 2000) + + def test_face_detection3(self): + results = self.module.face_detection( + images=[cv2.imread('tests/test.jpg')], + use_gpu=False, + visualization=True + ) + bbox = results[0]['data'][0] + + label = bbox['label'] + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertEqual(label, 'NO MASK') + self.assertTrue(confidence > 0.5) + self.assertTrue(0 < left < 2000) + self.assertTrue(0 < right < 2000) + self.assertTrue(0 < top < 2000) + self.assertTrue(0 < bottom < 2000) + + def test_face_detection4(self): + results = self.module.face_detection( + images=[cv2.imread('tests/test.jpg')], + use_gpu=True, + visualization=False + ) + bbox = results[0]['data'][0] + + label = bbox['label'] + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertEqual(label, 'NO MASK') + self.assertTrue(confidence > 0.5) + self.assertTrue(0 < left < 2000) + self.assertTrue(0 < right < 2000) + self.assertTrue(0 < top < 2000) + self.assertTrue(0 < bottom < 2000) + + def test_face_detection5(self): + self.assertRaises( + AssertionError, + self.module.face_detection, + paths=['no.jpg'] + ) + + def test_face_detection6(self): + self.assertRaises( + AttributeError, + self.module.face_detection, + images=['test.jpg'] + ) + + def test_save_inference_model(self): + self.module.save_inference_model('./inference/model') + + self.assertTrue(os.path.exists('./inference/model/face_detector.pdmodel')) + self.assertTrue(os.path.exists('./inference/model/face_detector.pdiparams')) + + self.assertTrue(os.path.exists('./inference/model/model.pdmodel')) + self.assertTrue(os.path.exists('./inference/model/model.pdiparams')) + + +if __name__ == "__main__": + unittest.main() From 7a54b07f0be8dedc3ca5cd8eb37dbfc029bbceed Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Fri, 16 Sep 2022 15:21:18 +0800 Subject: [PATCH 076/117] update falsr_b (#1988) * update falsr_b * add clean func * update falsr_b --- .../super_resolution/falsr_b/README.md | 29 +++---- .../super_resolution/falsr_b/README_en.md | 30 +++---- .../super_resolution/falsr_b/data_feed.py | 2 +- .../super_resolution/falsr_b/module.py | 70 ++++++--------- .../super_resolution/falsr_b/test.py | 86 +++++++++++++++++++ 5 files changed, 144 insertions(+), 73 deletions(-) create mode 100644 modules/image/Image_editing/super_resolution/falsr_b/test.py diff --git a/modules/image/Image_editing/super_resolution/falsr_b/README.md b/modules/image/Image_editing/super_resolution/falsr_b/README.md index b74a5f894..4eb25789a 100644 --- a/modules/image/Image_editing/super_resolution/falsr_b/README.md +++ b/modules/image/Image_editing/super_resolution/falsr_b/README.md @@ -68,12 +68,11 @@ - ### 3、API - ```python - def reconstruct(self, - images=None, - paths=None, - use_gpu=False, - visualization=False, - output_dir="falsr_b_output") + def reconstruct(images=None, + paths=None, + use_gpu=False, + visualization=False, + output_dir="falsr_b_output") ``` - 预测API,用于图像超分辨率。 @@ -93,21 +92,14 @@ * data (numpy.ndarray): 超分辨后图像。 - ```python - def save_inference_model(self, - dirname='falsr_b_save_model', - model_filename=None, - params_filename=None, - combined=False) + def save_inference_model(dirname) ``` - 将模型保存到指定路径。 - **参数** - * dirname: 存在模型的目录名称 - * model\_filename: 模型文件名称,默认为\_\_model\_\_ - * params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) - * combined: 是否将参数保存到统一的一个文件中 + * dirname: 模型保存路径 @@ -167,4 +159,11 @@ 初始发布 +* 1.1.0 + + 移除 fluid API + + ```shell + $ hub install falsr_b == 1.1.0 + ``` diff --git a/modules/image/Image_editing/super_resolution/falsr_b/README_en.md b/modules/image/Image_editing/super_resolution/falsr_b/README_en.md index 5507b2ac6..1dbea0f77 100644 --- a/modules/image/Image_editing/super_resolution/falsr_b/README_en.md +++ b/modules/image/Image_editing/super_resolution/falsr_b/README_en.md @@ -71,12 +71,11 @@ - ### 3、API - ```python - def reconstruct(self, - images=None, - paths=None, - use_gpu=False, - visualization=False, - output_dir="falsr_b_output") + def reconstruct(images=None, + paths=None, + use_gpu=False, + visualization=False, + output_dir="falsr_b_output") ``` - Prediction API. @@ -95,21 +94,14 @@ * data (numpy.ndarray): Result of super resolution. - ```python - def save_inference_model(self, - dirname='falsr_b_save_model', - model_filename=None, - params_filename=None, - combined=False) + def save_inference_model(dirname) ``` - Save the model to the specified path. - **Parameters** - * dirname: Save path. - * model\_filename: Model file name,defalt is \_\_model\_\_ - * params\_filename: Parameter file name,defalt is \_\_params\_\_(Only takes effect when `combined` is True) - * combined: Whether to save the parameters to a unified file. + * dirname: Model save path. @@ -170,4 +162,12 @@ First release +- 1.1.0 + + Remove Fluid API + + + ```shell + $ hub install falsr_b == 1.1.0 + ``` diff --git a/modules/image/Image_editing/super_resolution/falsr_b/data_feed.py b/modules/image/Image_editing/super_resolution/falsr_b/data_feed.py index 8aa6514b0..c64ffa078 100644 --- a/modules/image/Image_editing/super_resolution/falsr_b/data_feed.py +++ b/modules/image/Image_editing/super_resolution/falsr_b/data_feed.py @@ -5,7 +5,7 @@ import cv2 import numpy as np -from PIL import Image + __all__ = ['reader'] diff --git a/modules/image/Image_editing/super_resolution/falsr_b/module.py b/modules/image/Image_editing/super_resolution/falsr_b/module.py index b5db9e5ef..73ee69a7c 100644 --- a/modules/image/Image_editing/super_resolution/falsr_b/module.py +++ b/modules/image/Image_editing/super_resolution/falsr_b/module.py @@ -18,13 +18,14 @@ import argparse import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +import paddle +import paddle.jit +import paddle.static +from paddle.inference import Config, create_predictor from paddlehub.module.module import moduleinfo, runnable, serving -from falsr_b.data_feed import reader -from falsr_b.processor import postprocess, base64_to_cv2, cv2_to_base64, check_dir +from .data_feed import reader +from .processor import postprocess, base64_to_cv2, cv2_to_base64, check_dir @moduleinfo( @@ -33,21 +34,22 @@ author="paddlepaddle", author_email="", summary="falsr_b is a super resolution model.", - version="1.0.0") -class Falsr_B(hub.Module): - def _initialize(self): - self.default_pretrained_model_path = os.path.join(self.directory, "falsr_b_model") + version="1.1.0") +class Falsr_B: + def __init__(self): + self.default_pretrained_model_path = os.path.join(self.directory, "falsr_b_model", "model") self._set_config() def _set_config(self): """ predictor config setting """ - self.model_file_path = self.default_pretrained_model_path - cpu_config = AnalysisConfig(self.model_file_path) + model = self.default_pretrained_model_path+'.pdmodel' + params = self.default_pretrained_model_path+'.pdiparams' + cpu_config = Config(model, params) cpu_config.disable_glog_info() cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) + self.cpu_predictor = create_predictor(cpu_config) try: _places = os.environ["CUDA_VISIBLE_DEVICES"] @@ -56,10 +58,10 @@ def _set_config(self): except: use_gpu = False if use_gpu: - gpu_config = AnalysisConfig(self.model_file_path) + gpu_config = Config(model, params) gpu_config.disable_glog_info() gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) + self.gpu_predictor = create_predictor(gpu_config) def reconstruct(self, images=None, paths=None, use_gpu=False, visualization=False, output_dir="falsr_b_output"): """ @@ -96,11 +98,18 @@ def reconstruct(self, images=None, paths=None, use_gpu=False, visualization=Fals for i in range(total_num): image_y = np.array([all_data[i]['img_y']]) image_scale_pbpr = np.array([all_data[i]['img_scale_pbpr']]) - image_y = PaddleTensor(image_y.copy()) - image_scale_pbpr = PaddleTensor(image_scale_pbpr.copy()) - output = self.gpu_predictor.run([image_y, image_scale_pbpr]) if use_gpu else self.cpu_predictor.run( - [image_y, image_scale_pbpr]) - output = np.expand_dims(output[0].as_ndarray(), axis=1) + + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + input_handle.copy_from_cpu(image_y.copy()) + input_handle = predictor.get_input_handle(input_names[1]) + input_handle.copy_from_cpu(image_scale_pbpr.copy()) + + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) + output = np.expand_dims(output_handle.copy_to_cpu(), axis=1) out = postprocess( data_out=output, org_im=all_data[i]['org_im'], @@ -111,29 +120,6 @@ def reconstruct(self, images=None, paths=None, use_gpu=False, visualization=Fals res.append(out) return res - def save_inference_model(self, - dirname='falsr_b_save_model', - model_filename=None, - params_filename=None, - combined=False): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - @serving def serving_method(self, images, **kwargs): """ diff --git a/modules/image/Image_editing/super_resolution/falsr_b/test.py b/modules/image/Image_editing/super_resolution/falsr_b/test.py new file mode 100644 index 000000000..f64fca235 --- /dev/null +++ b/modules/image/Image_editing/super_resolution/falsr_b/test.py @@ -0,0 +1,86 @@ +import os +import shutil +import unittest + +import cv2 +import requests +import numpy as np +import paddlehub as hub + + +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + + +class TestHubModule(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + img_url = 'https://unsplash.com/photos/1sLIu1XKQrY/download?ixid=MnwxMjA3fDB8MXxhbGx8MTJ8fHx8fHwyfHwxNjYyMzQxNDUx&force=true&w=640' + if not os.path.exists('tests'): + os.makedirs('tests') + response = requests.get(img_url) + assert response.status_code == 200, 'Network Error.' + with open('tests/test.jpg', 'wb') as f: + f.write(response.content) + cls.module = hub.Module(name="falsr_b") + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree('tests') + shutil.rmtree('inference') + shutil.rmtree('falsr_b_output') + + def test_reconstruct1(self): + results = self.module.reconstruct( + paths=['tests/test.jpg'], + use_gpu=False, + visualization=False + ) + self.assertIsInstance(results[0]['data'], np.ndarray) + + def test_reconstruct2(self): + results = self.module.reconstruct( + images=[cv2.imread('tests/test.jpg')], + use_gpu=False, + visualization=False + ) + self.assertIsInstance(results[0]['data'], np.ndarray) + + def test_reconstruct3(self): + results = self.module.reconstruct( + images=[cv2.imread('tests/test.jpg')], + use_gpu=False, + visualization=True + ) + self.assertIsInstance(results[0]['data'], np.ndarray) + + def test_reconstruct4(self): + results = self.module.reconstruct( + images=[cv2.imread('tests/test.jpg')], + use_gpu=True, + visualization=False + ) + self.assertIsInstance(results[0]['data'], np.ndarray) + + def test_reconstruct5(self): + self.assertRaises( + AssertionError, + self.module.reconstruct, + paths=['no.jpg'] + ) + + def test_reconstruct6(self): + self.assertRaises( + AttributeError, + self.module.reconstruct, + images=['test.jpg'] + ) + + def test_save_inference_model(self): + self.module.save_inference_model('./inference/model') + + self.assertTrue(os.path.exists('./inference/model.pdmodel')) + self.assertTrue(os.path.exists('./inference/model.pdiparams')) + + +if __name__ == "__main__": + unittest.main() From e6891a504ab1fba528dfda6880e361173d73f65c Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Fri, 16 Sep 2022 15:21:45 +0800 Subject: [PATCH 077/117] update falsr_c (#1989) * update falsr_c * update version * add clean func * update falsr_c --- .../super_resolution/falsr_c/README.md | 30 +++---- .../super_resolution/falsr_c/README_en.md | 29 +++---- .../super_resolution/falsr_c/data_feed.py | 2 +- .../super_resolution/falsr_c/module.py | 70 ++++++--------- .../super_resolution/falsr_c/processor.py | 1 - .../super_resolution/falsr_c/test.py | 86 +++++++++++++++++++ 6 files changed, 144 insertions(+), 74 deletions(-) create mode 100644 modules/image/Image_editing/super_resolution/falsr_c/test.py diff --git a/modules/image/Image_editing/super_resolution/falsr_c/README.md b/modules/image/Image_editing/super_resolution/falsr_c/README.md index 2e7d35bbe..405b73970 100644 --- a/modules/image/Image_editing/super_resolution/falsr_c/README.md +++ b/modules/image/Image_editing/super_resolution/falsr_c/README.md @@ -68,12 +68,11 @@ - ### 3、API - ```python - def reconstruct(self, - images=None, - paths=None, - use_gpu=False, - visualization=False, - output_dir="falsr_c_output") + def reconstruct(images=None, + paths=None, + use_gpu=False, + visualization=False, + output_dir="falsr_c_output") ``` - 预测API,用于图像超分辨率。 @@ -93,21 +92,14 @@ * data (numpy.ndarray): 超分辨后图像。 - ```python - def save_inference_model(self, - dirname='falsr_c_save_model', - model_filename=None, - params_filename=None, - combined=False) + def save_inference_model(dirname) ``` - 将模型保存到指定路径。 - **参数** - * dirname: 存在模型的目录名称 - * model\_filename: 模型文件名称,默认为\_\_model\_\_ - * params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) - * combined: 是否将参数保存到统一的一个文件中 + * dirname: 模型保存路径 @@ -166,3 +158,11 @@ 初始发布 + +* 1.1.0 + + 移除 fluid API + + ```shell + $ hub install falsr_c == 1.1.0 + ``` diff --git a/modules/image/Image_editing/super_resolution/falsr_c/README_en.md b/modules/image/Image_editing/super_resolution/falsr_c/README_en.md index 5e651a7ea..c7e1d8a20 100644 --- a/modules/image/Image_editing/super_resolution/falsr_c/README_en.md +++ b/modules/image/Image_editing/super_resolution/falsr_c/README_en.md @@ -71,12 +71,11 @@ - ### 3、API - ```python - def reconstruct(self, - images=None, - paths=None, - use_gpu=False, - visualization=False, - output_dir="falsr_c_output") + def reconstruct(images=None, + paths=None, + use_gpu=False, + visualization=False, + output_dir="falsr_c_output") ``` - Prediction API. @@ -95,21 +94,14 @@ * data (numpy.ndarray): Result of super resolution. - ```python - def save_inference_model(self, - dirname='falsr_c_save_model', - model_filename=None, - params_filename=None, - combined=False) + def save_inference_model(dirname) ``` - Save the model to the specified path. - **Parameters** - * dirname: Save path. - * model\_filename: Model file name,defalt is \_\_model\_\_ - * params\_filename: Parameter file name,defalt is \_\_params\_\_(Only takes effect when `combined` is True) - * combined: Whether to save the parameters to a unified file. + * dirname: Model save path. @@ -170,4 +162,11 @@ First release +- 1.1.0 + Remove Fluid API + + + ```shell + $ hub install falsr_c == 1.1.0 + ``` diff --git a/modules/image/Image_editing/super_resolution/falsr_c/data_feed.py b/modules/image/Image_editing/super_resolution/falsr_c/data_feed.py index 8aa6514b0..c64ffa078 100644 --- a/modules/image/Image_editing/super_resolution/falsr_c/data_feed.py +++ b/modules/image/Image_editing/super_resolution/falsr_c/data_feed.py @@ -5,7 +5,7 @@ import cv2 import numpy as np -from PIL import Image + __all__ = ['reader'] diff --git a/modules/image/Image_editing/super_resolution/falsr_c/module.py b/modules/image/Image_editing/super_resolution/falsr_c/module.py index 8a8f25997..b1d8a8a35 100644 --- a/modules/image/Image_editing/super_resolution/falsr_c/module.py +++ b/modules/image/Image_editing/super_resolution/falsr_c/module.py @@ -18,13 +18,14 @@ import argparse import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +import paddle +import paddle.jit +import paddle.static +from paddle.inference import Config, create_predictor from paddlehub.module.module import moduleinfo, runnable, serving -from falsr_c.data_feed import reader -from falsr_c.processor import postprocess, base64_to_cv2, cv2_to_base64, check_dir +from .data_feed import reader +from .processor import postprocess, base64_to_cv2, cv2_to_base64, check_dir @moduleinfo( @@ -33,21 +34,22 @@ author="paddlepaddle", author_email="", summary="falsr_c is a super resolution model.", - version="1.0.0") -class Falsr_C(hub.Module): - def _initialize(self): - self.default_pretrained_model_path = os.path.join(self.directory, "falsr_c_model") + version="1.1.0") +class Falsr_C: + def __init__(self): + self.default_pretrained_model_path = os.path.join(self.directory, "falsr_c_model", "model") self._set_config() def _set_config(self): """ predictor config setting """ - self.model_file_path = self.default_pretrained_model_path - cpu_config = AnalysisConfig(self.model_file_path) + model = self.default_pretrained_model_path+'.pdmodel' + params = self.default_pretrained_model_path+'.pdiparams' + cpu_config = Config(model, params) cpu_config.disable_glog_info() cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) + self.cpu_predictor = create_predictor(cpu_config) try: _places = os.environ["CUDA_VISIBLE_DEVICES"] @@ -56,10 +58,10 @@ def _set_config(self): except: use_gpu = False if use_gpu: - gpu_config = AnalysisConfig(self.model_file_path) + gpu_config = Config(model, params) gpu_config.disable_glog_info() gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) + self.gpu_predictor = create_predictor(gpu_config) def reconstruct(self, images=None, paths=None, use_gpu=False, visualization=False, output_dir="falsr_c_output"): """ @@ -96,11 +98,18 @@ def reconstruct(self, images=None, paths=None, use_gpu=False, visualization=Fals for i in range(total_num): image_y = np.array([all_data[i]['img_y']]) image_scale_pbpr = np.array([all_data[i]['img_scale_pbpr']]) - image_y = PaddleTensor(image_y.copy()) - image_scale_pbpr = PaddleTensor(image_scale_pbpr.copy()) - output = self.gpu_predictor.run([image_y, image_scale_pbpr]) if use_gpu else self.cpu_predictor.run( - [image_y, image_scale_pbpr]) - output = np.expand_dims(output[0].as_ndarray(), axis=1) + + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + input_handle.copy_from_cpu(image_y.copy()) + input_handle = predictor.get_input_handle(input_names[1]) + input_handle.copy_from_cpu(image_scale_pbpr.copy()) + + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) + output = np.expand_dims(output_handle.copy_to_cpu(), axis=1) out = postprocess( data_out=output, org_im=all_data[i]['org_im'], @@ -111,29 +120,6 @@ def reconstruct(self, images=None, paths=None, use_gpu=False, visualization=Fals res.append(out) return res - def save_inference_model(self, - dirname='falsr_c_save_model', - model_filename=None, - params_filename=None, - combined=False): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - @serving def serving_method(self, images, **kwargs): """ diff --git a/modules/image/Image_editing/super_resolution/falsr_c/processor.py b/modules/image/Image_editing/super_resolution/falsr_c/processor.py index fe451116a..805ada4d6 100644 --- a/modules/image/Image_editing/super_resolution/falsr_c/processor.py +++ b/modules/image/Image_editing/super_resolution/falsr_c/processor.py @@ -52,7 +52,6 @@ def postprocess(data_out, org_im, org_im_shape, org_im_path, output_dir, visuali result['data'] = sr else: result['data'] = sr - print("result['data'] shape", result['data'].shape) return result diff --git a/modules/image/Image_editing/super_resolution/falsr_c/test.py b/modules/image/Image_editing/super_resolution/falsr_c/test.py new file mode 100644 index 000000000..ec2ef6734 --- /dev/null +++ b/modules/image/Image_editing/super_resolution/falsr_c/test.py @@ -0,0 +1,86 @@ +import os +import shutil +import unittest + +import cv2 +import requests +import numpy as np +import paddlehub as hub + + +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + + +class TestHubModule(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + img_url = 'https://unsplash.com/photos/1sLIu1XKQrY/download?ixid=MnwxMjA3fDB8MXxhbGx8MTJ8fHx8fHwyfHwxNjYyMzQxNDUx&force=true&w=640' + if not os.path.exists('tests'): + os.makedirs('tests') + response = requests.get(img_url) + assert response.status_code == 200, 'Network Error.' + with open('tests/test.jpg', 'wb') as f: + f.write(response.content) + cls.module = hub.Module(name="falsr_c") + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree('tests') + shutil.rmtree('inference') + shutil.rmtree('falsr_c_output') + + def test_reconstruct1(self): + results = self.module.reconstruct( + paths=['tests/test.jpg'], + use_gpu=False, + visualization=False + ) + self.assertIsInstance(results[0]['data'], np.ndarray) + + def test_reconstruct2(self): + results = self.module.reconstruct( + images=[cv2.imread('tests/test.jpg')], + use_gpu=False, + visualization=False + ) + self.assertIsInstance(results[0]['data'], np.ndarray) + + def test_reconstruct3(self): + results = self.module.reconstruct( + images=[cv2.imread('tests/test.jpg')], + use_gpu=False, + visualization=True + ) + self.assertIsInstance(results[0]['data'], np.ndarray) + + def test_reconstruct4(self): + results = self.module.reconstruct( + images=[cv2.imread('tests/test.jpg')], + use_gpu=True, + visualization=False + ) + self.assertIsInstance(results[0]['data'], np.ndarray) + + def test_reconstruct5(self): + self.assertRaises( + AssertionError, + self.module.reconstruct, + paths=['no.jpg'] + ) + + def test_reconstruct6(self): + self.assertRaises( + AttributeError, + self.module.reconstruct, + images=['test.jpg'] + ) + + def test_save_inference_model(self): + self.module.save_inference_model('./inference/model') + + self.assertTrue(os.path.exists('./inference/model.pdmodel')) + self.assertTrue(os.path.exists('./inference/model.pdiparams')) + + +if __name__ == "__main__": + unittest.main() From 9d830b932d120ed58e54781bd77ae608856c2eb4 Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Fri, 16 Sep 2022 15:22:03 +0800 Subject: [PATCH 078/117] update dcscn (#1990) * update dcscn * add clean func * update dcscn --- .../super_resolution/dcscn/README.md | 31 +++---- .../super_resolution/dcscn/README_en.md | 32 +++---- .../super_resolution/dcscn/data_feed.py | 2 +- .../super_resolution/dcscn/module.py | 72 +++++++--------- .../super_resolution/dcscn/test.py | 86 +++++++++++++++++++ 5 files changed, 149 insertions(+), 74 deletions(-) create mode 100644 modules/image/Image_editing/super_resolution/dcscn/test.py diff --git a/modules/image/Image_editing/super_resolution/dcscn/README.md b/modules/image/Image_editing/super_resolution/dcscn/README.md index 15722b2f2..da6069abe 100644 --- a/modules/image/Image_editing/super_resolution/dcscn/README.md +++ b/modules/image/Image_editing/super_resolution/dcscn/README.md @@ -68,12 +68,11 @@ - ### 3、API - ```python - def reconstruct(self, - images=None, - paths=None, - use_gpu=False, - visualization=False, - output_dir="dcscn_output") + def reconstruct(images=None, + paths=None, + use_gpu=False, + visualization=False, + output_dir="dcscn_output") ``` - 预测API,用于图像超分辨率。 @@ -93,21 +92,14 @@ * data (numpy.ndarray): 超分辨后图像。 - ```python - def save_inference_model(self, - dirname='dcscn_save_model', - model_filename=None, - params_filename=None, - combined=False) + def save_inference_model(dirname) ``` - 将模型保存到指定路径。 - **参数** - * dirname: 存在模型的目录名称 - * model\_filename: 模型文件名称,默认为\_\_model\_\_ - * params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) - * combined: 是否将参数保存到统一的一个文件中 + * dirname: 模型保存路径 @@ -171,3 +163,12 @@ * 1.0.0 初始发布 + + +* 1.1.0 + + 移除 fluid API + + ```shell + $ hub install dcscn == 1.1.0 + ``` diff --git a/modules/image/Image_editing/super_resolution/dcscn/README_en.md b/modules/image/Image_editing/super_resolution/dcscn/README_en.md index 098d03657..427d8e6f0 100644 --- a/modules/image/Image_editing/super_resolution/dcscn/README_en.md +++ b/modules/image/Image_editing/super_resolution/dcscn/README_en.md @@ -70,12 +70,11 @@ - ### 3、API - ```python - def reconstruct(self, - images=None, - paths=None, - use_gpu=False, - visualization=False, - output_dir="dcscn_output") + def reconstruct(images=None, + paths=None, + use_gpu=False, + visualization=False, + output_dir="dcscn_output") ``` - Prediction API. @@ -94,21 +93,14 @@ * data (numpy.ndarray): Result of super resolution. - ```python - def save_inference_model(self, - dirname='dcscn_save_model', - model_filename=None, - params_filename=None, - combined=False) + def save_inference_model(dirname) ``` - Save the model to the specified path. - **Parameters** - * dirname: Save path. - * model\_filename: Model file name,defalt is \_\_model\_\_ - * params\_filename: Parameter file name,defalt is \_\_params\_\_(Only takes effect when `combined` is True) - * combined: Whether to save the parameters to a unified file. + * dirname: Model save path. @@ -170,3 +162,13 @@ - 1.0.0 First release + + +- 1.1.0 + + Remove Fluid API + + + ```shell + $ hub install dcscn == 1.1.0 + ``` diff --git a/modules/image/Image_editing/super_resolution/dcscn/data_feed.py b/modules/image/Image_editing/super_resolution/dcscn/data_feed.py index 10eeba2e6..0fad3b1ec 100644 --- a/modules/image/Image_editing/super_resolution/dcscn/data_feed.py +++ b/modules/image/Image_editing/super_resolution/dcscn/data_feed.py @@ -5,7 +5,7 @@ import cv2 import numpy as np -from PIL import Image + __all__ = ['reader'] diff --git a/modules/image/Image_editing/super_resolution/dcscn/module.py b/modules/image/Image_editing/super_resolution/dcscn/module.py index 96b2715bc..8f94e5854 100644 --- a/modules/image/Image_editing/super_resolution/dcscn/module.py +++ b/modules/image/Image_editing/super_resolution/dcscn/module.py @@ -18,13 +18,14 @@ import argparse import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +import paddle +import paddle.jit +import paddle.static +from paddle.inference import Config, create_predictor from paddlehub.module.module import moduleinfo, runnable, serving -from dcscn.data_feed import reader -from dcscn.processor import postprocess, base64_to_cv2, cv2_to_base64, check_dir +from .data_feed import reader +from .processor import postprocess, base64_to_cv2, cv2_to_base64, check_dir @moduleinfo( @@ -33,21 +34,22 @@ author="paddlepaddle", author_email="", summary="dcscn is a super resolution model.", - version="1.0.0") -class Dcscn(hub.Module): - def _initialize(self): - self.default_pretrained_model_path = os.path.join(self.directory, "dcscn_model") + version="1.1.0") +class Dcscn: + def __init__(self): + self.default_pretrained_model_path = os.path.join(self.directory, "dcscn_model", "model") self._set_config() def _set_config(self): """ predictor config setting """ - self.model_file_path = self.default_pretrained_model_path - cpu_config = AnalysisConfig(self.model_file_path) + model = self.default_pretrained_model_path+'.pdmodel' + params = self.default_pretrained_model_path+'.pdiparams' + cpu_config = Config(model, params) cpu_config.disable_glog_info() cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) + self.cpu_predictor = create_predictor(cpu_config) try: _places = os.environ["CUDA_VISIBLE_DEVICES"] @@ -56,10 +58,10 @@ def _set_config(self): except: use_gpu = False if use_gpu: - gpu_config = AnalysisConfig(self.model_file_path) + gpu_config = Config(model, params) gpu_config.disable_glog_info() gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) + self.gpu_predictor = create_predictor(gpu_config) def reconstruct(self, images=None, paths=None, use_gpu=False, visualization=False, output_dir="dcscn_output"): """ @@ -97,13 +99,20 @@ def reconstruct(self, images=None, paths=None, use_gpu=False, visualization=Fals image_x = np.array([all_data[i]['img_x']]) image_x2 = np.array([all_data[i]['img_x2']]) dropout = np.array([0]) - image_x = PaddleTensor(image_x.copy()) - image_x2 = PaddleTensor(image_x2.copy()) - drop_out = PaddleTensor(dropout.copy()) - output = self.gpu_predictor.run([image_x, image_x2]) if use_gpu else self.cpu_predictor.run( - [image_x, image_x2]) - output = np.expand_dims(output[0].as_ndarray(), axis=1) + + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + input_handle.copy_from_cpu(image_x.copy()) + input_handle = predictor.get_input_handle(input_names[1]) + input_handle.copy_from_cpu(image_x2.copy()) + + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) + + output = np.expand_dims(output_handle.copy_to_cpu(), axis=1) out = postprocess( data_out=output, @@ -115,29 +124,6 @@ def reconstruct(self, images=None, paths=None, use_gpu=False, visualization=Fals res.append(out) return res - def save_inference_model(self, - dirname='dcscn_save_model', - model_filename=None, - params_filename=None, - combined=False): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - @serving def serving_method(self, images, **kwargs): """ diff --git a/modules/image/Image_editing/super_resolution/dcscn/test.py b/modules/image/Image_editing/super_resolution/dcscn/test.py new file mode 100644 index 000000000..525240f9a --- /dev/null +++ b/modules/image/Image_editing/super_resolution/dcscn/test.py @@ -0,0 +1,86 @@ +import os +import shutil +import unittest + +import cv2 +import requests +import numpy as np +import paddlehub as hub + + +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + + +class TestHubModule(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + img_url = 'https://unsplash.com/photos/1sLIu1XKQrY/download?ixid=MnwxMjA3fDB8MXxhbGx8MTJ8fHx8fHwyfHwxNjYyMzQxNDUx&force=true&w=640' + if not os.path.exists('tests'): + os.makedirs('tests') + response = requests.get(img_url) + assert response.status_code == 200, 'Network Error.' + with open('tests/test.jpg', 'wb') as f: + f.write(response.content) + cls.module = hub.Module(name="dcscn") + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree('tests') + shutil.rmtree('inference') + shutil.rmtree('dcscn_output') + + def test_reconstruct1(self): + results = self.module.reconstruct( + paths=['tests/test.jpg'], + use_gpu=False, + visualization=False + ) + self.assertIsInstance(results[0]['data'], np.ndarray) + + def test_reconstruct2(self): + results = self.module.reconstruct( + images=[cv2.imread('tests/test.jpg')], + use_gpu=False, + visualization=False + ) + self.assertIsInstance(results[0]['data'], np.ndarray) + + def test_reconstruct3(self): + results = self.module.reconstruct( + images=[cv2.imread('tests/test.jpg')], + use_gpu=False, + visualization=True + ) + self.assertIsInstance(results[0]['data'], np.ndarray) + + def test_reconstruct4(self): + results = self.module.reconstruct( + images=[cv2.imread('tests/test.jpg')], + use_gpu=True, + visualization=False + ) + self.assertIsInstance(results[0]['data'], np.ndarray) + + def test_reconstruct5(self): + self.assertRaises( + AssertionError, + self.module.reconstruct, + paths=['no.jpg'] + ) + + def test_reconstruct6(self): + self.assertRaises( + AttributeError, + self.module.reconstruct, + images=['test.jpg'] + ) + + def test_save_inference_model(self): + self.module.save_inference_model('./inference/model') + + self.assertTrue(os.path.exists('./inference/model.pdmodel')) + self.assertTrue(os.path.exists('./inference/model.pdiparams')) + + +if __name__ == "__main__": + unittest.main() From 0ea0f8e8757c3844a98d74013ae3708836bd6355 Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Fri, 16 Sep 2022 15:22:22 +0800 Subject: [PATCH 079/117] update user_guided_colorization (#1994) * update user_guided_colorization * add clean func --- .../user_guided_colorization/README.md | 4 + .../user_guided_colorization/README_en.md | 5 ++ .../user_guided_colorization/module.py | 2 +- .../user_guided_colorization/test.py | 85 +++++++++++++++++++ 4 files changed, 95 insertions(+), 1 deletion(-) create mode 100644 modules/image/Image_editing/colorization/user_guided_colorization/test.py diff --git a/modules/image/Image_editing/colorization/user_guided_colorization/README.md b/modules/image/Image_editing/colorization/user_guided_colorization/README.md index d5d13144e..d91c4fede 100644 --- a/modules/image/Image_editing/colorization/user_guided_colorization/README.md +++ b/modules/image/Image_editing/colorization/user_guided_colorization/README.md @@ -201,4 +201,8 @@ 初始发布 + - ```shell + $ hub install user_guided_colorization==1.0.0 + ``` + diff --git a/modules/image/Image_editing/colorization/user_guided_colorization/README_en.md b/modules/image/Image_editing/colorization/user_guided_colorization/README_en.md index 8e17592c8..69a11988c 100644 --- a/modules/image/Image_editing/colorization/user_guided_colorization/README_en.md +++ b/modules/image/Image_editing/colorization/user_guided_colorization/README_en.md @@ -203,3 +203,8 @@ * 1.0.0 First release + + + - ```shell + $ hub install user_guided_colorization==1.0.0 + ``` diff --git a/modules/image/Image_editing/colorization/user_guided_colorization/module.py b/modules/image/Image_editing/colorization/user_guided_colorization/module.py index c74bdf8d7..8b447e892 100644 --- a/modules/image/Image_editing/colorization/user_guided_colorization/module.py +++ b/modules/image/Image_editing/colorization/user_guided_colorization/module.py @@ -20,7 +20,7 @@ from paddlehub.module.module import moduleinfo import paddlehub.vision.transforms as T from paddlehub.module.cv_module import ImageColorizeModule -from user_guided_colorization.data_feed import ColorizePreprocess +from .data_feed import ColorizePreprocess @moduleinfo( diff --git a/modules/image/Image_editing/colorization/user_guided_colorization/test.py b/modules/image/Image_editing/colorization/user_guided_colorization/test.py new file mode 100644 index 000000000..990f25ea9 --- /dev/null +++ b/modules/image/Image_editing/colorization/user_guided_colorization/test.py @@ -0,0 +1,85 @@ +import os +import shutil +import unittest + +import cv2 +import requests +import numpy as np +import paddlehub as hub + + +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + + +class TestHubModule(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + img_url = 'https://unsplash.com/photos/1sLIu1XKQrY/download?ixid=MnwxMjA3fDB8MXxhbGx8MTJ8fHx8fHwyfHwxNjYyMzQxNDUx&force=true&w=640' + if not os.path.exists('tests'): + os.makedirs('tests') + response = requests.get(img_url) + assert response.status_code == 200, 'Network Error.' + with open('tests/test.jpg', 'wb') as f: + f.write(response.content) + cls.module = hub.Module(name="user_guided_colorization") + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree('tests') + shutil.rmtree('colorization') + + def test_predict1(self): + results = self.module.predict( + images=['tests/test.jpg'], + visualization=False + ) + gray = results[0]['gray'] + hint = results[0]['hint'] + real = results[0]['real'] + fake_reg = results[0]['fake_reg'] + + self.assertIsInstance(gray, np.ndarray) + self.assertIsInstance(hint, np.ndarray) + self.assertIsInstance(real, np.ndarray) + self.assertIsInstance(fake_reg, np.ndarray) + + def test_predict2(self): + results = self.module.predict( + images=[cv2.imread('tests/test.jpg')], + visualization=False + ) + gray = results[0]['gray'] + hint = results[0]['hint'] + real = results[0]['real'] + fake_reg = results[0]['fake_reg'] + + self.assertIsInstance(gray, np.ndarray) + self.assertIsInstance(hint, np.ndarray) + self.assertIsInstance(real, np.ndarray) + self.assertIsInstance(fake_reg, np.ndarray) + + def test_predict3(self): + results = self.module.predict( + images=[cv2.imread('tests/test.jpg')], + visualization=True + ) + gray = results[0]['gray'] + hint = results[0]['hint'] + real = results[0]['real'] + fake_reg = results[0]['fake_reg'] + + self.assertIsInstance(gray, np.ndarray) + self.assertIsInstance(hint, np.ndarray) + self.assertIsInstance(real, np.ndarray) + self.assertIsInstance(fake_reg, np.ndarray) + + def test_predict4(self): + self.assertRaises( + IndexError, + self.module.predict, + images=['no.jpg'], + visualization=False + ) + +if __name__ == "__main__": + unittest.main() From 5c923528176f6b0cd8d5b5f76b97f48748d94bb8 Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Fri, 16 Sep 2022 15:22:51 +0800 Subject: [PATCH 080/117] update pyramidbox_lite_mobile_mask (#1997) * update pyramidbox_lite_mobile_mask * update * add clean func * update save inference model --- .../pyramidbox_lite_mobile_mask/README.md | 17 +-- .../pyramidbox_lite_mobile_mask/README_en.md | 17 +-- .../pyramidbox_lite_mobile_mask/module.py | 48 ++---- .../pyramidbox_lite_mobile_mask/processor.py | 1 - .../pyramidbox_lite_mobile_mask/test.py | 144 ++++++++++++++++++ 5 files changed, 169 insertions(+), 58 deletions(-) create mode 100644 modules/image/face_detection/pyramidbox_lite_mobile_mask/test.py diff --git a/modules/image/face_detection/pyramidbox_lite_mobile_mask/README.md b/modules/image/face_detection/pyramidbox_lite_mobile_mask/README.md index 458a60e72..1e73457e3 100644 --- a/modules/image/face_detection/pyramidbox_lite_mobile_mask/README.md +++ b/modules/image/face_detection/pyramidbox_lite_mobile_mask/README.md @@ -131,19 +131,13 @@ - ```python - def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True) + def save_inference_model(dirname) ``` - 将模型保存到指定路径。 - **参数** - - dirname: 存在模型的目录名称;
- - model\_filename: 模型文件名称,默认为\_\_model\_\_;
- - params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效);
- - combined: 是否将参数保存到统一的一个文件中。 + - dirname: 模型保存路径
## 四、服务部署 @@ -194,7 +188,6 @@ # 将模型保存在test_program文件夹之中 pyramidbox_lite_mobile_mask.save_inference_model(dirname="test_program") ``` - 通过以上命令,可以获得人脸检测和口罩佩戴判断模型,分别存储在pyramidbox\_lite和mask\_detector之中。文件夹中的\_\_model\_\_是模型结构文件,\_\_params\_\_文件是权重文件。 - ### 进行模型转换 - 从paddlehub下载的是预测模型,可以使用PaddleLite提供的模型优化工具OPT对预测模型进行转换,转换之后进而可以实现在手机等端侧硬件上的部署,具体请请参考[OPT工具](https://paddle-lite.readthedocs.io/zh/latest/user_guides/model_optimize_tool.html) @@ -212,6 +205,10 @@ 移除 fluid api +* 1.4.0 + + 修复无法导出模型的问题 + - ```shell - $ hub install pyramidbox_lite_mobile_mask==1.3.1 + $ hub install pyramidbox_lite_mobile_mask==1.4.0 ``` diff --git a/modules/image/face_detection/pyramidbox_lite_mobile_mask/README_en.md b/modules/image/face_detection/pyramidbox_lite_mobile_mask/README_en.md index f7d2ef026..abe053f18 100644 --- a/modules/image/face_detection/pyramidbox_lite_mobile_mask/README_en.md +++ b/modules/image/face_detection/pyramidbox_lite_mobile_mask/README_en.md @@ -107,20 +107,13 @@ - ```python - def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True) + def save_inference_model(dirname) ``` - Save model to specific path - **Parameters** - - dirname: output dir for saving model - - model\_filename: filename for saving model - - params\_filename: filename for saving parameters - - combined: whether save parameters into one file - + - dirname: model save path ## IV.Server Deployment @@ -188,6 +181,10 @@ Remove fluid api +* 1.4.0 + + Fix a bug of save_inference_model + - ```shell - $ hub install pyramidbox_lite_mobile_mask==1.3.1 + $ hub install pyramidbox_lite_mobile_mask==1.4.0 ``` diff --git a/modules/image/face_detection/pyramidbox_lite_mobile_mask/module.py b/modules/image/face_detection/pyramidbox_lite_mobile_mask/module.py index 99a09fdd4..f548ac00b 100644 --- a/modules/image/face_detection/pyramidbox_lite_mobile_mask/module.py +++ b/modules/image/face_detection/pyramidbox_lite_mobile_mask/module.py @@ -10,9 +10,9 @@ import paddle from paddle.inference import Config from paddle.inference import create_predictor -from pyramidbox_lite_mobile_mask.data_feed import reader -from pyramidbox_lite_mobile_mask.processor import base64_to_cv2 -from pyramidbox_lite_mobile_mask.processor import postprocess +from .data_feed import reader +from .processor import base64_to_cv2 +from .processor import postprocess import paddlehub as hub from paddlehub.module.module import moduleinfo @@ -27,15 +27,14 @@ author_email="", summary= "Pyramidbox-Lite-Mobile-Mask is a high-performance face detection model used to detect whether people wear masks.", - version="1.3.1") -class PyramidBoxLiteMobileMask(hub.Module): - - def _initialize(self, face_detector_module=None): + version="1.4.0") +class PyramidBoxLiteMobileMask: + def __init__(self, face_detector_module=None): """ Args: face_detector_module (class): module to detect face. """ - self.default_pretrained_model_path = os.path.join(self.directory, "pyramidbox_lite_mobile_mask_model") + self.default_pretrained_model_path = os.path.join(self.directory, "pyramidbox_lite_mobile_mask_model", "model") if face_detector_module is None: self.face_detector = hub.Module(name='pyramidbox_lite_mobile') else: @@ -47,7 +46,9 @@ def _set_config(self): """ predictor config setting """ - cpu_config = Config(self.default_pretrained_model_path) + model = self.default_pretrained_model_path+'.pdmodel' + params = self.default_pretrained_model_path+'.pdiparams' + cpu_config = Config(model, params) cpu_config.disable_glog_info() cpu_config.disable_gpu() self.cpu_predictor = create_predictor(cpu_config) @@ -59,7 +60,7 @@ def _set_config(self): except: use_gpu = False if use_gpu: - gpu_config = Config(self.default_pretrained_model_path) + gpu_config = Config(model, params) gpu_config.disable_glog_info() gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) self.gpu_predictor = create_predictor(gpu_config) @@ -180,33 +181,6 @@ def face_detection(self, res.append(out) return res - def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): - classifier_dir = os.path.join(dirname, 'mask_detector') - detector_dir = os.path.join(dirname, 'pyramidbox_lite') - self._save_classifier_model(classifier_dir, model_filename, params_filename, combined) - self._save_detector_model(detector_dir, model_filename, params_filename, combined) - - def _save_detector_model(self, dirname, model_filename=None, params_filename=None, combined=True): - self.face_detector.save_inference_model(dirname, model_filename, params_filename, combined) - - def _save_classifier_model(self, dirname, model_filename=None, params_filename=None, combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = paddle.CPUPlace() - exe = paddle.Executor(place) - - program, feeded_var_names, target_vars = paddle.static.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - paddle.static.save_inference_model(dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - @serving def serving_method(self, images, **kwargs): """ diff --git a/modules/image/face_detection/pyramidbox_lite_mobile_mask/processor.py b/modules/image/face_detection/pyramidbox_lite_mobile_mask/processor.py index 4a9173f88..8605749a9 100644 --- a/modules/image/face_detection/pyramidbox_lite_mobile_mask/processor.py +++ b/modules/image/face_detection/pyramidbox_lite_mobile_mask/processor.py @@ -5,7 +5,6 @@ import os import time -from collections import OrderedDict import base64 import cv2 diff --git a/modules/image/face_detection/pyramidbox_lite_mobile_mask/test.py b/modules/image/face_detection/pyramidbox_lite_mobile_mask/test.py new file mode 100644 index 000000000..776a2ccf7 --- /dev/null +++ b/modules/image/face_detection/pyramidbox_lite_mobile_mask/test.py @@ -0,0 +1,144 @@ +import os +import shutil +import unittest + +import cv2 +import requests +import paddlehub as hub + + +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + + +class TestHubModule(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + img_url = 'https://ai-studio-static-online.cdn.bcebos.com/7799a8ccc5f6471b9d56fb6eff94f82a08b70ca2c7594d3f99877e366c0a2619' + if not os.path.exists('tests'): + os.makedirs('tests') + response = requests.get(img_url) + assert response.status_code == 200, 'Network Error.' + with open('tests/test.jpg', 'wb') as f: + f.write(response.content) + cls.module = hub.Module(name="pyramidbox_lite_mobile_mask") + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree('tests') + shutil.rmtree('inference') + shutil.rmtree('detection_result') + + def test_face_detection1(self): + results = self.module.face_detection( + paths=['tests/test.jpg'], + use_gpu=False, + visualization=False + ) + bbox = results[0]['data'][0] + + label = bbox['label'] + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertEqual(label, 'NO MASK') + self.assertTrue(confidence > 0.5) + self.assertTrue(1000 < left < 4000) + self.assertTrue(1000 < right < 4000) + self.assertTrue(0 < top < 2000) + self.assertTrue(0 < bottom < 2000) + + def test_face_detection2(self): + results = self.module.face_detection( + images=[cv2.imread('tests/test.jpg')], + use_gpu=False, + visualization=False + ) + bbox = results[0]['data'][0] + + label = bbox['label'] + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertEqual(label, 'NO MASK') + self.assertTrue(confidence > 0.5) + self.assertTrue(1000 < left < 4000) + self.assertTrue(1000 < right < 4000) + self.assertTrue(0 < top < 2000) + self.assertTrue(0 < bottom < 2000) + + def test_face_detection3(self): + results = self.module.face_detection( + images=[cv2.imread('tests/test.jpg')], + use_gpu=False, + visualization=True + ) + bbox = results[0]['data'][0] + + label = bbox['label'] + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertEqual(label, 'NO MASK') + self.assertTrue(confidence > 0.5) + self.assertTrue(1000 < left < 4000) + self.assertTrue(1000 < right < 4000) + self.assertTrue(0 < top < 2000) + self.assertTrue(0 < bottom < 2000) + + def test_face_detection4(self): + results = self.module.face_detection( + images=[cv2.imread('tests/test.jpg')], + use_gpu=True, + visualization=False + ) + bbox = results[0]['data'][0] + + label = bbox['label'] + confidence = bbox['confidence'] + left = bbox['left'] + right = bbox['right'] + top = bbox['top'] + bottom = bbox['bottom'] + + self.assertEqual(label, 'NO MASK') + self.assertTrue(confidence > 0.5) + self.assertTrue(1000 < left < 4000) + self.assertTrue(1000 < right < 4000) + self.assertTrue(0 < top < 2000) + self.assertTrue(0 < bottom < 2000) + + def test_face_detection5(self): + self.assertRaises( + AssertionError, + self.module.face_detection, + paths=['no.jpg'] + ) + + def test_face_detection6(self): + self.assertRaises( + AttributeError, + self.module.face_detection, + images=['test.jpg'] + ) + + def test_save_inference_model(self): + self.module.save_inference_model('./inference/model') + + self.assertTrue(os.path.exists('./inference/model/face_detector.pdmodel')) + self.assertTrue(os.path.exists('./inference/model/face_detector.pdiparams')) + + self.assertTrue(os.path.exists('./inference/model/model.pdmodel')) + self.assertTrue(os.path.exists('./inference/model/model.pdiparams')) + + +if __name__ == "__main__": + unittest.main() From 8873a70c51af296f65506810ae3a53da0a1d89ca Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Fri, 16 Sep 2022 15:23:10 +0800 Subject: [PATCH 081/117] update humanseg_lite (#2000) * update humanseg_lite * add clean func * update save inference model --- .../humanseg_lite/README.md | 20 ++- .../humanseg_lite/README_en.md | 23 +-- .../humanseg_lite/data_feed.py | 1 - .../humanseg_lite/module.py | 115 +++++++------- .../humanseg_lite/processor.py | 1 - .../humanseg_lite/test.py | 145 ++++++++++++++++++ 6 files changed, 231 insertions(+), 74 deletions(-) create mode 100644 modules/image/semantic_segmentation/humanseg_lite/test.py diff --git a/modules/image/semantic_segmentation/humanseg_lite/README.md b/modules/image/semantic_segmentation/humanseg_lite/README.md index 67472e181..12248ce8a 100644 --- a/modules/image/semantic_segmentation/humanseg_lite/README.md +++ b/modules/image/semantic_segmentation/humanseg_lite/README.md @@ -170,19 +170,13 @@ ```python - def save_inference_model(dirname='humanseg_lite_model', - model_filename=None, - params_filename=None, - combined=True) + def save_inference_model(dirname) ``` - 将模型保存到指定路径。 - **参数** - * dirname: 存在模型的目录名称 - * model\_filename: 模型文件名称,默认为\_\_model\_\_ - * params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) - * combined: 是否将参数保存到统一的一个文件中 + * dirname: 模型保存路径 ## 四、服务部署 @@ -240,11 +234,21 @@ * 1.0.0 初始发布 + * 1.1.0 新增视频人像分割接口 新增视频流人像分割接口 + * 1.1.1 修复cudnn为8.0.4显存泄露问题 + +* 1.2.0 + + 移除 Fluid API + + ```shell + $ hub install humanseg_lite == 1.2.0 + ``` diff --git a/modules/image/semantic_segmentation/humanseg_lite/README_en.md b/modules/image/semantic_segmentation/humanseg_lite/README_en.md index e37ba0123..f2b45ae35 100644 --- a/modules/image/semantic_segmentation/humanseg_lite/README_en.md +++ b/modules/image/semantic_segmentation/humanseg_lite/README_en.md @@ -171,10 +171,7 @@ - ```python - def save_inference_model(dirname='humanseg_lite_model', - model_filename=None, - params_filename=None, - combined=True) + def save_inference_model(dirname) ``` @@ -182,10 +179,7 @@ - **Parameters** - * dirname: Save path. - * model\_filename: model file name,defalt is \_\_model\_\_ - * params\_filename: parameter file name,defalt is \_\_params\_\_(Only takes effect when `combined` is True) - * combined: Whether to save the parameters to a unified file. + * dirname: Model save path. @@ -243,13 +237,22 @@ - 1.0.0 - First release + First release - 1.1.0 Added video portrait segmentation interface Added video stream portrait segmentation interface + * 1.1.1 - Fix memory leakage problem of on cudnn 8.0.4 + Fix memory leakage problem of on cudnn 8.0.4 + +* 1.2.0 + + Remove Fluid API + + ```shell + $ hub install humanseg_lite == 1.2.0 + ``` diff --git a/modules/image/semantic_segmentation/humanseg_lite/data_feed.py b/modules/image/semantic_segmentation/humanseg_lite/data_feed.py index 7f9033975..f7fbb0e21 100644 --- a/modules/image/semantic_segmentation/humanseg_lite/data_feed.py +++ b/modules/image/semantic_segmentation/humanseg_lite/data_feed.py @@ -5,7 +5,6 @@ import cv2 import numpy as np -from PIL import Image __all__ = ['reader', 'preprocess_v'] diff --git a/modules/image/semantic_segmentation/humanseg_lite/module.py b/modules/image/semantic_segmentation/humanseg_lite/module.py index b8ba86858..600d4c289 100644 --- a/modules/image/semantic_segmentation/humanseg_lite/module.py +++ b/modules/image/semantic_segmentation/humanseg_lite/module.py @@ -19,14 +19,15 @@ import cv2 import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +import paddle +import paddle.jit +import paddle.static +from paddle.inference import Config, create_predictor from paddlehub.module.module import moduleinfo, runnable, serving -from humanseg_lite.processor import postprocess, base64_to_cv2, cv2_to_base64, check_dir -from humanseg_lite.data_feed import reader, preprocess_v -from humanseg_lite.optimal import postprocess_v, threshold_mask +from .processor import postprocess, base64_to_cv2, cv2_to_base64, check_dir +from .data_feed import reader, preprocess_v +from .optimal import postprocess_v, threshold_mask @moduleinfo( @@ -35,22 +36,22 @@ author="paddlepaddle", author_email="", summary="humanseg_lite is a semantic segmentation model.", - version="1.1.0") -class ShufflenetHumanSeg(hub.Module): - def _initialize(self): - self.default_pretrained_model_path = os.path.join(self.directory, "humanseg_lite_inference") + version="1.2.0") +class ShufflenetHumanSeg: + def __init__(self): + self.default_pretrained_model_path = os.path.join(self.directory, "humanseg_lite_inference", "model") self._set_config() def _set_config(self): """ predictor config setting """ - self.model_file_path = os.path.join(self.default_pretrained_model_path, '__model__') - self.params_file_path = os.path.join(self.default_pretrained_model_path, '__params__') - cpu_config = AnalysisConfig(self.model_file_path, self.params_file_path) + model = self.default_pretrained_model_path+'.pdmodel' + params = self.default_pretrained_model_path+'.pdiparams' + cpu_config = Config(model, params) cpu_config.disable_glog_info() cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) + self.cpu_predictor = create_predictor(cpu_config) try: _places = os.environ["CUDA_VISIBLE_DEVICES"] @@ -60,10 +61,14 @@ def _set_config(self): use_gpu = False if use_gpu: - gpu_config = AnalysisConfig(self.model_file_path, self.params_file_path) + gpu_config = Config(model, params) gpu_config.disable_glog_info() gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) + + if paddle.get_cudnn_version() == 8004: + gpu_config.delete_pass('conv_elementwise_add_act_fuse_pass') + gpu_config.delete_pass('conv_elementwise_add2_act_fuse_pass') + self.gpu_predictor = create_predictor(gpu_config) def segment(self, images=None, @@ -116,9 +121,16 @@ def segment(self, pass # feed batch image batch_image = np.array([data['image'] for data in batch_data]) - batch_image = PaddleTensor(batch_image.copy()) - output = self.gpu_predictor.run([batch_image]) if use_gpu else self.cpu_predictor.run([batch_image]) - output = output[1].as_ndarray() + + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + input_handle.copy_from_cpu(batch_image.copy()) + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[1]) + output = output_handle.copy_to_cpu() + output = np.expand_dims(output[:, 1, :, :], axis=1) # postprocess one by one for i in range(len(batch_data)): @@ -156,9 +168,16 @@ def video_stream_segment(self, frame_org, frame_id, prev_gray, prev_cfd, use_gpu height = int(frame_org.shape[1]) disflow = cv2.DISOpticalFlow_create(cv2.DISOPTICAL_FLOW_PRESET_ULTRAFAST) frame = preprocess_v(frame_org, resize_w, resize_h) - image = PaddleTensor(np.array([frame.copy()])) - output = self.gpu_predictor.run([image]) if use_gpu else self.cpu_predictor.run([image]) - score_map = output[1].as_ndarray() + + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + input_handle.copy_from_cpu(frame.copy()[None, ...]) + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[1]) + score_map = output_handle.copy_to_cpu() + frame = np.transpose(frame, axes=[1, 2, 0]) score_map = np.transpose(np.squeeze(score_map, 0), axes=[1, 2, 0]) cur_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) @@ -227,9 +246,16 @@ def video_segment(self, video_path=None, use_gpu=False, save_dir='humanseg_lite_ ret, frame_org = cap_video.read() if ret: frame = preprocess_v(frame_org, resize_w, resize_h) - image = PaddleTensor(np.array([frame.copy()])) - output = self.gpu_predictor.run([image]) if use_gpu else self.cpu_predictor.run([image]) - score_map = output[1].as_ndarray() + + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + input_handle.copy_from_cpu(frame.copy()[None, ...]) + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[1]) + score_map = output_handle.copy_to_cpu() + frame = np.transpose(frame, axes=[1, 2, 0]) score_map = np.transpose(np.squeeze(score_map, 0), axes=[1, 2, 0]) cur_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) @@ -255,9 +281,16 @@ def video_segment(self, video_path=None, use_gpu=False, save_dir='humanseg_lite_ ret, frame_org = cap_video.read() if ret: frame = preprocess_v(frame_org, resize_w, resize_h) - image = PaddleTensor(np.array([frame.copy()])) - output = self.gpu_predictor.run([image]) if use_gpu else self.cpu_predictor.run([image]) - score_map = output[1].as_ndarray() + + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + input_handle.copy_from_cpu(frame.copy()[None, ...]) + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[1]) + score_map = output_handle.copy_to_cpu() + frame = np.transpose(frame, axes=[1, 2, 0]) score_map = np.transpose(np.squeeze(score_map, 0), axes=[1, 2, 0]) cur_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) @@ -279,32 +312,6 @@ def video_segment(self, video_path=None, use_gpu=False, save_dir='humanseg_lite_ break cap_video.release() - def save_inference_model(self, - dirname='humanseg_lite_model', - model_filename=None, - params_filename=None, - combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.default_pretrained_model_path, - model_filename=model_filename, - params_filename=params_filename, - executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - @serving def serving_method(self, images, **kwargs): """ diff --git a/modules/image/semantic_segmentation/humanseg_lite/processor.py b/modules/image/semantic_segmentation/humanseg_lite/processor.py index e4911ff4d..9cd53a841 100644 --- a/modules/image/semantic_segmentation/humanseg_lite/processor.py +++ b/modules/image/semantic_segmentation/humanseg_lite/processor.py @@ -50,7 +50,6 @@ def postprocess(data_out, org_im, org_im_shape, org_im_path, output_dir, visuali result['data'] = logit else: result['data'] = logit - print("result['data'] shape", result['data'].shape) return result diff --git a/modules/image/semantic_segmentation/humanseg_lite/test.py b/modules/image/semantic_segmentation/humanseg_lite/test.py new file mode 100644 index 000000000..df4334693 --- /dev/null +++ b/modules/image/semantic_segmentation/humanseg_lite/test.py @@ -0,0 +1,145 @@ +import os +import shutil +import unittest + +import cv2 +import requests +import numpy as np +import paddlehub as hub + + +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + + +class TestHubModule(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + img_url = 'https://unsplash.com/photos/pg_WCHWSdT8/download?ixid=MnwxMjA3fDB8MXxhbGx8fHx8fHx8fHwxNjYyNDM2ODI4&force=true&w=640' + if not os.path.exists('tests'): + os.makedirs('tests') + response = requests.get(img_url) + assert response.status_code == 200, 'Network Error.' + with open('tests/test.jpg', 'wb') as f: + f.write(response.content) + fourcc = cv2.VideoWriter_fourcc('M', 'J', 'P', 'G') + img = cv2.imread('tests/test.jpg') + video = cv2.VideoWriter('tests/test.avi', fourcc, + 20.0, tuple(img.shape[:2])) + for i in range(40): + video.write(img) + video.release() + cls.module = hub.Module(name="humanseg_lite") + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree('tests') + shutil.rmtree('inference') + shutil.rmtree('humanseg_lite_output') + shutil.rmtree('humanseg_lite_video_result') + + def test_segment1(self): + results = self.module.segment( + paths=['tests/test.jpg'], + use_gpu=False, + visualization=False + ) + self.assertIsInstance(results[0]['data'], np.ndarray) + + def test_segment2(self): + results = self.module.segment( + images=[cv2.imread('tests/test.jpg')], + use_gpu=False, + visualization=False + ) + self.assertIsInstance(results[0]['data'], np.ndarray) + + def test_segment3(self): + results = self.module.segment( + images=[cv2.imread('tests/test.jpg')], + use_gpu=False, + visualization=True + ) + self.assertIsInstance(results[0]['data'], np.ndarray) + + def test_segment4(self): + results = self.module.segment( + images=[cv2.imread('tests/test.jpg')], + use_gpu=True, + visualization=False + ) + self.assertIsInstance(results[0]['data'], np.ndarray) + + def test_segment5(self): + self.assertRaises( + AssertionError, + self.module.segment, + paths=['no.jpg'] + ) + + def test_segment6(self): + self.assertRaises( + AttributeError, + self.module.segment, + images=['test.jpg'] + ) + + def test_video_stream_segment1(self): + img_matting, cur_gray, optflow_map = self.module.video_stream_segment( + frame_org=cv2.imread('tests/test.jpg'), + frame_id=1, + prev_gray=None, + prev_cfd=None, + use_gpu=False + ) + self.assertIsInstance(img_matting, np.ndarray) + self.assertIsInstance(cur_gray, np.ndarray) + self.assertIsInstance(optflow_map, np.ndarray) + img_matting, cur_gray, optflow_map = self.module.video_stream_segment( + frame_org=cv2.imread('tests/test.jpg'), + frame_id=2, + prev_gray=cur_gray, + prev_cfd=optflow_map, + use_gpu=False + ) + self.assertIsInstance(img_matting, np.ndarray) + self.assertIsInstance(cur_gray, np.ndarray) + self.assertIsInstance(optflow_map, np.ndarray) + + def test_video_stream_segment2(self): + img_matting, cur_gray, optflow_map = self.module.video_stream_segment( + frame_org=cv2.imread('tests/test.jpg'), + frame_id=1, + prev_gray=None, + prev_cfd=None, + use_gpu=True + ) + self.assertIsInstance(img_matting, np.ndarray) + self.assertIsInstance(cur_gray, np.ndarray) + self.assertIsInstance(optflow_map, np.ndarray) + img_matting, cur_gray, optflow_map = self.module.video_stream_segment( + frame_org=cv2.imread('tests/test.jpg'), + frame_id=2, + prev_gray=cur_gray, + prev_cfd=optflow_map, + use_gpu=True + ) + self.assertIsInstance(img_matting, np.ndarray) + self.assertIsInstance(cur_gray, np.ndarray) + self.assertIsInstance(optflow_map, np.ndarray) + + def test_video_segment1(self): + self.module.video_segment( + video_path="tests/test.avi", + use_gpu=False, + save_dir='humanseg_lite_video_result' + ) + + def test_save_inference_model(self): + self.module.save_inference_model('./inference/model') + + self.assertTrue(os.path.exists('./inference/model.pdmodel')) + self.assertTrue(os.path.exists('./inference/model.pdiparams')) + + +if __name__ == "__main__": + unittest.main() From cf5f31126a7b9b44b68666103a2c04cfb5b4b48a Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Fri, 16 Sep 2022 15:23:49 +0800 Subject: [PATCH 082/117] update humanseg_server (#2002) * update humanseg_server * add clean func * update save inference model --- .../humanseg_server/README.md | 20 ++- .../humanseg_server/README_en.md | 23 +-- .../humanseg_server/data_feed.py | 1 - .../humanseg_server/module.py | 109 +++++++------ .../humanseg_server/test.py | 144 ++++++++++++++++++ 5 files changed, 228 insertions(+), 69 deletions(-) create mode 100644 modules/image/semantic_segmentation/humanseg_server/test.py diff --git a/modules/image/semantic_segmentation/humanseg_server/README.md b/modules/image/semantic_segmentation/humanseg_server/README.md index 35e19365c..621d880e2 100644 --- a/modules/image/semantic_segmentation/humanseg_server/README.md +++ b/modules/image/semantic_segmentation/humanseg_server/README.md @@ -173,19 +173,13 @@ ```python - def save_inference_model(dirname='humanseg_server_model', - model_filename=None, - params_filename=None, - combined=True) + def save_inference_model(dirname) ``` - 将模型保存到指定路径。 - **参数** - * dirname: 存在模型的目录名称 - * model\_filename: 模型文件名称,默认为\_\_model\_\_ - * params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) - * combined: 是否将参数保存到统一的一个文件中 + * dirname: 模型保存路径 ## 四、服务部署 @@ -243,11 +237,21 @@ * 1.0.0 初始发布 + * 1.1.0 新增视频人像分割接口 新增视频流人像分割接口 + * 1.1.1 修复cudnn为8.0.4显存泄露问题 + +* 1.2.0 + + 移除 Fluid API + + ```shell + $ hub install humanseg_server == 1.2.0 + ``` diff --git a/modules/image/semantic_segmentation/humanseg_server/README_en.md b/modules/image/semantic_segmentation/humanseg_server/README_en.md index 052b37e2a..303d03a30 100644 --- a/modules/image/semantic_segmentation/humanseg_server/README_en.md +++ b/modules/image/semantic_segmentation/humanseg_server/README_en.md @@ -170,10 +170,7 @@ ```python - def save_inference_model(dirname='humanseg_server_model', - model_filename=None, - params_filename=None, - combined=True) + def save_inference_model(dirname) ``` @@ -181,10 +178,7 @@ - **Parameters** - * dirname: Save path. - * model\_filename: Model file name,defalt is \_\_model\_\_ - * params\_filename: Parameter file name,defalt is \_\_params\_\_(Only takes effect when `combined` is True) - * combined: Whether to save the parameters to a unified file. + * dirname: Model save path. @@ -242,7 +236,7 @@ - 1.0.0 - First release + First release - 1.1.0 @@ -252,4 +246,13 @@ * 1.1.1 - Fix memory leakage problem of on cudnn 8.0.4 + Fix memory leakage problem of on cudnn 8.0.4 + +* 1.2.0 + + Remove Fluid API + + ```shell + $ hub install humanseg_server == 1.2.0 + ``` + diff --git a/modules/image/semantic_segmentation/humanseg_server/data_feed.py b/modules/image/semantic_segmentation/humanseg_server/data_feed.py index 85639d02d..f538db0ad 100644 --- a/modules/image/semantic_segmentation/humanseg_server/data_feed.py +++ b/modules/image/semantic_segmentation/humanseg_server/data_feed.py @@ -5,7 +5,6 @@ import cv2 import numpy as np -from PIL import Image __all__ = ['reader', 'preprocess_v'] diff --git a/modules/image/semantic_segmentation/humanseg_server/module.py b/modules/image/semantic_segmentation/humanseg_server/module.py index f266f59ec..76e76a591 100644 --- a/modules/image/semantic_segmentation/humanseg_server/module.py +++ b/modules/image/semantic_segmentation/humanseg_server/module.py @@ -20,9 +20,10 @@ import cv2 import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +import paddle +import paddle.jit +import paddle.static +from paddle.inference import Config, create_predictor from paddlehub.module.module import moduleinfo, runnable, serving from humanseg_server.processor import postprocess, base64_to_cv2, cv2_to_base64, check_dir @@ -36,22 +37,22 @@ author="baidu-vis", author_email="", summary="DeepLabv3+ is a semantic segmentation model.", - version="1.1.0") -class DeeplabV3pXception65HumanSeg(hub.Module): - def _initialize(self): - self.default_pretrained_model_path = os.path.join(self.directory, "humanseg_server_inference") + version="1.2.0") +class DeeplabV3pXception65HumanSeg: + def __init__(self): + self.default_pretrained_model_path = os.path.join(self.directory, "humanseg_server_inference", "model") self._set_config() def _set_config(self): """ predictor config setting """ - self.model_file_path = os.path.join(self.default_pretrained_model_path, '__model__') - self.params_file_path = os.path.join(self.default_pretrained_model_path, '__params__') - cpu_config = AnalysisConfig(self.model_file_path, self.params_file_path) + model = self.default_pretrained_model_path+'.pdmodel' + params = self.default_pretrained_model_path+'.pdiparams' + cpu_config = Config(model, params) cpu_config.disable_glog_info() cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) + self.cpu_predictor = create_predictor(cpu_config) try: _places = os.environ["CUDA_VISIBLE_DEVICES"] int(_places[0]) @@ -59,10 +60,14 @@ def _set_config(self): except: use_gpu = False if use_gpu: - gpu_config = AnalysisConfig(self.model_file_path, self.params_file_path) + gpu_config = Config(model, params) gpu_config.disable_glog_info() gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) + + if paddle.get_cudnn_version() == 8004: + gpu_config.delete_pass('conv_elementwise_add_act_fuse_pass') + gpu_config.delete_pass('conv_elementwise_add2_act_fuse_pass') + self.gpu_predictor = create_predictor(gpu_config) def segment(self, images=None, @@ -114,9 +119,16 @@ def segment(self, pass # feed batch image batch_image = np.array([data['image'] for data in batch_data]) - batch_image = PaddleTensor(batch_image.copy()) - output = self.gpu_predictor.run([batch_image]) if use_gpu else self.cpu_predictor.run([batch_image]) - output = output[1].as_ndarray() + + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + input_handle.copy_from_cpu(batch_image.copy()) + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[1]) + output = output_handle.copy_to_cpu() + output = np.expand_dims(output[:, 1, :, :], axis=1) # postprocess one by one for i in range(len(batch_data)): @@ -154,9 +166,16 @@ def video_stream_segment(self, frame_org, frame_id, prev_gray, prev_cfd, use_gpu height = int(frame_org.shape[1]) disflow = cv2.DISOpticalFlow_create(cv2.DISOPTICAL_FLOW_PRESET_ULTRAFAST) frame = preprocess_v(frame_org, resize_w, resize_h) - image = PaddleTensor(np.array([frame.copy()])) - output = self.gpu_predictor.run([image]) if use_gpu else self.cpu_predictor.run([image]) - score_map = output[1].as_ndarray() + + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + input_handle.copy_from_cpu(frame.copy()[None, ...]) + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[1]) + score_map = output_handle.copy_to_cpu() + frame = np.transpose(frame, axes=[1, 2, 0]) score_map = np.transpose(np.squeeze(score_map, 0), axes=[1, 2, 0]) cur_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) @@ -173,7 +192,7 @@ def video_stream_segment(self, frame_org, frame_id, prev_gray, prev_cfd, use_gpu img_matting = cv2.resize(optflow_map, (height, width), cv2.INTER_LINEAR) return [img_matting, cur_gray, optflow_map] - def video_segment(self, video_path=None, use_gpu=False, save_dir='humanseg_server_video'): + def video_segment(self, video_path=None, use_gpu=False, save_dir='humanseg_server_video_result'): resize_h = 512 resize_w = 512 if not video_path: @@ -201,9 +220,16 @@ def video_segment(self, video_path=None, use_gpu=False, save_dir='humanseg_serve ret, frame_org = cap_video.read() if ret: frame = preprocess_v(frame_org, resize_w, resize_h) - image = PaddleTensor(np.array([frame.copy()])) - output = self.gpu_predictor.run([image]) if use_gpu else self.cpu_predictor.run([image]) - score_map = output[1].as_ndarray() + + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + input_handle.copy_from_cpu(frame.copy()[None, ...]) + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[1]) + score_map = output_handle.copy_to_cpu() + frame = np.transpose(frame, axes=[1, 2, 0]) score_map = np.transpose(np.squeeze(score_map, 0), axes=[1, 2, 0]) cur_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) @@ -228,9 +254,16 @@ def video_segment(self, video_path=None, use_gpu=False, save_dir='humanseg_serve ret, frame_org = cap_video.read() if ret: frame = preprocess_v(frame_org, resize_w, resize_h) - image = PaddleTensor(np.array([frame.copy()])) - output = self.gpu_predictor.run([image]) if use_gpu else self.cpu_predictor.run([image]) - score_map = output[1].as_ndarray() + + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + input_handle.copy_from_cpu(frame.copy()[None, ...]) + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[1]) + score_map = output_handle.copy_to_cpu() + frame = np.transpose(frame, axes=[1, 2, 0]) score_map = np.transpose(np.squeeze(score_map, 0), axes=[1, 2, 0]) cur_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) @@ -252,30 +285,6 @@ def video_segment(self, video_path=None, use_gpu=False, save_dir='humanseg_serve break cap_video.release() - def save_inference_model(self, - dirname='humanseg_server_model', - model_filename=None, - params_filename=None, - combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.default_pretrained_model_path, - model_filename=model_filename, - params_filename=params_filename, - executor=exe) - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - @serving def serving_method(self, images, **kwargs): """ diff --git a/modules/image/semantic_segmentation/humanseg_server/test.py b/modules/image/semantic_segmentation/humanseg_server/test.py new file mode 100644 index 000000000..c6097abae --- /dev/null +++ b/modules/image/semantic_segmentation/humanseg_server/test.py @@ -0,0 +1,144 @@ +import os +import shutil +import unittest + +import cv2 +import requests +import numpy as np +import paddlehub as hub + + +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + + +class TestHubModule(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + img_url = 'https://unsplash.com/photos/pg_WCHWSdT8/download?ixid=MnwxMjA3fDB8MXxhbGx8fHx8fHx8fHwxNjYyNDM2ODI4&force=true&w=640' + if not os.path.exists('tests'): + os.makedirs('tests') + response = requests.get(img_url) + assert response.status_code == 200, 'Network Error.' + with open('tests/test.jpg', 'wb') as f: + f.write(response.content) + fourcc = cv2.VideoWriter_fourcc('M', 'J', 'P', 'G') + img = cv2.imread('tests/test.jpg') + video = cv2.VideoWriter('tests/test.avi', fourcc, + 20.0, tuple(img.shape[:2])) + for i in range(40): + video.write(img) + video.release() + cls.module = hub.Module(name="humanseg_server") + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree('tests') + shutil.rmtree('inference') + shutil.rmtree('humanseg_server_output') + shutil.rmtree('humanseg_server_video_result') + + def test_segment1(self): + results = self.module.segment( + paths=['tests/test.jpg'], + use_gpu=False, + visualization=False + ) + self.assertIsInstance(results[0]['data'], np.ndarray) + + def test_segment2(self): + results = self.module.segment( + images=[cv2.imread('tests/test.jpg')], + use_gpu=False, + visualization=False + ) + self.assertIsInstance(results[0]['data'], np.ndarray) + + def test_segment3(self): + results = self.module.segment( + images=[cv2.imread('tests/test.jpg')], + use_gpu=False, + visualization=True + ) + self.assertIsInstance(results[0]['data'], np.ndarray) + + def test_segment4(self): + results = self.module.segment( + images=[cv2.imread('tests/test.jpg')], + use_gpu=True, + visualization=False + ) + self.assertIsInstance(results[0]['data'], np.ndarray) + + def test_segment5(self): + self.assertRaises( + AssertionError, + self.module.segment, + paths=['no.jpg'] + ) + + def test_segment6(self): + self.assertRaises( + AttributeError, + self.module.segment, + images=['test.jpg'] + ) + + def test_video_stream_segment1(self): + img_matting, cur_gray, optflow_map = self.module.video_stream_segment( + frame_org=cv2.imread('tests/test.jpg'), + frame_id=1, + prev_gray=None, + prev_cfd=None, + use_gpu=False + ) + self.assertIsInstance(img_matting, np.ndarray) + self.assertIsInstance(cur_gray, np.ndarray) + self.assertIsInstance(optflow_map, np.ndarray) + img_matting, cur_gray, optflow_map = self.module.video_stream_segment( + frame_org=cv2.imread('tests/test.jpg'), + frame_id=2, + prev_gray=cur_gray, + prev_cfd=optflow_map, + use_gpu=False + ) + self.assertIsInstance(img_matting, np.ndarray) + self.assertIsInstance(cur_gray, np.ndarray) + self.assertIsInstance(optflow_map, np.ndarray) + + def test_video_stream_segment2(self): + img_matting, cur_gray, optflow_map = self.module.video_stream_segment( + frame_org=cv2.imread('tests/test.jpg'), + frame_id=1, + prev_gray=None, + prev_cfd=None, + use_gpu=True + ) + self.assertIsInstance(img_matting, np.ndarray) + self.assertIsInstance(cur_gray, np.ndarray) + self.assertIsInstance(optflow_map, np.ndarray) + img_matting, cur_gray, optflow_map = self.module.video_stream_segment( + frame_org=cv2.imread('tests/test.jpg'), + frame_id=2, + prev_gray=cur_gray, + prev_cfd=optflow_map, + use_gpu=True + ) + self.assertIsInstance(img_matting, np.ndarray) + self.assertIsInstance(cur_gray, np.ndarray) + self.assertIsInstance(optflow_map, np.ndarray) + + def test_video_segment1(self): + self.module.video_segment( + video_path="tests/test.avi", + use_gpu=False + ) + + def test_save_inference_model(self): + self.module.save_inference_model('./inference/model') + + self.assertTrue(os.path.exists('./inference/model.pdmodel')) + self.assertTrue(os.path.exists('./inference/model.pdiparams')) + + +if __name__ == "__main__": + unittest.main() From cfd8f7f5d4b750316d3e11d129dcb7bcea87d871 Mon Sep 17 00:00:00 2001 From: DanielYang Date: Fri, 16 Sep 2022 15:52:55 +0800 Subject: [PATCH 083/117] Update README_ch.md --- README_ch.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README_ch.md b/README_ch.md index 93018de64..6a66020f4 100644 --- a/README_ch.md +++ b/README_ch.md @@ -34,7 +34,6 @@ - **【超低使用门槛】**:无需深度学习背景、无需数据与训练过程,可快速使用AI模型 - **【一键模型快速预测】**:通过一行命令行或者极简的Python API实现模型调用,可快速体验模型效果 - **【一键模型转服务化】**:一行命令,搭建深度学习模型API服务化部署能力 -- **【十行代码迁移学习】**:十行代码完成图片分类、文本分类的迁移学习任务 - **【跨平台兼容性】**:可运行于Linux、Windows、MacOS等多种操作系统 ## 近期更新 From afba7194fc4b5f31c8a54170ff89e6a18b66519a Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Fri, 16 Sep 2022 16:13:41 +0800 Subject: [PATCH 084/117] update lac (#2025) Co-authored-by: wuzewu Co-authored-by: chenjian --- modules/text/lexical_analysis/lac/README.md | 4 +- modules/text/lexical_analysis/lac/module.py | 32 +++--- modules/text/lexical_analysis/lac/network.py | 87 ---------------- .../text/lexical_analysis/lac/processor.py | 1 - modules/text/lexical_analysis/lac/test.py | 99 +++++++++++++++++++ modules/text/lexical_analysis/lac/user.dict | 4 - 6 files changed, 115 insertions(+), 112 deletions(-) delete mode 100755 modules/text/lexical_analysis/lac/network.py create mode 100644 modules/text/lexical_analysis/lac/test.py delete mode 100644 modules/text/lexical_analysis/lac/user.dict diff --git a/modules/text/lexical_analysis/lac/README.md b/modules/text/lexical_analysis/lac/README.md index d94c875e3..2633a8d93 100644 --- a/modules/text/lexical_analysis/lac/README.md +++ b/modules/text/lexical_analysis/lac/README.md @@ -283,10 +283,10 @@ 升级自定义词典功能,支持增加不属于lac默认提供的词性 -* 2.2.1 +* 2.3.0 移除 fluid api - ```shell - $ hub install lac==2.2.1 + $ hub install lac==2.3.0 ``` diff --git a/modules/text/lexical_analysis/lac/module.py b/modules/text/lexical_analysis/lac/module.py index df6993761..2b4428bc0 100644 --- a/modules/text/lexical_analysis/lac/module.py +++ b/modules/text/lexical_analysis/lac/module.py @@ -6,25 +6,20 @@ import argparse import ast import io -import json import math import os import numpy as np -import paddle import six -from lac.custom import Customization -from lac.processor import load_kv_dict -from lac.processor import parse_result -from lac.processor import word_to_ids +from .custom import Customization +from .processor import load_kv_dict +from .processor import parse_result +from .processor import word_to_ids from paddle.inference import Config from paddle.inference import create_predictor -import paddlehub as hub -from paddlehub.common.logger import logger -from paddlehub.common.paddle_helper import add_vars_prefix -from paddlehub.common.utils import sys_stdin_encoding -from paddlehub.io.parser import txt_parser +from paddlehub.utils.utils import sys_stdin_encoding +from paddlehub.utils.parser import txt_parser from paddlehub.module.module import moduleinfo from paddlehub.module.module import runnable from paddlehub.module.module import serving @@ -38,19 +33,18 @@ def __init__(self, *args): @moduleinfo( name="lac", - version="2.2.1", + version="2.3.0", summary= "Baidu's open-source lexical analysis tool for Chinese, including word segmentation, part-of-speech tagging & named entity recognition", author="baidu-nlp", author_email="paddle-dev@baidu.com", type="nlp/lexical_analysis") -class LAC(hub.Module): - - def _initialize(self, user_dict=None): +class LAC: + def __init__(self, user_dict=None): """ initialize with the necessary elements """ - self.pretrained_model_path = os.path.join(self.directory, "infer_model") + self.default_pretrained_model_path = os.path.join(self.directory, "infer_model", "model") self.word2id_dict = load_kv_dict(os.path.join(self.directory, "assets/word.dic"), reverse=True, value_func=int) self.id2word_dict = load_kv_dict(os.path.join(self.directory, "assets/word.dic")) self.label2id_dict = load_kv_dict(os.path.join(self.directory, "assets/tag.dic"), reverse=True, value_func=int) @@ -72,7 +66,9 @@ def _set_config(self): """ predictor config setting """ - cpu_config = Config(self.pretrained_model_path) + model = self.default_pretrained_model_path+'.pdmodel' + params = self.default_pretrained_model_path+'.pdiparams' + cpu_config = Config(model, params) cpu_config.disable_glog_info() cpu_config.disable_gpu() self.cpu_predictor = create_predictor(cpu_config) @@ -84,7 +80,7 @@ def _set_config(self): except: use_gpu = False if use_gpu: - gpu_config = Config(self.pretrained_model_path) + gpu_config = Config(model, params) gpu_config.disable_glog_info() gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) self.gpu_predictor = create_predictor(gpu_config) diff --git a/modules/text/lexical_analysis/lac/network.py b/modules/text/lexical_analysis/lac/network.py deleted file mode 100755 index 47a7de4ad..000000000 --- a/modules/text/lexical_analysis/lac/network.py +++ /dev/null @@ -1,87 +0,0 @@ -# -*- coding:utf-8 -*- -import paddle.fluid as fluid - - -def lex_net(word_dict_len, label_dict_len): - """ - define the lexical analysis network structure - """ - word_emb_dim = 128 - grnn_hidden_dim = 128 - emb_lr = 2 - crf_lr = 0.2 - bigru_num = 2 - init_bound = 0.1 - IS_SPARSE = True - - def _bigru_layer(input_feature): - """ - define the bidirectional gru layer - """ - pre_gru = fluid.layers.fc( - input=input_feature, - size=grnn_hidden_dim * 3, - param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Uniform(low=-init_bound, high=init_bound), - regularizer=fluid.regularizer.L2DecayRegularizer(regularization_coeff=1e-4))) - gru = fluid.layers.dynamic_gru( - input=pre_gru, - size=grnn_hidden_dim, - param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Uniform(low=-init_bound, high=init_bound), - regularizer=fluid.regularizer.L2DecayRegularizer(regularization_coeff=1e-4))) - - pre_gru_r = fluid.layers.fc( - input=input_feature, - size=grnn_hidden_dim * 3, - param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Uniform(low=-init_bound, high=init_bound), - regularizer=fluid.regularizer.L2DecayRegularizer(regularization_coeff=1e-4))) - gru_r = fluid.layers.dynamic_gru( - input=pre_gru_r, - size=grnn_hidden_dim, - is_reverse=True, - param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Uniform(low=-init_bound, high=init_bound), - regularizer=fluid.regularizer.L2DecayRegularizer(regularization_coeff=1e-4))) - - bi_merge = fluid.layers.concat(input=[gru, gru_r], axis=1) - return bi_merge - - def _net_conf(word): - """ - Configure the network - """ - word_embedding = fluid.layers.embedding( - input=word, - size=[word_dict_len, word_emb_dim], - dtype='float32', - is_sparse=IS_SPARSE, - param_attr=fluid.ParamAttr( - learning_rate=emb_lr, - name="word_emb", - initializer=fluid.initializer.Uniform(low=-init_bound, high=init_bound))) - - input_feature = word_embedding - for i in range(bigru_num): - bigru_output = _bigru_layer(input_feature) - input_feature = bigru_output - - emission = fluid.layers.fc( - size=label_dict_len, - input=bigru_output, - param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Uniform(low=-init_bound, high=init_bound), - regularizer=fluid.regularizer.L2DecayRegularizer(regularization_coeff=1e-4))) - - size = emission.shape[1] - fluid.layers.create_parameter(shape=[size + 2, size], dtype=emission.dtype, name='crfw') - crf_decode = fluid.layers.crf_decoding(input=emission, param_attr=fluid.ParamAttr(name='crfw')) - - return crf_decode, emission - - word = fluid.layers.data(name='word', shape=[1], dtype='int64', lod_level=1) - - crf_decode, emission = _net_conf(word) - - return crf_decode, word, emission diff --git a/modules/text/lexical_analysis/lac/processor.py b/modules/text/lexical_analysis/lac/processor.py index 270c95dcd..ce9bc5bdb 100644 --- a/modules/text/lexical_analysis/lac/processor.py +++ b/modules/text/lexical_analysis/lac/processor.py @@ -1,6 +1,5 @@ # -*- coding:utf-8 -*- import io -import os import numpy as np import six diff --git a/modules/text/lexical_analysis/lac/test.py b/modules/text/lexical_analysis/lac/test.py new file mode 100644 index 000000000..bb7e5830b --- /dev/null +++ b/modules/text/lexical_analysis/lac/test.py @@ -0,0 +1,99 @@ +import os +import shutil +import unittest + +import paddlehub as hub + +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + + +class TestHubModule(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + cls.text = "今天是个好日子" + cls.texts = ["今天是个好日子", "天气预报说今天要下雨", "下一班地铁马上就要到了"] + cls.module = hub.Module(name="lac") + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree('inference') + + def test_cut1(self): + results = self.module.cut( + text=self.text, + use_gpu=False, + batch_size=1, + return_tag=False + ) + self.assertEqual(results, ['今天', '是', '个', '好日子']) + + def test_cut2(self): + results = self.module.cut( + text=self.texts, + use_gpu=False, + batch_size=1, + return_tag=False + ) + self.assertEqual(results, [ + {'word': ['今天', '是', '个', '好日子']}, + {'word': ['天气预报', '说', '今天', '要', '下雨']}, + {'word': ['下', '一班', '地铁', '马上', '就要', '到', '了']} + ]) + + def test_cut3(self): + results = self.module.cut( + text=self.texts, + use_gpu=False, + batch_size=2, + return_tag=False + ) + self.assertEqual(results, [ + {'word': ['今天', '是', '个', '好日子']}, + {'word': ['天气预报', '说', '今天', '要', '下雨']}, + {'word': ['下', '一班', '地铁', '马上', '就要', '到', '了']} + ]) + + def test_cut4(self): + results = self.module.cut( + text=self.texts, + use_gpu=True, + batch_size=2, + return_tag=False + ) + self.assertEqual(results, [ + {'word': ['今天', '是', '个', '好日子']}, + {'word': ['天气预报', '说', '今天', '要', '下雨']}, + {'word': ['下', '一班', '地铁', '马上', '就要', '到', '了']} + ]) + + def test_cut5(self): + results = self.module.cut( + text=self.texts, + use_gpu=True, + batch_size=2, + return_tag=True + ) + self.assertEqual(results, [ + { + 'word': ['今天', '是', '个', '好日子'], + 'tag': ['TIME', 'v', 'q', 'n'] + }, + { + 'word': ['天气预报', '说', '今天', '要', '下雨'], + 'tag': ['n', 'v', 'TIME', 'v', 'v'] + }, + { + 'word': ['下', '一班', '地铁', '马上', '就要', '到', '了'], + 'tag': ['f', 'm', 'n', 'd', 'v', 'v', 'xc'] + } + ]) + + def test_save_inference_model(self): + self.module.save_inference_model('./inference/model') + + self.assertTrue(os.path.exists('./inference/model.pdmodel')) + self.assertTrue(os.path.exists('./inference/model.pdiparams')) + + +if __name__ == '__main__': + unittest.main() diff --git a/modules/text/lexical_analysis/lac/user.dict b/modules/text/lexical_analysis/lac/user.dict deleted file mode 100644 index 47db3a260..000000000 --- a/modules/text/lexical_analysis/lac/user.dict +++ /dev/null @@ -1,4 +0,0 @@ -春天/SEASON -花/n 开/v -秋天的风 -落 阳 \ No newline at end of file From 7fe194c227bdc06a3d995b06155e028054f0a420 Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Fri, 16 Sep 2022 20:53:05 +0800 Subject: [PATCH 085/117] Modules README CN to EN (#2027) * CN to EN * CN to EN * fix typo * EN --- docs/docs_en/get_start/linux_quickstart.md | 210 +++++++++++++++++++ docs/docs_en/get_start/mac_quickstart.md | 199 ++++++++++++++++++ docs/docs_en/get_start/windows_quickstart.md | 151 +++++++++++++ modules/README.md | 205 +++++++++--------- 4 files changed, 663 insertions(+), 102 deletions(-) create mode 100755 docs/docs_en/get_start/linux_quickstart.md create mode 100755 docs/docs_en/get_start/mac_quickstart.md create mode 100644 docs/docs_en/get_start/windows_quickstart.md diff --git a/docs/docs_en/get_start/linux_quickstart.md b/docs/docs_en/get_start/linux_quickstart.md new file mode 100755 index 000000000..31b2da5ad --- /dev/null +++ b/docs/docs_en/get_start/linux_quickstart.md @@ -0,0 +1,210 @@ +# Zero base Linux installation and image style transfer + +## Step 1: Install Anaconda + +- Note: To use paddlepaddle, you need to install the Python environment first. Here we choose the Python integrated environment Anaconda toolkit. + - Anaconda is a commonly used python package management program. + - After installing Anaconda, you can install the python environment and the toolkit environment required by numpy. + +- **Download Anaconda**: + + - Download address: https://mirrors.tuna.tsinghua.edu.cn/anaconda/archive/?C=M&O=D + - + - Select the version appropriate for your operating system + - You can enter `uname -m` at the terminal to query the instruction set used by the system + + - Download method 1: Download locally, and then transfer the installation package to the Linux server + + - Download method 2: directly use the Linux command line to download + + - ```shell + # Install wget first + sudo apt-get install wget # Ubuntu + sudo yum install wget # CentOS + ``` + + - ```shell + # Then use wget to download from Tsinghua Source + # To download Anaconda3-2021.05-Linux-x86_64.sh, the download command is as follows: + wget https://mirrors.tuna.tsinghua.edu.cn/anaconda/archive/Anaconda3-2021.05-Linux-x86_64.sh + + # If you want to download other versions, you need to change the last/last file name to the version you want to download + ``` + +- To install Anaconda: + + - At the command line, enter `sh Anaconda3-2021.05-Linux-x86_64.sh` + - If you download another version, replace the file name of the command with the file name you downloaded + - Just follow the installation prompts + - When viewing the license, you can enter q to exit + +- **Add conda to the environment variable** + + - The environment variable is added to enable the system to recognize the conda command. If you have added conda to the environment variable path during installation, you can skip this step + + - Open `~/.bashrc` in the terminal: + + - ```shell + # Enter the following command in the terminal: + vim ~/.bashrc + ``` + + - Add conda as an environment variable in `~/.bashrc`: + + - ```shell + # Press i first to enter editing mode + # On the first line, enter: + export PATH="~/anaconda3/bin:$PATH" + # If the installation location is customized during installation, change ~/anaconda3/bin to the bin folder under the customized installation directory + ``` + + - ```shell + # Modified ~/.bash_profile file should be as follows (where xxx is the user name):: + export PATH="~/opt/anaconda3/bin:$PATH" + # >>> conda initialize >>> + # !! Contents within this block are managed by 'conda init' !! + __conda_setup="$('/Users/xxx/opt/anaconda3/bin/conda' 'shell.bash' 'hook' 2> /dev/null)" + if [ $? -eq 0 ]; then + eval "$__conda_setup" + else + if [ -f "/Users/xxx/opt/anaconda3/etc/profile.d/conda.sh" ]; then + . "/Users/xxx/opt/anaconda3/etc/profile.d/conda.sh" + else + export PATH="/Users/xxx/opt/anaconda3/bin:$PATH" + fi + fi + unset __conda_setup + # <<< conda initialize <<< + ``` + + - After modification, press the `esc` key to exit editing mode, and then enter `:wq!` And enter to save the exit + + - Verify that the conda command is recognized: + + - Enter `source ~/.bash_profile` in the terminal to update environment variables + - Then enter `conda info --envs` on the terminal. If the current base environment can be displayed, conda has added an environment variable + +## Step 2: Create a conda environment + +- Create a new conda environment + + - ```shell + # On the command line, enter the following command to create a file named paddle_env environment + # This is for accelerated download, use Tsinghua Source + conda create --name paddle_env python=3.8 --channel https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/ + ``` + + - This command will create an executable environment named paddle_env with Python version 3.8. It will take a while depending on the network status + + - Then the command line will output a prompt message, enter y and press Enter to continue the installation + + - conda_create + +- Activate the newly created conda environment, and enter the following command on the command line: + + - ```shell + # Activate paddle_env environment + conda activate paddle_env + ``` + + - The above anaconda environment and python environment have been installed + +## Step 3: Install the libraries required by the program + +- Use the pip command to install the paddle in the newly activated environment: + + - ```shell + # On the command line, enter the following command: + # The CPU version is installed by default. Baidu Source is recommended when installing the paddlepaddle + pip install paddlepaddle -i https://mirror.baidu.com/pypi/simple + ``` + +- After installing the PaddlePaddle, continue to install the paddlehub in the paddle_env environment: + + - ```shell + # On the command line, enter the following command: + pip install paddlehub -i https://mirror.baidu.com/pypi/simple + ``` + + - Introduction document of paddlehub: https://github.com/PaddlePaddle/PaddleHub/blob/develop/README.md + + - When installing the paddlehub, other dependent libraries will be installed automatically, which may take a while + +## Step 4: Install the paddlehub and download the model + +- After installing the paddlehub, download the style migration model: + + - ```shell + # Enter the following command on the command line + hub install stylepro_artistic==1.0.1 + ``` + + - Description document of the model: [https://www.paddlepaddle.org.cn/hubsearch?filter=en_category&value=%7B%22scenes%22%3A%5B%22GANs%22%5D%7D](https://www.paddlepaddle.org.cn/hubsearch?filter=en_category&value={"scenes"%3A["GANs"]}) + + - hub model intro + +## Step 5: Prepare the style to migrate data and code + +### Prepare style migration data + +- Create Working Directory `style_transfer` under User Directory `~` + + - ```shell + # Enter the following command in the terminal: + cd ~ # Enter the user directory + mkdir style_transfer # Create style_transfer folder + cd style_transfer # Enter style_transfer directory + ``` + +- Place pictures to be converted and style pictures respectively: + + - Place the picture to be converted to `~/style_transfer/pic.jpg` + - pic.jpg + - Place style picture to `~/style_transfer/fangao.jpg` + - fangao.jpg + +### Code + +- Create code file: + + - ```shell + # The following commands are executed on the command line + $ pwd # Check whether the current directory is style_transfer, if not, enter: cd ~/style_transfer + $ touch style_transfer.py # Create an empty file + $ vim style_transfer.py # Open code file with vim editor + # Enter i first to enter editing mode + # Copy the code into the vim editor + # Press esc key to exit editing mode, then enter ": wq" and enter Enter to save and exit + ``` + + - ```python + # Code + import paddlehub as hub + import cv2 + + # Relative address of the picture to be converted + picture = './pic.jpg' + # Relative address of style picture + style_image = './fangao.jpg' + + # Create a style transfer network and load parameters + stylepro_artistic = hub.Module(name="stylepro_artistic") + + # Read in pictures and start style conversion + result = stylepro_artistic.style_transfer( + images=[{'content': cv2.imread(picture), + 'styles': [cv2.imread(style_image)]}], + visualization=True + ) + ``` + +- Running code: + + - On the command line, enter `python style_transfer.py` + - When the program executes, a new folder `transfer_result` will be created, and save the converted file to this directory + - The output pictures are as follows: + - output image + +## Step 6: Explore the pre training model of flying oars +- Congratulations, the installation and introduction cases of PaddleHub in the Linux environment will be completed here. Start your more in-depth learning model exploration journey quickly.[【More model exploration, jump to the official website of PaddlePaddle】](https://www.paddlepaddle.org.cn/hublist) + diff --git a/docs/docs_en/get_start/mac_quickstart.md b/docs/docs_en/get_start/mac_quickstart.md new file mode 100755 index 000000000..f6a3dd5ab --- /dev/null +++ b/docs/docs_en/get_start/mac_quickstart.md @@ -0,0 +1,199 @@ +# Zero base mac installation and image style transfer + +## Step 1: Install Anaconda + +- Note: To use paddlepaddle, you need to install the Python environment first. Here we choose the Python integrated environment Anaconda toolkit + - Anaconda is a commonly used python package management program + - After installing Anaconda, you can install the python environment and the toolkit environment required by numpy +- Anaconda Download: + - Link: https://mirrors.tuna.tsinghua.edu.cn/anaconda/archive/?C=M&O=D + - anaconda download + - Select the lowest `Anaconda3-2021.05-MacOSX-x86_64.pkg` download +- After downloading, double click the. pkg file to enter the graphical interface + - By default, the installation will take a while +- It is recommended to install a code editor such as vscode or pycharm + +## Step 2: Open the terminal and create a conda environment + +- Open terminal + + - Press the command key and the space bar at the same time, enter "terminal" in the focus search, and double-click to enter the terminal + +- **Add conda to the environment variable** + + - The environment variable is added to enable the system to recognize the conda command + + - Enter the following command to open `~/.bash_profile`: + + - ```shell + vim ~/.bash_profile + ``` + + - In `~/.bash_profile` add conda as an environment variable: + + - ```shell + # Press i first to enter editing mode + # On the first line, enter: + export PATH="~/opt/anaconda3/bin:$PATH" + # If the installation location is customized during installation, change ~/opt/anaconda3/bin to the bin folder under the customized installation directory + ``` + + - ```shell + # Modified ~/.bash_profile file should be as follows (where xxx is the user name): + export PATH="~/opt/anaconda3/bin:$PATH" + # >>> conda initialize >>> + # !! Contents within this block are managed by 'conda init' !! + __conda_setup="$('/Users/xxx/opt/anaconda3/bin/conda' 'shell.bash' 'hook' 2> /dev/null)" + if [ $? -eq 0 ]; then + eval "$__conda_setup" + else + if [ -f "/Users/xxx/opt/anaconda3/etc/profile.d/conda.sh" ]; then + . "/Users/xxx/opt/anaconda3/etc/profile.d/conda.sh" + else + export PATH="/Users/xxx/opt/anaconda3/bin:$PATH" + fi + fi + unset __conda_setup + # <<< conda initialize <<< + ``` + + - After modification, press the `esc` key to exit editing mode, and then enter `:wq!` And enter to save the exit + + - Verify that the conda command is recognized: + + - Enter `source ~/.bash_profile` to update environment variables + - Then enter `conda info --envs` on the terminal. If the current base environment can be displayed, conda has added an environment variable + +- Create a new conda environment + + - ```shell + # On the command line, enter the following command to create a file named paddle_env environment + # This is for accelerated download, use Tsinghua source + conda create --name paddle_env python=3.8 --channel https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/ + ``` + + - This command will create an executable environment named paddle_env with Python version 3.8. It will take a while depending on the network status + + - Then the command line will output a prompt message, enter y and press Enter to continue the installation + + - conda_create + +- Activate the newly created conda environment, and enter the following command on the command line: + + - ```shell + # Activate paddle_env environment + conda activate paddle_env + # View the current python location + where python + ``` + + - conda_actviate + + - The above anaconda environment and python environment have been installed + +## Step 3: Install the libraries required by the program + +- Use the pip command to install the pad in the newly activated environment: + + - ```shell + # Enter the following command on the command line + # Confirm whether the currently used pip is the pip in the paddle_env environment + where pip + # The CPU version is installed by default. Baidu Source is recommended when installing the PaddlePaddle + pip install paddlepaddle -i https://mirror.baidu.com/pypi/simple + ``` + +- After installing Paddle, continue to install the PaddleHub in the paddle_env environment: + + - ```shell + # Enter the following command on the command line + pip install paddlehub -i https://mirror.baidu.com/pypi/simple + ``` + + - Introduction document of paddlehub: https://github.com/PaddlePaddle/PaddleHub/blob/develop/README.md + + - When installing the paddlehub, other dependent libraries will be installed automatically, which may take a while + +## Step 4: Install the paddlehub and download the model + +- After installing the PaddleHub, download the style migration model: + + - ```shell + # Enter the following command on the command line + hub install stylepro_artistic==1.0.1 + ``` + + - Description document of the model: [https://www.paddlepaddle.org.cn/hubsearch?filter=en_category&value=%7B%22scenes%22%3A%5B%22GANs%22%5D%7D](https://www.paddlepaddle.org.cn/hubsearch?filter=en_category&value={"scenes"%3A["GANs"]}) + + - hub model intro + +## Step 5: Prepare the style to migrate data and code + +### Prepare style migration data + +- Create Working Directory `style_transfer` on Desktop + + - ```shell + # Enter the following command in the terminal: + cd ~/Desktop # Enter the desktop + mkdir style_transfer # Create style_transfer folder + cd style_transfer # Enter style_transfer directory + ``` + +- Place pictures to be converted and style pictures respectively: + + - Place the picture to be converted on the desktop `style_transfer/pic.jpg` + - pic.jpg + - Place Style Picture on Desktop `style_transfer/fangao.jpg` + - fangao.jpg + +### 代码 + +- In `style_transfer` create code file `style_transfer.py` + +- Copy the following code into `style_transfer.py` + + - ```python + import paddlehub as hub + import cv2 + + # Relative address of the picture to be converted + picture = './pic.jpg' + # Relative address of style picture + style_image = './fangao.jpg' + + # Create a style transfer network and load parameters + stylepro_artistic = hub.Module(name="stylepro_artistic") + + # Read in pictures and start style conversion + result = stylepro_artistic.style_transfer( + images=[{'content': cv2.imread(picture), + 'styles': [cv2.imread(style_image)]}], + visualization=True + ) + ``` + +- If there is no code editor such as vscode, you can use the command line method: + + - ```shell + pwd # Check whether the current directory is style_transfer, if not, enter: cd ~/Desktop/style_transfer + touch style_transfer.py # Create an empty file + vim style_transfer.py # Open code file with vim editor + # Enter i first to enter editing mode + # Copy the above code into the vim editor + # Press esc key to exit editing mode, then enter ": wq" and enter Enter to save and exit + ``` + +- Running code: + + - On the command line, enter `python style_transfer.py` + - When the program executes, a new folder `transfer_result` will be created, and save the converted file to this directory + - The output pictures are as follows: + - output image + +## Step 6: Explore the pre training model of flying oars +- Congratulations, the installation and introduction cases of PaddleHub in the Mac environment will be completed here. Start your more in-depth learning model exploration journey quickly.[【More model exploration, jump to the official website of PaddlePaddle】](https://www.paddlepaddle.org.cn/hublist) + + + + diff --git a/docs/docs_en/get_start/windows_quickstart.md b/docs/docs_en/get_start/windows_quickstart.md new file mode 100644 index 000000000..9f05636ec --- /dev/null +++ b/docs/docs_en/get_start/windows_quickstart.md @@ -0,0 +1,151 @@ +# Zero base Windows installation and image style transfer + +## Step 1: Install Anaconda + +- Note: To use paddlepaddle, you need to install the Python environment first. Here we choose the Python integrated environment Anaconda toolkit + - Anaconda is a commonly used python package management program + - After installing Anaconda, you can install the python environment and the toolkit environment required by numpy. +- Anaconda Download: + - Link: https://mirrors.tuna.tsinghua.edu.cn/anaconda/archive/?C=M&O=D + - Most win10 computers are 64 bit operating systems, choose x86_64 version; If the computer is a 32-bit operating system, select x86.exe + - anaconda download + - After downloading, double click the installer to enter the graphical interface + - The default installation location is Disk C, and it is recommended to change the installation location to Disk D: + - install config + - Check conda to add environment variables, and ignore the warning: + - add conda to path + +## Step 2: Open the terminal and create a conda environment + +- Open Anaconda Prompt terminal + - Windows Start Menu -> Anaconda3 -> Anaconda Prompt + - anaconda download + + +- Create a new conda environment + + - ```shell + # On the command line, enter the following command to create a file named paddle_env Env environment + # This is for accelerated download, use Tsinghua Source + conda create --name paddle_env python=3.8 --channel https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/ # a shell command + ``` + + - This command will create an executable environment named paddle_env with Python version 3.8. It will take a while depending on the network status + + - Then the command line will output a prompt message, enter y and press Enter to continue the installation + + - conda create + +- Activate the newly created conda environment, and enter the following command on the command line: + + - ```shell + # Activate paddle_env environment + conda activate paddle_env + # View the current python location + where python + ``` + + - create environment + + - The above anaconda environment and python environment have been installed + +## Step 3: The required libraries for the installer to run + +- Use the pip command to install the PaddlePaddle in the environment you just activated + + - ```shell + # Enter the following command on the command line + # Confirm whether the currently used pip is a pad_ Pip in env environment + where pip + # The CPU version is installed by default. Baidu Source is recommended when installing the paddle + pip install paddlepaddle -i https://mirror.baidu.com/pypi/simple + ``` + + - If you need to install the GPU version, please open the [paddle official website](https://www.paddlepaddle.org.cn/) select the appropriate version. + + - Paddle official website: https://www.paddlepaddle.org.cn/ + - Since CUDA and cudnn need to be configured before installing the GPU version, it is recommended to install the GPU version after a certain foundation + +- After installing the Paddle, continue to install the paddlehub in the paddle_env environment: + + - ```shell + # Enter the following command on the command line + pip install paddlehub -i https://mirror.baidu.com/pypi/simple + ``` + + - Introduction document of paddlehub: https://github.com/PaddlePaddle/PaddleHub/blob/develop/README.md + +## Step 4: Install the paddlehub and download the model + +- After installing the paddlehub, download the style migration model: + + - ```shell + # Enter the following command on the command line + hub install stylepro_artistic==1.0.1 + ``` + + - Description document of the model: [https://www.paddlepaddle.org.cn/hubsearch?filter=en_category&value=%7B%22scenes%22%3A%5B%22GANs%22%5D%7D](https://www.paddlepaddle.org.cn/hubsearch?filter=en_category&value={"scenes"%3A["GANs"]}) + + - model introduction + +## Step 5: Prepare the style to migrate data and code + +### Prepare style migration data + +- Switch Working Directory to `D:\style_transfer`, enter the following command on the command line + + - ```shell + # Enter the following command on the command line + # Switch the current working directory to the root directory of disk D + D: + # Create style_transfer directory + mkdir style_transfer + # Switch the current directory to style_transfer directory + cd style_transfer + ``` + +- Place pictures to be converted and style pictures respectively + - Place the picture to be converted to `D:\style_transfer\pic.jpg` + - pic.jpg + - Place Style Picture to `D:\style_transfer\fangao.jpg` + - fangao.jpg + +### Code + +- In `D:\style_transfer` create code file `style_transfer.py` + + - If there is no editor such as vscode, you can use Notepad to create a txt file first, and then change the file name to `style_transfer.py` + +- Copy the following code into `style_transfer.py` + + - ```python + import paddlehub as hub + import cv2 + + # The absolute address of the picture to be converted + picture = 'D:\\style_transfer\\pic.jpg' # Note that double backslashes are used in the code + + # Absolute address of the style picture + style_image = 'D:\\style_transfer\\fangao.jpg' + + # Create a style transfer network and load parameters + stylepro_artistic = hub.Module(name="stylepro_artistic") + + # Read in pictures and start style conversion + result = stylepro_artistic.style_transfer( + images=[{'content': cv2.imread(picture), + 'styles': [cv2.imread(style_image)]}], + visualization=True + ) + ``` + +- Running code: + + - On the command line, enter `python style_transfer.py` + - When the program executes, a new folder `transfer_result` will be created, and save the converted file to this directory. + - The output picture is as follows: + - transferred image + +## Step 6: Explore the pre training model of flying oars +- Congratulations, the installation and introduction cases of PaddleHub in the Windows environment will be completed here. Start your more in-depth learning model exploration journey quickly.[【More model exploration, jump to the official website of PaddlePaddle】](https://www.paddlepaddle.org.cn/hublist) + diff --git a/modules/README.md b/modules/README.md index a6b0d265a..7b3ea3404 100644 --- a/modules/README.md +++ b/modules/README.md @@ -222,43 +222,44 @@ English | [简体中文](README_ch.md) |module|Network|Dataset|Introduction|Huggingface Spaces Demo| |--|--|--|--|--| -|[chinese_ocr_db_crnn_mobile](image/text_recognition/chinese_ocr_db_crnn_mobile)|Differentiable Binarization+RCNN|icdar2015数据集|中文文字识别|[![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/PaddlePaddle/chinese_ocr_db_crnn_mobile) |[chinese_text_detection_db_mobile](image/text_recognition/chinese_text_detection_db_mobile)|Differentiable Binarization|icdar2015数据集|中文文本检测| -|[chinese_text_detection_db_server](image/text_recognition/chinese_text_detection_db_server)|Differentiable Binarization|icdar2015数据集|中文文本检测| -|[chinese_ocr_db_crnn_server](image/text_recognition/chinese_ocr_db_crnn_server)|Differentiable Binarization+RCNN|icdar2015数据集|中文文字识别|[![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/PaddlePaddle/chinese_ocr_db_crnn_server) | -|[Vehicle_License_Plate_Recognition](image/text_recognition/Vehicle_License_Plate_Recognition)|-|CCPD|车牌识别| -|[chinese_cht_ocr_db_crnn_mobile](image/text_recognition/chinese_cht_ocr_db_crnn_mobile)|Differentiable Binarization+CRNN|icdar2015数据集|繁体中文文字识别| -|[japan_ocr_db_crnn_mobile](image/text_recognition/japan_ocr_db_crnn_mobile)|Differentiable Binarization+CRNN|icdar2015数据集|日文文字识别| -|[korean_ocr_db_crnn_mobile](image/text_recognition/korean_ocr_db_crnn_mobile)|Differentiable Binarization+CRNN|icdar2015数据集|韩文文字识别| -|[german_ocr_db_crnn_mobile](image/text_recognition/german_ocr_db_crnn_mobile)|Differentiable Binarization+CRNN|icdar2015数据集|德文文字识别| -|[french_ocr_db_crnn_mobile](image/text_recognition/french_ocr_db_crnn_mobile)|Differentiable Binarization+CRNN|icdar2015数据集|法文文字识别| -|[latin_ocr_db_crnn_mobile](image/text_recognition/latin_ocr_db_crnn_mobile)|Differentiable Binarization+CRNN|icdar2015数据集|拉丁文文字识别| -|[cyrillic_ocr_db_crnn_mobile](image/text_recognition/cyrillic_ocr_db_crnn_mobile)|Differentiable Binarization+CRNN|icdar2015数据集|斯拉夫文文字识别| -|[multi_languages_ocr_db_crnn](image/text_recognition/multi_languages_ocr_db_crnn)|Differentiable Binarization+RCNN|icdar2015数据集|多语言文字识别| -|[kannada_ocr_db_crnn_mobile](image/text_recognition/kannada_ocr_db_crnn_mobile)|Differentiable Binarization+CRNN|icdar2015数据集|卡纳达文文字识别| -|[arabic_ocr_db_crnn_mobile](image/text_recognition/arabic_ocr_db_crnn_mobile)|Differentiable Binarization+CRNN|icdar2015数据集|阿拉伯文文字识别| -|[telugu_ocr_db_crnn_mobile](image/text_recognition/telugu_ocr_db_crnn_mobile)|Differentiable Binarization+CRNN|icdar2015数据集|泰卢固文文字识别| -|[devanagari_ocr_db_crnn_mobile](image/text_recognition/devanagari_ocr_db_crnn_mobile)|Differentiable Binarization+CRNN|icdar2015数据集|梵文文字识别| -|[tamil_ocr_db_crnn_mobile](image/text_recognition/tamil_ocr_db_crnn_mobile)|Differentiable Binarization+CRNN|icdar2015数据集|泰米尔文文字识别| +|[chinese_ocr_db_crnn_mobile](image/text_recognition/chinese_ocr_db_crnn_mobile)|Differentiable Binarization+RCNN|icdar2015|Chinese text recognition|[![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/PaddlePaddle/chinese_ocr_db_crnn_mobile) | +|[chinese_text_detection_db_mobile](image/text_recognition/chinese_text_detection_db_mobile)|Differentiable Binarization|icdar2015|Chinese text Detection| +|[chinese_text_detection_db_server](image/text_recognition/chinese_text_detection_db_server)|Differentiable Binarization|icdar2015|Chinese text Detection| +|[chinese_ocr_db_crnn_server](image/text_recognition/chinese_ocr_db_crnn_server)|Differentiable Binarization+RCNN|icdar2015|Chinese text recognition|[![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/PaddlePaddle/chinese_ocr_db_crnn_server) | +|[Vehicle_License_Plate_Recognition](image/text_recognition/Vehicle_License_Plate_Recognition)|-|CCPD|Vehicle license plate recognition| +|[chinese_cht_ocr_db_crnn_mobile](image/text_recognition/chinese_cht_ocr_db_crnn_mobile)|Differentiable Binarization+CRNN|icdar2015|Traditional Chinese text Detection| +|[japan_ocr_db_crnn_mobile](image/text_recognition/japan_ocr_db_crnn_mobile)|Differentiable Binarization+CRNN|icdar2015|Japanese text recognition| +|[korean_ocr_db_crnn_mobile](image/text_recognition/korean_ocr_db_crnn_mobile)|Differentiable Binarization+CRNN|icdar2015|Korean text recognition| +|[german_ocr_db_crnn_mobile](image/text_recognition/german_ocr_db_crnn_mobile)|Differentiable Binarization+CRNN|icdar2015|German text recognition| +|[french_ocr_db_crnn_mobile](image/text_recognition/french_ocr_db_crnn_mobile)|Differentiable Binarization+CRNN|icdar2015|French text recognition| +|[latin_ocr_db_crnn_mobile](image/text_recognition/latin_ocr_db_crnn_mobile)|Differentiable Binarization+CRNN|icdar2015|Latin text recognition| +|[cyrillic_ocr_db_crnn_mobile](image/text_recognition/cyrillic_ocr_db_crnn_mobile)|Differentiable Binarization+CRNN|icdar2015|Cyrillic text recognition| +|[multi_languages_ocr_db_crnn](image/text_recognition/multi_languages_ocr_db_crnn)|Differentiable Binarization+RCNN|icdar2015|Multi languages text recognition| +|[kannada_ocr_db_crnn_mobile](image/text_recognition/kannada_ocr_db_crnn_mobile)|Differentiable Binarization+CRNN|icdar2015|Kannada text recognition| +|[arabic_ocr_db_crnn_mobile](image/text_recognition/arabic_ocr_db_crnn_mobile)|Differentiable Binarization+CRNN|icdar2015|Arabic text recognition| +|[telugu_ocr_db_crnn_mobile](image/text_recognition/telugu_ocr_db_crnn_mobile)|Differentiable Binarization+CRNN|icdar2015|Telugu text recognition| +|[devanagari_ocr_db_crnn_mobile](image/text_recognition/devanagari_ocr_db_crnn_mobile)|Differentiable Binarization+CRNN|icdar2015|Devanagari text recognition| +|[tamil_ocr_db_crnn_mobile](image/text_recognition/tamil_ocr_db_crnn_mobile)|Differentiable Binarization+CRNN|icdar2015|Tamil text recognition| - ### Image Editing |module|Network|Dataset|Introduction|Huggingface Spaces Demo| |--|--|--|--|--| -|[realsr](image/Image_editing/super_resolution/realsr)|LP-KPN|RealSR dataset|图像/视频超分-4倍| -|[deoldify](image/Image_editing/colorization/deoldify)|GAN|ILSVRC 2012|黑白照片/视频着色|[![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/PaddlePaddle/deoldify) | -|[photo_restoration](image/Image_editing/colorization/photo_restoration)|基于deoldify和realsr模型|-|老照片修复| -|[user_guided_colorization](image/Image_editing/colorization/user_guided_colorization)|siggraph|ILSVRC 2012|图像着色| -|[falsr_c](image/Image_editing/super_resolution/falsr_c)|falsr_c| DIV2k|轻量化超分-2倍| -|[dcscn](image/Image_editing/super_resolution/dcscn)|dcscn| DIV2k|轻量化超分-2倍| -|[falsr_a](image/Image_editing/super_resolution/falsr_a)|falsr_a| DIV2k|轻量化超分-2倍| -|[falsr_b](image/Image_editing/super_resolution/falsr_b)|falsr_b|DIV2k|轻量化超分-2倍| +|[realsr](image/Image_editing/super_resolution/realsr)|LP-KPN|RealSR dataset|Image / Video super-resolution| +|[deoldify](image/Image_editing/colorization/deoldify)|GAN|ILSVRC 2012|Black-and-white image / video colorization|[![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/PaddlePaddle/deoldify) | +|[photo_restoration](image/Image_editing/colorization/photo_restoration)|deoldify + realsr|-|Old photo restoration| +|[user_guided_colorization](image/Image_editing/colorization/user_guided_colorization)|siggraph|ILSVRC 2012|User guided colorization| +|[falsr_c](image/Image_editing/super_resolution/falsr_c)|falsr_c| DIV2k|Lightweight super resolution - 2x| +|[dcscn](image/Image_editing/super_resolution/dcscn)|dcscn| DIV2k|Lightweight super resolution - 2x| +|[falsr_a](image/Image_editing/super_resolution/falsr_a)|falsr_a| DIV2k|Lightweight super resolution - 2x| +|[falsr_b](image/Image_editing/super_resolution/falsr_b)|falsr_b|DIV2k|Lightweight super resolution - 2x| - ### Instance Segmentation |module|Network|Dataset|Introduction| |--|--|--|--| -|[solov2](image/instance_segmentation/solov2)|-|COCO2014|实例分割| +|[solov2](image/instance_segmentation/solov2)|-|COCO2014|Instance segmentation| - ### Object Detection @@ -266,16 +267,16 @@ English | [简体中文](README_ch.md) |--|--|--|--| |[faster_rcnn_resnet50_coco2017](image/object_detection/faster_rcnn_resnet50_coco2017)|faster_rcnn|COCO2017|| |[ssd_vgg16_512_coco2017](image/object_detection/ssd_vgg16_512_coco2017)|SSD|COCO2017|| -|[faster_rcnn_resnet50_fpn_venus](image/object_detection/faster_rcnn_resnet50_fpn_venus)|faster_rcnn|百度自建数据集|大规模通用目标检测| +|[faster_rcnn_resnet50_fpn_venus](image/object_detection/faster_rcnn_resnet50_fpn_venus)|faster_rcnn|Baidu self built dataset|Large-scale general detection| |[ssd_vgg16_300_coco2017](image/object_detection/ssd_vgg16_300_coco2017)|||| |[yolov3_resnet34_coco2017](image/object_detection/yolov3_resnet34_coco2017)|YOLOv3|COCO2017|| -|[yolov3_darknet53_pedestrian](image/object_detection/yolov3_darknet53_pedestrian)|YOLOv3|百度自建大规模行人数据集|行人检测| +|[yolov3_darknet53_pedestrian](image/object_detection/yolov3_darknet53_pedestrian)|YOLOv3|Baidu Self built large-scale pedestrian dataset|Pedestrian Detection| |[yolov3_mobilenet_v1_coco2017](image/object_detection/yolov3_mobilenet_v1_coco2017)|YOLOv3|COCO2017|| |[ssd_mobilenet_v1_pascal](image/object_detection/ssd_mobilenet_v1_pascal)|SSD|PASCAL VOC|| |[faster_rcnn_resnet50_fpn_coco2017](image/object_detection/faster_rcnn_resnet50_fpn_coco2017)|faster_rcnn|COCO2017|| |[yolov3_darknet53_coco2017](image/object_detection/yolov3_darknet53_coco2017)|YOLOv3|COCO2017|| -|[yolov3_darknet53_vehicles](image/object_detection/yolov3_darknet53_vehicles)|YOLOv3|百度自建大规模车辆数据集|车辆检测| -|[yolov3_darknet53_venus](image/object_detection/yolov3_darknet53_venus)|YOLOv3|百度自建数据集|大规模通用检测| +|[yolov3_darknet53_vehicles](image/object_detection/yolov3_darknet53_vehicles)|YOLOv3|Baidu Self built large-scale vehicles dataset|vehicles Detection| +|[yolov3_darknet53_venus](image/object_detection/yolov3_darknet53_venus)|YOLOv3|Baidu self built datasetset|Large-scale general detection| |[yolov3_resnet50_vd_coco2017](image/object_detection/yolov3_resnet50_vd_coco2017)|YOLOv3|COCO2017|| - ### Depth Estimation @@ -290,22 +291,22 @@ English | [简体中文](README_ch.md) |module|Network|Dataset|Introduction| |--|--|--|--| -|[ernie_gen](text/text_generation/ernie_gen)|ERNIE-GEN|-|面向生成任务的预训练-微调框架| -|[ernie_gen_poetry](text/text_generation/ernie_gen_poetry)|ERNIE-GEN|开源诗歌数据集|诗歌生成| -|[ernie_gen_couplet](text/text_generation/ernie_gen_couplet)|ERNIE-GEN|开源对联数据集|对联生成| -|[ernie_gen_lover_words](text/text_generation/ernie_gen_lover_words)|ERNIE-GEN|网络情诗、情话数据|情话生成| -|[ernie_tiny_couplet](text/text_generation/ernie_tiny_couplet)|Eernie_tiny|开源对联数据集|对联生成| -|[ernie_gen_acrostic_poetry](text/text_generation/ernie_gen_acrostic_poetry)|ERNIE-GEN|开源诗歌数据集|藏头诗生成| -|[Rumor_prediction](text/text_generation/Rumor_prediction)|-|新浪微博中文谣言数据|谣言预测| -|[plato-mini](text/text_generation/plato-mini)|Unified Transformer|十亿级别的中文对话数据|中文对话| -|[plato2_en_large](text/text_generation/plato2_en_large)|plato2|开放域多轮数据集|超大规模生成式对话| -|[plato2_en_base](text/text_generation/plato2_en_base)|plato2|开放域多轮数据集|超大规模生成式对话| -|[CPM_LM](text/text_generation/CPM_LM)|GPT-2|自建数据集|中文文本生成| -|[unified_transformer-12L-cn](text/text_generation/unified_transformer-12L-cn)|Unified Transformer|千万级别中文会话数据|人机多轮对话| -|[unified_transformer-12L-cn-luge](text/text_generation/unified_transformer-12L-cn-luge)|Unified Transformer|千言对话数据集|人机多轮对话| -|[reading_pictures_writing_poems](text/text_generation/reading_pictures_writing_poems)|多网络级联|-|看图写诗| -|[GPT2_CPM_LM](text/text_generation/GPT2_CPM_LM)|||问答类文本生成| -|[GPT2_Base_CN](text/text_generation/GPT2_Base_CN)|||问答类文本生成| +|[ernie_gen](text/text_generation/ernie_gen)|ERNIE-GEN|-|Pre-training finetuning framework for generating tasks| +|[ernie_gen_poetry](text/text_generation/ernie_gen_poetry)|ERNIE-GEN|Open source poetry dataset|Poetry generation| +|[ernie_gen_couplet](text/text_generation/ernie_gen_couplet)|ERNIE-GEN|Open source couplet dataset|Couplet generation| +|[ernie_gen_lover_words](text/text_generation/ernie_gen_lover_words)|ERNIE-GEN|Online love poems and love talk data|Love word generation| +|[ernie_tiny_couplet](text/text_generation/ernie_tiny_couplet)|Eernie_tiny|Open source couplet dataset|Couplet generation| +|[ernie_gen_acrostic_poetry](text/text_generation/ernie_gen_acrostic_poetry)|ERNIE-GEN|Open source poetry dataset|Acrostic poetry Generation| +|[Rumor_prediction](text/text_generation/Rumor_prediction)|-|Sina Weibo Chinese rumor data|Rumor prediction| +|[plato-mini](text/text_generation/plato-mini)|Unified Transformer|Billion level Chinese conversation data|Chinese dialogue| +|[plato2_en_large](text/text_generation/plato2_en_large)|plato2|Open domain multi round dataset|Super large scale generative dialogue| +|[plato2_en_base](text/text_generation/plato2_en_base)|plato2|Open domain multi round dataset|Super large scale generative dialogue| +|[CPM_LM](text/text_generation/CPM_LM)|GPT-2|Self built dataset|Chinese text generation| +|[unified_transformer-12L-cn](text/text_generation/unified_transformer-12L-cn)|Unified Transformer|Ten million level Chinese conversation data|Man machine multi round dialogue| +|[unified_transformer-12L-cn-luge](text/text_generation/unified_transformer-12L-cn-luge)|Unified Transformer|dialogue dataset|Man machine multi round dialogue| +|[reading_pictures_writing_poems](text/text_generation/reading_pictures_writing_poems)|Multi network cascade|-|Look at pictures and write poems| +|[GPT2_CPM_LM](text/text_generation/GPT2_CPM_LM)|||Q&A text generation| +|[GPT2_Base_CN](text/text_generation/GPT2_Base_CN)|||Q&A text generation| - ### Word Embedding @@ -316,7 +317,7 @@ English | [简体中文](README_ch.md) |[w2v_weibo_target_word-bigram_dim300](text/embedding/w2v_weibo_target_word-bigram_dim300)|w2v|weibo|| |[w2v_baidu_encyclopedia_target_word-ngram_1-2_dim300](text/embedding/w2v_baidu_encyclopedia_target_word-ngram_1-2_dim300)|w2v|baidu_encyclopedia|| |[w2v_literature_target_word-word_dim300](text/embedding/w2v_literature_target_word-word_dim300)|w2v|literature|| -|[word2vec_skipgram](text/embedding/word2vec_skipgram)|skip-gram|百度自建数据集|| +|[word2vec_skipgram](text/embedding/word2vec_skipgram)|skip-gram|Baidu self built dataset|| |[w2v_sogou_target_word-char_dim300](text/embedding/w2v_sogou_target_word-char_dim300)|w2v|sogou|| |[w2v_weibo_target_bigram-char_dim300](text/embedding/w2v_weibo_target_bigram-char_dim300)|w2v|weibo|| |[w2v_zhihu_target_word-bigram_dim300](text/embedding/w2v_zhihu_target_word-bigram_dim300)|w2v|zhihu|| @@ -393,16 +394,16 @@ English | [简体中文](README_ch.md) |--|--|--|--| |[chinese_electra_small](text/language_model/chinese_electra_small)|||| |[chinese_electra_base](text/language_model/chinese_electra_base)|||| -|[roberta-wwm-ext-large](text/language_model/roberta-wwm-ext-large)|roberta-wwm-ext-large|百度自建数据集|| -|[chinese-bert-wwm-ext](text/language_model/chinese_bert_wwm_ext)|chinese-bert-wwm-ext|百度自建数据集|| -|[lda_webpage](text/language_model/lda_webpage)|LDA|百度自建网页领域数据集|| +|[roberta-wwm-ext-large](text/language_model/roberta-wwm-ext-large)|roberta-wwm-ext-large|Baidu self built dataset|| +|[chinese-bert-wwm-ext](text/language_model/chinese_bert_wwm_ext)|chinese-bert-wwm-ext|Baidu self built dataset|| +|[lda_webpage](text/language_model/lda_webpage)|LDA|Baidu Self built Web Page Domain Dataset|| |[lda_novel](text/language_model/lda_novel)|||| |[bert-base-multilingual-uncased](text/language_model/bert-base-multilingual-uncased)|||| |[rbt3](text/language_model/rbt3)|||| -|[ernie_v2_eng_base](text/language_model/ernie_v2_eng_base)|ernie_v2_eng_base|百度自建数据集|| +|[ernie_v2_eng_base](text/language_model/ernie_v2_eng_base)|ernie_v2_eng_base|Baidu self built dataset|| |[bert-base-multilingual-cased](text/language_model/bert-base-multilingual-cased)|||| |[rbtl3](text/language_model/rbtl3)|||| -|[chinese-bert-wwm](text/language_model/chinese_bert_wwm)|chinese-bert-wwm|百度自建数据集|| +|[chinese-bert-wwm](text/language_model/chinese_bert_wwm)|chinese-bert-wwm|Baidu self built dataset|| |[bert-large-uncased](text/language_model/bert-large-uncased)|||| |[slda_novel](text/language_model/slda_novel)|||| |[slda_news](text/language_model/slda_news)|||| @@ -410,16 +411,16 @@ English | [简体中文](README_ch.md) |[slda_webpage](text/language_model/slda_webpage)|||| |[bert-base-cased](text/language_model/bert-base-cased)|||| |[slda_weibo](text/language_model/slda_weibo)|||| -|[roberta-wwm-ext](text/language_model/roberta-wwm-ext)|roberta-wwm-ext|百度自建数据集|| +|[roberta-wwm-ext](text/language_model/roberta-wwm-ext)|roberta-wwm-ext|Baidu self built dataset|| |[bert-base-uncased](text/language_model/bert-base-uncased)|||| |[electra_large](text/language_model/electra_large)|||| -|[ernie](text/language_model/ernie)|ernie-1.0|百度自建数据集|| -|[simnet_bow](text/language_model/simnet_bow)|BOW|百度自建数据集|| -|[ernie_tiny](text/language_model/ernie_tiny)|ernie_tiny|百度自建数据集|| -|[bert-base-chinese](text/language_model/bert-base-chinese)|bert-base-chinese|百度自建数据集|| -|[lda_news](text/language_model/lda_news)|LDA|百度自建新闻领域数据集|| +|[ernie](text/language_model/ernie)|ernie-1.0|Baidu self built dataset|| +|[simnet_bow](text/language_model/simnet_bow)|BOW|Baidu self built dataset|| +|[ernie_tiny](text/language_model/ernie_tiny)|ernie_tiny|Baidu self built dataset|| +|[bert-base-chinese](text/language_model/bert-base-chinese)|bert-base-chinese|Baidu self built dataset|| +|[lda_news](text/language_model/lda_news)|LDA|Baidu Self built News Field Dataset|| |[electra_base](text/language_model/electra_base)|||| -|[ernie_v2_eng_large](text/language_model/ernie_v2_eng_large)|ernie_v2_eng_large|百度自建数据集|| +|[ernie_v2_eng_large](text/language_model/ernie_v2_eng_large)|ernie_v2_eng_large|Baidu self built dataset|| |[bert-large-cased](text/language_model/bert-large-cased)||||
@@ -429,51 +430,51 @@ English | [简体中文](README_ch.md) |module|Network|Dataset|Introduction|Huggingface Spaces Demo| |--|--|--|--|--| -|[ernie_skep_sentiment_analysis](text/sentiment_analysis/ernie_skep_sentiment_analysis)|SKEP|百度自建数据集|句子级情感分析| -|[emotion_detection_textcnn](text/sentiment_analysis/emotion_detection_textcnn)|TextCNN|百度自建数据集|对话情绪识别| -|[senta_bilstm](text/sentiment_analysis/senta_bilstm)|BiLSTM|百度自建数据集|中文情感倾向分析| [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/PaddlePaddle/senta_bilstm) -|[senta_bow](text/sentiment_analysis/senta_bow)|BOW|百度自建数据集|中文情感倾向分析| -|[senta_gru](text/sentiment_analysis/senta_gru)|GRU|百度自建数据集|中文情感倾向分析| -|[senta_lstm](text/sentiment_analysis/senta_lstm)|LSTM|百度自建数据集|中文情感倾向分析| -|[senta_cnn](text/sentiment_analysis/senta_cnn)|CNN|百度自建数据集|中文情感倾向分析| +|[ernie_skep_sentiment_analysis](text/sentiment_analysis/ernie_skep_sentiment_analysis)|SKEP|Baidu self built dataset|Sentence level sentiment analysis| +|[emotion_detection_textcnn](text/sentiment_analysis/emotion_detection_textcnn)|TextCNN|Baidu self built dataset|Dialogue emotion detection| +|[senta_bilstm](text/sentiment_analysis/senta_bilstm)|BiLSTM|Baidu self built dataset|Chinesesentiment analysis| [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/PaddlePaddle/senta_bilstm) +|[senta_bow](text/sentiment_analysis/senta_bow)|BOW|Baidu self built dataset|Chinese sentiment analysis| +|[senta_gru](text/sentiment_analysis/senta_gru)|GRU|Baidu self built dataset|Chinese sentiment analysis| +|[senta_lstm](text/sentiment_analysis/senta_lstm)|LSTM|Baidu self built dataset|Chinese sentiment analysis| +|[senta_cnn](text/sentiment_analysis/senta_cnn)|CNN|Baidu self built dataset|Chinese sentiment analysis| - ### Syntactic Analysis |module|Network|Dataset|Introduction| |--|--|--|--| -|[DDParser](text/syntactic_analysis/DDParser)|Deep Biaffine Attention|搜索query、网页文本、语音输入等数据|句法分析| +|[DDParser](text/syntactic_analysis/DDParser)|Deep Biaffine Attention|Search query, web text, voice input and other data|Syntactic analysis| - ### Simultaneous Translation |module|Network|Dataset|Introduction| |--|--|--|--| -|[transformer_nist_wait_1](text/simultaneous_translation/stacl/transformer_nist_wait_1)|transformer|NIST 2008-中英翻译数据集|中译英-wait-1策略| -|[transformer_nist_wait_3](text/simultaneous_translation/stacl/transformer_nist_wait_3)|transformer|NIST 2008-中英翻译数据集|中译英-wait-3策略| -|[transformer_nist_wait_5](text/simultaneous_translation/stacl/transformer_nist_wait_5)|transformer|NIST 2008-中英翻译数据集|中译英-wait-5策略| -|[transformer_nist_wait_7](text/simultaneous_translation/stacl/transformer_nist_wait_7)|transformer|NIST 2008-中英翻译数据集|中译英-wait-7策略| -|[transformer_nist_wait_all](text/simultaneous_translation/stacl/transformer_nist_wait_all)|transformer|NIST 2008-中英翻译数据集|中译英-waitk=-1策略| +|[transformer_nist_wait_1](text/simultaneous_translation/stacl/transformer_nist_wait_1)|transformer|NIST 2008|Chinese to English - wait-1| +|[transformer_nist_wait_3](text/simultaneous_translation/stacl/transformer_nist_wait_3)|transformer|NIST 2008|Chinese to English - wait-3| +|[transformer_nist_wait_5](text/simultaneous_translation/stacl/transformer_nist_wait_5)|transformer|NIST 2008|Chinese to English - wait-5| +|[transformer_nist_wait_7](text/simultaneous_translation/stacl/transformer_nist_wait_7)|transformer|NIST 2008|Chinese to English - wait-7| +|[transformer_nist_wait_all](text/simultaneous_translation/stacl/transformer_nist_wait_all)|transformer|NIST 2008|Chinese to English - waitk=-1| - ### Lexical Analysis |module|Network|Dataset|Introduction|Huggingface Spaces Demo| |--|--|--|--|--| -|[jieba_paddle](text/lexical_analysis/jieba_paddle)|BiGRU+CRF|百度自建数据集|jieba使用Paddle搭建的切词网络(双向GRU)。同时支持jieba的传统切词方法,如精确模式、全模式、搜索引擎模式等切词模式。| -|[lac](text/lexical_analysis/lac)|BiGRU+CRF|百度自建数据集|百度自研联合的词法分析模型,能整体性地完成中文分词、词性标注、专名识别任务。在百度自建数据集上评测,LAC效果:Precision=88.0%,Recall=88.7%,F1-Score=88.4%。|[![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/PaddlePaddle/lac) +|[jieba_paddle](text/lexical_analysis/jieba_paddle)|BiGRU+CRF|Baidu self built dataset|Jieba uses Paddle to build a word segmentation network (two-way GRU). At the same time, it supports traditional word segmentation methods of jieba, such as precise mode, full mode, search engine mode, etc.| +|[lac](text/lexical_analysis/lac)|BiGRU+CRF|Baidu self built dataset|The lexical analysis model jointly developed by Baidu can complete the tasks of Chinese word segmentation, part of speech tagging and proper name recognition as a whole. Evaluated on Baidu self built dataset, LAC effect: Precision=88.0%, Recall=88.7%, F1 Score=88.4%.|[![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/PaddlePaddle/lac) - ### Punctuation Restoration |module|Network|Dataset|Introduction| |--|--|--|--| -|[auto_punc](text/punctuation_restoration/auto_punc)|Ernie-1.0|WuDaoCorpora 2.0|自动添加7种标点符号| +|[auto_punc](text/punctuation_restoration/auto_punc)|Ernie-1.0|WuDaoCorpora 2.0|Automatically add 7 punctuation marks| - ### Text Review |module|Network|Dataset|Introduction| |--|--|--|--| -|[porn_detection_cnn](text/text_review/porn_detection_cnn)|CNN|百度自建数据集|色情检测,自动判别文本是否涉黄并给出相应的置信度,对文本中的色情描述、低俗交友、污秽文案进行识别| -|[porn_detection_gru](text/text_review/porn_detection_gru)|GRU|百度自建数据集|色情检测,自动判别文本是否涉黄并给出相应的置信度,对文本中的色情描述、低俗交友、污秽文案进行识别| -|[porn_detection_lstm](text/text_review/porn_detection_lstm)|LSTM|百度自建数据集|色情检测,自动判别文本是否涉黄并给出相应的置信度,对文本中的色情描述、低俗交友、污秽文案进行识别| +|[porn_detection_cnn](text/text_review/porn_detection_cnn)|CNN|Baidu self built dataset|Pornography detection, automatically identify whether the text is pornographic and give the corresponding confidence, and identify pornographic descriptions, vulgar friends, and dirty documents in the text| +|[porn_detection_gru](text/text_review/porn_detection_gru)|GRU|Baidu self built dataset|Pornography detection, automatically identify whether the text is pornographic and give the corresponding confidence, and identify pornographic descriptions, vulgar friends, and dirty documents in the text| +|[porn_detection_lstm](text/text_review/porn_detection_lstm)|LSTM|Baidu self built dataset|Pornography detection, automatically identify whether the text is pornographic and give the corresponding confidence, and identify pornographic descriptions, vulgar friends, and dirty documents in the text| ## Audio @@ -481,62 +482,62 @@ English | [简体中文](README_ch.md) |module|Network|Dataset|Introduction| |--|--|--|--| -|[ge2e_fastspeech2_pwgan](audio/voice_cloning/ge2e_fastspeech2_pwgan)|FastSpeech2|AISHELL-3|中文语音克隆| -|[lstm_tacotron2](audio/voice_cloning/lstm_tacotron2)|LSTM、Tacotron2、WaveFlow|AISHELL-3|中文语音克隆| +|[ge2e_fastspeech2_pwgan](audio/voice_cloning/ge2e_fastspeech2_pwgan)|FastSpeech2|AISHELL-3|Chinese speech cloning| +|[lstm_tacotron2](audio/voice_cloning/lstm_tacotron2)|LSTM、Tacotron2、WaveFlow|AISHELL-3|Chinese speech cloning| - ### Text to Speech |module|Network|Dataset|Introduction| |--|--|--|--| -|[transformer_tts_ljspeech](audio/tts/transformer_tts_ljspeech)|Transformer|LJSpeech-1.1|英文语音合成| -|[fastspeech_ljspeech](audio/tts/fastspeech_ljspeech)|FastSpeech|LJSpeech-1.1|英文语音合成| -|[fastspeech2_baker](audio/tts/fastspeech2_baker)|FastSpeech2|Chinese Standard Mandarin Speech Copus|中文语音合成| -|[fastspeech2_ljspeech](audio/tts/fastspeech2_ljspeech)|FastSpeech2|LJSpeech-1.1|英文语音合成| -|[deepvoice3_ljspeech](audio/tts/deepvoice3_ljspeech)|DeepVoice3|LJSpeech-1.1|英文语音合成| +|[transformer_tts_ljspeech](audio/tts/transformer_tts_ljspeech)|Transformer|LJSpeech-1.1|English speech synthesis| +|[fastspeech_ljspeech](audio/tts/fastspeech_ljspeech)|FastSpeech|LJSpeech-1.1|English speech synthesis| +|[fastspeech2_baker](audio/tts/fastspeech2_baker)|FastSpeech2|Chinese Standard Mandarin Speech Copus|Chinese speech synthesis| +|[fastspeech2_ljspeech](audio/tts/fastspeech2_ljspeech)|FastSpeech2|LJSpeech-1.1|English speech synthesis| +|[deepvoice3_ljspeech](audio/tts/deepvoice3_ljspeech)|DeepVoice3|LJSpeech-1.1|English speech synthesis| - ### Automatic Speech Recognition |module|Network|Dataset|Introduction| |--|--|--|--| -|[deepspeech2_aishell](audio/asr/deepspeech2_aishell)|DeepSpeech2|AISHELL-1|中文语音识别| -|[deepspeech2_librispeech](audio/asr/deepspeech2_librispeech)|DeepSpeech2|LibriSpeech|英文语音识别| -|[u2_conformer_aishell](audio/asr/u2_conformer_aishell)|Conformer|AISHELL-1|中文语音识别| -|[u2_conformer_wenetspeech](audio/asr/u2_conformer_wenetspeech)|Conformer|WenetSpeech|中文语音识别| -|[u2_conformer_librispeech](audio/asr/u2_conformer_librispeech)|Conformer|LibriSpeech|英文语音识别| +|[deepspeech2_aishell](audio/asr/deepspeech2_aishell)|DeepSpeech2|AISHELL-1|Chinese Speech Recognition| +|[deepspeech2_librispeech](audio/asr/deepspeech2_librispeech)|DeepSpeech2|LibriSpeech|English Speech Recognition| +|[u2_conformer_aishell](audio/asr/u2_conformer_aishell)|Conformer|AISHELL-1|Chinese Speech Recognition| +|[u2_conformer_wenetspeech](audio/asr/u2_conformer_wenetspeech)|Conformer|WenetSpeech|Chinese Speech Recognition| +|[u2_conformer_librispeech](audio/asr/u2_conformer_librispeech)|Conformer|LibriSpeech|English Speech Recognition| - ### Audio Classification |module|Network|Dataset|Introduction| |--|--|--|--| -|[panns_cnn6](audio/audio_classification/PANNs/cnn6)|PANNs|Google Audioset|主要包含4个卷积层和2个全连接层,模型参数为4.5M。经过预训练后,可以用于提取音频的embbedding,维度是512| -|[panns_cnn14](audio/audio_classification/PANNs/cnn14)|PANNs|Google Audioset|主要包含12个卷积层和2个全连接层,模型参数为79.6M。经过预训练后,可以用于提取音频的embbedding,维度是2048| -|[panns_cnn10](audio/audio_classification/PANNs/cnn10)|PANNs|Google Audioset|主要包含8个卷积层和2个全连接层,模型参数为4.9M。经过预训练后,可以用于提取音频的embbedding,维度是512| +|[panns_cnn6](audio/audio_classification/PANNs/cnn6)|PANNs|Google Audioset|It mainly includes 4 convolution layers and 2 full connection layers, and the model parameter is 4.5M. After pre-training, it can be used to extract the embbedding of audio. The dimension is 512| +|[panns_cnn14](audio/audio_classification/PANNs/cnn14)|PANNs|Google Audioset|It mainly includes 4 convolution layers and 2 full connection layers, and the model parameter is 4.5M. After pre-training, it can be used to extract the embbedding of audio. The dimension is 2048| +|[panns_cnn10](audio/audio_classification/PANNs/cnn10)|PANNs|Google Audioset|It mainly includes 4 convolution layers and 2 full connection layers, and the model parameter is 4.5M. After pre-training, it can be used to extract the embbedding of audio. The dimension is 512| ## Video - ### Video Classification |module|Network|Dataset|Introduction| |--|--|--|--| -|[videotag_tsn_lstm](video/classification/videotag_tsn_lstm)|TSN + AttentionLSTM|百度自建数据集|大规模短视频分类打标签| -|[tsn_kinetics400](video/classification/tsn_kinetics400)|TSN|Kinetics-400|视频分类| -|[tsm_kinetics400](video/classification/tsm_kinetics400)|TSM|Kinetics-400|视频分类| -|[stnet_kinetics400](video/classification/stnet_kinetics400)|StNet|Kinetics-400|视频分类| -|[nonlocal_kinetics400](video/classification/nonlocal_kinetics400)|Non-local|Kinetics-400|视频分类| +|[videotag_tsn_lstm](video/classification/videotag_tsn_lstm)|TSN + AttentionLSTM|Baidu self built dataset|Short-video classification| +|[tsn_kinetics400](video/classification/tsn_kinetics400)|TSN|Kinetics-400|Video classification| +|[tsm_kinetics400](video/classification/tsm_kinetics400)|TSM|Kinetics-400|Video classification| +|[stnet_kinetics400](video/classification/stnet_kinetics400)|StNet|Kinetics-400|Video classification| +|[nonlocal_kinetics400](video/classification/nonlocal_kinetics400)|Non-local|Kinetics-400|Video classification| - ### Video Editing |module|Network|Dataset|Introduction| |--|--|--|--| -|[SkyAR](video/Video_editing/SkyAR)|UNet|UNet|视频换天| +|[SkyAR](video/Video_editing/SkyAR)|UNet|UNet|Video sky Replacement| - ### Multiple Object tracking |module|Network|Dataset|Introduction| |--|--|--|--| -|[fairmot_dla34](video/multiple_object_tracking/fairmot_dla34)|CenterNet|Caltech Pedestrian+CityPersons+CUHK-SYSU+PRW+ETHZ+MOT17|实时多目标跟踪| -|[jde_darknet53](video/multiple_object_tracking/jde_darknet53)|YOLOv3|Caltech Pedestrian+CityPersons+CUHK-SYSU+PRW+ETHZ+MOT17|多目标跟踪-兼顾精度和速度| +|[fairmot_dla34](video/multiple_object_tracking/fairmot_dla34)|CenterNet|Caltech Pedestrian+CityPersons+CUHK-SYSU+PRW+ETHZ+MOT17|Realtime multiple object tracking| +|[jde_darknet53](video/multiple_object_tracking/jde_darknet53)|YOLOv3|Caltech Pedestrian+CityPersons+CUHK-SYSU+PRW+ETHZ+MOT17|object tracking with both accuracy and speed| ## Industrial Application @@ -544,4 +545,4 @@ English | [简体中文](README_ch.md) |module|Network|Dataset|Introduction| |--|--|--|--| -|[WatermeterSegmentation](image/semantic_segmentation/WatermeterSegmentation)|DeepLabV3|水表的数字表盘分割数据集|水表的数字表盘分割| +|[WatermeterSegmentation](image/semantic_segmentation/WatermeterSegmentation)|DeepLabV3|Water meter dataset|Water meter segmentation| From aa949a69c1d824e1e63b6278d79e6ed3fcb77dc8 Mon Sep 17 00:00:00 2001 From: chenjian Date: Fri, 16 Sep 2022 21:17:00 +0800 Subject: [PATCH 086/117] add english readme for text to image modules (#2023) * add english readme for text to image modules * fix docs --- .../disco_diffusion_clip_rn101/README_en.md | 153 +++++++++++++++++ .../disco_diffusion_clip_rn50/README_en.md | 152 +++++++++++++++++ .../disco_diffusion_clip_vitb32/README_en.md | 153 +++++++++++++++++ .../README_en.md | 153 +++++++++++++++++ .../README_en.md | 154 +++++++++++++++++ .../stable_diffusion/README_en.md | 157 ++++++++++++++++++ 6 files changed, 922 insertions(+) create mode 100644 modules/image/text_to_image/disco_diffusion_clip_rn101/README_en.md create mode 100644 modules/image/text_to_image/disco_diffusion_clip_rn50/README_en.md create mode 100644 modules/image/text_to_image/disco_diffusion_clip_vitb32/README_en.md create mode 100644 modules/image/text_to_image/disco_diffusion_cnclip_vitb16/README_en.md create mode 100644 modules/image/text_to_image/disco_diffusion_ernievil_base/README_en.md create mode 100644 modules/image/text_to_image/stable_diffusion/README_en.md diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn101/README_en.md b/modules/image/text_to_image/disco_diffusion_clip_rn101/README_en.md new file mode 100644 index 000000000..d68c5aa56 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn101/README_en.md @@ -0,0 +1,153 @@ +# disco_diffusion_clip_rn101 + +|Module Name|disco_diffusion_clip_rn101| +| :--- | :---: | +|Category|text to image| +|Network|dd+clip ResNet101| +|Dataset|-| +|Fine-tuning supported or not|No| +|Module Size|2.9GB| +|Latest update date|2022-08-02| +|Data indicators|-| + +## I.Basic Information + +### Application Effect Display + + - Prompt "A beautiful painting of a singular lighthouse, shining its light across a tumultuous sea of blood by greg rutkowski and thomas kinkade, Trending on artstation." + + - Output image +

+ +
+ + - Generating process +

+ +
+ +### Module Introduction + +disco_diffusion_clip_rn101 is a text-to-image generation model that can generate images that match the semantics of the sentence you prompt. The model consists of two parts, one is the diffusion model, which is a generative model that reconstructs the original image from the noisy input. The other part is the multimodal pre-training model (CLIP), which can represent text and images in the same feature space, and text and images with similar semantics will be closer in this feature space. In the text image generation model, the diffusion model is responsible for generating the target image from the initial noise or the specified initial image, and CLIP is responsible for guiding the generated image to be as close as possible to the semantics of the input text. Diffusion model under the guidance of CLIP iteratively generates new images, eventually generating images of what the text describes. The CLIP model used in this module is ResNet101. + +For more details, please refer to [Diffusion Models Beat GANs on Image Synthesis](https://arxiv.org/abs/2105.05233) and [Learning Transferable Visual Models From Natural Language Supervision](https://arxiv.org/abs/2103.00020) + +## II.Installation + +- ### 1.Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.2.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + +- ### 2.Installation + + - ```shell + $ hub install disco_diffusion_clip_rn101 + ``` + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + + +## III.Module API Prediction + +- ### 1.Command line Prediction + + - ```shell + $ hub run disco_diffusion_clip_rn101 --text_prompts "A beautiful painting of a singular lighthouse, shining its light across a tumultuous sea of blood by greg rutkowski and thomas kinkade, Trending on artstation." --output_dir disco_diffusion_clip_rn101_out + ``` + +- ### 2.Prediction Code Example + + - ```python + import paddlehub as hub + + module = hub.Module(name="disco_diffusion_clip_rn101") + text_prompts = ["A beautiful painting of a singular lighthouse, shining its light across a tumultuous sea of blood by greg rutkowski and thomas kinkade, Trending on artstation."] + # Output images will be saved in disco_diffusion_clip_rn101_out directory. + # The returned da is a DocumentArray object, which contains all immediate and final results + # You can manipulate the DocumentArray object to do post-processing and save images + da = module.generate_image(text_prompts=text_prompts, output_dir='./disco_diffusion_clip_rn101_out/') + # Save final result image to a file + da[0].save_uri_to_file('disco_diffusion_clip_rn101_out-result.png') + # Show all immediate results + da[0].chunks.plot_image_sprites(skip_empty=True, show_index=True, keep_aspect_ratio=True) + # Save the generating process as a gif + da[0].chunks.save_gif('disco_diffusion_clip_rn101_out-result.gif') + ``` + +- ### 3.API + + - ```python + def generate_image( + text_prompts, + style: Optional[str] = None, + artist: Optional[str] = None, + width_height: Optional[List[int]] = [1280, 768], + seed: Optional[int] = None, + output_dir: Optional[str] = 'disco_diffusion_clip_rn101_out'): + ``` + + - Image generating api, which generates an image corresponding to your prompt.. + + - **Parameters** + + - text_prompts(str): Prompt, used to describe your image content. You can construct a prompt conforms to the format "content" + "artist/style", such as "a beautiful painting of Chinese architecture, by krenz, sunny, super wide angle, artstation.". For more details, you can refer to [website](https://docs.google.com/document/d/1XUT2G9LmkZataHFzmuOtRXnuWBfhvXDAo8DkS--8tec/edit#). + - style(Optional[str]): Image style, such as "watercolor" and "Chinese painting". If not provided, style is totally up to your prompt. + - artist(Optional[str]): Artist name, such as Greg Rutkowsk, krenz, image style is as whose works you choose. If not provided, style is totally up to your [prompt](https://weirdwonderfulai.art/resources/disco-diffusion-70-plus-artist-studies/). + - width_height(Optional[List[int]]): The width and height of output images, should be better multiples of 64. The larger size is, the longger computation time is. + - seed(Optional[int]): Random seed, different seeds result in different output images. + - output_dir(Optional[str]): Output directory, default is "disco_diffusion_clip_rn101_out". + + + - **Return** + - ra(DocumentArray): DocumentArray object, including `n_batches` Documents,each document keeps all immediate results during generation, please refer to [DocumentArray tutorial](https://docarray.jina.ai/fundamentals/documentarray/index.html) for more details. + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of text-to-image. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m disco_diffusion_clip_rn101 + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result. + + - ```python + import requests + import json + import cv2 + import base64 + from docarray import DocumentArray + + # Send an HTTP request + data = {'text_prompts': 'in the morning light,Overlooking TOKYO city by greg rutkowski and thomas kinkade,Trending on artstation.'} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/disco_diffusion_clip_rn101" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # Get results + da = DocumentArray.from_base64(r.json()["results"]) + # Save final result image to a file + da[0].save_uri_to_file('disco_diffusion_clip_rn101_out-result.png') + # Save the generating process as a gif + da[0].chunks.save_gif('disco_diffusion_clip_rn101_out-result.gif') + + +## V.Release Note + +* 1.0.0 + + First release + + ```shell + $ hub install disco_diffusion_clip_rn101 == 1.0.0 + ``` diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn50/README_en.md b/modules/image/text_to_image/disco_diffusion_clip_rn50/README_en.md new file mode 100644 index 000000000..89f35cc3f --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_rn50/README_en.md @@ -0,0 +1,152 @@ +# disco_diffusion_clip_rn50 + +|Module Name|disco_diffusion_clip_rn50| +| :--- | :---: | +|Category|text to image| +|Network|dd+clip ResNet50| +|Dataset|-| +|Fine-tuning supported or not|No| +|Module Size|2.8GB| +|Latest update date|2022-08-02| +|Data indicators|-| + +## I.Basic Information + +### Application Effect Display + + - Prompt "A beautiful painting of a singular lighthouse, shining its light across a tumultuous sea of blood by greg rutkowski and thomas kinkade, Trending on artstation." + + - Output image +

+ +
+ + - Generating process +

+ +
+ +### Module Introduction + +disco_diffusion_clip_rn50 is a text-to-image generation model that can generate images that match the semantics of the sentence you prompt. The model consists of two parts, one is the diffusion model, which is a generative model that reconstructs the original image from the noisy input. The other part is the multimodal pre-training model (CLIP), which can represent text and images in the same feature space, and text and images with similar semantics will be closer in this feature space. In the text image generation model, the diffusion model is responsible for generating the target image from the initial noise or the specified initial image, and CLIP is responsible for guiding the generated image to be as close as possible to the semantics of the input text. Diffusion model under the guidance of CLIP iteratively generates new images, eventually generating images of what the text describes. The CLIP model used in this module is ResNet50. + +For more details, please refer to [Diffusion Models Beat GANs on Image Synthesis](https://arxiv.org/abs/2105.05233) and [Learning Transferable Visual Models From Natural Language Supervision](https://arxiv.org/abs/2103.00020) + +## II.Installation + +- ### 1.Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.2.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + +- ### 2.Installation + + - ```shell + $ hub install disco_diffusion_clip_rn50 + ``` + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + + +## III.Module API Prediction + +- ### 1.Command line Prediction + + - ```shell + $ hub run disco_diffusion_clip_rn50 --text_prompts "A beautiful painting of a singular lighthouse, shining its light across a tumultuous sea of blood by greg rutkowski and thomas kinkade, Trending on artstation." --output_dir disco_diffusion_clip_rn50_out + ``` + +- ### 2.Prediction Code Example + + - ```python + import paddlehub as hub + + module = hub.Module(name="disco_diffusion_clip_rn50") + text_prompts = ["A beautiful painting of a singular lighthouse, shining its light across a tumultuous sea of blood by greg rutkowski and thomas kinkade, Trending on artstation."] + # Output images will be saved in disco_diffusion_clip_rn50_out directory. + # The returned da is a DocumentArray object, which contains all immediate and final results + # You can manipulate the DocumentArray object to do post-processing and save images + da = module.generate_image(text_prompts=text_prompts, output_dir='./disco_diffusion_clip_rn50_out/') + # Save final result image to a file + da[0].save_uri_to_file('disco_diffusion_clip_rn50_out-result.png') + # Show all immediate results + da[0].chunks.plot_image_sprites(skip_empty=True, show_index=True, keep_aspect_ratio=True) + # Save the generating process as a gif + da[0].chunks.save_gif('disco_diffusion_clip_rn50_out-result.gif') + ``` + +- ### 3.API + + - ```python + def generate_image( + text_prompts, + style: Optional[str] = None, + artist: Optional[str] = None, + width_height: Optional[List[int]] = [1280, 768], + seed: Optional[int] = None, + output_dir: Optional[str] = 'disco_diffusion_clip_rn50_out'): + ``` + + - Image generating api, which generates an image corresponding to your prompt. + + - **Parameters** + + - text_prompts(str): Prompt, used to describe your image content. You can construct a prompt conforms to the format "content" + "artist/style", such as "a beautiful painting of Chinese architecture, by krenz, sunny, super wide angle, artstation.". For more details, you can refer to [website](https://docs.google.com/document/d/1XUT2G9LmkZataHFzmuOtRXnuWBfhvXDAo8DkS--8tec/edit#). + - style(Optional[str]): Image style, such as "watercolor" and "Chinese painting". If not provided, style is totally up to your prompt. + - artist(Optional[str]): Artist name, such as Greg Rutkowsk, krenz, image style is as whose works you choose. If not provided, style is totally up to your [prompt](https://weirdwonderfulai.art/resources/disco-diffusion-70-plus-artist-studies/). + - width_height(Optional[List[int]]): The width and height of output images, should be better multiples of 64. The larger size is, the longger computation time is. + - seed(Optional[int]): Random seed, different seeds result in different output images. + - output_dir(Optional[str]): Output directory, default is "disco_diffusion_clip_rn50_out". + + + - **Return** + - ra(DocumentArray): DocumentArray object, including `n_batches` Documents,each document keeps all immediate results during generation, please refer to [DocumentArray tutorial](https://docarray.jina.ai/fundamentals/documentarray/index.html) for more details. + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of text-to-image. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m disco_diffusion_clip_rn50 + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result. + + - ```python + import requests + import json + import cv2 + import base64 + from docarray import DocumentArray + + # Send an HTTP request + data = {'text_prompts': 'in the morning light,Overlooking TOKYO city by greg rutkowski and thomas kinkade,Trending on artstation.'} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/disco_diffusion_clip_rn50" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # Get results + da = DocumentArray.from_base64(r.json()["results"]) + # Save final result image to a file + da[0].save_uri_to_file('disco_diffusion_clip_rn50_out-result.png') + # Save the generating process as a gif + da[0].chunks.save_gif('disco_diffusion_clip_rn50_out-result.gif') + +## V.Release Note + +* 1.0.0 + + First release + + ```shell + $ hub install disco_diffusion_clip_rn50 == 1.0.0 + ``` diff --git a/modules/image/text_to_image/disco_diffusion_clip_vitb32/README_en.md b/modules/image/text_to_image/disco_diffusion_clip_vitb32/README_en.md new file mode 100644 index 000000000..f53df2688 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_clip_vitb32/README_en.md @@ -0,0 +1,153 @@ +# disco_diffusion_clip_vitb32 + +|Module Name|disco_diffusion_clip_vitb32| +| :--- | :---: | +|Category|text to image| +|Network|dd+clip ViTB32| +|Dataset|-| +|Fine-tuning supported or not|No| +|Module Size|3.1GB| +|Latest update date|2022-08-02| +|Data indicators|-| + +## I.Basic Information + +### Application Effect Display + + - Prompt "A beautiful painting of a singular lighthouse, shining its light across a tumultuous sea of blood by greg rutkowski and thomas kinkade, Trending on artstation." + + - Output image +

+ +
+ + - Generating process +

+ +
+ +### Module Introduction + +disco_diffusion_clip_vitb32 disco_diffusion_clip_rn50 is a text-to-image generation model that can generate images that match the semantics of the sentence you prompt. The model consists of two parts, one is the diffusion model, which is a generative model that reconstructs the original image from the noisy input. The other part is the multimodal pre-training model (CLIP), which can represent text and images in the same feature space, and text and images with similar semantics will be closer in this feature space. In the text image generation model, the diffusion model is responsible for generating the target image from the initial noise or the specified initial image, and CLIP is responsible for guiding the generated image to be as close as possible to the semantics of the input text. Diffusion model under the guidance of CLIP iteratively generates new images, eventually generating images of what the text describes. The CLIP model used in this module is ViTB32. + +For more details, please refer to [Diffusion Models Beat GANs on Image Synthesis](https://arxiv.org/abs/2105.05233) and [Learning Transferable Visual Models From Natural Language Supervision](https://arxiv.org/abs/2103.00020) + +## II.Installation + +- ### 1.Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.2.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + +- ### 2.Installation + + - ```shell + $ hub install disco_diffusion_clip_vitb32 + ``` + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + + +## III.Module API Prediction + +- ### 1.Command line Prediction + + - ```shell + $ hub run disco_diffusion_clip_vitb32 --text_prompts "A beautiful painting of a singular lighthouse, shining its light across a tumultuous sea of blood by greg rutkowski and thomas kinkade, Trending on artstation." --output_dir disco_diffusion_clip_vitb32_out + ``` + +- ### 2.Prediction Code Example + + - ```python + import paddlehub as hub + + module = hub.Module(name="disco_diffusion_clip_vitb32") + text_prompts = ["A beautiful painting of a singular lighthouse, shining its light across a tumultuous sea of blood by greg rutkowski and thomas kinkade, Trending on artstation."] + # Output images will be saved in disco_diffusion_clip_vitb32_out directory. + # The returned da is a DocumentArray object, which contains all immediate and final results + # You can manipulate the DocumentArray object to do post-processing and save images + da = module.generate_image(text_prompts=text_prompts, output_dir='./disco_diffusion_clip_vitb32_out/') + # Save final result image to a file + da[0].save_uri_to_file('disco_diffusion_clip_vitb32_out-result.png') + # Show all immediate results + da[0].chunks.plot_image_sprites(skip_empty=True, show_index=True, keep_aspect_ratio=True) + # Save the generating process as a gif + da[0].chunks.save_gif('disco_diffusion_clip_vitb32_out-result.gif') + ``` + +- ### 3.API + + - ```python + def generate_image( + text_prompts, + style: Optional[str] = None, + artist: Optional[str] = None, + width_height: Optional[List[int]] = [1280, 768], + seed: Optional[int] = None, + output_dir: Optional[str] = 'disco_diffusion_clip_vitb32_out'): + ``` + + - Image generating api, which generates an image corresponding to your prompt.. + + - **Parameters** + + - text_prompts(str): Prompt, used to describe your image content. You can construct a prompt conforms to the format "content" + "artist/style", such as "a beautiful painting of Chinese architecture, by krenz, sunny, super wide angle, artstation.". For more details, you can refer to [website](https://docs.google.com/document/d/1XUT2G9LmkZataHFzmuOtRXnuWBfhvXDAo8DkS--8tec/edit#). + - style(Optional[str]): Image style, such as "watercolor" and "Chinese painting". If not provided, style is totally up to your prompt. + - artist(Optional[str]): Artist name, such as Greg Rutkowsk, krenz, image style is as whose works you choose. If not provided, style is totally up to your [prompt](https://weirdwonderfulai.art/resources/disco-diffusion-70-plus-artist-studies/). + - width_height(Optional[List[int]]): The width and height of output images, should be better multiples of 64. The larger size is, the longger computation time is. + - seed(Optional[int]): Random seed, different seeds result in different output images. + - output_dir(Optional[str]): Output directory, default is "disco_diffusion_clip_vitb32_out". + + + - **Return** + - ra(DocumentArray): DocumentArray object, including `n_batches` Documents,each document keeps all immediate results during generation, please refer to [DocumentArray tutorial](https://docarray.jina.ai/fundamentals/documentarray/index.html) for more details. + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of text-to-image. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m disco_diffusion_clip_vitb32 + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result. + + - ```python + import requests + import json + import cv2 + import base64 + from docarray import DocumentArray + + # Send an HTTP request + data = {'text_prompts': 'in the morning light,Overlooking TOKYO city by greg rutkowski and thomas kinkade,Trending on artstation.'} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/disco_diffusion_clip_vitb32" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # Get results + da = DocumentArray.from_base64(r.json()["results"]) + # Save final result image to a file + da[0].save_uri_to_file('disco_diffusion_clip_vitb32_out-result.png') + # Save the generating process as a gif + da[0].chunks.save_gif('disco_diffusion_clip_vitb32_out-result.gif') + + +## V.Release Note + +* 1.0.0 + + First release + + ```shell + $ hub install disco_diffusion_clip_vitb32 == 1.0.0 + ``` diff --git a/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/README_en.md b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/README_en.md new file mode 100644 index 000000000..79c01c523 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/README_en.md @@ -0,0 +1,153 @@ +# disco_diffusion_cnclip_vitb16 + +|Module Name|disco_diffusion_cnclip_vitb16| +| :--- | :---: | +|Category|text to image| +|Network|dd+cnclip ViTB16| +|Dataset|-| +|Fine-tuning supported or not|No| +|Module Size|2.9GB| +|Latest update date|2022-08-02| +|Data indicators|-| + +## I.Basic Information + +### Application Effect Display + + - Prompt "在宁静的风景中画一幅美丽的建筑画,由Arthur Adams在artstation上所作" + + - Output image +

+ +
+ + - Generating process +

+ +
+ +### Module Introduction + +disco_diffusion_cnclip_vitb16 is a text-to-image generation model that can generate images that match the semantics of the sentence you prompt. The model consists of two parts, one is the diffusion model, which is a generative model that reconstructs the original image from the noisy input. The other part is the multimodal pre-training model (CLIP), which can represent text and images in the same feature space, and text and images with similar semantics will be closer in this feature space. In the text image generation model, the diffusion model is responsible for generating the target image from the initial noise or the specified initial image, and CLIP is responsible for guiding the generated image to be as close as possible to the semantics of the input text. Diffusion model under the guidance of CLIP iteratively generates new images, eventually generating images of what the text describes. The CLIP model used in this module is ViTB16. + +For more details, please refer to [Diffusion Models Beat GANs on Image Synthesis](https://arxiv.org/abs/2105.05233) and [Learning Transferable Visual Models From Natural Language Supervision](https://arxiv.org/abs/2103.00020) + +## II.Installation + +- ### 1.Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.2.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + +- ### 2.Installation + + - ```shell + $ hub install disco_diffusion_cnclip_vitb16 + ``` + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + + +## III.Module API Prediction + +- ### 1.Command line Prediction + + - ```shell + $ hub run disco_diffusion_cnclip_vitb16 --text_prompts "孤舟蓑笠翁,独钓寒江雪。风格如齐白石所作。" --output_dir disco_diffusion_cnclip_vitb16_out + ``` + +- ### 2.Prediction Code Example + + - ```python + import paddlehub as hub + + module = hub.Module(name="disco_diffusion_cnclip_vitb16") + text_prompts = ["孤舟蓑笠翁,独钓寒江雪。"] + # Output images will be saved in disco_diffusion_cnclip_vitb16_out directory. + # The returned da is a DocumentArray object, which contains all immediate and final results + # You can manipulate the DocumentArray object to do post-processing and save images + da = module.generate_image(text_prompts=text_prompts, artist='齐白石', output_dir='./disco_diffusion_cnclip_vitb16_out/') + # Save final result image to a file + da[0].save_uri_to_file('disco_diffusion_cnclip_vitb16_out-result.png') + # Show all immediate results + da[0].chunks.plot_image_sprites(skip_empty=True, show_index=True, keep_aspect_ratio=True) + # Save the generating process as a gif + da[0].chunks.save_gif('disco_diffusion_cnclip_vitb16_out-result.gif') + ``` + +- ### 3.API + + - ```python + def generate_image( + text_prompts, + style: Optional[str] = None, + artist: Optional[str] = None, + width_height: Optional[List[int]] = [1280, 768], + seed: Optional[int] = None, + output_dir: Optional[str] = 'disco_diffusion_cnclip_vitb16_out'): + ``` + + - Image generating api, which generates an image corresponding to your prompt. + + - **Parameters** + + - text_prompts(str): Prompt, used to describe your image content. You can construct a prompt conforms to the format "content" + "artist/style", such as "孤舟蓑笠翁,独钓寒江雪。风格如齐白石所作". For more details, you can refer to [website](https://docs.google.com/document/d/1XUT2G9LmkZataHFzmuOtRXnuWBfhvXDAo8DkS--8tec/edit#). + - style(Optional[str]): Image style, such as "watercolor" and "Chinese painting". If not provided, style is totally up to your prompt. + - artist(Optional[str]): Artist name, such as 齐白石,Greg Rutkowsk,image style is as whose works you choose. If not provided, style is totally up to your [prompt](https://weirdwonderfulai.art/resources/disco-diffusion-70-plus-artist-studies/). + - width_height(Optional[List[int]]): The width and height of output images, should be better multiples of 64. The larger size is, the longger computation time is. + - seed(Optional[int]): Random seed, different seeds result in different output images. + - output_dir(Optional[str]): Output directory, default is "disco_diffusion_cnclip_vitb16_out". + + + - **Return** + - ra(DocumentArray): DocumentArray object, including `n_batches` Documents,each document keeps all immediate results during generation, please refer to [DocumentArray tutorial](https://docarray.jina.ai/fundamentals/documentarray/index.html) for more details. + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of text-to-image. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m disco_diffusion_cnclip_vitb16 + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result. + + - ```python + import requests + import json + import cv2 + import base64 + from docarray import DocumentArray + + # Send an HTTP request + data = {'text_prompts': '孤舟蓑笠翁,独钓寒江雪。风格如齐白石所作'} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/disco_diffusion_cnclip_vitb16" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # Get results + da = DocumentArray.from_base64(r.json()["results"]) + # Save final result image to a file + da[0].save_uri_to_file('disco_diffusion_cnclip_vitb16_out-result.png') + # Save the generating process as a gif + da[0].chunks.save_gif('disco_diffusion_cnclip_vitb16_out-result.gif') + + +## V.Release Note + +* 1.0.0 + + First release + + ```shell + $ hub install disco_diffusion_cnclip_vitb16 == 1.0.0 + ``` diff --git a/modules/image/text_to_image/disco_diffusion_ernievil_base/README_en.md b/modules/image/text_to_image/disco_diffusion_ernievil_base/README_en.md new file mode 100644 index 000000000..041ad0ed6 --- /dev/null +++ b/modules/image/text_to_image/disco_diffusion_ernievil_base/README_en.md @@ -0,0 +1,154 @@ +# disco_diffusion_ernievil_base + +|Module Name|disco_diffusion_ernievil_base| +| :--- | :---: | +|Category|text to image| +|Network|dd+ERNIE-ViL| +|Dataset|-| +|Fine-tuning supported or not|No| +|Module Size|2.9GB| +|Latest update date|2022-08-02| +|Data indicators|-| + +## I.Basic Information + +### Application Effect Display + + - Prompt "小桥流水人家" + + - Output image +

+ +
+ + - Generating process +

+ +
+ + +### Module Introduction + +disco_diffusion_ernievil_base is a text-to-image generation model that can generate images that match the semantics of the sentence you prompt. The model consists of two parts, one is the diffusion model, which is a generative model that reconstructs the original image from the noisy input. The other part is the multimodal pre-training model (ERNIE-ViL), which can represent text and images in the same feature space, and text and images with similar semantics will be closer in this feature space. In the text image generation model, the diffusion model is responsible for generating the target image from the initial noise or the specified initial image, and ERNIE-ViL is responsible for guiding the generated image to be as close as possible to the semantics of the input text. Diffusion model under the guidance of ERNIE-ViL iteratively generates new images, eventually generating images of what the text describes. The model used in this module is ERNIE-ViL, consisting of ERNIE 3.0+ViT. + +For more details, please refer to [Diffusion Models Beat GANs on Image Synthesis](https://arxiv.org/abs/2105.05233) + +## II.Installation + +- ### 1.Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.2.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + +- ### 2.Installation + + - ```shell + $ hub install disco_diffusion_ernievil_base + ``` + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + + +## III.Module API Prediction + +- ### 1.Command line Prediction + + - ```shell + $ hub run disco_diffusion_ernievil_base --text_prompts "孤舟蓑笠翁,独钓寒江雪。风格如齐白石所作。" --output_dir disco_diffusion_ernievil_base_out + ``` + +- ### 2.Prediction Code Example + + - ```python + import paddlehub as hub + + module = hub.Module(name="disco_diffusion_ernievil_base") + text_prompts = ["孤舟蓑笠翁,独钓寒江雪。"] + # Output images will be saved in disco_diffusion_ernievil_base_out directory. + # The returned da is a DocumentArray object, which contains all immediate and final results + # You can manipulate the DocumentArray object to do post-processing and save images + da = module.generate_image(text_prompts=text_prompts, artist='齐白石', output_dir='./disco_diffusion_ernievil_base_out/') + # Save final result image to a file + da[0].save_uri_to_file('disco_diffusion_ernievil_base_out-result.png') + # Show all immediate results + da[0].chunks.plot_image_sprites(skip_empty=True, show_index=True, keep_aspect_ratio=True) + # Save the generating process as a gif + da[0].chunks.save_gif('disco_diffusion_ernievil_base_out-result.gif') + ``` + +- ### 3.API + + - ```python + def generate_image( + text_prompts, + style: Optional[str] = None, + artist: Optional[str] = None, + width_height: Optional[List[int]] = [1280, 768], + seed: Optional[int] = None, + output_dir: Optional[str] = 'disco_diffusion_ernievil_base_out'): + ``` + + - Image generating api, which generates an image corresponding to your prompt. + + - **Parameters** + + - text_prompts(str): Prompt, used to describe your image content. You can construct a prompt conforms to the format "content" + "artist/style", such as "孤舟蓑笠翁,独钓寒江雪。风格如齐白石所作". For more details, you can refer to [website](https://docs.google.com/document/d/1XUT2G9LmkZataHFzmuOtRXnuWBfhvXDAo8DkS--8tec/edit#). + - style(Optional[str]): Image style, such as "watercolor" and "Chinese painting". If not provided, style is totally up to your prompt. + - artist(Optional[str]): Artist name, such as 齐白石,Greg Rutkowsk,image style is as whose works you choose. If not provided, style is totally up to your [prompt](https://weirdwonderfulai.art/resources/disco-diffusion-70-plus-artist-studies/). + - width_height(Optional[List[int]]): The width and height of output images, should be better multiples of 64. The larger size is, the longger computation time is. + - seed(Optional[int]): Random seed, different seeds result in different output images. + - output_dir(Optional[str]): Output directory, default is "disco_diffusion_ernievil_base_out". + + + - **Return** + - ra(DocumentArray): DocumentArray object, including `n_batches` Documents,each document keeps all immediate results during generation, please refer to [DocumentArray tutorial](https://docarray.jina.ai/fundamentals/documentarray/index.html) for more details. + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of text-to-image. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m disco_diffusion_ernievil_base + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result. + + - ```python + import requests + import json + import cv2 + import base64 + from docarray import DocumentArray + + # Send an HTTP request + data = {'text_prompts': '孤舟蓑笠翁,独钓寒江雪。风格如齐白石所作'} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/disco_diffusion_ernievil_base" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # Get results + da = DocumentArray.from_base64(r.json()["results"]) + # Save final result image to a file + da[0].save_uri_to_file('disco_diffusion_ernievil_base_out-result.png') + # Save the generating process as a gif + da[0].chunks.save_gif('disco_diffusion_ernievil_base_out-result.gif') + + +## V.Release Note + +* 1.0.0 + + First release + + ```shell + $ hub install disco_diffusion_ernievil_base == 1.0.0 + ``` diff --git a/modules/image/text_to_image/stable_diffusion/README_en.md b/modules/image/text_to_image/stable_diffusion/README_en.md new file mode 100644 index 000000000..b99b19a52 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion/README_en.md @@ -0,0 +1,157 @@ +# stable_diffusion + +|Module Name|stable_diffusion| +| :--- | :---: | +|Category|text to image| +|Network|CLIP Text Encoder+UNet+VAD| +|Dataset|-| +|Fine-tuning supported or not|No| +|Module Size|4.0GB| +|Latest update date|2022-08-26| +|Data indicators|-| + +## I.Basic Information + +### Application Effect Display + + - Prompt "in the morning light,Overlooking TOKYO city by greg rutkowski and thomas kinkade,Trending on artstation." + + - Output image +

+ +
+ + - Generating process +

+ +
+ +### Module Introduction + +Stable Diffusion is a latent diffusion model (Latent Diffusion), which belongs to the generative model. This kind of model obtains the images by iteratively denoising noise and sampling step by step, and currently has achieved amazing results. Compared with Disco Diffusion, Stable Diffusion iterates in a lower dimensional latent space instead of the original pixel space, which greatly reduces the memory and computational requirements. You can render the desired image within a minute on the V100, welcome to enjoy it in [aistudio](https://aistudio.baidu.com/aistudio/projectdetail/4512600). + +For more details, please refer to [High-Resolution Image Synthesis with Latent Diffusion Models](https://arxiv.org/abs/2112.10752) + +## II.Installation + +- ### 1.Environmental Dependence + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 | [How to install PaddleHub](../../../../docs/docs_en/get_start/installation.rst) + +- ### 2.Installation + + - ```shell + $ hub install stable_diffusion + ``` + - In case of any problems during installation, please refer to:[Windows_Quickstart](../../../../docs/docs_en/get_start/windows_quickstart.md) | [Linux_Quickstart](../../../../docs/docs_en/get_start/linux_quickstart.md) | [Mac_Quickstart](../../../../docs/docs_en/get_start/mac_quickstart.md) + + +## III.Module API Prediction + +- ### 1.Command line Prediction + + - ```shell + $ hub run stable_diffusion --text_prompts "in the morning light,Overlooking TOKYO city by greg rutkowski and thomas kinkade,Trending on artstation." --output_dir stable_diffusion_out + ``` + +- ### 2.Prediction Code Example + + - ```python + import paddlehub as hub + + module = hub.Module(name="stable_diffusion") + text_prompts = ["in the morning light,Overlooking TOKYO city by greg rutkowski and thomas kinkade,Trending on artstation."] + # Output images will be saved in stable_diffusion_out directory. + # The returned da is a DocumentArray object, which contains all immediate and final results + # You can manipulate the DocumentArray object to do post-processing and save images + # you can set batch_size parameter to generate number of batch_size images at one inference step. + da = module.generate_image(text_prompts=text_prompts, batch_size=3, output_dir='./stable_diffusion_out/') + # Show all immediate results + da[0].chunks[-1].chunks.plot_image_sprites(skip_empty=True, show_index=True, keep_aspect_ratio=True) + # Save the generating process as a gif + da[0].chunks[-1].chunks.save_gif('stable_diffusion_out-merged-result.gif') + da[0].chunks[0].chunks.plot_image_sprites(skip_empty=True, show_index=True, keep_aspect_ratio=True) + da[0].chunks[0].chunks.save_gif('stable_diffusion_out-image-0-result.gif') + ``` + +- ### 3.API + + - ```python + def generate_image( + text_prompts, + style: Optional[str] = None, + artist: Optional[str] = None, + width_height: Optional[List[int]] = [512, 512], + seed: Optional[int] = None, + batch_size: Optional[int] = 1, + output_dir: Optional[str] = 'stable_diffusion_out'): + ``` + + - Image generating api, which generates an image corresponding to your prompt. + + - **Parameters** + + - text_prompts(str): Prompt, used to describe your image content. You can construct a prompt conforms to the format "content" + "artist/style", such as "in the morning light,Overlooking TOKYO city by greg rutkowski and thomas kinkade,Trending on artstation.". For more details, you can refer to [website](https://docs.google.com/document/d/1XUT2G9LmkZataHFzmuOtRXnuWBfhvXDAo8DkS--8tec/edit#). + - style(Optional[str]): Image style, such as "watercolor" and "Chinese painting". If not provided, style is totally up to your prompt. + - artist(Optional[str]): Artist name, such as Greg Rutkowsk,krenz, image style is as whose works you choose. If not provided, style is totally up to your prompt.(https://weirdwonderfulai.art/resources/disco-diffusion-70-plus-artist-studies/). + - width_height(Optional[List[int]]): The width and height of output images, should be better multiples of 64. The larger size is, the longger computation time is. + - seed(Optional[int]): Random seed, different seeds result in different output images. + - batch_size(Optional[int]): Number of images generated for one inference step. + - output_dir(Optional[str]): Output directory, default is "stable_diffusion_out". + + + - **Return** + - ra(DocumentArray): DocumentArray object, including `batch_size` Documents,each document keeps all immediate results during generation, please refer to [DocumentArray tutorial](https://docarray.jina.ai/fundamentals/documentarray/index.html) for more details. + +## IV.Server Deployment + +- PaddleHub Serving can deploy an online service of text-to-image. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m stable_diffusion + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before the service, otherwise it need not be set. + +- ### Step 2: Send a predictive request + + - With a configured server, use the following lines of code to send the prediction request and obtain the result. + + - ```python + import requests + import json + import cv2 + import base64 + from docarray import DocumentArray + + # Send an HTTP request + data = {'text_prompts': 'in the morning light,Overlooking TOKYO city by greg rutkowski and thomas kinkade,Trending on artstation.'} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/stable_diffusion" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # Get results + r.json()["results"] + da = DocumentArray.from_base64(r.json()["results"]) + # Save final result image to a file + da[0].save_uri_to_file('stable_diffusion_out.png') + # Save the generating process as a gif + da[0].chunks[0].chunks.save_gif('stable_diffusion_out.gif') + ``` + +## V.Release Note + +* 1.0.0 + + First release + + ```shell + $ hub install stable_diffusion == 1.0.0 + ``` From 87eca2dff7ad25fe2f7fbb8e81f0905205996595 Mon Sep 17 00:00:00 2001 From: chenjian Date: Sat, 17 Sep 2022 01:11:54 +0800 Subject: [PATCH 087/117] add baidu_language_recognition module (#1984) * add baidu_language_recognition module * fix * fix doc * fix * fix doc Co-authored-by: wuzewu Co-authored-by: jm12138 <2286040843@qq.com> --- .../baidu_language_recognition/README.md | 110 ++++++++++++++++++ .../baidu_language_recognition/module.py | 100 ++++++++++++++++ 2 files changed, 210 insertions(+) create mode 100644 modules/text/machine_translation/baidu_language_recognition/README.md create mode 100644 modules/text/machine_translation/baidu_language_recognition/module.py diff --git a/modules/text/machine_translation/baidu_language_recognition/README.md b/modules/text/machine_translation/baidu_language_recognition/README.md new file mode 100644 index 000000000..faa079be7 --- /dev/null +++ b/modules/text/machine_translation/baidu_language_recognition/README.md @@ -0,0 +1,110 @@ +# baidu_language_recognition +|模型名称|baidu_language_recognition| +| :--- | :---: | +|类别|文本-语种识别| +|网络|-| +|数据集|-| +|是否支持Fine-tuning|否| +|模型大小|-| +|最新更新日期|2022-09-01| +|数据指标|-| + +## 一、模型基本信息 + +- ### 模型介绍 + + - 本模块提供百度翻译开放平台的服务,可支持语种识别。您只需要通过传入文本内容,就可以得到识别出来的语种类别。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.1.0 + + - paddlehub >= 2.3.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install baidu_language_recognition + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、预测代码示例 + + - ```python + import paddlehub as hub + + module = hub.Module(name='baidu_language_recognition') + result = module.recognize("I like panda") + print(result) + ``` + +- ### 2、API + + - ```python + def recognize(query: str) + ``` + + - 语种识别API,输入文本句子,输出识别后的语种编码。 + + - **参数** + + - `query`(str): 待识别的语言。 + + - **返回** + + - `result`(str): 识别的结果,语言的ISO 639-1编码。 + + 目前支持识别的语种如下: +

+ + +## 四、服务部署 + +- 通过启动PaddleHub Serving,可以加载模型部署在线语种识别服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + + - ```shell + $ hub serving start -m baidu_language_recognition + ``` + + - 通过以上命令可完成一个语种识别API的部署,默认端口号为8866。 + + +- ## 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + text = "I like panda" + data = {"query": text} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/baidu_language_recognition" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + - 关于PaddleHub Serving更多信息参考:[服务部署](../../../../docs/docs_ch/tutorial/serving.md) + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install baidu_language_recognition==1.0.0 + ``` diff --git a/modules/text/machine_translation/baidu_language_recognition/module.py b/modules/text/machine_translation/baidu_language_recognition/module.py new file mode 100644 index 000000000..e444fe05b --- /dev/null +++ b/modules/text/machine_translation/baidu_language_recognition/module.py @@ -0,0 +1,100 @@ +import argparse +import random +from hashlib import md5 +from typing import Optional + +import requests + +import paddlehub as hub +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving + + +def make_md5(s, encoding='utf-8'): + return md5(s.encode(encoding)).hexdigest() + + +@moduleinfo(name="baidu_language_recognition", + version="1.0.0", + type="text/machine_translation", + summary="", + author="baidu-nlp", + author_email="paddle-dev@baidu.com") +class BaiduLanguageRecognition: + + def __init__(self, appid=None, appkey=None): + """ + :param appid: appid for requesting Baidu translation service. + :param appkey: appkey for requesting Baidu translation service. + """ + # Set your own appid/appkey. + if appid == None: + self.appid = '20201015000580007' + else: + self.appid = appid + if appkey is None: + self.appkey = 'IFJB6jBORFuMmVGDRud1' + else: + self.appkey = appkey + self.url = 'https://fanyi-api.baidu.com/api/trans/vip/language' + + def recognize(self, query: str): + """ + Create image by text prompts using ErnieVilG model. + + :param query: Text to be translated. + + Return language type code. + """ + # Generate salt and sign + salt = random.randint(32768, 65536) + sign = make_md5(self.appid + query + str(salt) + self.appkey) + + # Build request + headers = {'Content-Type': 'application/x-www-form-urlencoded'} + payload = {'appid': self.appid, 'q': query, 'salt': salt, 'sign': sign} + + # Send request + try: + r = requests.post(self.url, params=payload, headers=headers) + result = r.json() + except Exception as e: + error_msg = str(e) + raise RuntimeError(error_msg) + if result['error_code'] != 0: + raise RuntimeError(result['error_msg']) + return result['data']['src'] + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + if args.appid is not None and args.appkey is not None: + self.appid = args.appid + self.appkey = args.appkey + result = self.recognize(args.query) + return result + + @serving + def serving_method(self, query): + """ + Run as a service. + """ + return self.recognize(query) + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--query', type=str) + self.arg_input_group.add_argument('--appid', type=str, default=None, help="注册得到的个人appid") + self.arg_input_group.add_argument('--appkey', type=str, default=None, help="注册得到的个人appkey") From 14ad25465300399aac0d9066fc6df55bb657e719 Mon Sep 17 00:00:00 2001 From: chenjian Date: Mon, 19 Sep 2022 10:01:02 +0800 Subject: [PATCH 088/117] add license for stable diffusion module (#1995) Co-authored-by: jm12138 <2286040843@qq.com> --- .../text_to_image/stable_diffusion/LICENSE | 82 +++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 modules/image/text_to_image/stable_diffusion/LICENSE diff --git a/modules/image/text_to_image/stable_diffusion/LICENSE b/modules/image/text_to_image/stable_diffusion/LICENSE new file mode 100644 index 000000000..928aa738f --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion/LICENSE @@ -0,0 +1,82 @@ +Copyright (c) 2022 Robin Rombach and Patrick Esser and contributors + +CreativeML Open RAIL-M +dated August 22, 2022 + +Section I: PREAMBLE + +Multimodal generative models are being widely adopted and used, and have the potential to transform the way artists, among other individuals, conceive and benefit from AI or ML technologies as a tool for content creation. + +Notwithstanding the current and potential benefits that these artifacts can bring to society at large, there are also concerns about potential misuses of them, either due to their technical limitations or ethical considerations. + +In short, this license strives for both the open and responsible downstream use of the accompanying model. When it comes to the open character, we took inspiration from open source permissive licenses regarding the grant of IP rights. Referring to the downstream responsible use, we added use-based restrictions not permitting the use of the Model in very specific scenarios, in order for the licensor to be able to enforce the license in case potential misuses of the Model may occur. At the same time, we strive to promote open and responsible research on generative models for art and content generation. + +Even though downstream derivative versions of the model could be released under different licensing terms, the latter will always have to include - at minimum - the same use-based restrictions as the ones in the original license (this license). We believe in the intersection between open and responsible AI development; thus, this License aims to strike a balance between both in order to enable responsible open-science in the field of AI. + +This License governs the use of the model (and its derivatives) and is informed by the model card associated with the model. + +NOW THEREFORE, You and Licensor agree as follows: + +1. Definitions + +- "License" means the terms and conditions for use, reproduction, and Distribution as defined in this document. +- "Data" means a collection of information and/or content extracted from the dataset used with the Model, including to train, pretrain, or otherwise evaluate the Model. The Data is not licensed under this License. +- "Output" means the results of operating a Model as embodied in informational content resulting therefrom. +- "Model" means any accompanying machine-learning based assemblies (including checkpoints), consisting of learnt weights, parameters (including optimizer states), corresponding to the model architecture as embodied in the Complementary Material, that have been trained or tuned, in whole or in part on the Data, using the Complementary Material. +- "Derivatives of the Model" means all modifications to the Model, works based on the Model, or any other model which is created or initialized by transfer of patterns of the weights, parameters, activations or output of the Model, to the other model, in order to cause the other model to perform similarly to the Model, including - but not limited to - distillation methods entailing the use of intermediate data representations or methods based on the generation of synthetic data by the Model for training the other model. +- "Complementary Material" means the accompanying source code and scripts used to define, run, load, benchmark or evaluate the Model, and used to prepare data for training or evaluation, if any. This includes any accompanying documentation, tutorials, examples, etc, if any. +- "Distribution" means any transmission, reproduction, publication or other sharing of the Model or Derivatives of the Model to a third party, including providing the Model as a hosted service made available by electronic or other remote means - e.g. API-based or web access. +- "Licensor" means the copyright owner or entity authorized by the copyright owner that is granting the License, including the persons or entities that may have rights in the Model and/or distributing the Model. +- "You" (or "Your") means an individual or Legal Entity exercising permissions granted by this License and/or making use of the Model for whichever purpose and in any field of use, including usage of the Model in an end-use application - e.g. chatbot, translator, image generator. +- "Third Parties" means individuals or legal entities that are not under common control with Licensor or You. +- "Contribution" means any work of authorship, including the original version of the Model and any modifications or additions to that Model or Derivatives of the Model thereof, that is intentionally submitted to Licensor for inclusion in the Model by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Model, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." +- "Contributor" means Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Model. + +Section II: INTELLECTUAL PROPERTY RIGHTS + +Both copyright and patent grants apply to the Model, Derivatives of the Model and Complementary Material. The Model and Derivatives of the Model are subject to additional terms as described in Section III. + +2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare, publicly display, publicly perform, sublicense, and distribute the Complementary Material, the Model, and Derivatives of the Model. +3. Grant of Patent License. Subject to the terms and conditions of this License and where and as applicable, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this paragraph) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Model and the Complementary Material, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Model to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Model and/or Complementary Material or a Contribution incorporated within the Model and/or Complementary Material constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for the Model and/or Work shall terminate as of the date such litigation is asserted or filed. + +Section III: CONDITIONS OF USAGE, DISTRIBUTION AND REDISTRIBUTION + +4. Distribution and Redistribution. You may host for Third Party remote access purposes (e.g. software-as-a-service), reproduce and distribute copies of the Model or Derivatives of the Model thereof in any medium, with or without modifications, provided that You meet the following conditions: +Use-based restrictions as referenced in paragraph 5 MUST be included as an enforceable provision by You in any type of legal agreement (e.g. a license) governing the use and/or distribution of the Model or Derivatives of the Model, and You shall give notice to subsequent users You Distribute to, that the Model or Derivatives of the Model are subject to paragraph 5. This provision does not apply to the use of Complementary Material. +You must give any Third Party recipients of the Model or Derivatives of the Model a copy of this License; +You must cause any modified files to carry prominent notices stating that You changed the files; +You must retain all copyright, patent, trademark, and attribution notices excluding those notices that do not pertain to any part of the Model, Derivatives of the Model. +You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions - respecting paragraph 4.a. - for use, reproduction, or Distribution of Your modifications, or for any such Derivatives of the Model as a whole, provided Your use, reproduction, and Distribution of the Model otherwise complies with the conditions stated in this License. +5. Use-based restrictions. The restrictions set forth in Attachment A are considered Use-based restrictions. Therefore You cannot use the Model and the Derivatives of the Model for the specified restricted uses. You may use the Model subject to this License, including only for lawful purposes and in accordance with the License. Use may include creating any content with, finetuning, updating, running, training, evaluating and/or reparametrizing the Model. You shall require all of Your users who use the Model or a Derivative of the Model to comply with the terms of this paragraph (paragraph 5). +6. The Output You Generate. Except as set forth herein, Licensor claims no rights in the Output You generate using the Model. You are accountable for the Output you generate and its subsequent uses. No use of the output can contravene any provision as stated in the License. + +Section IV: OTHER PROVISIONS + +7. Updates and Runtime Restrictions. To the maximum extent permitted by law, Licensor reserves the right to restrict (remotely or otherwise) usage of the Model in violation of this License, update the Model through electronic means, or modify the Output of the Model based on updates. You shall undertake reasonable efforts to use the latest version of the Model. +8. Trademarks and related. Nothing in this License permits You to make use of Licensors’ trademarks, trade names, logos or to otherwise suggest endorsement or misrepresent the relationship between the parties; and any rights not expressly granted herein are reserved by the Licensors. +9. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Model and the Complementary Material (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Model, Derivatives of the Model, and the Complementary Material and assume any risks associated with Your exercise of permissions under this License. +10. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Model and the Complementary Material (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. +11. Accepting Warranty or Additional Liability. While redistributing the Model, Derivatives of the Model and the Complementary Material thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. +12. If any provision of this License is held to be invalid, illegal or unenforceable, the remaining provisions shall be unaffected thereby and remain valid as if such provision had not been set forth herein. + +END OF TERMS AND CONDITIONS + + + + +Attachment A + +Use Restrictions + +You agree not to use the Model or Derivatives of the Model: +- In any way that violates any applicable national, federal, state, local or international law or regulation; +- For the purpose of exploiting, harming or attempting to exploit or harm minors in any way; +- To generate or disseminate verifiably false information and/or content with the purpose of harming others; +- To generate or disseminate personal identifiable information that can be used to harm an individual; +- To defame, disparage or otherwise harass others; +- For fully automated decision making that adversely impacts an individual’s legal rights or otherwise creates or modifies a binding, enforceable obligation; +- For any use intended to or which has the effect of discriminating against or harming individuals or groups based on online or offline social behavior or known or predicted personal or personality characteristics; +- To exploit any of the vulnerabilities of a specific group of persons based on their age, social, physical or mental characteristics, in order to materially distort the behavior of a person pertaining to that group in a manner that causes or is likely to cause that person or another person physical or psychological harm; +- For any use intended to or which has the effect of discriminating against individuals or groups based on legally protected characteristics or categories; +- To provide medical advice and medical results interpretation; +- To generate or disseminate information for the purpose to be used for administration of justice, law enforcement, immigration or asylum processes, such as predicting an individual will commit fraud/crime commitment (e.g. by text profiling, drawing causal relationships between assertions made in documents, indiscriminate and arbitrarily-targeted use). \ No newline at end of file From d1b2da280c3443bea32539812d997f87a4072cdf Mon Sep 17 00:00:00 2001 From: littletomatodonkey Date: Thu, 22 Sep 2022 14:30:36 +0800 Subject: [PATCH 089/117] fix ocr det (#2035) * fix ocr det * fix default thres value * fix readme --- .../ch_pp-ocrv3_det/README.md | 2 +- .../ch_pp-ocrv3_det/module.py | 26 +++++++++++++------ .../ch_pp-ocrv3_det/processor.py | 2 +- 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/modules/image/text_recognition/ch_pp-ocrv3_det/README.md b/modules/image/text_recognition/ch_pp-ocrv3_det/README.md index d1bf63bfb..495d53586 100755 --- a/modules/image/text_recognition/ch_pp-ocrv3_det/README.md +++ b/modules/image/text_recognition/ch_pp-ocrv3_det/README.md @@ -58,7 +58,7 @@ ``` - 通过命令行方式实现文字识别模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) -- ### 2、代码示例 +- ### 2、预测代码示例 - ```python import paddlehub as hub diff --git a/modules/image/text_recognition/ch_pp-ocrv3_det/module.py b/modules/image/text_recognition/ch_pp-ocrv3_det/module.py index 52d50a60e..331f2dafd 100644 --- a/modules/image/text_recognition/ch_pp-ocrv3_det/module.py +++ b/modules/image/text_recognition/ch_pp-ocrv3_det/module.py @@ -168,8 +168,9 @@ def detect_text(self, use_gpu=False, output_dir='detection_result', visualization=False, - box_thresh=0.5, - det_db_unclip_ratio=1.5): + box_thresh=0.6, + det_db_unclip_ratio=1.5, + det_db_score_mode="fast"): """ Get the text box in the predicted images. Args: @@ -180,6 +181,7 @@ def detect_text(self, visualization (bool): Whether to save image or not. box_thresh(float): the threshold of the detected text box's confidence det_db_unclip_ratio(float): unclip ratio for post processing in DB detection. + det_db_score_mode(str): method to calc the final det score, one of fast(using box) and slow(using poly). Returns: res (list): The result of text detection box and save path of images. """ @@ -206,12 +208,14 @@ def detect_text(self, assert predicted_data != [], "There is not any image to be predicted. Please check the input data." preprocessor = DBProcessTest(params={'max_side_len': 960}) - postprocessor = DBPostProcess(params={ - 'thresh': 0.3, - 'box_thresh': 0.6, - 'max_candidates': 1000, - 'unclip_ratio': det_db_unclip_ratio - }) + postprocessor = DBPostProcess( + params={ + 'thresh': 0.3, + 'box_thresh': 0.6, + 'max_candidates': 1000, + 'unclip_ratio': det_db_unclip_ratio, + 'det_db_score_mode': det_db_score_mode, + }) all_imgs = [] all_ratios = [] @@ -288,6 +292,7 @@ def run_cmd(self, argvs): use_gpu=args.use_gpu, output_dir=args.output_dir, det_db_unclip_ratio=args.det_db_unclip_ratio, + det_db_score_mode=args.det_db_score_mode, visualization=args.visualization) return results @@ -311,6 +316,11 @@ def add_module_config_arg(self): type=float, default=1.5, help="unclip ratio for post processing in DB detection.") + self.arg_config_group.add_argument( + '--det_db_score_mode', + type=str, + default="str", + help="method to calc the final det score, one of fast(using box) and slow(using poly).") def add_module_input_arg(self): """ diff --git a/modules/image/text_recognition/ch_pp-ocrv3_det/processor.py b/modules/image/text_recognition/ch_pp-ocrv3_det/processor.py index 46a3b2638..7854e7f06 100644 --- a/modules/image/text_recognition/ch_pp-ocrv3_det/processor.py +++ b/modules/image/text_recognition/ch_pp-ocrv3_det/processor.py @@ -124,9 +124,9 @@ def __init__(self, params): self.box_thresh = params['box_thresh'] self.max_candidates = params['max_candidates'] self.unclip_ratio = params['unclip_ratio'] + self.score_mode = params['det_db_score_mode'] self.min_size = 3 self.dilation_kernel = None - self.score_mode = 'fast' def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height): ''' From 98d598b7fe14ddca68f8107a66a1f8a3e4ce2bd8 Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Fri, 23 Sep 2022 17:42:16 +0800 Subject: [PATCH 090/117] Add LSeg Module (#2038) * add LSeg * add LSeg README * add requirements.txt * update README * update module * update * update * update * update * pre-commit * update * save jpg -> save png * bgr -> bgra * fix typo * pre-commit --- .../semantic_segmentation/lseg/README.md | 178 ++++++++++ .../lseg/models/__init__.py | 3 + .../semantic_segmentation/lseg/models/clip.py | 45 +++ .../semantic_segmentation/lseg/models/lseg.py | 19 ++ .../lseg/models/scratch.py | 318 ++++++++++++++++++ .../semantic_segmentation/lseg/models/vit.py | 228 +++++++++++++ .../semantic_segmentation/lseg/module.py | 194 +++++++++++ .../lseg/requirements.txt | 4 + .../image/semantic_segmentation/lseg/test.py | 67 ++++ 9 files changed, 1056 insertions(+) create mode 100644 modules/image/semantic_segmentation/lseg/README.md create mode 100644 modules/image/semantic_segmentation/lseg/models/__init__.py create mode 100644 modules/image/semantic_segmentation/lseg/models/clip.py create mode 100644 modules/image/semantic_segmentation/lseg/models/lseg.py create mode 100644 modules/image/semantic_segmentation/lseg/models/scratch.py create mode 100644 modules/image/semantic_segmentation/lseg/models/vit.py create mode 100644 modules/image/semantic_segmentation/lseg/module.py create mode 100644 modules/image/semantic_segmentation/lseg/requirements.txt create mode 100644 modules/image/semantic_segmentation/lseg/test.py diff --git a/modules/image/semantic_segmentation/lseg/README.md b/modules/image/semantic_segmentation/lseg/README.md new file mode 100644 index 000000000..63a929314 --- /dev/null +++ b/modules/image/semantic_segmentation/lseg/README.md @@ -0,0 +1,178 @@ +# lseg + +|模型名称|lseg| +| :--- | :---: | +|类别|图像-图像分割| +|网络|LSeg| +|数据集|-| +|是否支持Fine-tuning|否| +|模型大小|1.63GB| +|指标|-| +|最新更新日期|2022-09-22| + + +## 一、模型基本信息 + +- ### 应用效果展示 + + - 网络结构: +

+
+

+ + - 样例结果示例: +

+ +

+ +- ### 模型介绍 + + - 文本驱动的图像语义分割模型(Language-driven Semantic Segmentation),即通过文本控制模型的分割类别实现指定类别的图像语义分割算法。 + + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2.安装 + + - ```shell + $ hub install lseg + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + - ### 1、命令行预测 + + ```shell + $ hub run lseg \ + --input_path "/PATH/TO/IMAGE" \ + --labels "Category 1" "Category 2" "Category n" \ + --output_dir "lseg_output" + ``` + + - ### 2、预测代码示例 + + ```python + import paddlehub as hub + import cv2 + + module = hub.Module(name="lseg") + result = module.segment( + image=cv2.imread('/PATH/TO/IMAGE'), + labels=["Category 1", "Category 2", "Category n"], + visualization=True, + output_dir='lseg_output' + ) + ``` + + - ### 3、API + + ```python + def segment( + image: Union[str, numpy.ndarray], + labels: Union[str, List[str]], + visualization: bool = False, + output_dir: str = 'lseg_output' + ) -> Dict[str, Union[numpy.ndarray, Dict[str, numpy.ndarray]]] + ``` + + - 语义分割 API + + - **参数** + + * image (Union\[str, numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; + * labels (Union\[str, List\[str\]\]): 类别文本标签; + * visualization (bool): 是否将识别结果保存为图片文件; + * output\_dir (str): 保存处理结果的文件目录。 + + - **返回** + + * res (Dict\[str, Union\[numpy.ndarray, Dict\[str, numpy.ndarray\]\]\]): 识别结果的字典,字典中包含如下元素: + * gray (numpy.ndarray): 灰度分割结果 (GRAY); + * color (numpy.ndarray): 伪彩色图分割结果 (BGR); + * mix (numpy.ndarray): 叠加原图和伪彩色图的分割结果 (BGR); + * classes (Dict\[str, numpy.ndarray\]): 各个类别标签的分割抠图结果 (BGRA)。 + +## 四、服务部署 + +- PaddleHub Serving可以部署一个语义驱动的语义分割的在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + + ```shell + $ hub serving start -m lseg + ``` + + - 这样就完成了一个语义驱动的语义分割服务化API的部署,默认端口号为8866。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + ```python + import requests + import json + import base64 + + import cv2 + import numpy as np + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tobytes()).decode('utf8') + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.frombuffer(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = { + 'image': cv2_to_base64(org_im), + 'labels': ["Category 1", "Category 2", "Category n"] + } + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/lseg" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 结果转换 + results = r.json()['results'] + results = { + 'gray': base64_to_cv2(results['gray']), + 'color': base64_to_cv2(results['color']), + 'mix': base64_to_cv2(results['mix']), + 'classes': { + k: base64_to_cv2(v) for k, v in results['classes'].items() + } + } + + # 保存输出 + cv2.imwrite('mix.jpg', results['mix']) + ``` + +## 五、参考资料 + +* 论文:[Language-driven Semantic Segmentation](https://arxiv.org/abs/2201.03546) + +* 官方实现:[isl-org/lang-seg](https://github.com/isl-org/lang-seg) + +## 六、更新历史 + +* 1.0.0 + + 初始发布 + + ```shell + $ hub install lseg==1.0.0 + ``` diff --git a/modules/image/semantic_segmentation/lseg/models/__init__.py b/modules/image/semantic_segmentation/lseg/models/__init__.py new file mode 100644 index 000000000..7718276ca --- /dev/null +++ b/modules/image/semantic_segmentation/lseg/models/__init__.py @@ -0,0 +1,3 @@ +from .lseg import LSeg + +__all__ = ['LSeg'] diff --git a/modules/image/semantic_segmentation/lseg/models/clip.py b/modules/image/semantic_segmentation/lseg/models/clip.py new file mode 100644 index 000000000..791f3c4b0 --- /dev/null +++ b/modules/image/semantic_segmentation/lseg/models/clip.py @@ -0,0 +1,45 @@ +import paddle +import paddle.nn as nn +from paddlenlp.transformers.clip.modeling import TextTransformer + + +class CLIPText(nn.Layer): + + def __init__(self, + max_text_length: int = 77, + vocab_size: int = 49408, + text_embed_dim: int = 512, + text_heads: int = 8, + text_layers: int = 12, + text_hidden_act: str = "quick_gelu", + projection_dim: int = 512): + super().__init__() + + self.text_model = TextTransformer(context_length=max_text_length, + transformer_width=text_embed_dim, + transformer_heads=text_heads, + transformer_layers=text_layers, + vocab_size=vocab_size, + activation=text_hidden_act, + normalize_before=True) + + self.text_projection = paddle.create_parameter((text_embed_dim, projection_dim), paddle.get_default_dtype()) + + def get_text_features( + self, + input_ids, + attention_mask=None, + position_ids=None, + output_attentions=False, + output_hidden_states=False, + return_dict=False, + ): + text_outputs = self.text_model(input_ids=input_ids, + position_ids=position_ids, + attention_mask=attention_mask, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict) + pooled_output = text_outputs[1] + text_features = paddle.matmul(pooled_output, self.text_projection) + return text_features diff --git a/modules/image/semantic_segmentation/lseg/models/lseg.py b/modules/image/semantic_segmentation/lseg/models/lseg.py new file mode 100644 index 000000000..f2ace02bd --- /dev/null +++ b/modules/image/semantic_segmentation/lseg/models/lseg.py @@ -0,0 +1,19 @@ +import paddle.nn as nn + +from .clip import CLIPText +from .scratch import Scratch +from .vit import ViT + + +class LSeg(nn.Layer): + + def __init__(self): + super().__init__() + self.clip = CLIPText() + self.vit = ViT() + self.scratch = Scratch() + + def forward(self, images, texts): + layer_1, layer_2, layer_3, layer_4 = self.vit.forward(images) + text_features = self.clip.get_text_features(texts) + return self.scratch.forward(layer_1, layer_2, layer_3, layer_4, text_features) diff --git a/modules/image/semantic_segmentation/lseg/models/scratch.py b/modules/image/semantic_segmentation/lseg/models/scratch.py new file mode 100644 index 000000000..3e4074614 --- /dev/null +++ b/modules/image/semantic_segmentation/lseg/models/scratch.py @@ -0,0 +1,318 @@ +import numpy as np +import paddle +import paddle.nn as nn + + +class Interpolate(nn.Layer): + """Interpolation module.""" + + def __init__(self, scale_factor, mode, align_corners=False): + """Init. + + Args: + scale_factor (float): scaling + mode (str): interpolation mode + """ + super(Interpolate, self).__init__() + + self.interp = nn.functional.interpolate + self.scale_factor = scale_factor + self.mode = mode + self.align_corners = align_corners + + def forward(self, x): + """Forward pass. + + Args: + x (tensor): input + + Returns: + tensor: interpolated data + """ + + x = self.interp( + x, + scale_factor=self.scale_factor, + mode=self.mode, + align_corners=self.align_corners, + ) + + return x + + +class ResidualConvUnit(nn.Layer): + """Residual convolution module.""" + + def __init__(self, features): + """Init. + + Args: + features (int): number of features + """ + super().__init__() + + self.conv1 = nn.Conv2D(features, features, kernel_size=3, stride=1, padding=1) + + self.conv2 = nn.Conv2D(features, features, kernel_size=3, stride=1, padding=1) + + self.relu = nn.ReLU(inplace=True) + + def forward(self, x): + """Forward pass. + + Args: + x (tensor): input + + Returns: + tensor: output + """ + out = self.relu(x) + out = self.conv1(out) + out = self.relu(out) + out = self.conv2(out) + + return out + x + + +class FeatureFusionBlock(nn.Layer): + """Feature fusion block.""" + + def __init__(self, features): + """Init. + + Args: + features (int): number of features + """ + super(FeatureFusionBlock, self).__init__() + + self.resConfUnit1 = ResidualConvUnit(features) + self.resConfUnit2 = ResidualConvUnit(features) + + def forward(self, *xs): + """Forward pass. + + Returns: + tensor: output + """ + output = xs[0] + + if len(xs) == 2: + output += self.resConfUnit1(xs[1]) + + output = self.resConfUnit2(output) + + output = nn.functional.interpolate(output, scale_factor=2, mode="bilinear", align_corners=True) + + return output + + +class ResidualConvUnit_custom(nn.Layer): + """Residual convolution module.""" + + def __init__(self, features, activation, bn): + """Init. + + Args: + features (int): number of features + """ + super().__init__() + + self.bn = bn + + self.groups = 1 + + self.conv1 = nn.Conv2D( + features, + features, + kernel_size=3, + stride=1, + padding=1, + bias_attr=not self.bn, + groups=self.groups, + ) + + self.conv2 = nn.Conv2D( + features, + features, + kernel_size=3, + stride=1, + padding=1, + bias_attr=not self.bn, + groups=self.groups, + ) + + if self.bn == True: + self.bn1 = nn.BatchNorm2D(features) + self.bn2 = nn.BatchNorm2D(features) + + self.activation = activation + + def forward(self, x): + """Forward pass. + + Args: + x (tensor): input + + Returns: + tensor: output + """ + + out = self.activation(x) + out = self.conv1(out) + if self.bn == True: + out = self.bn1(out) + + out = self.activation(out) + out = self.conv2(out) + if self.bn == True: + out = self.bn2(out) + + if self.groups > 1: + out = self.conv_merge(out) + + return out + x + + +class FeatureFusionBlock_custom(nn.Layer): + """Feature fusion block.""" + + def __init__( + self, + features, + activation=nn.ReLU(), + deconv=False, + bn=False, + expand=False, + align_corners=True, + ): + """Init. + + Args: + features (int): number of features + """ + super(FeatureFusionBlock_custom, self).__init__() + + self.deconv = deconv + self.align_corners = align_corners + + self.groups = 1 + + self.expand = expand + out_features = features + if self.expand == True: + out_features = features // 2 + + self.out_conv = nn.Conv2D( + features, + out_features, + kernel_size=1, + stride=1, + padding=0, + bias_attr=True, + groups=1, + ) + + self.resConfUnit1 = ResidualConvUnit_custom(features, activation, bn) + self.resConfUnit2 = ResidualConvUnit_custom(features, activation, bn) + + def forward(self, *xs): + """Forward pass. + + Returns: + tensor: output + """ + output = xs[0] + + if len(xs) == 2: + res = self.resConfUnit1(xs[1]) + output += res + + output = self.resConfUnit2(output) + + output = nn.functional.interpolate(output, scale_factor=2, mode="bilinear", align_corners=self.align_corners) + + output = self.out_conv(output) + + return output + + +class Scratch(nn.Layer): + + def __init__(self, in_channels=[256, 512, 1024, 1024], out_channels=256): + super().__init__() + self.out_c = 512 + self.logit_scale = paddle.to_tensor(np.exp(np.log([1 / 0.07]))) + self.layer1_rn = nn.Conv2D( + in_channels[0], + out_channels, + kernel_size=3, + stride=1, + padding=1, + bias_attr=False, + groups=1, + ) + self.layer2_rn = nn.Conv2D( + in_channels[1], + out_channels, + kernel_size=3, + stride=1, + padding=1, + bias_attr=False, + groups=1, + ) + self.layer3_rn = nn.Conv2D( + in_channels[2], + out_channels, + kernel_size=3, + stride=1, + padding=1, + bias_attr=False, + groups=1, + ) + self.layer4_rn = nn.Conv2D( + in_channels[3], + out_channels, + kernel_size=3, + stride=1, + padding=1, + bias_attr=False, + groups=1, + ) + + self.refinenet1 = FeatureFusionBlock_custom(out_channels, bn=True) + self.refinenet2 = FeatureFusionBlock_custom(out_channels, bn=True) + self.refinenet3 = FeatureFusionBlock_custom(out_channels, bn=True) + self.refinenet4 = FeatureFusionBlock_custom(out_channels, bn=True) + + self.head1 = nn.Conv2D(out_channels, self.out_c, kernel_size=1) + + self.output_conv = nn.Sequential(Interpolate(scale_factor=2, mode="bilinear", align_corners=True)) + + def forward(self, layer_1, layer_2, layer_3, layer_4, text_features): + + layer_1_rn = self.layer1_rn(layer_1) + layer_2_rn = self.layer2_rn(layer_2) + layer_3_rn = self.layer3_rn(layer_3) + layer_4_rn = self.layer4_rn(layer_4) + + path_4 = self.refinenet4(layer_4_rn) + path_3 = self.refinenet3(path_4, layer_3_rn) + path_2 = self.refinenet2(path_3, layer_2_rn) + path_1 = self.refinenet1(path_2, layer_1_rn) + + image_features = self.head1(path_1) + + imshape = image_features.shape + image_features = image_features.transpose((0, 2, 3, 1)).reshape((-1, self.out_c)) + + # normalized features + image_features = image_features / image_features.norm(axis=-1, keepdim=True) + text_features = text_features / text_features.norm(axis=-1, keepdim=True) + + logits_per_image = self.logit_scale * image_features @ text_features.t() + + out = logits_per_image.reshape((imshape[0], imshape[2], imshape[3], -1)).transpose((0, 3, 1, 2)) + + out = self.output_conv(out) + + return out diff --git a/modules/image/semantic_segmentation/lseg/models/vit.py b/modules/image/semantic_segmentation/lseg/models/vit.py new file mode 100644 index 000000000..75c5d0191 --- /dev/null +++ b/modules/image/semantic_segmentation/lseg/models/vit.py @@ -0,0 +1,228 @@ +import math + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddleclas.ppcls.arch.backbone.model_zoo.vision_transformer import VisionTransformer + + +class Slice(nn.Layer): + + def __init__(self, start_index=1): + super(Slice, self).__init__() + self.start_index = start_index + + def forward(self, x): + return x[:, self.start_index:] + + +class AddReadout(nn.Layer): + + def __init__(self, start_index=1): + super(AddReadout, self).__init__() + self.start_index = start_index + + def forward(self, x): + if self.start_index == 2: + readout = (x[:, 0] + x[:, 1]) / 2 + else: + readout = x[:, 0] + return x[:, self.start_index:] + readout.unsqueeze(1) + + +class Transpose(nn.Layer): + + def __init__(self, dim0, dim1): + super(Transpose, self).__init__() + self.dim0 = dim0 + self.dim1 = dim1 + + def forward(self, x): + prems = list(range(x.dim())) + prems[self.dim0], prems[self.dim1] = prems[self.dim1], prems[self.dim0] + x = x.transpose(prems) + return x + + +class Unflatten(nn.Layer): + + def __init__(self, start_axis, shape): + super(Unflatten, self).__init__() + self.start_axis = start_axis + self.shape = shape + + def forward(self, x): + return paddle.reshape(x, x.shape[:self.start_axis] + [self.shape]) + + +class ProjectReadout(nn.Layer): + + def __init__(self, in_features, start_index=1): + super(ProjectReadout, self).__init__() + self.start_index = start_index + + self.project = nn.Sequential(nn.Linear(2 * in_features, in_features), nn.GELU()) + + def forward(self, x): + readout = x[:, 0].unsqueeze(1).expand_as(x[:, self.start_index:]) + features = paddle.concat((x[:, self.start_index:], readout), -1) + + return self.project(features) + + +class ViT(VisionTransformer): + + def __init__(self, + img_size=384, + patch_size=16, + in_chans=3, + class_num=1000, + embed_dim=1024, + depth=24, + num_heads=16, + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + drop_rate=0, + attn_drop_rate=0, + drop_path_rate=0, + norm_layer='nn.LayerNorm', + epsilon=1e-6, + **kwargs): + super().__init__(img_size, patch_size, in_chans, class_num, embed_dim, depth, num_heads, mlp_ratio, qkv_bias, + qk_scale, drop_rate, attn_drop_rate, drop_path_rate, norm_layer, epsilon, **kwargs) + self.patch_size = patch_size + self.start_index = 1 + features = [256, 512, 1024, 1024] + readout_oper = [ProjectReadout(embed_dim, self.start_index) for out_feat in features] + self.act_postprocess1 = nn.Sequential( + readout_oper[0], + Transpose(1, 2), + Unflatten(2, [img_size // 16, img_size // 16]), + nn.Conv2D( + in_channels=embed_dim, + out_channels=features[0], + kernel_size=1, + stride=1, + padding=0, + ), + nn.Conv2DTranspose( + in_channels=features[0], + out_channels=features[0], + kernel_size=4, + stride=4, + padding=0, + dilation=1, + groups=1, + ), + ) + + self.act_postprocess2 = nn.Sequential( + readout_oper[1], + Transpose(1, 2), + Unflatten(2, [img_size // 16, img_size // 16]), + nn.Conv2D( + in_channels=embed_dim, + out_channels=features[1], + kernel_size=1, + stride=1, + padding=0, + ), + nn.Conv2DTranspose( + in_channels=features[1], + out_channels=features[1], + kernel_size=2, + stride=2, + padding=0, + dilation=1, + groups=1, + ), + ) + + self.act_postprocess3 = nn.Sequential( + readout_oper[2], + Transpose(1, 2), + Unflatten(2, [img_size // 16, img_size // 16]), + nn.Conv2D( + in_channels=embed_dim, + out_channels=features[2], + kernel_size=1, + stride=1, + padding=0, + ), + ) + + self.act_postprocess4 = nn.Sequential( + readout_oper[3], + Transpose(1, 2), + Unflatten(2, [img_size // 16, img_size // 16]), + nn.Conv2D( + in_channels=embed_dim, + out_channels=features[3], + kernel_size=1, + stride=1, + padding=0, + ), + nn.Conv2D( + in_channels=features[3], + out_channels=features[3], + kernel_size=3, + stride=2, + padding=1, + ), + ) + + self.norm = nn.Identity() + self.head = nn.Identity() + + def _resize_pos_embed(self, posemb, gs_h, gs_w): + posemb_tok, posemb_grid = ( + posemb[:, :self.start_index], + posemb[0, self.start_index:], + ) + + gs_old = int(math.sqrt(len(posemb_grid))) + + posemb_grid = posemb_grid.reshape((1, gs_old, gs_old, -1)).transpose((0, 3, 1, 2)) + posemb_grid = F.interpolate(posemb_grid, size=(gs_h, gs_w), mode="bilinear") + posemb_grid = posemb_grid.transpose((0, 2, 3, 1)).reshape((1, gs_h * gs_w, -1)) + + posemb = paddle.concat([posemb_tok, posemb_grid], axis=1) + + return posemb + + def forward(self, x): + b, c, h, w = x.shape + + pos_embed = self._resize_pos_embed(self.pos_embed, h // self.patch_size, w // self.patch_size) + x = self.patch_embed.proj(x).flatten(2).transpose((0, 2, 1)) + + cls_tokens = self.cls_token.expand((b, -1, -1)) + x = paddle.concat((cls_tokens, x), axis=1) + + x = x + pos_embed + x = self.pos_drop(x) + + outputs = [] + for index, blk in enumerate(self.blocks): + x = blk(x) + if index in [5, 11, 17, 23]: + outputs.append(x) + + layer_1 = self.act_postprocess1[0:2](outputs[0]) + layer_2 = self.act_postprocess2[0:2](outputs[1]) + layer_3 = self.act_postprocess3[0:2](outputs[2]) + layer_4 = self.act_postprocess4[0:2](outputs[3]) + + shape = (-1, 1024, h // self.patch_size, w // self.patch_size) + layer_1 = layer_1.reshape(shape) + layer_2 = layer_2.reshape(shape) + layer_3 = layer_3.reshape(shape) + layer_4 = layer_4.reshape(shape) + + layer_1 = self.act_postprocess1[3:len(self.act_postprocess1)](layer_1) + layer_2 = self.act_postprocess2[3:len(self.act_postprocess2)](layer_2) + layer_3 = self.act_postprocess3[3:len(self.act_postprocess3)](layer_3) + layer_4 = self.act_postprocess4[3:len(self.act_postprocess4)](layer_4) + + return layer_1, layer_2, layer_3, layer_4 diff --git a/modules/image/semantic_segmentation/lseg/module.py b/modules/image/semantic_segmentation/lseg/module.py new file mode 100644 index 000000000..55ba891ee --- /dev/null +++ b/modules/image/semantic_segmentation/lseg/module.py @@ -0,0 +1,194 @@ +import argparse +import base64 +import os +import time +from typing import Dict +from typing import List +from typing import Union + +import cv2 +import numpy as np +import paddle +import paddle.vision.transforms as transforms +from paddlenlp.transformers.clip.tokenizer import CLIPTokenizer + +import paddlehub as hub +from . import models +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving + + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tobytes()).decode('utf8') + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.frombuffer(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +@moduleinfo( + name='lseg', + version='1.0.0', + type="CV/semantic_segmentation", + author="", + author_email="", + summary="Language-driven Semantic Segmentation.", +) +class LSeg(models.LSeg): + + def __init__(self): + super(LSeg, self).__init__() + self.default_pretrained_model_path = os.path.join(self.directory, 'ckpts', 'LSeg.pdparams') + state_dict = paddle.load(self.default_pretrained_model_path) + self.set_state_dict(state_dict) + self.eval() + self.transforms = transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]), + ]) + self.tokenizer = CLIPTokenizer.from_pretrained('openai/clip-vit-base-patch32') + + self.language_recognition = hub.Module(name='baidu_language_recognition') + self.translate = hub.Module(name='baidu_translate') + + @staticmethod + def get_colormap(n): + assert n <= 256, "num_class should be less than 256." + + pallete = [0] * (256 * 3) + + for j in range(0, n): + lab = j + pallete[j * 3 + 0] = 0 + pallete[j * 3 + 1] = 0 + pallete[j * 3 + 2] = 0 + i = 0 + while (lab > 0): + pallete[j * 3 + 0] |= (((lab >> 0) & 1) << (7 - i)) + pallete[j * 3 + 1] |= (((lab >> 1) & 1) << (7 - i)) + pallete[j * 3 + 2] |= (((lab >> 2) & 1) << (7 - i)) + i = i + 1 + lab >>= 3 + + return np.asarray(pallete, dtype=np.uint8).reshape(256, 1, 3) + + def segment(self, + image: Union[str, np.ndarray], + labels: Union[str, List[str]], + visualization: bool = False, + output_dir: str = 'lseg_output') -> Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]: + if isinstance(image, str): + image = cv2.imread(image) + elif isinstance(image, np.ndarray): + image = image + else: + raise Exception("image should be a str / np.ndarray") + + if isinstance(labels, str): + labels = [labels, 'other'] + print('"other" category label is automatically added because the length of labels is equal to 1') + print('new labels: ', labels) + elif isinstance(labels, list): + if len(labels) == 1: + labels.append('other') + print('"other" category label is automatically added because the length of labels is equal to 1') + print('new labels: ', labels) + elif len(labels) == 0: + raise Exception("labels should not be empty.") + else: + raise Exception("labels should be a str or list.") + + class_num = len(labels) + + labels_ = list(set(labels)) + labels_.sort(key=labels.index) + labels = labels_ + + input_labels = [] + for label in labels: + from_lang = self.language_recognition.recognize(query=label) + if from_lang != 'en': + label = self.translate.translate(query=label, from_lang=from_lang, to_lang='en') + input_labels.append(label) + + labels_dict = {k: v for k, v in zip(input_labels, labels)} + + input_labels_ = list(set(input_labels)) + input_labels_.sort(key=input_labels.index) + input_labels = input_labels_ + + labels = [] + for input_label in input_labels: + labels.append(labels_dict[input_label]) + + if len(labels) < class_num: + print('remove the same labels...') + print('new labels: ', labels) + + h, w = image.shape[:2] + image = image[:-(h % 32) if h % 32 else None, :-(w % 32) if w % 32 else None] + images = self.transforms(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)).unsqueeze(0) + texts = self.tokenizer(input_labels, padding=True, return_tensors="pd")['input_ids'] + + with paddle.no_grad(): + results = self.forward(images, texts) + results = paddle.argmax(results, 1).cast(paddle.uint8) + gray_seg = results.numpy()[0] + + colormap = self.get_colormap(len(labels)) + color_seg = cv2.applyColorMap(gray_seg, colormap) + mix_seg = cv2.addWeighted(image, 0.5, color_seg, 0.5, 0.0) + + classes_seg = {} + for i, label in enumerate(labels): + mask = ((gray_seg == i).astype('uint8') * 255)[..., None] + classes_seg[label] = np.concatenate([image, mask], 2) + + if visualization: + save_dir = os.path.join(output_dir, str(int(time.time()))) + if not os.path.isdir(save_dir): + os.makedirs(save_dir) + for label, dst in classes_seg.items(): + cv2.imwrite(os.path.join(save_dir, '%s.png' % label), dst) + + return {'gray': gray_seg, 'color': color_seg, 'mix': mix_seg, 'classes': classes_seg} + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.parser.add_argument('--input_path', type=str, help="path to image.") + self.parser.add_argument('--labels', type=str, nargs='+', help="segmentation labels.") + self.parser.add_argument('--output_dir', + type=str, + default='lseg_output', + help="The directory to save output images.") + args = self.parser.parse_args(argvs) + self.segment(image=args.input_path, labels=args.labels, visualization=True, output_dir=args.output_dir) + return 'segmentation results are saved in %s' % args.output_dir + + @serving + def serving_method(self, image, **kwargs): + """ + Run as a service. + """ + image = base64_to_cv2(image) + results = self.segment(image=image, **kwargs) + + return { + 'gray': cv2_to_base64(results['gray']), + 'color': cv2_to_base64(results['color']), + 'mix': cv2_to_base64(results['mix']), + 'classes': {k: cv2_to_base64(v) + for k, v in results['classes'].items()} + } diff --git a/modules/image/semantic_segmentation/lseg/requirements.txt b/modules/image/semantic_segmentation/lseg/requirements.txt new file mode 100644 index 000000000..1bd663bde --- /dev/null +++ b/modules/image/semantic_segmentation/lseg/requirements.txt @@ -0,0 +1,4 @@ +paddleclas>=2.4.0 +paddlenlp>=2.4.0 +ftfy +regex diff --git a/modules/image/semantic_segmentation/lseg/test.py b/modules/image/semantic_segmentation/lseg/test.py new file mode 100644 index 000000000..d6860e608 --- /dev/null +++ b/modules/image/semantic_segmentation/lseg/test.py @@ -0,0 +1,67 @@ +import os +import shutil +import unittest + +import cv2 +import numpy as np +import requests + +import paddlehub as hub + +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + + +class TestHubModule(unittest.TestCase): + + @classmethod + def setUpClass(cls) -> None: + img_url = 'https://unsplash.com/photos/mJaD10XeD7w/download?ixid=MnwxMjA3fDB8MXxzZWFyY2h8M3x8Y2F0fGVufDB8fHx8MTY2MzczNDc3Mw&force=true&w=640' + if not os.path.exists('tests'): + os.makedirs('tests') + response = requests.get(img_url) + assert response.status_code == 200, 'Network Error.' + with open('tests/test.jpg', 'wb') as f: + f.write(response.content) + cls.module = hub.Module(name="lseg") + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree('tests') + shutil.rmtree('lseg_output') + + def test_segment1(self): + results = self.module.segment(image='tests/test.jpg', labels=['other', 'cat'], visualization=False) + + self.assertIsInstance(results['mix'], np.ndarray) + self.assertIsInstance(results['color'], np.ndarray) + self.assertIsInstance(results['gray'], np.ndarray) + self.assertIsInstance(results['classes']['other'], np.ndarray) + self.assertIsInstance(results['classes']['cat'], np.ndarray) + + def test_segment2(self): + results = self.module.segment(image=cv2.imread('tests/test.jpg'), labels=['other', 'cat'], visualization=True) + + self.assertIsInstance(results['mix'], np.ndarray) + self.assertIsInstance(results['color'], np.ndarray) + self.assertIsInstance(results['gray'], np.ndarray) + self.assertIsInstance(results['classes']['other'], np.ndarray) + self.assertIsInstance(results['classes']['cat'], np.ndarray) + + def test_segment3(self): + results = self.module.segment(image=cv2.imread('tests/test.jpg'), labels=['其他', '猫'], visualization=False) + + self.assertIsInstance(results['mix'], np.ndarray) + self.assertIsInstance(results['color'], np.ndarray) + self.assertIsInstance(results['gray'], np.ndarray) + self.assertIsInstance(results['classes']['其他'], np.ndarray) + self.assertIsInstance(results['classes']['猫'], np.ndarray) + + def test_segment4(self): + self.assertRaises(Exception, self.module.segment, image=['tests/test.jpg'], labels=['other', 'cat']) + + def test_segment5(self): + self.assertRaises(AttributeError, self.module.segment, image='no.jpg', labels=['other', 'cat']) + + +if __name__ == "__main__": + unittest.main() From 8d97ef3c83249441277f7c960438aa478c265f34 Mon Sep 17 00:00:00 2001 From: chenjian Date: Mon, 10 Oct 2022 15:34:33 +0800 Subject: [PATCH 091/117] Add stable_diffusion_img2img module --- .../stable_diffusion_img2img/LICENSE | 82 + .../stable_diffusion_img2img/README.md | 171 ++ .../stable_diffusion_img2img/clip/README.md | 2 + .../clip/clip/__init__.py | 1 + .../clip/clip/layers.py | 182 +++ .../clip/clip/model.py | 259 +++ .../clip/clip/simple_tokenizer.py | 135 ++ .../clip/clip/utils.py | 88 + .../diffusers/__init__.py | 20 + .../diffusers/configuration_utils.py | 312 ++++ .../diffusers/models/README.md | 11 + .../diffusers/models/__init__.py | 20 + .../diffusers/models/attention.py | 465 ++++++ .../diffusers/models/embeddings.py | 116 ++ .../diffusers/models/resnet.py | 515 ++++++ .../diffusers/models/unet_2d.py | 206 +++ .../diffusers/models/unet_2d_condition.py | 206 +++ .../diffusers/models/unet_blocks.py | 1428 +++++++++++++++++ .../diffusers/models/vae.py | 465 ++++++ .../diffusers/schedulers/README.md | 18 + .../diffusers/schedulers/__init__.py | 24 + .../diffusers/schedulers/scheduling_ddim.py | 182 +++ .../diffusers/schedulers/scheduling_ddpm.py | 191 +++ .../schedulers/scheduling_karras_ve.py | 124 ++ .../schedulers/scheduling_lms_discrete.py | 133 ++ .../diffusers/schedulers/scheduling_pndm.py | 258 +++ .../diffusers/schedulers/scheduling_sde_ve.py | 172 ++ .../diffusers/schedulers/scheduling_sde_vp.py | 59 + .../diffusers/schedulers/scheduling_utils.py | 102 ++ .../stable_diffusion_img2img/module.py | 428 +++++ .../stable_diffusion_img2img/requirements.txt | 8 + .../stable_diffusion_img2img/utils.py | 16 + 32 files changed, 6399 insertions(+) create mode 100644 modules/image/text_to_image/stable_diffusion_img2img/LICENSE create mode 100644 modules/image/text_to_image/stable_diffusion_img2img/README.md create mode 100755 modules/image/text_to_image/stable_diffusion_img2img/clip/README.md create mode 100755 modules/image/text_to_image/stable_diffusion_img2img/clip/clip/__init__.py create mode 100755 modules/image/text_to_image/stable_diffusion_img2img/clip/clip/layers.py create mode 100755 modules/image/text_to_image/stable_diffusion_img2img/clip/clip/model.py create mode 100755 modules/image/text_to_image/stable_diffusion_img2img/clip/clip/simple_tokenizer.py create mode 100755 modules/image/text_to_image/stable_diffusion_img2img/clip/clip/utils.py create mode 100644 modules/image/text_to_image/stable_diffusion_img2img/diffusers/__init__.py create mode 100644 modules/image/text_to_image/stable_diffusion_img2img/diffusers/configuration_utils.py create mode 100644 modules/image/text_to_image/stable_diffusion_img2img/diffusers/models/README.md create mode 100644 modules/image/text_to_image/stable_diffusion_img2img/diffusers/models/__init__.py create mode 100644 modules/image/text_to_image/stable_diffusion_img2img/diffusers/models/attention.py create mode 100644 modules/image/text_to_image/stable_diffusion_img2img/diffusers/models/embeddings.py create mode 100644 modules/image/text_to_image/stable_diffusion_img2img/diffusers/models/resnet.py create mode 100644 modules/image/text_to_image/stable_diffusion_img2img/diffusers/models/unet_2d.py create mode 100644 modules/image/text_to_image/stable_diffusion_img2img/diffusers/models/unet_2d_condition.py create mode 100644 modules/image/text_to_image/stable_diffusion_img2img/diffusers/models/unet_blocks.py create mode 100644 modules/image/text_to_image/stable_diffusion_img2img/diffusers/models/vae.py create mode 100644 modules/image/text_to_image/stable_diffusion_img2img/diffusers/schedulers/README.md create mode 100644 modules/image/text_to_image/stable_diffusion_img2img/diffusers/schedulers/__init__.py create mode 100644 modules/image/text_to_image/stable_diffusion_img2img/diffusers/schedulers/scheduling_ddim.py create mode 100644 modules/image/text_to_image/stable_diffusion_img2img/diffusers/schedulers/scheduling_ddpm.py create mode 100644 modules/image/text_to_image/stable_diffusion_img2img/diffusers/schedulers/scheduling_karras_ve.py create mode 100644 modules/image/text_to_image/stable_diffusion_img2img/diffusers/schedulers/scheduling_lms_discrete.py create mode 100644 modules/image/text_to_image/stable_diffusion_img2img/diffusers/schedulers/scheduling_pndm.py create mode 100644 modules/image/text_to_image/stable_diffusion_img2img/diffusers/schedulers/scheduling_sde_ve.py create mode 100644 modules/image/text_to_image/stable_diffusion_img2img/diffusers/schedulers/scheduling_sde_vp.py create mode 100644 modules/image/text_to_image/stable_diffusion_img2img/diffusers/schedulers/scheduling_utils.py create mode 100755 modules/image/text_to_image/stable_diffusion_img2img/module.py create mode 100644 modules/image/text_to_image/stable_diffusion_img2img/requirements.txt create mode 100644 modules/image/text_to_image/stable_diffusion_img2img/utils.py diff --git a/modules/image/text_to_image/stable_diffusion_img2img/LICENSE b/modules/image/text_to_image/stable_diffusion_img2img/LICENSE new file mode 100644 index 000000000..928aa738f --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_img2img/LICENSE @@ -0,0 +1,82 @@ +Copyright (c) 2022 Robin Rombach and Patrick Esser and contributors + +CreativeML Open RAIL-M +dated August 22, 2022 + +Section I: PREAMBLE + +Multimodal generative models are being widely adopted and used, and have the potential to transform the way artists, among other individuals, conceive and benefit from AI or ML technologies as a tool for content creation. + +Notwithstanding the current and potential benefits that these artifacts can bring to society at large, there are also concerns about potential misuses of them, either due to their technical limitations or ethical considerations. + +In short, this license strives for both the open and responsible downstream use of the accompanying model. When it comes to the open character, we took inspiration from open source permissive licenses regarding the grant of IP rights. Referring to the downstream responsible use, we added use-based restrictions not permitting the use of the Model in very specific scenarios, in order for the licensor to be able to enforce the license in case potential misuses of the Model may occur. At the same time, we strive to promote open and responsible research on generative models for art and content generation. + +Even though downstream derivative versions of the model could be released under different licensing terms, the latter will always have to include - at minimum - the same use-based restrictions as the ones in the original license (this license). We believe in the intersection between open and responsible AI development; thus, this License aims to strike a balance between both in order to enable responsible open-science in the field of AI. + +This License governs the use of the model (and its derivatives) and is informed by the model card associated with the model. + +NOW THEREFORE, You and Licensor agree as follows: + +1. Definitions + +- "License" means the terms and conditions for use, reproduction, and Distribution as defined in this document. +- "Data" means a collection of information and/or content extracted from the dataset used with the Model, including to train, pretrain, or otherwise evaluate the Model. The Data is not licensed under this License. +- "Output" means the results of operating a Model as embodied in informational content resulting therefrom. +- "Model" means any accompanying machine-learning based assemblies (including checkpoints), consisting of learnt weights, parameters (including optimizer states), corresponding to the model architecture as embodied in the Complementary Material, that have been trained or tuned, in whole or in part on the Data, using the Complementary Material. +- "Derivatives of the Model" means all modifications to the Model, works based on the Model, or any other model which is created or initialized by transfer of patterns of the weights, parameters, activations or output of the Model, to the other model, in order to cause the other model to perform similarly to the Model, including - but not limited to - distillation methods entailing the use of intermediate data representations or methods based on the generation of synthetic data by the Model for training the other model. +- "Complementary Material" means the accompanying source code and scripts used to define, run, load, benchmark or evaluate the Model, and used to prepare data for training or evaluation, if any. This includes any accompanying documentation, tutorials, examples, etc, if any. +- "Distribution" means any transmission, reproduction, publication or other sharing of the Model or Derivatives of the Model to a third party, including providing the Model as a hosted service made available by electronic or other remote means - e.g. API-based or web access. +- "Licensor" means the copyright owner or entity authorized by the copyright owner that is granting the License, including the persons or entities that may have rights in the Model and/or distributing the Model. +- "You" (or "Your") means an individual or Legal Entity exercising permissions granted by this License and/or making use of the Model for whichever purpose and in any field of use, including usage of the Model in an end-use application - e.g. chatbot, translator, image generator. +- "Third Parties" means individuals or legal entities that are not under common control with Licensor or You. +- "Contribution" means any work of authorship, including the original version of the Model and any modifications or additions to that Model or Derivatives of the Model thereof, that is intentionally submitted to Licensor for inclusion in the Model by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Model, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." +- "Contributor" means Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Model. + +Section II: INTELLECTUAL PROPERTY RIGHTS + +Both copyright and patent grants apply to the Model, Derivatives of the Model and Complementary Material. The Model and Derivatives of the Model are subject to additional terms as described in Section III. + +2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare, publicly display, publicly perform, sublicense, and distribute the Complementary Material, the Model, and Derivatives of the Model. +3. Grant of Patent License. Subject to the terms and conditions of this License and where and as applicable, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this paragraph) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Model and the Complementary Material, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Model to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Model and/or Complementary Material or a Contribution incorporated within the Model and/or Complementary Material constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for the Model and/or Work shall terminate as of the date such litigation is asserted or filed. + +Section III: CONDITIONS OF USAGE, DISTRIBUTION AND REDISTRIBUTION + +4. Distribution and Redistribution. You may host for Third Party remote access purposes (e.g. software-as-a-service), reproduce and distribute copies of the Model or Derivatives of the Model thereof in any medium, with or without modifications, provided that You meet the following conditions: +Use-based restrictions as referenced in paragraph 5 MUST be included as an enforceable provision by You in any type of legal agreement (e.g. a license) governing the use and/or distribution of the Model or Derivatives of the Model, and You shall give notice to subsequent users You Distribute to, that the Model or Derivatives of the Model are subject to paragraph 5. This provision does not apply to the use of Complementary Material. +You must give any Third Party recipients of the Model or Derivatives of the Model a copy of this License; +You must cause any modified files to carry prominent notices stating that You changed the files; +You must retain all copyright, patent, trademark, and attribution notices excluding those notices that do not pertain to any part of the Model, Derivatives of the Model. +You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions - respecting paragraph 4.a. - for use, reproduction, or Distribution of Your modifications, or for any such Derivatives of the Model as a whole, provided Your use, reproduction, and Distribution of the Model otherwise complies with the conditions stated in this License. +5. Use-based restrictions. The restrictions set forth in Attachment A are considered Use-based restrictions. Therefore You cannot use the Model and the Derivatives of the Model for the specified restricted uses. You may use the Model subject to this License, including only for lawful purposes and in accordance with the License. Use may include creating any content with, finetuning, updating, running, training, evaluating and/or reparametrizing the Model. You shall require all of Your users who use the Model or a Derivative of the Model to comply with the terms of this paragraph (paragraph 5). +6. The Output You Generate. Except as set forth herein, Licensor claims no rights in the Output You generate using the Model. You are accountable for the Output you generate and its subsequent uses. No use of the output can contravene any provision as stated in the License. + +Section IV: OTHER PROVISIONS + +7. Updates and Runtime Restrictions. To the maximum extent permitted by law, Licensor reserves the right to restrict (remotely or otherwise) usage of the Model in violation of this License, update the Model through electronic means, or modify the Output of the Model based on updates. You shall undertake reasonable efforts to use the latest version of the Model. +8. Trademarks and related. Nothing in this License permits You to make use of Licensors’ trademarks, trade names, logos or to otherwise suggest endorsement or misrepresent the relationship between the parties; and any rights not expressly granted herein are reserved by the Licensors. +9. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Model and the Complementary Material (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Model, Derivatives of the Model, and the Complementary Material and assume any risks associated with Your exercise of permissions under this License. +10. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Model and the Complementary Material (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. +11. Accepting Warranty or Additional Liability. While redistributing the Model, Derivatives of the Model and the Complementary Material thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. +12. If any provision of this License is held to be invalid, illegal or unenforceable, the remaining provisions shall be unaffected thereby and remain valid as if such provision had not been set forth herein. + +END OF TERMS AND CONDITIONS + + + + +Attachment A + +Use Restrictions + +You agree not to use the Model or Derivatives of the Model: +- In any way that violates any applicable national, federal, state, local or international law or regulation; +- For the purpose of exploiting, harming or attempting to exploit or harm minors in any way; +- To generate or disseminate verifiably false information and/or content with the purpose of harming others; +- To generate or disseminate personal identifiable information that can be used to harm an individual; +- To defame, disparage or otherwise harass others; +- For fully automated decision making that adversely impacts an individual’s legal rights or otherwise creates or modifies a binding, enforceable obligation; +- For any use intended to or which has the effect of discriminating against or harming individuals or groups based on online or offline social behavior or known or predicted personal or personality characteristics; +- To exploit any of the vulnerabilities of a specific group of persons based on their age, social, physical or mental characteristics, in order to materially distort the behavior of a person pertaining to that group in a manner that causes or is likely to cause that person or another person physical or psychological harm; +- For any use intended to or which has the effect of discriminating against individuals or groups based on legally protected characteristics or categories; +- To provide medical advice and medical results interpretation; +- To generate or disseminate information for the purpose to be used for administration of justice, law enforcement, immigration or asylum processes, such as predicting an individual will commit fraud/crime commitment (e.g. by text profiling, drawing causal relationships between assertions made in documents, indiscriminate and arbitrarily-targeted use). \ No newline at end of file diff --git a/modules/image/text_to_image/stable_diffusion_img2img/README.md b/modules/image/text_to_image/stable_diffusion_img2img/README.md new file mode 100644 index 000000000..92d9fa328 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_img2img/README.md @@ -0,0 +1,171 @@ +# stable_diffusion_img2img + +|模型名称|stable_diffusion_img2img| +| :--- | :---: | +|类别|多模态-文图生成| +|网络|CLIP Text Encoder+UNet+VAD| +|数据集|-| +|是否支持Fine-tuning|否| +|模型大小|4.0GB| +|最新更新日期|2022-08-26| +|数据指标|-| + +## 一、模型基本信息 + +### 应用效果展示 + + - 输入文本 "A fantasy landscape, trending on artstation" + + - 输入初始图像 +

+ +
+ + - 输出图像 +

+ +
+ + - 生成过程 +

+ +
+ +### 模型介绍 + +Stable Diffusion是一种潜在扩散模型(Latent Diffusion), 属于生成类模型,这类模型通过对随机噪声进行一步步地迭代降噪并采样来获得感兴趣的图像,当前取得了令人惊艳的效果。相比于Disco Diffusion, Stable Diffusion通过在低纬度的潜在空间(lower dimensional latent space)而不是原像素空间来做迭代,极大地降低了内存和计算量的需求,并且在V100上一分钟之内即可以渲染出想要的图像,欢迎体验。该模块支持输入文本以及一个初始图像,对初始图像的内容进行改变。 + +更多详情请参考论文:[High-Resolution Image Synthesis with Latent Diffusion Models](https://arxiv.org/abs/2112.10752) + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install stable_diffusion_img2img + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run stable_diffusion_img2img --text_prompts "A fantasy landscape, trending on artstation" --init_image /PATH/TO/IMAGE --output_dir stable_diffusion_img2img_out + ``` + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + + module = hub.Module(name="stable_diffusion_img2img") + text_prompts = ["A fantasy landscape, trending on artstation"] + # 生成图像, 默认会在stable_diffusion_img2img_out目录保存图像 + # 返回的da是一个DocumentArray对象,保存了所有的结果,包括最终结果和迭代过程的中间结果 + # 可以通过操作DocumentArray对象对生成的图像做后处理,保存或者分析 + # 您可以设置batch_size一次生成多张 + da = module.generate_image(text_prompts=text_prompts, batch_size=2, output_dir='./stable_diffusion_img2img_out/') + # 展示所有的中间结果 + da[0].chunks[-1].chunks.plot_image_sprites(skip_empty=True, show_index=True, keep_aspect_ratio=True) + # 将整个生成过程保存为一个动态图gif + da[0].chunks[-1].chunks.save_gif('stable_diffusion_img2img_out-merged-result.gif') + # da索引的是prompt, da[0].chunks索引的是该prompt下生成的第一张图,在batch_size不为1时能同时生成多张图 + # 您也可以按照上述操作显示单张图,如第0张的生成过程 + da[0].chunks[0].chunks.plot_image_sprites(skip_empty=True, show_index=True, keep_aspect_ratio=True) + da[0].chunks[0].chunks.save_gif('stable_diffusion_img2img-image-0-result.gif') + ``` + +- ### 3、API + + - ```python + def generate_image( + text_prompts, + init_image, + strength: float = 0.8, + width_height: Optional[List[int]] = [512, 512], + seed: Optional[int] = None, + batch_size: Optional[int] = 1, + display_rate: Optional[int] = 5, + output_dir: Optional[str] = 'stable_diffusion_out'): + ``` + + - 文图生成API,生成文本描述内容的图像。 + + - **参数** + + - text_prompts(str): 输入的语句,描述想要生成的图像的内容。 + - init_image(str|numpy.ndarray|PIL.Image): 输入的初始图像。 + - strength(float): 控制添加到输入图像的噪声强度,取值范围0到1。越接近1.0,图像变化越大。 + - width_height(Optional[List[int]]): 指定最终输出图像的宽高,宽和高都需要是64的倍数,生成的图像越大,所需要的计算时间越长。 + - seed(Optional[int]): 随机种子,由于输入默认是随机高斯噪声,设置不同的随机种子会由不同的初始输入,从而最终生成不同的结果,可以设置该参数来获得不同的输出图像。 + - batch_size(Optional[int]): 指定每个prompt一次生成的图像的数量。 + - display_rate(Optional[int]): 保存中间结果的频率,默认每5个step保存一次中间结果,如果不需要中间结果来让程序跑的更快,可以将这个值设大。 + - output_dir(Optional[str]): 保存输出图像的目录,默认为"stable_diffusion_out"。 + + + - **返回** + - ra(DocumentArray): DocumentArray对象, 包含`n_batches`个Documents,其中每个Document都保存了迭代过程的所有中间结果。详细可参考[DocumentArray使用文档](https://docarray.jina.ai/fundamentals/documentarray/index.html)。 + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线文图生成服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m stable_diffusion_img2img + ``` + + - 这样就完成了一个文图生成的在线服务API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果,返回的预测结果在反序列化后即是上述接口声明中说明的DocumentArray类型,返回后对结果的操作方式和使用generate_image接口完全相同。 + + - ```python + import requests + import json + import cv2 + import base64 + from docarray import DocumentArray + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tobytes()) + + # 发送HTTP请求 + data = {'text_prompts': 'A fantasy landscape, trending on artstation', 'init_image': cv2_to_base64(cv2.imread('/PATH/TO/IMAGE'))} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/stable_diffusion_img2img" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 获取返回结果 + r.json()["results"] + da = DocumentArray.from_base64(r.json()["results"]) + # 保存结果图 + da[0].save_uri_to_file('stable_diffusion_img2img_out.png') + # 将生成过程保存为一个动态图gif + da[0].chunks[0].chunks.save_gif('stable_diffusion_img2img_out.gif') + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + ```shell + $ hub install stable_diffusion_img2img == 1.0.0 + ``` diff --git a/modules/image/text_to_image/stable_diffusion_img2img/clip/README.md b/modules/image/text_to_image/stable_diffusion_img2img/clip/README.md new file mode 100755 index 000000000..9944794f8 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_img2img/clip/README.md @@ -0,0 +1,2 @@ +# OpenAI CLIP implemented in Paddle. +The original implementation repo is [ranchlai/clip.paddle](https://github.com/ranchlai/clip.paddle). We use this repo here for text encoder in stable diffusion. diff --git a/modules/image/text_to_image/stable_diffusion_img2img/clip/clip/__init__.py b/modules/image/text_to_image/stable_diffusion_img2img/clip/clip/__init__.py new file mode 100755 index 000000000..5657b56e6 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_img2img/clip/clip/__init__.py @@ -0,0 +1 @@ +from .utils import * diff --git a/modules/image/text_to_image/stable_diffusion_img2img/clip/clip/layers.py b/modules/image/text_to_image/stable_diffusion_img2img/clip/clip/layers.py new file mode 100755 index 000000000..286f35ab4 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_img2img/clip/clip/layers.py @@ -0,0 +1,182 @@ +from typing import Optional + +import paddle +import paddle.nn as nn +from paddle import Tensor +from paddle.nn import functional as F +from paddle.nn import Linear + +__all__ = ['ResidualAttentionBlock', 'AttentionPool2d', 'multi_head_attention_forward', 'MultiHeadAttention'] + + +def multi_head_attention_forward(x: Tensor, + num_heads: int, + q_proj: Linear, + k_proj: Linear, + v_proj: Linear, + c_proj: Linear, + attn_mask: Optional[Tensor] = None): + max_len, batch_size, emb_dim = x.shape + head_dim = emb_dim // num_heads + scaling = float(head_dim)**-0.5 + q = q_proj(x) # L, N, E + k = k_proj(x) # L, N, E + v = v_proj(x) # L, N, E + #k = k.con + v = v.reshape((-1, batch_size * num_heads, head_dim)).transpose((1, 0, 2)) + k = k.reshape((-1, batch_size * num_heads, head_dim)).transpose((1, 0, 2)) + q = q.reshape((-1, batch_size * num_heads, head_dim)).transpose((1, 0, 2)) + + q = q * scaling + qk = paddle.bmm(q, k.transpose((0, 2, 1))) + if attn_mask is not None: + if attn_mask.ndim == 2: + attn_mask.unsqueeze_(0) + #assert str(attn_mask.dtype) == 'VarType.FP32' and attn_mask.ndim == 3 + assert attn_mask.shape[0] == 1 and attn_mask.shape[1] == max_len and attn_mask.shape[2] == max_len + qk += attn_mask + + qk = paddle.nn.functional.softmax(qk, axis=-1) + atten = paddle.bmm(qk, v) + atten = atten.transpose((1, 0, 2)) + atten = atten.reshape((max_len, batch_size, emb_dim)) + atten = c_proj(atten) + return atten + + +class MultiHeadAttention(nn.Layer): # without attention mask + + def __init__(self, emb_dim: int, num_heads: int): + super().__init__() + self.q_proj = nn.Linear(emb_dim, emb_dim, bias_attr=True) + self.k_proj = nn.Linear(emb_dim, emb_dim, bias_attr=True) + self.v_proj = nn.Linear(emb_dim, emb_dim, bias_attr=True) + self.c_proj = nn.Linear(emb_dim, emb_dim, bias_attr=True) + self.head_dim = emb_dim // num_heads + self.emb_dim = emb_dim + self.num_heads = num_heads + assert self.head_dim * num_heads == emb_dim, "embed_dim must be divisible by num_heads" + #self.scaling = float(self.head_dim) ** -0.5 + + def forward(self, x, attn_mask=None): # x is in shape[max_len,batch_size,emb_dim] + + atten = multi_head_attention_forward(x, + self.num_heads, + self.q_proj, + self.k_proj, + self.v_proj, + self.c_proj, + attn_mask=attn_mask) + + return atten + + +class Identity(nn.Layer): + + def __init__(self): + super().__init__() + + def forward(self, x): + return x + + +class Bottleneck(nn.Layer): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1): + super().__init__() + + # all conv layers have stride 1. an avgpool is performed after the second convolution when stride > 1 + self.conv1 = nn.Conv2D(inplanes, planes, 1, bias_attr=False) + self.bn1 = nn.BatchNorm2D(planes) + + self.conv2 = nn.Conv2D(planes, planes, 3, padding=1, bias_attr=False) + self.bn2 = nn.BatchNorm2D(planes) + + self.avgpool = nn.AvgPool2D(stride) if stride > 1 else Identity() + + self.conv3 = nn.Conv2D(planes, planes * self.expansion, 1, bias_attr=False) + self.bn3 = nn.BatchNorm2D(planes * self.expansion) + + self.relu = nn.ReLU() + self.downsample = None + self.stride = stride + + if stride > 1 or inplanes != planes * Bottleneck.expansion: + self.downsample = nn.Sequential( + ("-1", nn.AvgPool2D(stride)), + ("0", nn.Conv2D(inplanes, planes * self.expansion, 1, stride=1, bias_attr=False)), + ("1", nn.BatchNorm2D(planes * self.expansion))) + + def forward(self, x): + identity = x + + out = self.relu(self.bn1(self.conv1(x))) + out = self.relu(self.bn2(self.conv2(out))) + out = self.avgpool(out) + out = self.bn3(self.conv3(out)) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.relu(out) + return out + + +class AttentionPool2d(nn.Layer): + + def __init__(self, spacial_dim: int, embed_dim: int, num_heads: int, output_dim: int = None): + super().__init__() + + self.positional_embedding = paddle.create_parameter((spacial_dim**2 + 1, embed_dim), dtype='float32') + + self.q_proj = nn.Linear(embed_dim, embed_dim, bias_attr=True) + self.k_proj = nn.Linear(embed_dim, embed_dim, bias_attr=True) + self.v_proj = nn.Linear(embed_dim, embed_dim, bias_attr=True) + self.c_proj = nn.Linear(embed_dim, output_dim or embed_dim, bias_attr=True) + self.num_heads = num_heads + + self.head_dim = embed_dim // num_heads + assert self.head_dim * num_heads == embed_dim, "embed_dim must be divisible by num_heads" + + def forward(self, x): + + x = x.reshape((x.shape[0], x.shape[1], x.shape[2] * x.shape[3])).transpose((2, 0, 1)) # NCHW -> (HW)NC + max_len, batch_size, emb_dim = x.shape + head_dim = self.head_dim + x = paddle.concat([paddle.mean(x, axis=0, keepdim=True), x], axis=0) + x = x + paddle.unsqueeze(self.positional_embedding, 1) + out = multi_head_attention_forward(x, self.num_heads, self.q_proj, self.k_proj, self.v_proj, self.c_proj) + + return out[0] + + +class QuickGELU(nn.Layer): + + def forward(self, x): + return x * paddle.nn.functional.sigmoid(1.702 * x) + + +class ResidualAttentionBlock(nn.Layer): + + def __init__(self, d_model: int, n_head: int, attn_mask=None): + super().__init__() + + self.attn = MultiHeadAttention(d_model, n_head) + self.ln_1 = nn.LayerNorm(d_model) + self.mlp = nn.Sequential(("c_fc", nn.Linear(d_model, d_model * 4)), ("gelu", QuickGELU()), + ("c_proj", nn.Linear(d_model * 4, d_model))) + self.ln_2 = nn.LayerNorm(d_model) + self.attn_mask = attn_mask + + def attention(self, x): + x = self.attn(x, self.attn_mask) + assert isinstance(x, paddle.Tensor) # not tuble here + return x + + def forward(self, x): + + x = x + self.attention(self.ln_1(x)) + x = x + self.mlp(self.ln_2(x)) + return x diff --git a/modules/image/text_to_image/stable_diffusion_img2img/clip/clip/model.py b/modules/image/text_to_image/stable_diffusion_img2img/clip/clip/model.py new file mode 100755 index 000000000..06affcc4b --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_img2img/clip/clip/model.py @@ -0,0 +1,259 @@ +from typing import Tuple +from typing import Union + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddle import nn + +from .layers import AttentionPool2d +from .layers import Bottleneck +from .layers import MultiHeadAttention +from .layers import ResidualAttentionBlock + + +class ModifiedResNet(nn.Layer): + """ + A ResNet class that is similar to torchvision's but contains the following changes: + - There are now 3 "stem" convolutions as opposed to 1, with an average pool instead of a max pool. + - Performs anti-aliasing strided convolutions, where an avgpool is prepended to convolutions with stride > 1 + - The final pooling layer is a QKV attention instead of an average pool + """ + + def __init__(self, layers, output_dim, heads, input_resolution=224, width=64): + super().__init__() + self.output_dim = output_dim + self.input_resolution = input_resolution + + # the 3-layer stem + self.conv1 = nn.Conv2D(3, width // 2, kernel_size=3, stride=2, padding=1, bias_attr=False) + self.bn1 = nn.BatchNorm2D(width // 2) + self.conv2 = nn.Conv2D(width // 2, width // 2, kernel_size=3, padding=1, bias_attr=False) + self.bn2 = nn.BatchNorm2D(width // 2) + self.conv3 = nn.Conv2D(width // 2, width, kernel_size=3, padding=1, bias_attr=False) + self.bn3 = nn.BatchNorm2D(width) + self.avgpool = nn.AvgPool2D(2) + self.relu = nn.ReLU() + + # residual layers + self._inplanes = width # this is a *mutable* variable used during construction + self.layer1 = self._make_layer(width, layers[0]) + self.layer2 = self._make_layer(width * 2, layers[1], stride=2) + self.layer3 = self._make_layer(width * 4, layers[2], stride=2) + self.layer4 = self._make_layer(width * 8, layers[3], stride=2) + + embed_dim = width * 32 # the ResNet feature dimension + self.attnpool = AttentionPool2d(input_resolution // 32, embed_dim, heads, output_dim) + + def _make_layer(self, planes, blocks, stride=1): + layers = [Bottleneck(self._inplanes, planes, stride)] + + self._inplanes = planes * Bottleneck.expansion + for _ in range(1, blocks): + layers.append(Bottleneck(self._inplanes, planes)) + + return nn.Sequential(*layers) + + def forward(self, x): + + def stem(x): + for conv, bn in [(self.conv1, self.bn1), (self.conv2, self.bn2), (self.conv3, self.bn3)]: + x = self.relu(bn(conv(x))) + x = self.avgpool(x) + return x + + #x = x.type(self.conv1.weight.dtype) + x = stem(x) + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + x = self.attnpool(x) + + return x + + +class Transformer(nn.Layer): + + def __init__(self, width: int, layers: int, heads: int, attn_mask=None): + super().__init__() + self.width = width + self.layers = layers + self.resblocks = nn.Sequential(*[ResidualAttentionBlock(width, heads, attn_mask) for _ in range(layers)]) + + def forward(self, x): + return self.resblocks(x) + + +class VisualTransformer(nn.Layer): + + def __init__(self, input_resolution: int, patch_size: int, width: int, layers: int, heads: int, output_dim: int): + super().__init__() + self.input_resolution = input_resolution + self.output_dim = output_dim + # used patch_size x patch_size, stride patch_size to do linear projection + self.conv1 = nn.Conv2D(in_channels=3, + out_channels=width, + kernel_size=patch_size, + stride=patch_size, + bias_attr=False) + + # scale = width ** -0.5 + self.class_embedding = paddle.create_parameter((width, ), 'float32') + + self.positional_embedding = paddle.create_parameter(((input_resolution // patch_size)**2 + 1, width), 'float32') + + self.ln_pre = nn.LayerNorm(width) + + self.transformer = Transformer(width, layers, heads) + + self.ln_post = nn.LayerNorm(width) + self.proj = paddle.create_parameter((width, output_dim), 'float32') + + def forward(self, x): + + x = self.conv1(x) + x = x.reshape((x.shape[0], x.shape[1], -1)) + x = x.transpose((0, 2, 1)) + x = paddle.concat([self.class_embedding + paddle.zeros((x.shape[0], 1, x.shape[-1]), dtype=x.dtype), x], axis=1) + + x = x + self.positional_embedding + x = self.ln_pre(x) + x = x.transpose((1, 0, 2)) + x = self.transformer(x) + x = x.transpose((1, 0, 2)) + x = self.ln_post(x[:, 0, :]) + if self.proj is not None: + x = paddle.matmul(x, self.proj) + + return x + + +class TextTransformer(nn.Layer): + + def __init__(self, context_length: int, vocab_size: int, transformer_width: int, transformer_heads: int, + transformer_layers: int): + super().__init__() + self.context_length = context_length + self.transformer = Transformer(width=transformer_width, + layers=transformer_layers, + heads=transformer_heads, + attn_mask=self.build_attention_mask()) + + self.vocab_size = vocab_size + self.token_embedding = nn.Embedding(vocab_size, transformer_width) + self.positional_embedding = paddle.create_parameter((self.context_length, transformer_width), 'float32') + self.ln_final = nn.LayerNorm(transformer_width) + + def build_attention_mask(self): + # lazily create causal attention mask, with full attention between the vision tokens + # mask = paddle.empty((self.context_length, self.context_length),dtype='float32') + # mask.fill_(float("-inf")) + #mask.triu_(1) # zero out the lower diagonal + + mask = paddle.ones((self.context_length, self.context_length)) * float("-inf") + mask = paddle.triu(mask, diagonal=1) + + return mask + + def forward(self, text): + x = self.token_embedding(text) # [batch_size, n_ctx, d_model] + x = x + self.positional_embedding + x = x.transpose((1, 0, 2)) # NLD -> LND + x = self.transformer(x) + x = x.transpose((1, 0, 2)) # LND -> NLD + x = self.ln_final(x) + return x + + +class CLIP(nn.Layer): + + def __init__( + self, + embed_dim: int, + # vision + image_resolution: int, + vision_layers: Union[Tuple[int, int, int, int], int], + vision_width: int, + vision_patch_size: int, + # text + context_length: int, + vocab_size: int, + transformer_width: int, + transformer_heads: int, + transformer_layers: int): + super().__init__() + + self.context_length = context_length + if isinstance(vision_layers, (tuple, list)): + vision_heads = vision_width * 32 // 64 + self.visual = ModifiedResNet(layers=vision_layers, + output_dim=embed_dim, + heads=vision_heads, + input_resolution=image_resolution, + width=vision_width) + else: + vision_heads = vision_width // 64 + self.visual = VisualTransformer(input_resolution=image_resolution, + patch_size=vision_patch_size, + width=vision_width, + layers=vision_layers, + heads=vision_heads, + output_dim=embed_dim) + + self.transformer = Transformer(width=transformer_width, + layers=transformer_layers, + heads=transformer_heads, + attn_mask=self.build_attention_mask()) + + self.vocab_size = vocab_size + self.token_embedding = nn.Embedding(vocab_size, transformer_width) + self.positional_embedding = paddle.create_parameter((self.context_length, transformer_width), 'float32') + self.ln_final = nn.LayerNorm(transformer_width) + + self.text_projection = paddle.create_parameter((transformer_width, embed_dim), 'float32') + self.logit_scale = paddle.create_parameter((1, ), 'float32') + + def build_attention_mask(self): + # lazily create causal attention mask, with full attention between the vision tokens + # mask = paddle.empty((self.context_length, self.context_length),dtype='float32') + # mask.fill_(float("-inf")) + #mask.triu_(1) # zero out the lower diagonal + + mask = paddle.ones((self.context_length, self.context_length)) * float("-inf") + mask = paddle.triu(mask, diagonal=1) + + return mask + + def encode_image(self, image): + return self.visual(image) + + def encode_text(self, text): + x = self.token_embedding(text) # [batch_size, n_ctx, d_model] + x = x + self.positional_embedding + x = x.transpose((1, 0, 2)) # NLD -> LND + x = self.transformer(x) + x = x.transpose((1, 0, 2)) # LND -> NLD + x = self.ln_final(x) + idx = text.numpy().argmax(-1) + idx = list(idx) + x = [x[i:i + 1, int(j), :] for i, j in enumerate(idx)] + x = paddle.concat(x, 0) + x = paddle.matmul(x, self.text_projection) + return x + + def forward(self, image, text): + image_features = self.encode_image(image) + text_features = self.encode_text(text) + + # normalized features + image_features = image_features / image_features.norm(dim=-1, keepdim=True) + text_features = text_features / text_features.norm(dim=-1, keepdim=True) + + # cosine similarity as logits + logit_scale = self.logit_scale.exp() + logits_per_image = paddle.matmul(logit_scale * image_features, text_features.t()) + logits_per_text = paddle.matmul(logit_scale * text_features, image_features.t()) + + # shape = [global_batch_size, global_batch_size] + return logits_per_image, logits_per_text diff --git a/modules/image/text_to_image/stable_diffusion_img2img/clip/clip/simple_tokenizer.py b/modules/image/text_to_image/stable_diffusion_img2img/clip/clip/simple_tokenizer.py new file mode 100755 index 000000000..4eaf82e9e --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_img2img/clip/clip/simple_tokenizer.py @@ -0,0 +1,135 @@ +import gzip +import html +import os +from functools import lru_cache + +import ftfy +import regex as re + + +@lru_cache() +def default_bpe(): + return os.path.join(os.path.dirname(os.path.abspath(__file__)), "../assets/bpe_simple_vocab_16e6.txt.gz") + + +@lru_cache() +def bytes_to_unicode(): + """ + Returns list of utf-8 byte and a corresponding list of unicode strings. + The reversible bpe codes work on unicode strings. + This means you need a large # of unicode characters in your vocab if you want to avoid UNKs. + When you're at something like a 10B token dataset you end up needing around 5K for decent coverage. + This is a signficant percentage of your normal, say, 32K bpe vocab. + To avoid that, we want lookup tables between utf-8 bytes and unicode strings. + And avoids mapping to whitespace/control characters the bpe code barfs on. + """ + bs = list(range(ord("!"), ord("~") + 1)) + list(range(ord("¡"), ord("¬") + 1)) + list(range(ord("®"), ord("ÿ") + 1)) + cs = bs[:] + n = 0 + for b in range(2**8): + if b not in bs: + bs.append(b) + cs.append(2**8 + n) + n += 1 + cs = [chr(n) for n in cs] + return dict(zip(bs, cs)) + + +def get_pairs(word): + """Return set of symbol pairs in a word. + Word is represented as tuple of symbols (symbols being variable-length strings). + """ + pairs = set() + prev_char = word[0] + for char in word[1:]: + pairs.add((prev_char, char)) + prev_char = char + return pairs + + +def basic_clean(text): + text = ftfy.fix_text(text) + text = html.unescape(html.unescape(text)) + return text.strip() + + +def whitespace_clean(text): + text = re.sub(r'\s+', ' ', text) + text = text.strip() + return text + + +class SimpleTokenizer(object): + + def __init__(self, bpe_path: str = default_bpe()): + self.byte_encoder = bytes_to_unicode() + self.byte_decoder = {v: k for k, v in self.byte_encoder.items()} + merges = gzip.open(bpe_path).read().decode("utf-8").split('\n') + merges = merges[1:49152 - 256 - 2 + 1] + merges = [tuple(merge.split()) for merge in merges] + vocab = list(bytes_to_unicode().values()) + vocab = vocab + [v + '' for v in vocab] + for merge in merges: + vocab.append(''.join(merge)) + vocab.extend(['<|startoftext|>', '<|endoftext|>']) + self.encoder = dict(zip(vocab, range(len(vocab)))) + self.decoder = {v: k for k, v in self.encoder.items()} + self.bpe_ranks = dict(zip(merges, range(len(merges)))) + self.cache = {'<|startoftext|>': '<|startoftext|>', '<|endoftext|>': '<|endoftext|>'} + self.pat = re.compile( + r"""<\|startoftext\|>|<\|endoftext\|>|'s|'t|'re|'ve|'m|'ll|'d|[\p{L}]+|[\p{N}]|[^\s\p{L}\p{N}]+""", + re.IGNORECASE) + + def bpe(self, token): + if token in self.cache: + return self.cache[token] + word = tuple(token[:-1]) + (token[-1] + '', ) + pairs = get_pairs(word) + + if not pairs: + return token + '' + + while True: + bigram = min(pairs, key=lambda pair: self.bpe_ranks.get(pair, float('inf'))) + if bigram not in self.bpe_ranks: + break + first, second = bigram + new_word = [] + i = 0 + while i < len(word): + try: + j = word.index(first, i) + new_word.extend(word[i:j]) + i = j + except: + new_word.extend(word[i:]) + break + + if word[i] == first and i < len(word) - 1 and word[i + 1] == second: + new_word.append(first + second) + i += 2 + else: + new_word.append(word[i]) + i += 1 + new_word = tuple(new_word) + word = new_word + if len(word) == 1: + break + else: + pairs = get_pairs(word) + word = ' '.join(word) + self.cache[token] = word + return word + + def encode(self, text): + bpe_tokens = [] + text = whitespace_clean(basic_clean(text)).lower() + for token in re.findall(self.pat, text): + token = ''.join(self.byte_encoder[b] for b in token.encode('utf-8')) + bpe_tokens.extend(self.encoder[bpe_token] for bpe_token in self.bpe(token).split(' ')) + return bpe_tokens + + def decode(self, tokens): + text = ''.join([self.decoder[token] for token in tokens]) + text = bytearray([self.byte_decoder[c] for c in text]).decode('utf-8', errors="replace").replace('', ' ') + return text diff --git a/modules/image/text_to_image/stable_diffusion_img2img/clip/clip/utils.py b/modules/image/text_to_image/stable_diffusion_img2img/clip/clip/utils.py new file mode 100755 index 000000000..b5d417144 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_img2img/clip/clip/utils.py @@ -0,0 +1,88 @@ +import os +from typing import List +from typing import Union + +import numpy as np +import paddle +from paddle.utils import download +from paddle.vision.transforms import CenterCrop +from paddle.vision.transforms import Compose +from paddle.vision.transforms import Normalize +from paddle.vision.transforms import Resize +from paddle.vision.transforms import ToTensor + +from .model import CLIP +from .model import TextTransformer +from .simple_tokenizer import SimpleTokenizer + +__all__ = ['transform', 'tokenize', 'build_model'] + +MODEL_NAMES = ['VITL14'] + +URL = {'VITL14': os.path.join(os.path.dirname(__file__), 'pre_trained', 'vitl14_textencoder.pdparams')} + +MEAN, STD = (0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711) +_tokenizer = SimpleTokenizer() + +transform = Compose([ + Resize(224, interpolation='bicubic'), + CenterCrop(224), lambda image: image.convert('RGB'), + ToTensor(), + Normalize(mean=MEAN, std=STD), lambda t: t.unsqueeze_(0) +]) + + +def tokenize(texts: Union[str, List[str]], context_length: int = 77): + """ + Returns the tokenized representation of given input string(s) + + Parameters + ---------- + texts : Union[str, List[str]] + An input string or a list of input strings to tokenize + + context_length : int + The context length to use; all CLIP models use 77 as the context length + + Returns + ------- + A two-dimensional tensor containing the resulting tokens, shape = [number of input strings, context_length] + """ + if isinstance(texts, str): + texts = [texts] + + sot_token = _tokenizer.encoder["<|startoftext|>"] + eot_token = _tokenizer.encoder["<|endoftext|>"] + all_tokens = [[sot_token] + _tokenizer.encode(text) + [eot_token] for text in texts] + result = paddle.zeros((len(all_tokens), context_length), dtype='int64') + + for i, tokens in enumerate(all_tokens): + if len(tokens) > context_length: + raise RuntimeError(f"Input {texts[i]} is too long for context length {context_length}") + result[i, :len(tokens)] = paddle.Tensor(np.array(tokens)) + + return result + + +def build_model(name='VITL14'): + assert name in MODEL_NAMES, f"model name must be one of {MODEL_NAMES}" + name2model = {'VITL14': build_vitl14_language_model} + model = name2model[name]() + weight = URL[name] + sd = paddle.load(weight) + state_dict = model.state_dict() + for key, value in sd.items(): + if key in state_dict: + state_dict[key] = value + model.load_dict(state_dict) + model.eval() + return model + + +def build_vitl14_language_model(): + model = TextTransformer(context_length=77, + vocab_size=49408, + transformer_width=768, + transformer_heads=12, + transformer_layers=12) + return model diff --git a/modules/image/text_to_image/stable_diffusion_img2img/diffusers/__init__.py b/modules/image/text_to_image/stable_diffusion_img2img/diffusers/__init__.py new file mode 100644 index 000000000..7f41816d7 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_img2img/diffusers/__init__.py @@ -0,0 +1,20 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__version__ = "0.2.4" + +from .models import AutoencoderKL, UNet2DConditionModel, UNet2DModel, VQModel + +from .schedulers import (DDIMScheduler, DDPMScheduler, KarrasVeScheduler, PNDMScheduler, SchedulerMixin, + ScoreSdeVeScheduler, LMSDiscreteScheduler) diff --git a/modules/image/text_to_image/stable_diffusion_img2img/diffusers/configuration_utils.py b/modules/image/text_to_image/stable_diffusion_img2img/diffusers/configuration_utils.py new file mode 100644 index 000000000..c90ebd5be --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_img2img/diffusers/configuration_utils.py @@ -0,0 +1,312 @@ +# coding=utf-8 +# Copyright 2022 The HuggingFace Inc. team. +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" ConfigMixinuration base class and utilities.""" +import functools +import inspect +import json +import os +import re +from collections import OrderedDict +from typing import Any +from typing import Dict +from typing import Tuple +from typing import Union + +from requests import HTTPError + +from paddlehub.common.logger import logger + +HUGGINGFACE_CO_RESOLVE_ENDPOINT = "HUGGINGFACE_CO_RESOLVE_ENDPOINT" +DIFFUSERS_CACHE = "./caches" + +_re_configuration_file = re.compile(r"config\.(.*)\.json") + + +class ConfigMixin: + r""" + Base class for all configuration classes. Handles a few parameters common to all models' configurations as well as + methods for loading/downloading/saving configurations. + + """ + config_name = "model_config.json" + ignore_for_config = [] + + def register_to_config(self, **kwargs): + if self.config_name is None: + raise NotImplementedError(f"Make sure that {self.__class__} has defined a class name `config_name`") + kwargs["_class_name"] = self.__class__.__name__ + kwargs["_diffusers_version"] = "0.0.1" + + for key, value in kwargs.items(): + try: + setattr(self, key, value) + except AttributeError as err: + logger.error(f"Can't set {key} with value {value} for {self}") + raise err + + if not hasattr(self, "_internal_dict"): + internal_dict = kwargs + else: + previous_dict = dict(self._internal_dict) + internal_dict = {**self._internal_dict, **kwargs} + logger.debug(f"Updating config from {previous_dict} to {internal_dict}") + + self._internal_dict = FrozenDict(internal_dict) + + def save_config(self, save_directory: Union[str, os.PathLike], push_to_hub: bool = False, **kwargs): + """ + Save a configuration object to the directory `save_directory`, so that it can be re-loaded using the + [`~ConfigMixin.from_config`] class method. + + Args: + save_directory (`str` or `os.PathLike`): + Directory where the configuration JSON file will be saved (will be created if it does not exist). + kwargs: + Additional key word arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method. + """ + if os.path.isfile(save_directory): + raise AssertionError(f"Provided path ({save_directory}) should be a directory, not a file") + + os.makedirs(save_directory, exist_ok=True) + + # If we save using the predefined names, we can load using `from_config` + output_config_file = os.path.join(save_directory, self.config_name) + + self.to_json_file(output_config_file) + logger.info(f"ConfigMixinuration saved in {output_config_file}") + + @classmethod + def from_config(cls, pretrained_model_name_or_path: Union[str, os.PathLike], return_unused_kwargs=False, **kwargs): + config_dict = cls.get_config_dict(pretrained_model_name_or_path=pretrained_model_name_or_path, **kwargs) + + init_dict, unused_kwargs = cls.extract_init_dict(config_dict, **kwargs) + + model = cls(**init_dict) + + if return_unused_kwargs: + return model, unused_kwargs + else: + return model + + @classmethod + def get_config_dict(cls, pretrained_model_name_or_path: Union[str, os.PathLike], + **kwargs) -> Tuple[Dict[str, Any], Dict[str, Any]]: + cache_dir = kwargs.pop("cache_dir", DIFFUSERS_CACHE) + force_download = kwargs.pop("force_download", False) + resume_download = kwargs.pop("resume_download", False) + proxies = kwargs.pop("proxies", None) + use_auth_token = kwargs.pop("use_auth_token", None) + local_files_only = kwargs.pop("local_files_only", False) + revision = kwargs.pop("revision", None) + subfolder = kwargs.pop("subfolder", None) + + user_agent = {"file_type": "config"} + + pretrained_model_name_or_path = str(pretrained_model_name_or_path) + + if cls.config_name is None: + raise ValueError( + "`self.config_name` is not defined. Note that one should not load a config from " + "`ConfigMixin`. Please make sure to define `config_name` in a class inheriting from `ConfigMixin`") + + if os.path.isfile(pretrained_model_name_or_path): + config_file = pretrained_model_name_or_path + elif os.path.isdir(pretrained_model_name_or_path): + if os.path.isfile(os.path.join(pretrained_model_name_or_path, cls.config_name)): + # Load from a PyTorch checkpoint + config_file = os.path.join(pretrained_model_name_or_path, cls.config_name) + elif subfolder is not None and os.path.isfile( + os.path.join(pretrained_model_name_or_path, subfolder, cls.config_name)): + config_file = os.path.join(pretrained_model_name_or_path, subfolder, cls.config_name) + else: + raise EnvironmentError( + f"Error no file named {cls.config_name} found in directory {pretrained_model_name_or_path}.") + else: + try: + # Load from URL or cache if already cached + from huggingface_hub import hf_hub_download + config_file = hf_hub_download( + pretrained_model_name_or_path, + filename=cls.config_name, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + resume_download=resume_download, + local_files_only=local_files_only, + use_auth_token=use_auth_token, + user_agent=user_agent, + subfolder=subfolder, + ) + + except HTTPError as err: + raise EnvironmentError("There was a specific connection error when trying to load" + f" {pretrained_model_name_or_path}:\n{err}") + except ValueError: + raise EnvironmentError( + f"We couldn't connect to '{HUGGINGFACE_CO_RESOLVE_ENDPOINT}' to load this model, couldn't find it" + f" in the cached files and it looks like {pretrained_model_name_or_path} is not the path to a" + f" directory containing a {cls.config_name} file.\nCheckout your internet connection or see how to" + " run the library in offline mode at" + " 'https://huggingface.co/docs/diffusers/installation#offline-mode'.") + except EnvironmentError: + raise EnvironmentError( + f"Can't load config for '{pretrained_model_name_or_path}'. If you were trying to load it from " + "'https://huggingface.co/models', make sure you don't have a local directory with the same name. " + f"Otherwise, make sure '{pretrained_model_name_or_path}' is the correct path to a directory " + f"containing a {cls.config_name} file") + + try: + # Load config dict + config_dict = cls._dict_from_json_file(config_file) + except (json.JSONDecodeError, UnicodeDecodeError): + raise EnvironmentError(f"It looks like the config file at '{config_file}' is not a valid JSON file.") + + return config_dict + + @classmethod + def extract_init_dict(cls, config_dict, **kwargs): + expected_keys = set(dict(inspect.signature(cls.__init__).parameters).keys()) + expected_keys.remove("self") + # remove general kwargs if present in dict + if "kwargs" in expected_keys: + expected_keys.remove("kwargs") + # remove keys to be ignored + if len(cls.ignore_for_config) > 0: + expected_keys = expected_keys - set(cls.ignore_for_config) + init_dict = {} + for key in expected_keys: + if key in kwargs: + # overwrite key + init_dict[key] = kwargs.pop(key) + elif key in config_dict: + # use value from config dict + init_dict[key] = config_dict.pop(key) + + unused_kwargs = config_dict.update(kwargs) + + passed_keys = set(init_dict.keys()) + if len(expected_keys - passed_keys) > 0: + logger.warning( + f"{expected_keys - passed_keys} was not found in config. Values will be initialized to default values.") + + return init_dict, unused_kwargs + + @classmethod + def _dict_from_json_file(cls, json_file: Union[str, os.PathLike]): + with open(json_file, "r", encoding="utf-8") as reader: + text = reader.read() + return json.loads(text) + + def __repr__(self): + return f"{self.__class__.__name__} {self.to_json_string()}" + + @property + def config(self) -> Dict[str, Any]: + return self._internal_dict + + def to_json_string(self) -> str: + """ + Serializes this instance to a JSON string. + + Returns: + `str`: String containing all the attributes that make up this configuration instance in JSON format. + """ + config_dict = self._internal_dict if hasattr(self, "_internal_dict") else {} + return json.dumps(config_dict, indent=2, sort_keys=True) + "\n" + + def to_json_file(self, json_file_path: Union[str, os.PathLike]): + """ + Save this instance to a JSON file. + + Args: + json_file_path (`str` or `os.PathLike`): + Path to the JSON file in which this configuration instance's parameters will be saved. + """ + with open(json_file_path, "w", encoding="utf-8") as writer: + writer.write(self.to_json_string()) + + +class FrozenDict(OrderedDict): + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + for key, value in self.items(): + setattr(self, key, value) + + self.__frozen = True + + def __delitem__(self, *args, **kwargs): + raise Exception(f"You cannot use ``__delitem__`` on a {self.__class__.__name__} instance.") + + def setdefault(self, *args, **kwargs): + raise Exception(f"You cannot use ``setdefault`` on a {self.__class__.__name__} instance.") + + def pop(self, *args, **kwargs): + raise Exception(f"You cannot use ``pop`` on a {self.__class__.__name__} instance.") + + def update(self, *args, **kwargs): + raise Exception(f"You cannot use ``update`` on a {self.__class__.__name__} instance.") + + def __setattr__(self, name, value): + if hasattr(self, "__frozen") and self.__frozen: + raise Exception(f"You cannot use ``__setattr__`` on a {self.__class__.__name__} instance.") + super().__setattr__(name, value) + + def __setitem__(self, name, value): + if hasattr(self, "__frozen") and self.__frozen: + raise Exception(f"You cannot use ``__setattr__`` on a {self.__class__.__name__} instance.") + super().__setitem__(name, value) + + +def register_to_config(init): + """ + Decorator to apply on the init of classes inheriting from `ConfigMixin` so that all the arguments are automatically + sent to `self.register_for_config`. To ignore a specific argument accepted by the init but that shouldn't be + registered in the config, use the `ignore_for_config` class variable + + Warning: Once decorated, all private arguments (beginning with an underscore) are trashed and not sent to the init! + """ + + @functools.wraps(init) + def inner_init(self, *args, **kwargs): + # Ignore private kwargs in the init. + init_kwargs = {k: v for k, v in kwargs.items() if not k.startswith("_")} + init(self, *args, **init_kwargs) + if not isinstance(self, ConfigMixin): + raise RuntimeError( + f"`@register_for_config` was applied to {self.__class__.__name__} init method, but this class does " + "not inherit from `ConfigMixin`.") + + ignore = getattr(self, "ignore_for_config", []) + # Get positional arguments aligned with kwargs + new_kwargs = {} + signature = inspect.signature(init) + parameters = { + name: p.default + for i, (name, p) in enumerate(signature.parameters.items()) if i > 0 and name not in ignore + } + for arg, name in zip(args, parameters.keys()): + new_kwargs[name] = arg + + # Then add all kwargs + new_kwargs.update({ + k: init_kwargs.get(k, default) + for k, default in parameters.items() if k not in ignore and k not in new_kwargs + }) + getattr(self, "register_to_config")(**new_kwargs) + + return inner_init diff --git a/modules/image/text_to_image/stable_diffusion_img2img/diffusers/models/README.md b/modules/image/text_to_image/stable_diffusion_img2img/diffusers/models/README.md new file mode 100644 index 000000000..e786fe518 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_img2img/diffusers/models/README.md @@ -0,0 +1,11 @@ +# Models + +- Models: Neural network that models $p_\theta(\mathbf{x}_{t-1}|\mathbf{x}_t)$ (see image below) and is trained end-to-end to denoise a noisy input to an image. Examples: UNet, Conditioned UNet, 3D UNet, Transformer UNet + +## API + +TODO(Suraj, Patrick) + +## Examples + +TODO(Suraj, Patrick) diff --git a/modules/image/text_to_image/stable_diffusion_img2img/diffusers/models/__init__.py b/modules/image/text_to_image/stable_diffusion_img2img/diffusers/models/__init__.py new file mode 100644 index 000000000..f55cc88a8 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_img2img/diffusers/models/__init__.py @@ -0,0 +1,20 @@ +# flake8: noqa +# There's no way to ignore "F401 '...' imported but unused" warnings in this +# module, but to preserve other warnings. So, don't check this module at all. +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from .unet_2d import UNet2DModel +from .unet_2d_condition import UNet2DConditionModel +from .vae import AutoencoderKL +from .vae import VQModel diff --git a/modules/image/text_to_image/stable_diffusion_img2img/diffusers/models/attention.py b/modules/image/text_to_image/stable_diffusion_img2img/diffusers/models/attention.py new file mode 100644 index 000000000..29d0e73a7 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_img2img/diffusers/models/attention.py @@ -0,0 +1,465 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +from inspect import isfunction + +import numpy as np +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + + +def finfo(dtype): + if dtype == paddle.float32: + return np.finfo(np.float32) + if dtype == paddle.float16: + return np.finfo(np.float16) + if dtype == paddle.float64: + return np.finfo(np.float64) + + +paddle.finfo = finfo + + +class AttentionBlockNew(nn.Layer): + """ + An attention block that allows spatial positions to attend to each other. Originally ported from here, but adapted + to the N-d case. + https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/models/unet.py#L66. + Uses three q, k, v linear layers to compute attention + """ + + def __init__( + self, + channels, + num_head_channels=None, + num_groups=32, + rescale_output_factor=1.0, + eps=1e-5, + ): + super().__init__() + self.channels = channels + + self.num_heads = channels // num_head_channels if num_head_channels is not None else 1 + self.num_head_size = num_head_channels + self.group_norm = nn.GroupNorm(num_channels=channels, num_groups=num_groups, epsilon=eps) + + # define q,k,v as linear layers + self.query = nn.Linear(channels, channels) + self.key = nn.Linear(channels, channels) + self.value = nn.Linear(channels, channels) + + self.rescale_output_factor = rescale_output_factor + self.proj_attn = nn.Linear(channels, channels) + + def transpose_for_scores(self, projection: paddle.Tensor) -> paddle.Tensor: + new_projection_shape = projection.shape[:-1] + [self.num_heads, -1] + # move heads to 2nd position (B, T, H * D) -> (B, T, H, D) -> (B, H, T, D) + new_projection = projection.reshape(new_projection_shape).transpose([0, 2, 1, 3]) + return new_projection + + def forward(self, hidden_states): + residual = hidden_states + batch, channel, height, width = hidden_states.shape + + # norm + hidden_states = self.group_norm(hidden_states) + + hidden_states = hidden_states.reshape([batch, channel, height * width]).transpose([0, 2, 1]) + + # proj to q, k, v + query_proj = self.query(hidden_states) + key_proj = self.key(hidden_states) + value_proj = self.value(hidden_states) + + # transpose + query_states = self.transpose_for_scores(query_proj) + key_states = self.transpose_for_scores(key_proj) + value_states = self.transpose_for_scores(value_proj) + + # get scores + scale = 1 / math.sqrt(math.sqrt(self.channels / self.num_heads)) + attention_scores = paddle.matmul(query_states * scale, key_states * scale, transpose_y=True) + attention_probs = F.softmax(attention_scores.astype("float32"), axis=-1).astype(attention_scores.dtype) + + # compute attention output + context_states = paddle.matmul(attention_probs, value_states) + + context_states = context_states.transpose([0, 2, 1, 3]) + new_context_states_shape = context_states.shape[:-2] + [ + self.channels, + ] + context_states = context_states.reshape(new_context_states_shape) + + # compute next hidden_states + hidden_states = self.proj_attn(context_states) + hidden_states = hidden_states.transpose([0, 2, 1]).reshape([batch, channel, height, width]) + + # res connect and rescale + hidden_states = (hidden_states + residual) / self.rescale_output_factor + return hidden_states + + def set_weight(self, attn_layer): + self.group_norm.weight.set_value(attn_layer.norm.weight) + self.group_norm.bias.set_value(attn_layer.norm.bias) + + if hasattr(attn_layer, "q"): + self.query.weight.set_value(attn_layer.q.weight[:, :, 0, 0]) + self.key.weight.set_value(attn_layer.k.weight[:, :, 0, 0]) + self.value.weight.set_value(attn_layer.v.weight[:, :, 0, 0]) + + self.query.bias.set_value(attn_layer.q.bias) + self.key.bias.set_value(attn_layer.k.bias) + self.value.bias.set_value(attn_layer.v.bias) + + self.proj_attn.weight.set_value(attn_layer.proj_out.weight[:, :, 0, 0]) + self.proj_attn.bias.set_value(attn_layer.proj_out.bias) + elif hasattr(attn_layer, "NIN_0"): + self.query.weight.set_value(attn_layer.NIN_0.W.t()) + self.key.weight.set_value(attn_layer.NIN_1.W.t()) + self.value.weight.set_value(attn_layer.NIN_2.W.t()) + + self.query.bias.set_value(attn_layer.NIN_0.b) + self.key.bias.set_value(attn_layer.NIN_1.b) + self.value.bias.set_value(attn_layer.NIN_2.b) + + self.proj_attn.weight.set_value(attn_layer.NIN_3.W.t()) + self.proj_attn.bias.set_value(attn_layer.NIN_3.b) + + self.group_norm.weight.set_value(attn_layer.GroupNorm_0.weight) + self.group_norm.bias.set_value(attn_layer.GroupNorm_0.bias) + else: + qkv_weight = attn_layer.qkv.weight.reshape( + [self.num_heads, 3 * self.channels // self.num_heads, self.channels]) + qkv_bias = attn_layer.qkv.bias.reshape([self.num_heads, 3 * self.channels // self.num_heads]) + + q_w, k_w, v_w = qkv_weight.split(self.channels // self.num_heads, axis=1) + q_b, k_b, v_b = qkv_bias.split(self.channels // self.num_heads, axis=1) + + self.query.weight.set_value(q_w.reshape([-1, self.channels])) + self.key.weight.set_value(k_w.reshape([-1, self.channels])) + self.value.weight.set_value(v_w.reshape([-1, self.channels])) + + self.query.bias.set_value(q_b.flatten()) + self.key.bias.set_value(k_b.flatten()) + self.value.bias.set_value(v_b.flatten()) + + self.proj_attn.weight.set_value(attn_layer.proj.weight[:, :, 0]) + self.proj_attn.bias.set_value(attn_layer.proj.bias) + + +class SpatialTransformer(nn.Layer): + """ + Transformer block for image-like data. First, project the input (aka embedding) and reshape to b, t, d. Then apply + standard transformer action. Finally, reshape to image + """ + + def __init__(self, in_channels, n_heads, d_head, depth=1, dropout=0.0, context_dim=None): + super().__init__() + self.n_heads = n_heads + self.d_head = d_head + self.in_channels = in_channels + inner_dim = n_heads * d_head + self.norm = nn.GroupNorm(num_groups=32, num_channels=in_channels, epsilon=1e-6) + + self.proj_in = nn.Conv2D(in_channels, inner_dim, kernel_size=1, stride=1, padding=0) + + self.transformer_blocks = nn.LayerList([ + BasicTransformerBlock(inner_dim, n_heads, d_head, dropout=dropout, context_dim=context_dim) + for d in range(depth) + ]) + + self.proj_out = nn.Conv2D(inner_dim, in_channels, kernel_size=1, stride=1, padding=0) + + def forward(self, x, context=None): + # note: if no context is given, cross-attention defaults to self-attention + b, c, h, w = x.shape + x_in = x + x = self.norm(x) + x = self.proj_in(x) + x = x.transpose([0, 2, 3, 1]).reshape([b, h * w, c]) + for block in self.transformer_blocks: + x = block(x, context=context) + x = x.reshape([b, h, w, c]).transpose([0, 3, 1, 2]) + x = self.proj_out(x) + return x + x_in + + def set_weight(self, layer): + self.norm = layer.norm + self.proj_in = layer.proj_in + self.transformer_blocks = layer.transformer_blocks + self.proj_out = layer.proj_out + + +class BasicTransformerBlock(nn.Layer): + + def __init__(self, dim, n_heads, d_head, dropout=0.0, context_dim=None, gated_ff=True, checkpoint=True): + super().__init__() + self.attn1 = CrossAttention(query_dim=dim, heads=n_heads, dim_head=d_head, + dropout=dropout) # is a self-attention + self.ff = FeedForward(dim, dropout=dropout, glu=gated_ff) + self.attn2 = CrossAttention(query_dim=dim, + context_dim=context_dim, + heads=n_heads, + dim_head=d_head, + dropout=dropout) # is self-attn if context is none + self.norm1 = nn.LayerNorm(dim) + self.norm2 = nn.LayerNorm(dim) + self.norm3 = nn.LayerNorm(dim) + self.checkpoint = checkpoint + + def forward(self, x, context=None): + x = self.attn1(self.norm1(x)) + x + x = self.attn2(self.norm2(x), context=context) + x + x = self.ff(self.norm3(x)) + x + return x + + +class CrossAttention(nn.Layer): + + def __init__(self, query_dim, context_dim=None, heads=8, dim_head=64, dropout=0.0): + super().__init__() + inner_dim = dim_head * heads + context_dim = default(context_dim, query_dim) + + self.scale = dim_head**-0.5 + self.heads = heads + + self.to_q = nn.Linear(query_dim, inner_dim, bias_attr=False) + self.to_k = nn.Linear(context_dim, inner_dim, bias_attr=False) + self.to_v = nn.Linear(context_dim, inner_dim, bias_attr=False) + + self.to_out = nn.Sequential(nn.Linear(inner_dim, query_dim), nn.Dropout(dropout)) + + def reshape_heads_to_batch_dim(self, tensor): + batch_size, seq_len, dim = tensor.shape + head_size = self.heads + tensor = tensor.reshape([batch_size, seq_len, head_size, dim // head_size]) + tensor = tensor.transpose([0, 2, 1, 3]).reshape([batch_size * head_size, seq_len, dim // head_size]) + return tensor + + def reshape_batch_dim_to_heads(self, tensor): + batch_size, seq_len, dim = tensor.shape + head_size = self.heads + tensor = tensor.reshape([batch_size // head_size, head_size, seq_len, dim]) + tensor = tensor.transpose([0, 2, 1, 3]).reshape([batch_size // head_size, seq_len, dim * head_size]) + return tensor + + def forward(self, x, context=None, mask=None): + batch_size, sequence_length, dim = x.shape + + h = self.heads + + q = self.to_q(x) + context = default(context, x) + k = self.to_k(context) + v = self.to_v(context) + + q = self.reshape_heads_to_batch_dim(q) + k = self.reshape_heads_to_batch_dim(k) + v = self.reshape_heads_to_batch_dim(v) + + sim = paddle.einsum("b i d, b j d -> b i j", q * self.scale, k) + + if exists(mask): + mask = mask.reshape([batch_size, -1]) + max_neg_value = -paddle.finfo(sim.dtype).max + mask = mask[:, None, :].repeat(h, 1, 1) + sim.masked_fill_(~mask, max_neg_value) + + # attention, what we cannot get enough of + attn = F.softmax(sim, axis=-1) + + out = paddle.einsum("b i j, b j d -> b i d", attn, v) + out = self.reshape_batch_dim_to_heads(out) + return self.to_out(out) + + +class FeedForward(nn.Layer): + + def __init__(self, dim, dim_out=None, mult=4, glu=False, dropout=0.0): + super().__init__() + inner_dim = int(dim * mult) + dim_out = default(dim_out, dim) + project_in = nn.Sequential(nn.Linear(dim, inner_dim), nn.GELU()) if not glu else GEGLU(dim, inner_dim) + + self.net = nn.Sequential(project_in, nn.Dropout(dropout), nn.Linear(inner_dim, dim_out)) + + def forward(self, x): + return self.net(x) + + +# feedforward +class GEGLU(nn.Layer): + + def __init__(self, dim_in, dim_out): + super().__init__() + self.proj = nn.Linear(dim_in, dim_out * 2) + + def forward(self, x): + x, gate = self.proj(x).chunk(2, axis=-1) + return x * F.gelu(gate) + + +# TODO(Patrick) - remove once all weights have been converted -> not needed anymore then +class NIN(nn.Layer): + + def __init__(self, in_dim, num_units, init_scale=0.1): + super().__init__() + self.W = self.create_parameter(shape=[in_dim, num_units], default_initializer=nn.initializer.Constant(0.)) + self.b = self.create_parameter(shape=[ + num_units, + ], + is_bias=True, + default_initializer=nn.initializer.Constant(0.)) + + +def exists(val): + return val is not None + + +def default(val, d): + if exists(val): + return val + return d() if isfunction(d) else d + + +# the main attention block that is used for all models +class AttentionBlock(nn.Layer): + """ + An attention block that allows spatial positions to attend to each other. + + Originally ported from here, but adapted to the N-d case. + https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/models/unet.py#L66. + """ + + def __init__( + self, + channels, + num_heads=1, + num_head_channels=None, + num_groups=32, + encoder_channels=None, + overwrite_qkv=False, + overwrite_linear=False, + rescale_output_factor=1.0, + eps=1e-5, + ): + super().__init__() + self.channels = channels + if num_head_channels is None: + self.num_heads = num_heads + else: + assert (channels % num_head_channels == 0 + ), f"q,k,v channels {channels} is not divisible by num_head_channels {num_head_channels}" + self.num_heads = channels // num_head_channels + + self.norm = nn.GroupNorm(num_channels=channels, num_groups=num_groups, epsilon=eps) + self.qkv = nn.Conv1D(channels, channels * 3, 1) + self.n_heads = self.num_heads + self.rescale_output_factor = rescale_output_factor + + if encoder_channels is not None: + self.encoder_kv = nn.Conv1D(encoder_channels, channels * 2, 1) + + self.proj = nn.Conv1D(channels, channels, 1) + + self.overwrite_qkv = overwrite_qkv + self.overwrite_linear = overwrite_linear + + if overwrite_qkv: + in_channels = channels + self.norm = nn.GroupNorm(num_channels=channels, num_groups=num_groups, epsilon=1e-6) + self.q = nn.Conv2D(in_channels, in_channels, kernel_size=1, stride=1, padding=0) + self.k = nn.Conv2D(in_channels, in_channels, kernel_size=1, stride=1, padding=0) + self.v = nn.Conv2D(in_channels, in_channels, kernel_size=1, stride=1, padding=0) + self.proj_out = nn.Conv2D(in_channels, in_channels, kernel_size=1, stride=1, padding=0) + elif self.overwrite_linear: + num_groups = min(channels // 4, 32) + self.norm = nn.GroupNorm(num_channels=channels, num_groups=num_groups, epsilon=1e-6) + self.NIN_0 = NIN(channels, channels) + self.NIN_1 = NIN(channels, channels) + self.NIN_2 = NIN(channels, channels) + self.NIN_3 = NIN(channels, channels) + + self.GroupNorm_0 = nn.GroupNorm(num_groups=num_groups, num_channels=channels, epsilon=1e-6) + else: + self.proj_out = nn.Conv1D(channels, channels, 1) + self.set_weights(self) + + self.is_overwritten = False + + def set_weights(self, layer): + if self.overwrite_qkv: + qkv_weight = paddle.concat([layer.q.weight, layer.k.weight, layer.v.weight], axis=0)[:, :, :, 0] + qkv_bias = paddle.concat([layer.q.bias, layer.k.bias, layer.v.bias], axis=0) + + self.qkv.weight.set_value(qkv_weight) + self.qkv.bias.set_value(qkv_bias) + + proj_out = nn.Conv1D(self.channels, self.channels, 1) + proj_out.weight.set_value(layer.proj_out.weight[:, :, :, 0]) + proj_out.bias.set_value(layer.proj_out.bias) + + self.proj = proj_out + elif self.overwrite_linear: + self.qkv.weight.set_value( + paddle.concat([self.NIN_0.W.t(), self.NIN_1.W.t(), self.NIN_2.W.t()], axis=0)[:, :, None]) + self.qkv.bias.set_value(paddle.concat([self.NIN_0.b, self.NIN_1.b, self.NIN_2.b], axis=0)) + + self.proj.weight.set_value(self.NIN_3.W.t()[:, :, None]) + self.proj.bias.set_value(self.NIN_3.b) + + self.norm.weight.set_value(self.GroupNorm_0.weight) + self.norm.bias.set_value(self.GroupNorm_0.bias) + else: + self.proj.weight.set_value(self.proj_out.weight) + self.proj.bias.set_value(self.proj_out.bias) + + def forward(self, x, encoder_out=None): + if not self.is_overwritten and (self.overwrite_qkv or self.overwrite_linear): + self.set_weights(self) + self.is_overwritten = True + + b, c, *spatial = x.shape + hid_states = self.norm(x).reshape([b, c, -1]) + + qkv = self.qkv(hid_states) + bs, width, length = qkv.shape + assert width % (3 * self.n_heads) == 0 + ch = width // (3 * self.n_heads) + q, k, v = qkv.reshape([bs * self.n_heads, ch * 3, length]).split(ch, axis=1) + + if encoder_out is not None: + encoder_kv = self.encoder_kv(encoder_out) + assert encoder_kv.shape[1] == self.n_heads * ch * 2 + ek, ev = encoder_kv.reshape([bs * self.n_heads, ch * 2, -1]).split(ch, axis=1) + k = paddle.concat([ek, k], axis=-1) + v = paddle.concat([ev, v], axis=-1) + + scale = 1 / math.sqrt(math.sqrt(ch)) + weight = paddle.einsum("bct,bcs->bts", q * scale, k * scale) # More stable with f16 than dividing afterwards + weight = F.softmax(weight.astype("float32"), axis=-1).astype(weight.dtype) + + a = paddle.einsum("bts,bcs->bct", weight, v) + h = a.reshape([bs, -1, length]) + + h = self.proj(h) + h = h.reshape([b, c, *spatial]) + + result = x + h + + result = result / self.rescale_output_factor + + return result diff --git a/modules/image/text_to_image/stable_diffusion_img2img/diffusers/models/embeddings.py b/modules/image/text_to_image/stable_diffusion_img2img/diffusers/models/embeddings.py new file mode 100644 index 000000000..3e826193b --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_img2img/diffusers/models/embeddings.py @@ -0,0 +1,116 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math + +import numpy as np +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + + +def get_timestep_embedding(timesteps, + embedding_dim, + flip_sin_to_cos=False, + downscale_freq_shift=1, + scale=1, + max_period=10000): + """ + This matches the implementation in Denoising Diffusion Probabilistic Models: Create sinusoidal timestep embeddings. + + :param timesteps: a 1-D Tensor of N indices, one per batch element. + These may be fractional. + :param embedding_dim: the dimension of the output. :param max_period: controls the minimum frequency of the + embeddings. :return: an [N x dim] Tensor of positional embeddings. + """ + assert len(timesteps.shape) == 1, "Timesteps should be a 1d-array" + + half_dim = embedding_dim // 2 + exponent = -math.log(max_period) * paddle.arange(start=0, end=half_dim, dtype="float32") + exponent = exponent / (half_dim - downscale_freq_shift) + + emb = paddle.exp(exponent) + emb = timesteps[:, None].astype("float32") * emb[None, :] + + # scale embeddings + emb = scale * emb + + # concat sine and cosine embeddings + emb = paddle.concat([paddle.sin(emb), paddle.cos(emb)], axis=-1) + + # flip sine and cosine embeddings + if flip_sin_to_cos: + emb = paddle.concat([emb[:, half_dim:], emb[:, :half_dim]], axis=-1) + + # zero pad + if embedding_dim % 2 == 1: + emb = paddle.concat(emb, paddle.zeros([emb.shape[0], 1]), axis=-1) + return emb + + +class TimestepEmbedding(nn.Layer): + + def __init__(self, channel, time_embed_dim, act_fn="silu"): + super().__init__() + + self.linear_1 = nn.Linear(channel, time_embed_dim) + self.act = None + if act_fn == "silu": + self.act = nn.Silu() + self.linear_2 = nn.Linear(time_embed_dim, time_embed_dim) + + def forward(self, sample): + sample = self.linear_1(sample) + + if self.act is not None: + sample = self.act(sample) + + sample = self.linear_2(sample) + return sample + + +class Timesteps(nn.Layer): + + def __init__(self, num_channels, flip_sin_to_cos, downscale_freq_shift): + super().__init__() + self.num_channels = num_channels + self.flip_sin_to_cos = flip_sin_to_cos + self.downscale_freq_shift = downscale_freq_shift + + def forward(self, timesteps): + t_emb = get_timestep_embedding( + timesteps, + self.num_channels, + flip_sin_to_cos=self.flip_sin_to_cos, + downscale_freq_shift=self.downscale_freq_shift, + ) + return t_emb + + +class GaussianFourierProjection(nn.Layer): + """Gaussian Fourier embeddings for noise levels.""" + + def __init__(self, embedding_size=256, scale=1.0): + super().__init__() + self.register_buffer("weight", paddle.randn((embedding_size, )) * scale) + + # to delete later + self.register_buffer("W", paddle.randn((embedding_size, )) * scale) + + self.weight = self.W + + def forward(self, x): + x = paddle.log(x) + x_proj = x[:, None] * self.weight[None, :] * 2 * np.pi + out = paddle.concat([paddle.sin(x_proj), paddle.cos(x_proj)], axis=-1) + return out diff --git a/modules/image/text_to_image/stable_diffusion_img2img/diffusers/models/resnet.py b/modules/image/text_to_image/stable_diffusion_img2img/diffusers/models/resnet.py new file mode 100644 index 000000000..944bc11cd --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_img2img/diffusers/models/resnet.py @@ -0,0 +1,515 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from functools import partial + +import numpy as np +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + + +def pad_new(x, pad, mode="constant", value=0): + new_pad = [] + for _ in range(x.ndim * 2 - len(pad)): + new_pad.append(0) + ndim = list(range(x.ndim - 1, 0, -1)) + axes_start = {} + for i, _pad in enumerate(pad): + if _pad < 0: + new_pad.append(0) + zhengshu, yushu = divmod(i, 2) + if yushu == 0: + axes_start[ndim[zhengshu]] = -_pad + else: + new_pad.append(_pad) + + padded = paddle.nn.functional.pad(x, new_pad, mode=mode, value=value) + padded_shape = paddle.shape(padded) + axes = [] + starts = [] + ends = [] + for k, v in axes_start.items(): + axes.append(k) + starts.append(v) + ends.append(padded_shape[k]) + assert v < padded_shape[k] + + if axes: + return padded.slice(axes=axes, starts=starts, ends=ends) + else: + return padded + + +class Upsample2D(nn.Layer): + """ + An upsampling layer with an optional convolution. + + :param channels: channels in the inputs and outputs. :param use_conv: a bool determining if a convolution is + applied. :param dims: determines if the signal is 1D, 2D, or 3D. If 3D, then + upsampling occurs in the inner-two dimensions. + """ + + def __init__(self, channels, use_conv=False, use_conv_transpose=False, out_channels=None, name="conv"): + super().__init__() + self.channels = channels + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.use_conv_transpose = use_conv_transpose + self.name = name + + conv = None + if use_conv_transpose: + conv = nn.Conv2DTranspose(channels, self.out_channels, 4, 2, 1) + elif use_conv: + conv = nn.Conv2D(self.channels, self.out_channels, 3, padding=1) + + # TODO(Suraj, Patrick) - clean up after weight dicts are correctly renamed + if name == "conv": + self.conv = conv + else: + self.Conv2d_0 = conv + + def forward(self, x): + assert x.shape[1] == self.channels + if self.use_conv_transpose: + return self.conv(x) + + x = F.interpolate(x, scale_factor=2.0, mode="nearest") + + # TODO(Suraj, Patrick) - clean up after weight dicts are correctly renamed + if self.use_conv: + if self.name == "conv": + x = self.conv(x) + else: + x = self.Conv2d_0(x) + + return x + + +class Downsample2D(nn.Layer): + """ + A downsampling layer with an optional convolution. + + :param channels: channels in the inputs and outputs. :param use_conv: a bool determining if a convolution is + applied. :param dims: determines if the signal is 1D, 2D, or 3D. If 3D, then + downsampling occurs in the inner-two dimensions. + """ + + def __init__(self, channels, use_conv=False, out_channels=None, padding=1, name="conv"): + super().__init__() + self.channels = channels + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.padding = padding + stride = 2 + self.name = name + + if use_conv: + conv = nn.Conv2D(self.channels, self.out_channels, 3, stride=stride, padding=padding) + else: + assert self.channels == self.out_channels + conv = nn.AvgPool2D(kernel_size=stride, stride=stride) + + # TODO(Suraj, Patrick) - clean up after weight dicts are correctly renamed + if name == "conv": + self.Conv2d_0 = conv + self.conv = conv + elif name == "Conv2d_0": + self.conv = conv + else: + self.conv = conv + + def forward(self, x): + assert x.shape[1] == self.channels + if self.use_conv and self.padding == 0: + pad = (0, 1, 0, 1) + x = pad_new(x, pad, mode="constant", value=0) + + assert x.shape[1] == self.channels + x = self.conv(x) + + return x + + +class FirUpsample2D(nn.Layer): + + def __init__(self, channels=None, out_channels=None, use_conv=False, fir_kernel=(1, 3, 3, 1)): + super().__init__() + out_channels = out_channels if out_channels else channels + if use_conv: + self.Conv2d_0 = nn.Conv2D(channels, out_channels, kernel_size=3, stride=1, padding=1) + self.use_conv = use_conv + self.fir_kernel = fir_kernel + self.out_channels = out_channels + + def _upsample_2d(self, x, w=None, k=None, factor=2, gain=1): + """Fused `upsample_2d()` followed by `Conv2d()`. + + Args: + Padding is performed only once at the beginning, not between the operations. The fused op is considerably more + efficient than performing the same calculation using standard TensorFlow ops. It supports gradients of arbitrary: + order. + x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, + C]`. + w: Weight tensor of the shape `[filterH, filterW, inChannels, + outChannels]`. Grouped convolution can be performed by `inChannels = x.shape[0] // numGroups`. + k: FIR filter of the shape `[firH, firW]` or `[firN]` + (separable). The default is `[1] * factor`, which corresponds to nearest-neighbor upsampling. + factor: Integer upsampling factor (default: 2). gain: Scaling factor for signal magnitude (default: 1.0). + + Returns: + Tensor of the shape `[N, C, H * factor, W * factor]` or `[N, H * factor, W * factor, C]`, and same datatype as + `x`. + """ + + assert isinstance(factor, int) and factor >= 1 + + # Setup filter kernel. + if k is None: + k = [1] * factor + + # setup kernel + k = np.asarray(k, dtype=np.float32) + if k.ndim == 1: + k = np.outer(k, k) + k /= np.sum(k) + + k = k * (gain * (factor**2)) + + if self.use_conv: + convH = w.shape[2] + convW = w.shape[3] + inC = w.shape[1] + + p = (k.shape[0] - factor) - (convW - 1) + + stride = (factor, factor) + # Determine data dimensions. + stride = [1, 1, factor, factor] + output_shape = ((x.shape[2] - 1) * factor + convH, (x.shape[3] - 1) * factor + convW) + output_padding = ( + output_shape[0] - (x.shape[2] - 1) * stride[0] - convH, + output_shape[1] - (x.shape[3] - 1) * stride[1] - convW, + ) + assert output_padding[0] >= 0 and output_padding[1] >= 0 + inC = w.shape[1] + num_groups = x.shape[1] // inC + + # Transpose weights. + w = paddle.reshape(w, (num_groups, -1, inC, convH, convW)) + w = w[..., ::-1, ::-1].transpose([0, 2, 1, 3, 4]) + w = paddle.reshape(w, (num_groups * inC, -1, convH, convW)) + + x = F.conv2d_transpose(x, w, stride=stride, output_padding=output_padding, padding=0) + + x = upfirdn2d_native(x, paddle.to_tensor(k), pad=((p + 1) // 2 + factor - 1, p // 2 + 1)) + else: + p = k.shape[0] - factor + x = upfirdn2d_native(x, paddle.to_tensor(k), up=factor, pad=((p + 1) // 2 + factor - 1, p // 2)) + + return x + + def forward(self, x): + if self.use_conv: + h = self._upsample_2d(x, self.Conv2d_0.weight, k=self.fir_kernel) + h = h + self.Conv2d_0.bias.reshape([1, -1, 1, 1]) + else: + h = self._upsample_2d(x, k=self.fir_kernel, factor=2) + + return h + + +class FirDownsample2D(nn.Layer): + + def __init__(self, channels=None, out_channels=None, use_conv=False, fir_kernel=(1, 3, 3, 1)): + super().__init__() + out_channels = out_channels if out_channels else channels + if use_conv: + self.Conv2d_0 = nn.Conv2D(channels, out_channels, kernel_size=3, stride=1, padding=1) + self.fir_kernel = fir_kernel + self.use_conv = use_conv + self.out_channels = out_channels + + def _downsample_2d(self, x, w=None, k=None, factor=2, gain=1): + """Fused `Conv2d()` followed by `downsample_2d()`. + + Args: + Padding is performed only once at the beginning, not between the operations. The fused op is considerably more + efficient than performing the same calculation using standard TensorFlow ops. It supports gradients of arbitrary: + order. + x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`. w: Weight tensor of the shape `[filterH, + filterW, inChannels, outChannels]`. Grouped convolution can be performed by `inChannels = x.shape[0] // + numGroups`. k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable). The default is `[1] * + factor`, which corresponds to average pooling. factor: Integer downsampling factor (default: 2). gain: + Scaling factor for signal magnitude (default: 1.0). + + Returns: + Tensor of the shape `[N, C, H // factor, W // factor]` or `[N, H // factor, W // factor, C]`, and same + datatype as `x`. + """ + + assert isinstance(factor, int) and factor >= 1 + if k is None: + k = [1] * factor + + # setup kernel + k = np.asarray(k, dtype=np.float32) + if k.ndim == 1: + k = np.outer(k, k) + k /= np.sum(k) + + k = k * gain + + if self.use_conv: + _, _, convH, convW = w.shape + p = (k.shape[0] - factor) + (convW - 1) + s = [factor, factor] + x = upfirdn2d_native(x, paddle.to_tensor(k), pad=((p + 1) // 2, p // 2)) + x = F.conv2d(x, w, stride=s, padding=0) + else: + p = k.shape[0] - factor + x = upfirdn2d_native(x, paddle.to_tensor(k), down=factor, pad=((p + 1) // 2, p // 2)) + + return x + + def forward(self, x): + if self.use_conv: + x = self._downsample_2d(x, w=self.Conv2d_0.weight, k=self.fir_kernel) + x = x + self.Conv2d_0.bias.reshape([1, -1, 1, 1]) + else: + x = self._downsample_2d(x, k=self.fir_kernel, factor=2) + + return x + + +class ResnetBlock(nn.Layer): + + def __init__( + self, + *, + in_channels, + out_channels=None, + conv_shortcut=False, + dropout=0.0, + temb_channels=512, + groups=32, + groups_out=None, + pre_norm=True, + eps=1e-6, + non_linearity="swish", + time_embedding_norm="default", + kernel=None, + output_scale_factor=1.0, + use_nin_shortcut=None, + up=False, + down=False, + ): + super().__init__() + self.pre_norm = pre_norm + self.pre_norm = True + self.in_channels = in_channels + out_channels = in_channels if out_channels is None else out_channels + self.out_channels = out_channels + self.use_conv_shortcut = conv_shortcut + self.time_embedding_norm = time_embedding_norm + self.up = up + self.down = down + self.output_scale_factor = output_scale_factor + + if groups_out is None: + groups_out = groups + + self.norm1 = nn.GroupNorm(num_groups=groups, num_channels=in_channels, epsilon=eps) + + self.conv1 = nn.Conv2D(in_channels, out_channels, kernel_size=3, stride=1, padding=1) + + if temb_channels is not None: + self.time_emb_proj = nn.Linear(temb_channels, out_channels) + else: + self.time_emb_proj = None + + self.norm2 = nn.GroupNorm(num_groups=groups_out, num_channels=out_channels, epsilon=eps) + self.dropout = nn.Dropout(dropout) + self.conv2 = nn.Conv2D(out_channels, out_channels, kernel_size=3, stride=1, padding=1) + + if non_linearity == "swish": + self.nonlinearity = lambda x: F.silu(x) + elif non_linearity == "mish": + self.nonlinearity = Mish() + elif non_linearity == "silu": + self.nonlinearity = nn.Silu() + + self.upsample = self.downsample = None + if self.up: + if kernel == "fir": + fir_kernel = (1, 3, 3, 1) + self.upsample = lambda x: upsample_2d(x, k=fir_kernel) + elif kernel == "sde_vp": + self.upsample = partial(F.interpolate, scale_factor=2.0, mode="nearest") + else: + self.upsample = Upsample2D(in_channels, use_conv=False) + elif self.down: + if kernel == "fir": + fir_kernel = (1, 3, 3, 1) + self.downsample = lambda x: downsample_2d(x, k=fir_kernel) + elif kernel == "sde_vp": + self.downsample = partial(F.avg_pool2d, kernel_size=2, stride=2) + else: + self.downsample = Downsample2D(in_channels, use_conv=False, padding=1, name="op") + + self.use_nin_shortcut = self.in_channels != self.out_channels if use_nin_shortcut is None else use_nin_shortcut + + self.conv_shortcut = None + if self.use_nin_shortcut: + self.conv_shortcut = nn.Conv2D(in_channels, out_channels, kernel_size=1, stride=1, padding=0) + + def forward(self, x, temb, hey=False): + h = x + + # make sure hidden states is in float32 + # when running in half-precision + h = self.norm1(h.astype("float32")).astype(h.dtype) + h = self.nonlinearity(h) + + if self.upsample is not None: + x = self.upsample(x) + h = self.upsample(h) + elif self.downsample is not None: + x = self.downsample(x) + h = self.downsample(h) + + h = self.conv1(h) + + if temb is not None: + temb = self.time_emb_proj(self.nonlinearity(temb))[:, :, None, None] + h = h + temb + + # make sure hidden states is in float32 + # when running in half-precision + h = self.norm2(h.astype("float32")).astype(h.dtype) + h = self.nonlinearity(h) + + h = self.dropout(h) + h = self.conv2(h) + + if self.conv_shortcut is not None: + x = self.conv_shortcut(x) + + out = (x + h) / self.output_scale_factor + + return out + + +class Mish(nn.Layer): + + def forward(self, x): + return x * F.tanh(F.softplus(x)) + + +def upsample_2d(x, k=None, factor=2, gain=1): + r"""Upsample2D a batch of 2D images with the given filter. + + Args: + Accepts a batch of 2D images of the shape `[N, C, H, W]` or `[N, H, W, C]` and upsamples each image with the given + filter. The filter is normalized so that if the input pixels are constant, they will be scaled by the specified + `gain`. Pixels outside the image are assumed to be zero, and the filter is padded with zeros so that its shape is a: + multiple of the upsampling factor. + x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, + C]`. + k: FIR filter of the shape `[firH, firW]` or `[firN]` + (separable). The default is `[1] * factor`, which corresponds to nearest-neighbor upsampling. + factor: Integer upsampling factor (default: 2). gain: Scaling factor for signal magnitude (default: 1.0). + + Returns: + Tensor of the shape `[N, C, H * factor, W * factor]` + """ + assert isinstance(factor, int) and factor >= 1 + if k is None: + k = [1] * factor + + k = np.asarray(k, dtype=np.float32) + if k.ndim == 1: + k = np.outer(k, k) + k /= np.sum(k) + + k = k * (gain * (factor**2)) + p = k.shape[0] - factor + return upfirdn2d_native(x, paddle.to_tensor(k), up=factor, pad=((p + 1) // 2 + factor - 1, p // 2)) + + +def downsample_2d(x, k=None, factor=2, gain=1): + r"""Downsample2D a batch of 2D images with the given filter. + + Args: + Accepts a batch of 2D images of the shape `[N, C, H, W]` or `[N, H, W, C]` and downsamples each image with the + given filter. The filter is normalized so that if the input pixels are constant, they will be scaled by the + specified `gain`. Pixels outside the image are assumed to be zero, and the filter is padded with zeros so that its + shape is a multiple of the downsampling factor. + x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, + C]`. + k: FIR filter of the shape `[firH, firW]` or `[firN]` + (separable). The default is `[1] * factor`, which corresponds to average pooling. + factor: Integer downsampling factor (default: 2). gain: Scaling factor for signal magnitude (default: 1.0). + + Returns: + Tensor of the shape `[N, C, H // factor, W // factor]` + """ + + assert isinstance(factor, int) and factor >= 1 + if k is None: + k = [1] * factor + + k = np.asarray(k, dtype=np.float32) + if k.ndim == 1: + k = np.outer(k, k) + k /= np.sum(k) + + k = k * gain + p = k.shape[0] - factor + return upfirdn2d_native(x, paddle.to_tensor(k), down=factor, pad=((p + 1) // 2, p // 2)) + + +def upfirdn2d_native(input, kernel, up=1, down=1, pad=(0, 0)): + up_x = up_y = up + down_x = down_y = down + pad_x0 = pad_y0 = pad[0] + pad_x1 = pad_y1 = pad[1] + + _, channel, in_h, in_w = input.shape + input = input.reshape([-1, in_h, in_w, 1]) + + _, in_h, in_w, minor = input.shape + kernel_h, kernel_w = kernel.shape + + out = input.reshape([-1, in_h, 1, in_w, 1, minor]) + # TODO + out = pad_new(out, [0, 0, 0, up_x - 1, 0, 0, 0, up_y - 1]) + out = out.reshape([-1, in_h * up_y, in_w * up_x, minor]) + + out = pad_new(out, [0, 0, max(pad_x0, 0), max(pad_x1, 0), max(pad_y0, 0), max(pad_y1, 0)]) + out = out[:, max(-pad_y0, 0):out.shape[1] - max(-pad_y1, 0), max(-pad_x0, 0):out.shape[2] - max(-pad_x1, 0), :, ] + + out = out.transpose([0, 3, 1, 2]) + out = out.reshape([-1, 1, in_h * up_y + pad_y0 + pad_y1, in_w * up_x + pad_x0 + pad_x1]) + w = paddle.flip(kernel, [0, 1]).reshape([1, 1, kernel_h, kernel_w]) + out = F.conv2d(out, w) + out = out.reshape( + [-1, minor, in_h * up_y + pad_y0 + pad_y1 - kernel_h + 1, in_w * up_x + pad_x0 + pad_x1 - kernel_w + 1]) + out = out.transpose([0, 2, 3, 1]) + out = out[:, ::down_y, ::down_x, :] + + out_h = (in_h * up_y + pad_y0 + pad_y1 - kernel_h) // down_y + 1 + out_w = (in_w * up_x + pad_x0 + pad_x1 - kernel_w) // down_x + 1 + + return out.reshape([-1, channel, out_h, out_w]) diff --git a/modules/image/text_to_image/stable_diffusion_img2img/diffusers/models/unet_2d.py b/modules/image/text_to_image/stable_diffusion_img2img/diffusers/models/unet_2d.py new file mode 100644 index 000000000..11316a819 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_img2img/diffusers/models/unet_2d.py @@ -0,0 +1,206 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Dict +from typing import Union + +import paddle +import paddle.nn as nn + +from ..configuration_utils import ConfigMixin +from ..configuration_utils import register_to_config +from .embeddings import GaussianFourierProjection +from .embeddings import TimestepEmbedding +from .embeddings import Timesteps +from .unet_blocks import get_down_block +from .unet_blocks import get_up_block +from .unet_blocks import UNetMidBlock2D + + +class UNet2DModel(nn.Layer, ConfigMixin): + + @register_to_config + def __init__( + self, + sample_size=None, + in_channels=3, + out_channels=3, + center_input_sample=False, + time_embedding_type="positional", + freq_shift=0, + flip_sin_to_cos=True, + down_block_types=("DownBlock2D", "AttnDownBlock2D", "AttnDownBlock2D", "AttnDownBlock2D"), + up_block_types=("AttnUpBlock2D", "AttnUpBlock2D", "AttnUpBlock2D", "UpBlock2D"), + block_out_channels=(224, 448, 672, 896), + layers_per_block=2, + mid_block_scale_factor=1, + downsample_padding=1, + act_fn="silu", + attention_head_dim=8, + norm_num_groups=32, + norm_eps=1e-5, + ): + super().__init__() + + self.sample_size = sample_size + time_embed_dim = block_out_channels[0] * 4 + + # input + self.conv_in = nn.Conv2D(in_channels, block_out_channels[0], kernel_size=3, padding=(1, 1)) + + # time + if time_embedding_type == "fourier": + self.time_proj = GaussianFourierProjection(embedding_size=block_out_channels[0], scale=16) + timestep_input_dim = 2 * block_out_channels[0] + elif time_embedding_type == "positional": + self.time_proj = Timesteps(block_out_channels[0], flip_sin_to_cos, freq_shift) + timestep_input_dim = block_out_channels[0] + + self.time_embedding = TimestepEmbedding(timestep_input_dim, time_embed_dim) + + self.down_blocks = nn.LayerList([]) + self.mid_block = None + self.up_blocks = nn.LayerList([]) + + # down + output_channel = block_out_channels[0] + for i, down_block_type in enumerate(down_block_types): + input_channel = output_channel + output_channel = block_out_channels[i] + is_final_block = i == len(block_out_channels) - 1 + + down_block = get_down_block( + down_block_type, + num_layers=layers_per_block, + in_channels=input_channel, + out_channels=output_channel, + temb_channels=time_embed_dim, + add_downsample=not is_final_block, + resnet_eps=norm_eps, + resnet_act_fn=act_fn, + attn_num_head_channels=attention_head_dim, + downsample_padding=downsample_padding, + ) + self.down_blocks.append(down_block) + + # mid + self.mid_block = UNetMidBlock2D( + in_channels=block_out_channels[-1], + temb_channels=time_embed_dim, + resnet_eps=norm_eps, + resnet_act_fn=act_fn, + output_scale_factor=mid_block_scale_factor, + resnet_time_scale_shift="default", + attn_num_head_channels=attention_head_dim, + resnet_groups=norm_num_groups, + ) + + # up + reversed_block_out_channels = list(reversed(block_out_channels)) + output_channel = reversed_block_out_channels[0] + for i, up_block_type in enumerate(up_block_types): + prev_output_channel = output_channel + output_channel = reversed_block_out_channels[i] + input_channel = reversed_block_out_channels[min(i + 1, len(block_out_channels) - 1)] + + is_final_block = i == len(block_out_channels) - 1 + + up_block = get_up_block( + up_block_type, + num_layers=layers_per_block + 1, + in_channels=input_channel, + out_channels=output_channel, + prev_output_channel=prev_output_channel, + temb_channels=time_embed_dim, + add_upsample=not is_final_block, + resnet_eps=norm_eps, + resnet_act_fn=act_fn, + attn_num_head_channels=attention_head_dim, + ) + self.up_blocks.append(up_block) + prev_output_channel = output_channel + + # out + num_groups_out = norm_num_groups if norm_num_groups is not None else min(block_out_channels[0] // 4, 32) + self.conv_norm_out = nn.GroupNorm(num_channels=block_out_channels[0], + num_groups=num_groups_out, + epsilon=norm_eps) + self.conv_act = nn.Silu() + self.conv_out = nn.Conv2D(block_out_channels[0], out_channels, 3, padding=1) + + def forward(self, sample: paddle.Tensor, timestep: Union[paddle.Tensor, float, int]) -> Dict[str, paddle.Tensor]: + + # 0. center input if necessary + if self.config.center_input_sample: + sample = 2 * sample - 1.0 + + # 1. time + timesteps = timestep + if not paddle.is_tensor(timesteps): + timesteps = paddle.to_tensor([timesteps], dtype="int64") + elif paddle.is_tensor(timesteps) and len(timesteps.shape) == 0: + timesteps = timesteps[None] + + # broadcast to batch dimension + timesteps = paddle.broadcast_to(timesteps, [sample.shape[0]]) + + t_emb = self.time_proj(timesteps) + emb = self.time_embedding(t_emb) + + # 2. pre-process + skip_sample = sample + sample = self.conv_in(sample) + + # 3. down + down_block_res_samples = (sample, ) + for downsample_block in self.down_blocks: + if hasattr(downsample_block, "skip_conv"): + sample, res_samples, skip_sample = downsample_block(hidden_states=sample, + temb=emb, + skip_sample=skip_sample) + else: + sample, res_samples = downsample_block(hidden_states=sample, temb=emb) + + down_block_res_samples += res_samples + + # 4. mid + sample = self.mid_block(sample, emb) + + # 5. up + skip_sample = None + for upsample_block in self.up_blocks: + res_samples = down_block_res_samples[-len(upsample_block.resnets):] + down_block_res_samples = down_block_res_samples[:-len(upsample_block.resnets)] + + if hasattr(upsample_block, "skip_conv"): + sample, skip_sample = upsample_block(sample, res_samples, emb, skip_sample) + else: + sample = upsample_block(sample, res_samples, emb) + + # 6. post-process + # make sure hidden states is in float32 + # when running in half-precision + sample = self.conv_norm_out(sample.astype("float32")).astype(sample.dtype) + sample = self.conv_act(sample) + sample = self.conv_out(sample) + + if skip_sample is not None: + sample += skip_sample + + if self.config.time_embedding_type == "fourier": + timesteps = timesteps.reshape((sample.shape[0], *([1] * len(sample.shape[1:])))) + sample = sample / timesteps + + output = {"sample": sample} + + return output diff --git a/modules/image/text_to_image/stable_diffusion_img2img/diffusers/models/unet_2d_condition.py b/modules/image/text_to_image/stable_diffusion_img2img/diffusers/models/unet_2d_condition.py new file mode 100644 index 000000000..897491b2f --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_img2img/diffusers/models/unet_2d_condition.py @@ -0,0 +1,206 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Dict +from typing import Union + +import paddle +import paddle.nn as nn + +from ..configuration_utils import ConfigMixin +from ..configuration_utils import register_to_config +from .embeddings import TimestepEmbedding +from .embeddings import Timesteps +from .unet_blocks import get_down_block +from .unet_blocks import get_up_block +from .unet_blocks import UNetMidBlock2DCrossAttn + + +class UNet2DConditionModel(nn.Layer, ConfigMixin): + + @register_to_config + def __init__( + self, + sample_size=64, + in_channels=4, + out_channels=4, + center_input_sample=False, + flip_sin_to_cos=True, + freq_shift=0, + down_block_types=("CrossAttnDownBlock2D", "CrossAttnDownBlock2D", "CrossAttnDownBlock2D", "DownBlock2D"), + up_block_types=("UpBlock2D", "CrossAttnUpBlock2D", "CrossAttnUpBlock2D", "CrossAttnUpBlock2D"), + block_out_channels=(320, 640, 1280, 1280), + layers_per_block=2, + downsample_padding=1, + mid_block_scale_factor=1, + act_fn="silu", + norm_num_groups=32, + norm_eps=1e-5, + cross_attention_dim=768, + attention_head_dim=8, + ): + super().__init__() + + self.sample_size = sample_size + time_embed_dim = block_out_channels[0] * 4 + + # input + self.conv_in = nn.Conv2D(in_channels, block_out_channels[0], kernel_size=3, padding=(1, 1)) + + # time + self.time_proj = Timesteps(block_out_channels[0], flip_sin_to_cos, freq_shift) + timestep_input_dim = block_out_channels[0] + + self.time_embedding = TimestepEmbedding(timestep_input_dim, time_embed_dim) + + self.down_blocks = nn.LayerList([]) + self.mid_block = None + self.up_blocks = nn.LayerList([]) + + # down + output_channel = block_out_channels[0] + for i, down_block_type in enumerate(down_block_types): + input_channel = output_channel + output_channel = block_out_channels[i] + is_final_block = i == len(block_out_channels) - 1 + + down_block = get_down_block( + down_block_type, + num_layers=layers_per_block, + in_channels=input_channel, + out_channels=output_channel, + temb_channels=time_embed_dim, + add_downsample=not is_final_block, + resnet_eps=norm_eps, + resnet_act_fn=act_fn, + cross_attention_dim=cross_attention_dim, + attn_num_head_channels=attention_head_dim, + downsample_padding=downsample_padding, + ) + self.down_blocks.append(down_block) + + # mid + self.mid_block = UNetMidBlock2DCrossAttn( + in_channels=block_out_channels[-1], + temb_channels=time_embed_dim, + resnet_eps=norm_eps, + resnet_act_fn=act_fn, + output_scale_factor=mid_block_scale_factor, + resnet_time_scale_shift="default", + cross_attention_dim=cross_attention_dim, + attn_num_head_channels=attention_head_dim, + resnet_groups=norm_num_groups, + ) + + # up + reversed_block_out_channels = list(reversed(block_out_channels)) + output_channel = reversed_block_out_channels[0] + for i, up_block_type in enumerate(up_block_types): + prev_output_channel = output_channel + output_channel = reversed_block_out_channels[i] + input_channel = reversed_block_out_channels[min(i + 1, len(block_out_channels) - 1)] + + is_final_block = i == len(block_out_channels) - 1 + + up_block = get_up_block( + up_block_type, + num_layers=layers_per_block + 1, + in_channels=input_channel, + out_channels=output_channel, + prev_output_channel=prev_output_channel, + temb_channels=time_embed_dim, + add_upsample=not is_final_block, + resnet_eps=norm_eps, + resnet_act_fn=act_fn, + cross_attention_dim=cross_attention_dim, + attn_num_head_channels=attention_head_dim, + ) + self.up_blocks.append(up_block) + prev_output_channel = output_channel + + # out + self.conv_norm_out = nn.GroupNorm(num_channels=block_out_channels[0], + num_groups=norm_num_groups, + epsilon=norm_eps) + self.conv_act = nn.Silu() + self.conv_out = nn.Conv2D(block_out_channels[0], out_channels, 3, padding=1) + + def forward( + self, + sample: paddle.Tensor, + timestep: Union[paddle.Tensor, float, int], + encoder_hidden_states: paddle.Tensor, + ) -> Dict[str, paddle.Tensor]: + + # 0. center input if necessary + if self.config.center_input_sample: + sample = 2 * sample - 1.0 + + # 1. time + timesteps = timestep + if not paddle.is_tensor(timesteps): + timesteps = paddle.to_tensor([timesteps], dtype="int64") + elif paddle.is_tensor(timesteps) and len(timesteps.shape) == 0: + timesteps = timesteps[None] + + # broadcast to batch dimension + timesteps = paddle.broadcast_to(timesteps, [sample.shape[0]]) + + t_emb = self.time_proj(timesteps) + emb = self.time_embedding(t_emb) + + # 2. pre-process + sample = self.conv_in(sample) + + # 3. down + down_block_res_samples = (sample, ) + for downsample_block in self.down_blocks: + + if hasattr(downsample_block, "attentions") and downsample_block.attentions is not None: + sample, res_samples = downsample_block(hidden_states=sample, + temb=emb, + encoder_hidden_states=encoder_hidden_states) + else: + sample, res_samples = downsample_block(hidden_states=sample, temb=emb) + + down_block_res_samples += res_samples + + # 4. mid + sample = self.mid_block(sample, emb, encoder_hidden_states=encoder_hidden_states) + + # 5. up + for upsample_block in self.up_blocks: + + res_samples = down_block_res_samples[-len(upsample_block.resnets):] + down_block_res_samples = down_block_res_samples[:-len(upsample_block.resnets)] + + if hasattr(upsample_block, "attentions") and upsample_block.attentions is not None: + sample = upsample_block( + hidden_states=sample, + temb=emb, + res_hidden_states_tuple=res_samples, + encoder_hidden_states=encoder_hidden_states, + ) + else: + sample = upsample_block(hidden_states=sample, temb=emb, res_hidden_states_tuple=res_samples) + + # 6. post-process + # make sure hidden states is in float32 + # when running in half-precision + sample = self.conv_norm_out(sample.astype("float32")).astype(sample.dtype) + sample = self.conv_act(sample) + sample = self.conv_out(sample) + + output = {"sample": sample} + + return output diff --git a/modules/image/text_to_image/stable_diffusion_img2img/diffusers/models/unet_blocks.py b/modules/image/text_to_image/stable_diffusion_img2img/diffusers/models/unet_blocks.py new file mode 100644 index 000000000..684a2a43d --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_img2img/diffusers/models/unet_blocks.py @@ -0,0 +1,1428 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import paddle +import paddle.nn as nn + +from .attention import AttentionBlockNew +from .attention import SpatialTransformer +from .resnet import Downsample2D +from .resnet import FirDownsample2D +from .resnet import FirUpsample2D +from .resnet import ResnetBlock +from .resnet import Upsample2D + + +def get_down_block( + down_block_type, + num_layers, + in_channels, + out_channels, + temb_channels, + add_downsample, + resnet_eps, + resnet_act_fn, + attn_num_head_channels, + cross_attention_dim=None, + downsample_padding=None, +): + down_block_type = down_block_type[7:] if down_block_type.startswith("UNetRes") else down_block_type + if down_block_type == "DownBlock2D": + return DownBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + temb_channels=temb_channels, + add_downsample=add_downsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + downsample_padding=downsample_padding, + ) + elif down_block_type == "AttnDownBlock2D": + return AttnDownBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + temb_channels=temb_channels, + add_downsample=add_downsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + downsample_padding=downsample_padding, + attn_num_head_channels=attn_num_head_channels, + ) + elif down_block_type == "CrossAttnDownBlock2D": + if cross_attention_dim is None: + raise ValueError("cross_attention_dim must be specified for CrossAttnUpBlock2D") + return CrossAttnDownBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + temb_channels=temb_channels, + add_downsample=add_downsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + downsample_padding=downsample_padding, + cross_attention_dim=cross_attention_dim, + attn_num_head_channels=attn_num_head_channels, + ) + elif down_block_type == "SkipDownBlock2D": + return SkipDownBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + temb_channels=temb_channels, + add_downsample=add_downsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + downsample_padding=downsample_padding, + ) + elif down_block_type == "AttnSkipDownBlock2D": + return AttnSkipDownBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + temb_channels=temb_channels, + add_downsample=add_downsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + downsample_padding=downsample_padding, + attn_num_head_channels=attn_num_head_channels, + ) + elif down_block_type == "DownEncoderBlock2D": + return DownEncoderBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + add_downsample=add_downsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + downsample_padding=downsample_padding, + ) + + +def get_up_block( + up_block_type, + num_layers, + in_channels, + out_channels, + prev_output_channel, + temb_channels, + add_upsample, + resnet_eps, + resnet_act_fn, + attn_num_head_channels, + cross_attention_dim=None, +): + up_block_type = up_block_type[7:] if up_block_type.startswith("UNetRes") else up_block_type + if up_block_type == "UpBlock2D": + return UpBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + prev_output_channel=prev_output_channel, + temb_channels=temb_channels, + add_upsample=add_upsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + ) + elif up_block_type == "CrossAttnUpBlock2D": + if cross_attention_dim is None: + raise ValueError("cross_attention_dim must be specified for CrossAttnUpBlock2D") + return CrossAttnUpBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + prev_output_channel=prev_output_channel, + temb_channels=temb_channels, + add_upsample=add_upsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + cross_attention_dim=cross_attention_dim, + attn_num_head_channels=attn_num_head_channels, + ) + elif up_block_type == "AttnUpBlock2D": + return AttnUpBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + prev_output_channel=prev_output_channel, + temb_channels=temb_channels, + add_upsample=add_upsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + attn_num_head_channels=attn_num_head_channels, + ) + elif up_block_type == "SkipUpBlock2D": + return SkipUpBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + prev_output_channel=prev_output_channel, + temb_channels=temb_channels, + add_upsample=add_upsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + ) + elif up_block_type == "AttnSkipUpBlock2D": + return AttnSkipUpBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + prev_output_channel=prev_output_channel, + temb_channels=temb_channels, + add_upsample=add_upsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + attn_num_head_channels=attn_num_head_channels, + ) + elif up_block_type == "UpDecoderBlock2D": + return UpDecoderBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + add_upsample=add_upsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + ) + raise ValueError(f"{up_block_type} does not exist.") + + +class UNetMidBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + attn_num_head_channels=1, + attention_type="default", + output_scale_factor=1.0, + **kwargs, + ): + super().__init__() + + self.attention_type = attention_type + resnet_groups = resnet_groups if resnet_groups is not None else min(in_channels // 4, 32) + + # there is always at least one resnet + resnets = [ + ResnetBlock( + in_channels=in_channels, + out_channels=in_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + ) + ] + attentions = [] + + for _ in range(num_layers): + attentions.append( + AttentionBlockNew( + in_channels, + num_head_channels=attn_num_head_channels, + rescale_output_factor=output_scale_factor, + eps=resnet_eps, + num_groups=resnet_groups, + )) + resnets.append( + ResnetBlock( + in_channels=in_channels, + out_channels=in_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + + self.attentions = nn.LayerList(attentions) + self.resnets = nn.LayerList(resnets) + + def forward(self, hidden_states, temb=None, encoder_states=None): + hidden_states = self.resnets[0](hidden_states, temb) + for attn, resnet in zip(self.attentions, self.resnets[1:]): + if self.attention_type == "default": + hidden_states = attn(hidden_states) + else: + hidden_states = attn(hidden_states, encoder_states) + hidden_states = resnet(hidden_states, temb) + + return hidden_states + + +class UNetMidBlock2DCrossAttn(nn.Layer): + + def __init__( + self, + in_channels: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + attn_num_head_channels=1, + attention_type="default", + output_scale_factor=1.0, + cross_attention_dim=1280, + **kwargs, + ): + super().__init__() + + self.attention_type = attention_type + resnet_groups = resnet_groups if resnet_groups is not None else min(in_channels // 4, 32) + + # there is always at least one resnet + resnets = [ + ResnetBlock( + in_channels=in_channels, + out_channels=in_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + ) + ] + attentions = [] + + for _ in range(num_layers): + attentions.append( + SpatialTransformer( + in_channels, + attn_num_head_channels, + in_channels // attn_num_head_channels, + depth=1, + context_dim=cross_attention_dim, + )) + resnets.append( + ResnetBlock( + in_channels=in_channels, + out_channels=in_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + + self.attentions = nn.LayerList(attentions) + self.resnets = nn.LayerList(resnets) + + def forward(self, hidden_states, temb=None, encoder_hidden_states=None): + hidden_states = self.resnets[0](hidden_states, temb) + for attn, resnet in zip(self.attentions, self.resnets[1:]): + hidden_states = attn(hidden_states, encoder_hidden_states) + hidden_states = resnet(hidden_states, temb) + + return hidden_states + + +class AttnDownBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + out_channels: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + attn_num_head_channels=1, + attention_type="default", + output_scale_factor=1.0, + downsample_padding=1, + add_downsample=True, + ): + super().__init__() + resnets = [] + attentions = [] + + self.attention_type = attention_type + + for i in range(num_layers): + in_channels = in_channels if i == 0 else out_channels + resnets.append( + ResnetBlock( + in_channels=in_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + attentions.append( + AttentionBlockNew( + out_channels, + num_head_channels=attn_num_head_channels, + rescale_output_factor=output_scale_factor, + eps=resnet_eps, + )) + + self.attentions = nn.LayerList(attentions) + self.resnets = nn.LayerList(resnets) + + if add_downsample: + self.downsamplers = nn.LayerList([ + Downsample2D(in_channels, + use_conv=True, + out_channels=out_channels, + padding=downsample_padding, + name="op") + ]) + else: + self.downsamplers = None + + def forward(self, hidden_states, temb=None): + output_states = () + + for resnet, attn in zip(self.resnets, self.attentions): + hidden_states = resnet(hidden_states, temb) + hidden_states = attn(hidden_states) + output_states += (hidden_states, ) + + if self.downsamplers is not None: + for downsampler in self.downsamplers: + hidden_states = downsampler(hidden_states) + + output_states += (hidden_states, ) + + return hidden_states, output_states + + +class CrossAttnDownBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + out_channels: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + attn_num_head_channels=1, + cross_attention_dim=1280, + attention_type="default", + output_scale_factor=1.0, + downsample_padding=1, + add_downsample=True, + ): + super().__init__() + resnets = [] + attentions = [] + + self.attention_type = attention_type + + for i in range(num_layers): + in_channels = in_channels if i == 0 else out_channels + resnets.append( + ResnetBlock( + in_channels=in_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + attentions.append( + SpatialTransformer( + out_channels, + attn_num_head_channels, + out_channels // attn_num_head_channels, + depth=1, + context_dim=cross_attention_dim, + )) + self.attentions = nn.LayerList(attentions) + self.resnets = nn.LayerList(resnets) + + if add_downsample: + self.downsamplers = nn.LayerList([ + Downsample2D(in_channels, + use_conv=True, + out_channels=out_channels, + padding=downsample_padding, + name="op") + ]) + else: + self.downsamplers = None + + def forward(self, hidden_states, temb=None, encoder_hidden_states=None): + output_states = () + + for resnet, attn in zip(self.resnets, self.attentions): + hidden_states = resnet(hidden_states, temb) + hidden_states = attn(hidden_states, context=encoder_hidden_states) + output_states += (hidden_states, ) + + if self.downsamplers is not None: + for downsampler in self.downsamplers: + hidden_states = downsampler(hidden_states) + + output_states += (hidden_states, ) + + return hidden_states, output_states + + +class DownBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + out_channels: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + output_scale_factor=1.0, + add_downsample=True, + downsample_padding=1, + ): + super().__init__() + resnets = [] + + for i in range(num_layers): + in_channels = in_channels if i == 0 else out_channels + resnets.append( + ResnetBlock( + in_channels=in_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + + self.resnets = nn.LayerList(resnets) + + if add_downsample: + self.downsamplers = nn.LayerList([ + Downsample2D(in_channels, + use_conv=True, + out_channels=out_channels, + padding=downsample_padding, + name="op") + ]) + else: + self.downsamplers = None + + def forward(self, hidden_states, temb=None): + output_states = () + + for resnet in self.resnets: + hidden_states = resnet(hidden_states, temb) + output_states += (hidden_states, ) + + if self.downsamplers is not None: + for downsampler in self.downsamplers: + hidden_states = downsampler(hidden_states) + + output_states += (hidden_states, ) + + return hidden_states, output_states + + +class DownEncoderBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + out_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + output_scale_factor=1.0, + add_downsample=True, + downsample_padding=1, + ): + super().__init__() + resnets = [] + + for i in range(num_layers): + in_channels = in_channels if i == 0 else out_channels + resnets.append( + ResnetBlock( + in_channels=in_channels, + out_channels=out_channels, + temb_channels=None, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + + self.resnets = nn.LayerList(resnets) + + if add_downsample: + self.downsamplers = nn.LayerList([ + Downsample2D(in_channels, + use_conv=True, + out_channels=out_channels, + padding=downsample_padding, + name="op") + ]) + else: + self.downsamplers = None + + def forward(self, hidden_states): + for resnet in self.resnets: + hidden_states = resnet(hidden_states, temb=None) + + if self.downsamplers is not None: + for downsampler in self.downsamplers: + hidden_states = downsampler(hidden_states) + + return hidden_states + + +class AttnDownEncoderBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + out_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + attn_num_head_channels=1, + output_scale_factor=1.0, + add_downsample=True, + downsample_padding=1, + ): + super().__init__() + resnets = [] + attentions = [] + + for i in range(num_layers): + in_channels = in_channels if i == 0 else out_channels + resnets.append( + ResnetBlock( + in_channels=in_channels, + out_channels=out_channels, + temb_channels=None, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + attentions.append( + AttentionBlockNew( + out_channels, + num_head_channels=attn_num_head_channels, + rescale_output_factor=output_scale_factor, + eps=resnet_eps, + num_groups=resnet_groups, + )) + + self.attentions = nn.LayerList(attentions) + self.resnets = nn.LayerList(resnets) + + if add_downsample: + self.downsamplers = nn.LayerList([ + Downsample2D(in_channels, + use_conv=True, + out_channels=out_channels, + padding=downsample_padding, + name="op") + ]) + else: + self.downsamplers = None + + def forward(self, hidden_states): + for resnet, attn in zip(self.resnets, self.attentions): + hidden_states = resnet(hidden_states, temb=None) + hidden_states = attn(hidden_states) + + if self.downsamplers is not None: + for downsampler in self.downsamplers: + hidden_states = downsampler(hidden_states) + + return hidden_states + + +class AttnSkipDownBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + out_channels: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_pre_norm: bool = True, + attn_num_head_channels=1, + attention_type="default", + output_scale_factor=np.sqrt(2.0), + downsample_padding=1, + add_downsample=True, + ): + super().__init__() + self.attentions = nn.LayerList([]) + self.resnets = nn.LayerList([]) + + self.attention_type = attention_type + + for i in range(num_layers): + in_channels = in_channels if i == 0 else out_channels + self.resnets.append( + ResnetBlock( + in_channels=in_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=min(in_channels // 4, 32), + groups_out=min(out_channels // 4, 32), + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + self.attentions.append( + AttentionBlockNew( + out_channels, + num_head_channels=attn_num_head_channels, + rescale_output_factor=output_scale_factor, + eps=resnet_eps, + )) + + if add_downsample: + self.resnet_down = ResnetBlock( + in_channels=out_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=min(out_channels // 4, 32), + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + use_nin_shortcut=True, + down=True, + kernel="fir", + ) + self.downsamplers = nn.LayerList([FirDownsample2D(in_channels, out_channels=out_channels)]) + self.skip_conv = nn.Conv2D(3, out_channels, kernel_size=(1, 1), stride=(1, 1)) + else: + self.resnet_down = None + self.downsamplers = None + self.skip_conv = None + + def forward(self, hidden_states, temb=None, skip_sample=None): + output_states = () + + for resnet, attn in zip(self.resnets, self.attentions): + hidden_states = resnet(hidden_states, temb) + hidden_states = attn(hidden_states) + output_states += (hidden_states, ) + + if self.downsamplers is not None: + hidden_states = self.resnet_down(hidden_states, temb) + for downsampler in self.downsamplers: + skip_sample = downsampler(skip_sample) + + hidden_states = self.skip_conv(skip_sample) + hidden_states + + output_states += (hidden_states, ) + + return hidden_states, output_states, skip_sample + + +class SkipDownBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + out_channels: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_pre_norm: bool = True, + output_scale_factor=np.sqrt(2.0), + add_downsample=True, + downsample_padding=1, + ): + super().__init__() + self.resnets = nn.LayerList([]) + + for i in range(num_layers): + in_channels = in_channels if i == 0 else out_channels + self.resnets.append( + ResnetBlock( + in_channels=in_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=min(in_channels // 4, 32), + groups_out=min(out_channels // 4, 32), + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + + if add_downsample: + self.resnet_down = ResnetBlock( + in_channels=out_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=min(out_channels // 4, 32), + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + use_nin_shortcut=True, + down=True, + kernel="fir", + ) + self.downsamplers = nn.LayerList([FirDownsample2D(in_channels, out_channels=out_channels)]) + self.skip_conv = nn.Conv2D(3, out_channels, kernel_size=(1, 1), stride=(1, 1)) + else: + self.resnet_down = None + self.downsamplers = None + self.skip_conv = None + + def forward(self, hidden_states, temb=None, skip_sample=None): + output_states = () + + for resnet in self.resnets: + hidden_states = resnet(hidden_states, temb) + output_states += (hidden_states, ) + + if self.downsamplers is not None: + hidden_states = self.resnet_down(hidden_states, temb) + for downsampler in self.downsamplers: + skip_sample = downsampler(skip_sample) + + hidden_states = self.skip_conv(skip_sample) + hidden_states + + output_states += (hidden_states, ) + + return hidden_states, output_states, skip_sample + + +class AttnUpBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + prev_output_channel: int, + out_channels: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + attention_type="default", + attn_num_head_channels=1, + output_scale_factor=1.0, + add_upsample=True, + ): + super().__init__() + resnets = [] + attentions = [] + + self.attention_type = attention_type + + for i in range(num_layers): + res_skip_channels = in_channels if (i == num_layers - 1) else out_channels + resnet_in_channels = prev_output_channel if i == 0 else out_channels + + resnets.append( + ResnetBlock( + in_channels=resnet_in_channels + res_skip_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + attentions.append( + AttentionBlockNew( + out_channels, + num_head_channels=attn_num_head_channels, + rescale_output_factor=output_scale_factor, + eps=resnet_eps, + )) + + self.attentions = nn.LayerList(attentions) + self.resnets = nn.LayerList(resnets) + + if add_upsample: + self.upsamplers = nn.LayerList([Upsample2D(out_channels, use_conv=True, out_channels=out_channels)]) + else: + self.upsamplers = None + + def forward(self, hidden_states, res_hidden_states_tuple, temb=None): + for resnet, attn in zip(self.resnets, self.attentions): + + # pop res hidden states + res_hidden_states = res_hidden_states_tuple[-1] + res_hidden_states_tuple = res_hidden_states_tuple[:-1] + hidden_states = paddle.concat([hidden_states, res_hidden_states], axis=1) + + hidden_states = resnet(hidden_states, temb) + hidden_states = attn(hidden_states) + + if self.upsamplers is not None: + for upsampler in self.upsamplers: + hidden_states = upsampler(hidden_states) + + return hidden_states + + +class CrossAttnUpBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + out_channels: int, + prev_output_channel: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + attn_num_head_channels=1, + cross_attention_dim=1280, + attention_type="default", + output_scale_factor=1.0, + downsample_padding=1, + add_upsample=True, + ): + super().__init__() + resnets = [] + attentions = [] + + self.attention_type = attention_type + + for i in range(num_layers): + res_skip_channels = in_channels if (i == num_layers - 1) else out_channels + resnet_in_channels = prev_output_channel if i == 0 else out_channels + + resnets.append( + ResnetBlock( + in_channels=resnet_in_channels + res_skip_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + attentions.append( + SpatialTransformer( + out_channels, + attn_num_head_channels, + out_channels // attn_num_head_channels, + depth=1, + context_dim=cross_attention_dim, + )) + self.attentions = nn.LayerList(attentions) + self.resnets = nn.LayerList(resnets) + + if add_upsample: + self.upsamplers = nn.LayerList([Upsample2D(out_channels, use_conv=True, out_channels=out_channels)]) + else: + self.upsamplers = None + + def forward(self, hidden_states, res_hidden_states_tuple, temb=None, encoder_hidden_states=None): + for resnet, attn in zip(self.resnets, self.attentions): + + # pop res hidden states + res_hidden_states = res_hidden_states_tuple[-1] + res_hidden_states_tuple = res_hidden_states_tuple[:-1] + hidden_states = paddle.concat([hidden_states, res_hidden_states], axis=1) + + hidden_states = resnet(hidden_states, temb) + hidden_states = attn(hidden_states, context=encoder_hidden_states) + + if self.upsamplers is not None: + for upsampler in self.upsamplers: + hidden_states = upsampler(hidden_states) + + return hidden_states + + +class UpBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + prev_output_channel: int, + out_channels: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + output_scale_factor=1.0, + add_upsample=True, + ): + super().__init__() + resnets = [] + + for i in range(num_layers): + res_skip_channels = in_channels if (i == num_layers - 1) else out_channels + resnet_in_channels = prev_output_channel if i == 0 else out_channels + + resnets.append( + ResnetBlock( + in_channels=resnet_in_channels + res_skip_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + + self.resnets = nn.LayerList(resnets) + + if add_upsample: + self.upsamplers = nn.LayerList([Upsample2D(out_channels, use_conv=True, out_channels=out_channels)]) + else: + self.upsamplers = None + + def forward(self, hidden_states, res_hidden_states_tuple, temb=None): + for resnet in self.resnets: + + # pop res hidden states + res_hidden_states = res_hidden_states_tuple[-1] + res_hidden_states_tuple = res_hidden_states_tuple[:-1] + hidden_states = paddle.concat([hidden_states, res_hidden_states], axis=1) + + hidden_states = resnet(hidden_states, temb) + + if self.upsamplers is not None: + for upsampler in self.upsamplers: + hidden_states = upsampler(hidden_states) + + return hidden_states + + +class UpDecoderBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + out_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + output_scale_factor=1.0, + add_upsample=True, + ): + super().__init__() + resnets = [] + + for i in range(num_layers): + input_channels = in_channels if i == 0 else out_channels + + resnets.append( + ResnetBlock( + in_channels=input_channels, + out_channels=out_channels, + temb_channels=None, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + + self.resnets = nn.LayerList(resnets) + + if add_upsample: + self.upsamplers = nn.LayerList([Upsample2D(out_channels, use_conv=True, out_channels=out_channels)]) + else: + self.upsamplers = None + + def forward(self, hidden_states): + for resnet in self.resnets: + hidden_states = resnet(hidden_states, temb=None) + + if self.upsamplers is not None: + for upsampler in self.upsamplers: + hidden_states = upsampler(hidden_states) + + return hidden_states + + +class AttnUpDecoderBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + out_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + attn_num_head_channels=1, + output_scale_factor=1.0, + add_upsample=True, + ): + super().__init__() + resnets = [] + attentions = [] + + for i in range(num_layers): + input_channels = in_channels if i == 0 else out_channels + + resnets.append( + ResnetBlock( + in_channels=input_channels, + out_channels=out_channels, + temb_channels=None, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + attentions.append( + AttentionBlockNew( + out_channels, + num_head_channels=attn_num_head_channels, + rescale_output_factor=output_scale_factor, + eps=resnet_eps, + num_groups=resnet_groups, + )) + + self.attentions = nn.LayerList(attentions) + self.resnets = nn.LayerList(resnets) + + if add_upsample: + self.upsamplers = nn.LayerList([Upsample2D(out_channels, use_conv=True, out_channels=out_channels)]) + else: + self.upsamplers = None + + def forward(self, hidden_states): + for resnet, attn in zip(self.resnets, self.attentions): + hidden_states = resnet(hidden_states, temb=None) + hidden_states = attn(hidden_states) + + if self.upsamplers is not None: + for upsampler in self.upsamplers: + hidden_states = upsampler(hidden_states) + + return hidden_states + + +class AttnSkipUpBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + prev_output_channel: int, + out_channels: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_pre_norm: bool = True, + attn_num_head_channels=1, + attention_type="default", + output_scale_factor=np.sqrt(2.0), + upsample_padding=1, + add_upsample=True, + ): + super().__init__() + self.attentions = nn.LayerList([]) + self.resnets = nn.LayerList([]) + + self.attention_type = attention_type + + for i in range(num_layers): + res_skip_channels = in_channels if (i == num_layers - 1) else out_channels + resnet_in_channels = prev_output_channel if i == 0 else out_channels + + self.resnets.append( + ResnetBlock( + in_channels=resnet_in_channels + res_skip_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=min(resnet_in_channels + res_skip_channels // 4, 32), + groups_out=min(out_channels // 4, 32), + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + + self.attentions.append( + AttentionBlockNew( + out_channels, + num_head_channels=attn_num_head_channels, + rescale_output_factor=output_scale_factor, + eps=resnet_eps, + )) + + self.upsampler = FirUpsample2D(in_channels, out_channels=out_channels) + if add_upsample: + self.resnet_up = ResnetBlock( + in_channels=out_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=min(out_channels // 4, 32), + groups_out=min(out_channels // 4, 32), + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + use_nin_shortcut=True, + up=True, + kernel="fir", + ) + self.skip_conv = nn.Conv2D(out_channels, 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + self.skip_norm = nn.GroupNorm(num_groups=min(out_channels // 4, 32), + num_channels=out_channels, + eps=resnet_eps, + affine=True) + self.act = nn.SiLU() + else: + self.resnet_up = None + self.skip_conv = None + self.skip_norm = None + self.act = None + + def forward(self, hidden_states, res_hidden_states_tuple, temb=None, skip_sample=None): + for resnet in self.resnets: + # pop res hidden states + res_hidden_states = res_hidden_states_tuple[-1] + res_hidden_states_tuple = res_hidden_states_tuple[:-1] + hidden_states = paddle.concat([hidden_states, res_hidden_states], axis=1) + + hidden_states = resnet(hidden_states, temb) + + hidden_states = self.attentions[0](hidden_states) + + if skip_sample is not None: + skip_sample = self.upsampler(skip_sample) + else: + skip_sample = 0 + + if self.resnet_up is not None: + skip_sample_states = self.skip_norm(hidden_states) + skip_sample_states = self.act(skip_sample_states) + skip_sample_states = self.skip_conv(skip_sample_states) + + skip_sample = skip_sample + skip_sample_states + + hidden_states = self.resnet_up(hidden_states, temb) + + return hidden_states, skip_sample + + +class SkipUpBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + prev_output_channel: int, + out_channels: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_pre_norm: bool = True, + output_scale_factor=np.sqrt(2.0), + add_upsample=True, + upsample_padding=1, + ): + super().__init__() + self.resnets = nn.LayerList([]) + + for i in range(num_layers): + res_skip_channels = in_channels if (i == num_layers - 1) else out_channels + resnet_in_channels = prev_output_channel if i == 0 else out_channels + + self.resnets.append( + ResnetBlock( + in_channels=resnet_in_channels + res_skip_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=min((resnet_in_channels + res_skip_channels) // 4, 32), + groups_out=min(out_channels // 4, 32), + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + + self.upsampler = FirUpsample2D(in_channels, out_channels=out_channels) + if add_upsample: + self.resnet_up = ResnetBlock( + in_channels=out_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=min(out_channels // 4, 32), + groups_out=min(out_channels // 4, 32), + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + use_nin_shortcut=True, + up=True, + kernel="fir", + ) + self.skip_conv = nn.Conv2D(out_channels, 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + self.skip_norm = nn.GroupNorm(num_groups=min(out_channels // 4, 32), + num_channels=out_channels, + eps=resnet_eps, + affine=True) + self.act = nn.SiLU() + else: + self.resnet_up = None + self.skip_conv = None + self.skip_norm = None + self.act = None + + def forward(self, hidden_states, res_hidden_states_tuple, temb=None, skip_sample=None): + for resnet in self.resnets: + # pop res hidden states + res_hidden_states = res_hidden_states_tuple[-1] + res_hidden_states_tuple = res_hidden_states_tuple[:-1] + hidden_states = paddle.concat([hidden_states, res_hidden_states], axis=1) + + hidden_states = resnet(hidden_states, temb) + + if skip_sample is not None: + skip_sample = self.upsampler(skip_sample) + else: + skip_sample = 0 + + if self.resnet_up is not None: + skip_sample_states = self.skip_norm(hidden_states) + skip_sample_states = self.act(skip_sample_states) + skip_sample_states = self.skip_conv(skip_sample_states) + + skip_sample = skip_sample + skip_sample_states + + hidden_states = self.resnet_up(hidden_states, temb) + + return hidden_states, skip_sample diff --git a/modules/image/text_to_image/stable_diffusion_img2img/diffusers/models/vae.py b/modules/image/text_to_image/stable_diffusion_img2img/diffusers/models/vae.py new file mode 100644 index 000000000..59e35b0fb --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_img2img/diffusers/models/vae.py @@ -0,0 +1,465 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import paddle +import paddle.nn as nn + +from ..configuration_utils import ConfigMixin +from ..configuration_utils import register_to_config +from .unet_blocks import get_down_block +from .unet_blocks import get_up_block +from .unet_blocks import UNetMidBlock2D + + +class Encoder(nn.Layer): + + def __init__( + self, + in_channels=3, + out_channels=3, + down_block_types=("DownEncoderBlock2D", ), + block_out_channels=(64, ), + layers_per_block=2, + act_fn="silu", + double_z=True, + ): + super().__init__() + self.layers_per_block = layers_per_block + + self.conv_in = nn.Conv2D(in_channels, block_out_channels[0], kernel_size=3, stride=1, padding=1) + + self.mid_block = None + self.down_blocks = nn.LayerList([]) + + # down + output_channel = block_out_channels[0] + for i, down_block_type in enumerate(down_block_types): + input_channel = output_channel + output_channel = block_out_channels[i] + is_final_block = i == len(block_out_channels) - 1 + + down_block = get_down_block( + down_block_type, + num_layers=self.layers_per_block, + in_channels=input_channel, + out_channels=output_channel, + add_downsample=not is_final_block, + resnet_eps=1e-6, + downsample_padding=0, + resnet_act_fn=act_fn, + attn_num_head_channels=None, + temb_channels=None, + ) + self.down_blocks.append(down_block) + + # mid + self.mid_block = UNetMidBlock2D( + in_channels=block_out_channels[-1], + resnet_eps=1e-6, + resnet_act_fn=act_fn, + output_scale_factor=1, + resnet_time_scale_shift="default", + attn_num_head_channels=None, + resnet_groups=32, + temb_channels=None, + ) + + # out + num_groups_out = 32 + self.conv_norm_out = nn.GroupNorm(num_channels=block_out_channels[-1], num_groups=num_groups_out, epsilon=1e-6) + self.conv_act = nn.Silu() + + conv_out_channels = 2 * out_channels if double_z else out_channels + self.conv_out = nn.Conv2D(block_out_channels[-1], conv_out_channels, 3, padding=1) + + def forward(self, x): + sample = x + sample = self.conv_in(sample) + + # down + for down_block in self.down_blocks: + sample = down_block(sample) + + # middle + sample = self.mid_block(sample) + + # post-process + sample = self.conv_norm_out(sample) + sample = self.conv_act(sample) + sample = self.conv_out(sample) + + return sample + + +class Decoder(nn.Layer): + + def __init__( + self, + in_channels=3, + out_channels=3, + up_block_types=("UpDecoderBlock2D", ), + block_out_channels=(64, ), + layers_per_block=2, + act_fn="silu", + ): + super().__init__() + self.layers_per_block = layers_per_block + + self.conv_in = nn.Conv2D(in_channels, block_out_channels[-1], kernel_size=3, stride=1, padding=1) + + self.mid_block = None + self.up_blocks = nn.LayerList([]) + + # mid + self.mid_block = UNetMidBlock2D( + in_channels=block_out_channels[-1], + resnet_eps=1e-6, + resnet_act_fn=act_fn, + output_scale_factor=1, + resnet_time_scale_shift="default", + attn_num_head_channels=None, + resnet_groups=32, + temb_channels=None, + ) + + # up + reversed_block_out_channels = list(reversed(block_out_channels)) + output_channel = reversed_block_out_channels[0] + for i, up_block_type in enumerate(up_block_types): + prev_output_channel = output_channel + output_channel = reversed_block_out_channels[i] + + is_final_block = i == len(block_out_channels) - 1 + + up_block = get_up_block( + up_block_type, + num_layers=self.layers_per_block + 1, + in_channels=prev_output_channel, + out_channels=output_channel, + prev_output_channel=None, + add_upsample=not is_final_block, + resnet_eps=1e-6, + resnet_act_fn=act_fn, + attn_num_head_channels=None, + temb_channels=None, + ) + self.up_blocks.append(up_block) + prev_output_channel = output_channel + + # out + num_groups_out = 32 + self.conv_norm_out = nn.GroupNorm(num_channels=block_out_channels[0], num_groups=num_groups_out, epsilon=1e-6) + self.conv_act = nn.Silu() + self.conv_out = nn.Conv2D(block_out_channels[0], out_channels, 3, padding=1) + + def forward(self, z): + sample = z + sample = self.conv_in(sample) + + # middle + sample = self.mid_block(sample) + + # up + for up_block in self.up_blocks: + sample = up_block(sample) + + # post-process + sample = self.conv_norm_out(sample) + sample = self.conv_act(sample) + sample = self.conv_out(sample) + + return sample + + +class VectorQuantizer(nn.Layer): + """ + Improved version over VectorQuantizer, can be used as a drop-in replacement. Mostly avoids costly matrix + multiplications and allows for post-hoc remapping of indices. + """ + + # NOTE: due to a bug the beta term was applied to the wrong term. for + # backwards compatibility we use the buggy version by default, but you can + # specify legacy=False to fix it. + def __init__(self, n_e, e_dim, beta, remap=None, unknown_index="random", sane_index_shape=False, legacy=True): + super().__init__() + self.n_e = n_e + self.e_dim = e_dim + self.beta = beta + self.legacy = legacy + + self.embedding = nn.Embedding(self.n_e, self.e_dim) + self.embedding.weight.data.uniform_(-1.0 / self.n_e, 1.0 / self.n_e) + + self.remap = remap + if self.remap is not None: + self.register_buffer("used", paddle.to_tensor(np.load(self.remap))) + self.re_embed = self.used.shape[0] + self.unknown_index = unknown_index # "random" or "extra" or integer + if self.unknown_index == "extra": + self.unknown_index = self.re_embed + self.re_embed = self.re_embed + 1 + print(f"Remapping {self.n_e} indices to {self.re_embed} indices. " + f"Using {self.unknown_index} for unknown indices.") + else: + self.re_embed = n_e + + self.sane_index_shape = sane_index_shape + + def remap_to_used(self, inds): + ishape = inds.shape + assert len(ishape) > 1 + inds = inds.reshape([ishape[0], -1]) + used = self.used + match = (inds[:, :, None] == used[None, None, ...]).astype("int64") + new = match.argmax(-1) + unknown = match.sum(2) < 1 + if self.unknown_index == "random": + new[unknown] = paddle.randint(0, self.re_embed, shape=new[unknown].shape) + else: + new[unknown] = self.unknown_index + return new.reshape(ishape) + + def unmap_to_all(self, inds): + ishape = inds.shape + assert len(ishape) > 1 + inds = inds.reshape([ishape[0], -1]) + used = self.used + if self.re_embed > self.used.shape[0]: # extra token + inds[inds >= self.used.shape[0]] = 0 # simply set to zero + back = paddle.gather(used[None, :][inds.shape[0] * [0], :], inds, axis=1) + return back.reshape(ishape) + + def forward(self, z): + # reshape z -> (batch, height, width, channel) and flatten + z = z.transpose([0, 2, 3, 1]) + z_flattened = z.reshape([-1, self.e_dim]) + # distances from z to embeddings e_j (z - e)^2 = z^2 + e^2 - 2 e * z + + d = (paddle.sum(z_flattened**2, axis=1, keepdim=True) + paddle.sum(self.embedding.weight**2, axis=1) - + 2 * paddle.einsum("bd,dn->bn", z_flattened, self.embedding.weight.t())) + + min_encoding_indices = paddle.argmin(d, axis=1) + z_q = self.embedding(min_encoding_indices).reshape(z.shape) + perplexity = None + min_encodings = None + + # compute loss for embedding + if not self.legacy: + loss = self.beta * paddle.mean((z_q.detach() - z)**2) + paddle.mean((z_q - z.detach())**2) + else: + loss = paddle.mean((z_q.detach() - z)**2) + self.beta * paddle.mean((z_q - z.detach())**2) + + # preserve gradients + z_q = z + (z_q - z).detach() + + # reshape back to match original input shape + z_q = z_q.transpose([0, 3, 1, 2]) + + if self.remap is not None: + min_encoding_indices = min_encoding_indices.reshape([z.shape[0], -1]) # add batch axis + min_encoding_indices = self.remap_to_used(min_encoding_indices) + min_encoding_indices = min_encoding_indices.reshape([-1, 1]) # flatten + + if self.sane_index_shape: + min_encoding_indices = min_encoding_indices.reshape([z_q.shape[0], z_q.shape[2], z_q.shape[3]]) + + return z_q, loss, (perplexity, min_encodings, min_encoding_indices) + + def get_codebook_entry(self, indices, shape): + # shape specifying (batch, height, width, channel) + if self.remap is not None: + indices = indices.reshape([shape[0], -1]) # add batch axis + indices = self.unmap_to_all(indices) + indices = indices.flatten() # flatten again + + # get quantized latent vectors + z_q = self.embedding(indices) + + if shape is not None: + z_q = z_q.reshape(shape) + # reshape back to match original input shape + z_q = z_q.transpose([0, 3, 1, 2]) + + return z_q + + +class DiagonalGaussianDistribution(object): + + def __init__(self, parameters, deterministic=False): + self.parameters = parameters + self.mean, self.logvar = paddle.chunk(parameters, 2, axis=1) + self.logvar = paddle.clip(self.logvar, -30.0, 20.0) + self.deterministic = deterministic + self.std = paddle.exp(0.5 * self.logvar) + self.var = paddle.exp(self.logvar) + if self.deterministic: + self.var = self.std = paddle.zeros_like(self.mean) + + def sample(self): + x = self.mean + self.std * paddle.randn(self.mean.shape) + return x + + def kl(self, other=None): + if self.deterministic: + return paddle.to_tensor([0.0]) + else: + if other is None: + return 0.5 * paddle.sum(paddle.pow(self.mean, 2) + self.var - 1.0 - self.logvar, axis=[1, 2, 3]) + else: + return 0.5 * paddle.sum( + paddle.pow(self.mean - other.mean, 2) / other.var + self.var / other.var - 1.0 - self.logvar + + other.logvar, + axis=[1, 2, 3], + ) + + def nll(self, sample, dims=[1, 2, 3]): + if self.deterministic: + return paddle.to_tensor([0.0]) + logtwopi = np.log(2.0 * np.pi) + return 0.5 * paddle.sum(logtwopi + self.logvar + paddle.pow(sample - self.mean, 2) / self.var, axis=dims) + + def mode(self): + return self.mean + + +class VQModel(ConfigMixin): + + @register_to_config + def __init__( + self, + in_channels=3, + out_channels=3, + down_block_types=("DownEncoderBlock2D", ), + up_block_types=("UpDecoderBlock2D", ), + block_out_channels=(64, ), + layers_per_block=1, + act_fn="silu", + latent_channels=3, + sample_size=32, + num_vq_embeddings=256, + ): + super().__init__() + + # pass init params to Encoder + self.encoder = Encoder( + in_channels=in_channels, + out_channels=latent_channels, + down_block_types=down_block_types, + block_out_channels=block_out_channels, + layers_per_block=layers_per_block, + act_fn=act_fn, + double_z=False, + ) + + self.quant_conv = nn.Conv2D(latent_channels, latent_channels, 1) + self.quantize = VectorQuantizer(num_vq_embeddings, + latent_channels, + beta=0.25, + remap=None, + sane_index_shape=False) + self.post_quant_conv = nn.Conv2D(latent_channels, latent_channels, 1) + + # pass init params to Decoder + self.decoder = Decoder( + in_channels=latent_channels, + out_channels=out_channels, + up_block_types=up_block_types, + block_out_channels=block_out_channels, + layers_per_block=layers_per_block, + act_fn=act_fn, + ) + + def encode(self, x): + h = self.encoder(x) + h = self.quant_conv(h) + return h + + def decode(self, h, force_not_quantize=False): + # also go through quantization layer + if not force_not_quantize: + quant, emb_loss, info = self.quantize(h) + else: + quant = h + quant = self.post_quant_conv(quant) + dec = self.decoder(quant) + return dec + + def forward(self, sample): + x = sample + h = self.encode(x) + dec = self.decode(h) + return dec + + +class AutoencoderKL(nn.Layer, ConfigMixin): + + @register_to_config + def __init__( + self, + in_channels=3, + out_channels=3, + down_block_types=("DownEncoderBlock2D", "DownEncoderBlock2D", "DownEncoderBlock2D", "DownEncoderBlock2D"), + up_block_types=("UpDecoderBlock2D", "UpDecoderBlock2D", "UpDecoderBlock2D", "UpDecoderBlock2D"), + block_out_channels=(128, 256, 512, 512), + layers_per_block=2, + act_fn="silu", + latent_channels=4, + sample_size=512, + ): + super().__init__() + + # pass init params to Encoder + self.encoder = Encoder( + in_channels=in_channels, + out_channels=latent_channels, + down_block_types=down_block_types, + block_out_channels=block_out_channels, + layers_per_block=layers_per_block, + act_fn=act_fn, + double_z=True, + ) + + # pass init params to Decoder + self.decoder = Decoder( + in_channels=latent_channels, + out_channels=out_channels, + up_block_types=up_block_types, + block_out_channels=block_out_channels, + layers_per_block=layers_per_block, + act_fn=act_fn, + ) + + self.quant_conv = nn.Conv2D(2 * latent_channels, 2 * latent_channels, 1) + self.post_quant_conv = nn.Conv2D(latent_channels, latent_channels, 1) + + def encode(self, x): + h = self.encoder(x) + moments = self.quant_conv(h) + posterior = DiagonalGaussianDistribution(moments) + return posterior + + def decode(self, z): + z = self.post_quant_conv(z) + dec = self.decoder(z) + return dec + + def forward(self, sample, sample_posterior=False): + x = sample + posterior = self.encode(x) + if sample_posterior: + z = posterior.sample() + else: + z = posterior.mode() + dec = self.decode(z) + return dec diff --git a/modules/image/text_to_image/stable_diffusion_img2img/diffusers/schedulers/README.md b/modules/image/text_to_image/stable_diffusion_img2img/diffusers/schedulers/README.md new file mode 100644 index 000000000..40f50f232 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_img2img/diffusers/schedulers/README.md @@ -0,0 +1,18 @@ +# Schedulers + +- Schedulers are the algorithms to use diffusion models in inference as well as for training. They include the noise schedules and define algorithm-specific diffusion steps. +- Schedulers can be used interchangable between diffusion models in inference to find the preferred trade-off between speed and generation quality. +- Schedulers are available in numpy, but can easily be transformed into Py + +## API + +- Schedulers should provide one or more `def step(...)` functions that should be called iteratively to unroll the diffusion loop during +the forward pass. +- Schedulers should be framework-agnostic, but provide a simple functionality to convert the scheduler into a specific framework, such as PyTorch +with a `set_format(...)` method. + +## Examples + +- The DDPM scheduler was proposed in [Denoising Diffusion Probabilistic Models](https://arxiv.org/abs/2006.11239) and can be found in [scheduling_ddpm.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_ddpm.py). An example of how to use this scheduler can be found in [pipeline_ddpm.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/pipeline_ddpm.py). +- The DDIM scheduler was proposed in [Denoising Diffusion Implicit Models](https://arxiv.org/abs/2010.02502) and can be found in [scheduling_ddim.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_ddim.py). An example of how to use this scheduler can be found in [pipeline_ddim.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/pipeline_ddim.py). +- The PNDM scheduler was proposed in [Pseudo Numerical Methods for Diffusion Models on Manifolds](https://arxiv.org/abs/2202.09778) and can be found in [scheduling_pndm.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_pndm.py). An example of how to use this scheduler can be found in [pipeline_pndm.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/pipeline_pndm.py). diff --git a/modules/image/text_to_image/stable_diffusion_img2img/diffusers/schedulers/__init__.py b/modules/image/text_to_image/stable_diffusion_img2img/diffusers/schedulers/__init__.py new file mode 100644 index 000000000..cebc3e618 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_img2img/diffusers/schedulers/__init__.py @@ -0,0 +1,24 @@ +# flake8: noqa +# There's no way to ignore "F401 '...' imported but unused" warnings in this +# module, but to preserve other warnings. So, don't check this module at all. +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from .scheduling_ddim import DDIMScheduler +from .scheduling_ddpm import DDPMScheduler +from .scheduling_karras_ve import KarrasVeScheduler +from .scheduling_lms_discrete import LMSDiscreteScheduler +from .scheduling_pndm import PNDMScheduler +from .scheduling_sde_ve import ScoreSdeVeScheduler +from .scheduling_sde_vp import ScoreSdeVpScheduler +from .scheduling_utils import SchedulerMixin diff --git a/modules/image/text_to_image/stable_diffusion_img2img/diffusers/schedulers/scheduling_ddim.py b/modules/image/text_to_image/stable_diffusion_img2img/diffusers/schedulers/scheduling_ddim.py new file mode 100644 index 000000000..ebe362d99 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_img2img/diffusers/schedulers/scheduling_ddim.py @@ -0,0 +1,182 @@ +# Copyright 2022 Stanford University Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# DISCLAIMER: This code is strongly influenced by https://github.com/pesser/pypaddle_diffusion +# and https://github.com/hojonathanho/diffusion +import math +from typing import Union + +import numpy as np +import paddle + +from ..configuration_utils import ConfigMixin +from ..configuration_utils import register_to_config +from .scheduling_utils import SchedulerMixin + + +def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999): + """ + Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of + (1-beta) over time from t = [0,1]. + + :param num_diffusion_timesteps: the number of betas to produce. :param alpha_bar: a lambda that takes an argument t + from 0 to 1 and + produces the cumulative product of (1-beta) up to that part of the diffusion process. + :param max_beta: the maximum beta to use; use values lower than 1 to + prevent singularities. + """ + + def alpha_bar(time_step): + return math.cos((time_step + 0.008) / 1.008 * math.pi / 2)**2 + + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta)) + return np.array(betas, dtype=np.float32) + + +class DDIMScheduler(SchedulerMixin, ConfigMixin): + + @register_to_config + def __init__( + self, + num_train_timesteps=1000, + beta_start=0.0001, + beta_end=0.02, + beta_schedule="linear", + trained_betas=None, + timestep_values=None, + clip_sample=True, + set_alpha_to_one=True, + tensor_format="pd", + ): + + if beta_schedule == "linear": + self.betas = np.linspace(beta_start, beta_end, num_train_timesteps, dtype=np.float32) + elif beta_schedule == "scaled_linear": + # this schedule is very specific to the latent diffusion model. + self.betas = np.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=np.float32)**2 + elif beta_schedule == "squaredcos_cap_v2": + # Glide cosine schedule + self.betas = betas_for_alpha_bar(num_train_timesteps) + else: + raise NotImplementedError(f"{beta_schedule} does is not implemented for {self.__class__}") + + self.alphas = 1.0 - self.betas + self.alphas_cumprod = np.cumprod(self.alphas, axis=0) + + # At every step in ddim, we are looking into the previous alphas_cumprod + # For the final step, there is no previous alphas_cumprod because we are already at 0 + # `set_alpha_to_one` decides whether we set this paratemer simply to one or + # whether we use the final alpha of the "non-previous" one. + self.final_alpha_cumprod = np.array(1.0) if set_alpha_to_one else self.alphas_cumprod[0] + + # setable values + self.num_inference_steps = None + self.timesteps = np.arange(0, num_train_timesteps)[::-1].copy() + + self.tensor_format = tensor_format + self.set_format(tensor_format=tensor_format) + + def _get_variance(self, timestep, prev_timestep): + alpha_prod_t = self.alphas_cumprod[timestep] + alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod + beta_prod_t = 1 - alpha_prod_t + beta_prod_t_prev = 1 - alpha_prod_t_prev + + variance = (beta_prod_t_prev / beta_prod_t) * (1 - alpha_prod_t / alpha_prod_t_prev) + + return variance + + def set_timesteps(self, num_inference_steps, offset=0): + self.num_inference_steps = num_inference_steps + self.timesteps = np.arange(0, self.config.num_train_timesteps, + self.config.num_train_timesteps // self.num_inference_steps)[::-1].copy() + self.timesteps += offset + self.set_format(tensor_format=self.tensor_format) + + def step( + self, + model_output: Union[paddle.Tensor, np.ndarray], + timestep: int, + sample: Union[paddle.Tensor, np.ndarray], + eta: float = 0.0, + use_clipped_model_output: bool = False, + generator=None, + ): + # See formulas (12) and (16) of DDIM paper https://arxiv.org/pdf/2010.02502.pdf + # Ideally, read DDIM paper in-detail understanding + + # Notation ( -> + # - pred_noise_t -> e_theta(x_t, t) + # - pred_original_sample -> f_theta(x_t, t) or x_0 + # - std_dev_t -> sigma_t + # - eta -> η + # - pred_sample_direction -> "direction pointingc to x_t" + # - pred_prev_sample -> "x_t-1" + + # 1. get previous step value (=t-1) + prev_timestep = timestep - self.config.num_train_timesteps // self.num_inference_steps + + # 2. compute alphas, betas + alpha_prod_t = self.alphas_cumprod[timestep] + alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod + beta_prod_t = 1 - alpha_prod_t + + # 3. compute predicted original sample from predicted noise also called + # "predicted x_0" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf + pred_original_sample = (sample - beta_prod_t**(0.5) * model_output) / alpha_prod_t**(0.5) + + # 4. Clip "predicted x_0" + if self.config.clip_sample: + pred_original_sample = self.clip(pred_original_sample, -1, 1) + + # 5. compute variance: "sigma_t(η)" -> see formula (16) + # σ_t = sqrt((1 − α_t−1)/(1 − α_t)) * sqrt(1 − α_t/α_t−1) + variance = self._get_variance(timestep, prev_timestep) + std_dev_t = eta * variance**(0.5) + + if use_clipped_model_output: + # the model_output is always re-derived from the clipped x_0 in Glide + model_output = (sample - alpha_prod_t**(0.5) * pred_original_sample) / beta_prod_t**(0.5) + + # 6. compute "direction pointing to x_t" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf + pred_sample_direction = (1 - alpha_prod_t_prev - std_dev_t**2)**(0.5) * model_output + + # 7. compute x_t without "random noise" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf + prev_sample = alpha_prod_t_prev**(0.5) * pred_original_sample + pred_sample_direction + + if eta > 0: + noise = paddle.randn(model_output.shape) + variance = self._get_variance(timestep, prev_timestep)**(0.5) * eta * noise + + if not paddle.is_tensor(model_output): + variance = variance.numpy() + + prev_sample = prev_sample + variance + + return {"prev_sample": prev_sample} + + def add_noise(self, original_samples, noise, timesteps): + sqrt_alpha_prod = self.alphas_cumprod[timesteps]**0.5 + sqrt_alpha_prod = self.match_shape(sqrt_alpha_prod, original_samples) + sqrt_one_minus_alpha_prod = (1 - self.alphas_cumprod[timesteps])**0.5 + sqrt_one_minus_alpha_prod = self.match_shape(sqrt_one_minus_alpha_prod, original_samples) + + noisy_samples = sqrt_alpha_prod * original_samples + sqrt_one_minus_alpha_prod * noise + return noisy_samples + + def __len__(self): + return self.config.num_train_timesteps diff --git a/modules/image/text_to_image/stable_diffusion_img2img/diffusers/schedulers/scheduling_ddpm.py b/modules/image/text_to_image/stable_diffusion_img2img/diffusers/schedulers/scheduling_ddpm.py new file mode 100644 index 000000000..34551b2ad --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_img2img/diffusers/schedulers/scheduling_ddpm.py @@ -0,0 +1,191 @@ +# Copyright 2022 UC Berkely Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# DISCLAIMER: This file is strongly influenced by https://github.com/ermongroup/ddim +import math +from typing import Union + +import numpy as np +import paddle + +from ..configuration_utils import ConfigMixin +from ..configuration_utils import register_to_config +from .scheduling_utils import SchedulerMixin + + +def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999): + """ + Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of + (1-beta) over time from t = [0,1]. + + :param num_diffusion_timesteps: the number of betas to produce. :param alpha_bar: a lambda that takes an argument t + from 0 to 1 and + produces the cumulative product of (1-beta) up to that part of the diffusion process. + :param max_beta: the maximum beta to use; use values lower than 1 to + prevent singularities. + """ + + def alpha_bar(time_step): + return math.cos((time_step + 0.008) / 1.008 * math.pi / 2)**2 + + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta)) + return np.array(betas, dtype=np.float32) + + +class DDPMScheduler(SchedulerMixin, ConfigMixin): + + @register_to_config + def __init__( + self, + num_train_timesteps=1000, + beta_start=0.0001, + beta_end=0.02, + beta_schedule="linear", + trained_betas=None, + variance_type="fixed_small", + clip_sample=True, + tensor_format="pd", + ): + + if trained_betas is not None: + self.betas = np.asarray(trained_betas) + elif beta_schedule == "linear": + self.betas = np.linspace(beta_start, beta_end, num_train_timesteps, dtype=np.float32) + elif beta_schedule == "scaled_linear": + # this schedule is very specific to the latent diffusion model. + self.betas = np.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=np.float32)**2 + elif beta_schedule == "squaredcos_cap_v2": + # Glide cosine schedule + self.betas = betas_for_alpha_bar(num_train_timesteps) + else: + raise NotImplementedError(f"{beta_schedule} does is not implemented for {self.__class__}") + + self.alphas = 1.0 - self.betas + self.alphas_cumprod = np.cumprod(self.alphas, axis=0) + self.one = np.array(1.0) + + # setable values + self.num_inference_steps = None + self.timesteps = np.arange(0, num_train_timesteps)[::-1].copy() + + self.tensor_format = tensor_format + self.set_format(tensor_format=tensor_format) + + self.variance_type = variance_type + + def set_timesteps(self, num_inference_steps): + num_inference_steps = min(self.config.num_train_timesteps, num_inference_steps) + self.num_inference_steps = num_inference_steps + self.timesteps = np.arange(0, self.config.num_train_timesteps, + self.config.num_train_timesteps // self.num_inference_steps)[::-1].copy() + self.set_format(tensor_format=self.tensor_format) + + def _get_variance(self, t, predicted_variance=None, variance_type=None): + alpha_prod_t = self.alphas_cumprod[t] + alpha_prod_t_prev = self.alphas_cumprod[t - 1] if t > 0 else self.one + + # For t > 0, compute predicted variance βt (see formula (6) and (7) from https://arxiv.org/pdf/2006.11239.pdf) + # and sample from it to get previous sample + # x_{t-1} ~ N(pred_prev_sample, variance) == add variance to pred_sample + variance = (1 - alpha_prod_t_prev) / (1 - alpha_prod_t) * self.betas[t] + + if variance_type is None: + variance_type = self.config.variance_type + + # hacks - were probs added for training stability + if variance_type == "fixed_small": + variance = self.clip(variance, min_value=1e-20) + # for rl-diffuser https://arxiv.org/abs/2205.09991 + elif variance_type == "fixed_small_log": + variance = self.log(self.clip(variance, min_value=1e-20)) + elif variance_type == "fixed_large": + variance = self.betas[t] + elif variance_type == "fixed_large_log": + # Glide max_log + variance = self.log(self.betas[t]) + elif variance_type == "learned": + return predicted_variance + elif variance_type == "learned_range": + min_log = variance + max_log = self.betas[t] + frac = (predicted_variance + 1) / 2 + variance = frac * max_log + (1 - frac) * min_log + + return variance + + def step( + self, + model_output: Union[paddle.Tensor, np.ndarray], + timestep: int, + sample: Union[paddle.Tensor, np.ndarray], + predict_epsilon=True, + generator=None, + ): + t = timestep + + if model_output.shape[1] == sample.shape[1] * 2 and self.variance_type in ["learned", "learned_range"]: + model_output, predicted_variance = paddle.split(model_output, sample.shape[1], axis=1) + else: + predicted_variance = None + + # 1. compute alphas, betas + alpha_prod_t = self.alphas_cumprod[t] + alpha_prod_t_prev = self.alphas_cumprod[t - 1] if t > 0 else self.one + beta_prod_t = 1 - alpha_prod_t + beta_prod_t_prev = 1 - alpha_prod_t_prev + + # 2. compute predicted original sample from predicted noise also called + # "predicted x_0" of formula (15) from https://arxiv.org/pdf/2006.11239.pdf + if predict_epsilon: + pred_original_sample = (sample - beta_prod_t**(0.5) * model_output) / alpha_prod_t**(0.5) + else: + pred_original_sample = model_output + + # 3. Clip "predicted x_0" + if self.config.clip_sample: + pred_original_sample = self.clip(pred_original_sample, -1, 1) + + # 4. Compute coefficients for pred_original_sample x_0 and current sample x_t + # See formula (7) from https://arxiv.org/pdf/2006.11239.pdf + pred_original_sample_coeff = (alpha_prod_t_prev**(0.5) * self.betas[t]) / beta_prod_t + current_sample_coeff = self.alphas[t]**(0.5) * beta_prod_t_prev / beta_prod_t + + # 5. Compute predicted previous sample µ_t + # See formula (7) from https://arxiv.org/pdf/2006.11239.pdf + pred_prev_sample = pred_original_sample_coeff * pred_original_sample + current_sample_coeff * sample + + # 6. Add noise + variance = 0 + if t > 0: + noise = self.randn_like(model_output) + variance = (self._get_variance(t, predicted_variance=predicted_variance)**0.5) * noise + + pred_prev_sample = pred_prev_sample + variance + + return {"prev_sample": pred_prev_sample} + + def add_noise(self, original_samples, noise, timesteps): + sqrt_alpha_prod = self.alphas_cumprod[timesteps]**0.5 + sqrt_alpha_prod = self.match_shape(sqrt_alpha_prod, original_samples) + sqrt_one_minus_alpha_prod = (1 - self.alphas_cumprod[timesteps])**0.5 + sqrt_one_minus_alpha_prod = self.match_shape(sqrt_one_minus_alpha_prod, original_samples) + + noisy_samples = sqrt_alpha_prod * original_samples + sqrt_one_minus_alpha_prod * noise + return noisy_samples + + def __len__(self): + return self.config.num_train_timesteps diff --git a/modules/image/text_to_image/stable_diffusion_img2img/diffusers/schedulers/scheduling_karras_ve.py b/modules/image/text_to_image/stable_diffusion_img2img/diffusers/schedulers/scheduling_karras_ve.py new file mode 100644 index 000000000..36827564e --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_img2img/diffusers/schedulers/scheduling_karras_ve.py @@ -0,0 +1,124 @@ +# Copyright 2022 NVIDIA and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Union + +import numpy as np +import paddle + +from ..configuration_utils import ConfigMixin +from ..configuration_utils import register_to_config +from .scheduling_utils import SchedulerMixin + + +class KarrasVeScheduler(SchedulerMixin, ConfigMixin): + """ + Stochastic sampling from Karras et al. [1] tailored to the Variance-Expanding (VE) models [2]. Use Algorithm 2 and + the VE column of Table 1 from [1] for reference. + + [1] Karras, Tero, et al. "Elucidating the Design Space of Diffusion-Based Generative Models." + https://arxiv.org/abs/2206.00364 [2] Song, Yang, et al. "Score-based generative modeling through stochastic + differential equations." https://arxiv.org/abs/2011.13456 + """ + + @register_to_config + def __init__( + self, + sigma_min=0.02, + sigma_max=100, + s_noise=1.007, + s_churn=80, + s_min=0.05, + s_max=50, + tensor_format="pd", + ): + """ + For more details on the parameters, see the original paper's Appendix E.: "Elucidating the Design Space of + Diffusion-Based Generative Models." https://arxiv.org/abs/2206.00364. The grid search values used to find the + optimal {s_noise, s_churn, s_min, s_max} for a specific model are described in Table 5 of the paper. + + Args: + sigma_min (`float`): minimum noise magnitude + sigma_max (`float`): maximum noise magnitude + s_noise (`float`): the amount of additional noise to counteract loss of detail during sampling. + A reasonable range is [1.000, 1.011]. + s_churn (`float`): the parameter controlling the overall amount of stochasticity. + A reasonable range is [0, 100]. + s_min (`float`): the start value of the sigma range where we add noise (enable stochasticity). + A reasonable range is [0, 10]. + s_max (`float`): the end value of the sigma range where we add noise. + A reasonable range is [0.2, 80]. + """ + # setable values + self.num_inference_steps = None + self.timesteps = None + self.schedule = None # sigma(t_i) + + self.tensor_format = tensor_format + self.set_format(tensor_format=tensor_format) + + def set_timesteps(self, num_inference_steps): + self.num_inference_steps = num_inference_steps + self.timesteps = np.arange(0, self.num_inference_steps)[::-1].copy() + self.schedule = [(self.sigma_max * (self.sigma_min**2 / self.sigma_max**2)**(i / (num_inference_steps - 1))) + for i in self.timesteps] + self.schedule = np.array(self.schedule, dtype=np.float32) + + self.set_format(tensor_format=self.tensor_format) + + def add_noise_to_input(self, sample, sigma, generator=None): + """ + Explicit Langevin-like "churn" step of adding noise to the sample according to a factor gamma_i ≥ 0 to reach a + higher noise level sigma_hat = sigma_i + gamma_i*sigma_i. + """ + if self.s_min <= sigma <= self.s_max: + gamma = min(self.s_churn / self.num_inference_steps, 2**0.5 - 1) + else: + gamma = 0 + + # sample eps ~ N(0, S_noise^2 * I) + eps = self.s_noise * paddle.randn(sample.shape) + sigma_hat = sigma + gamma * sigma + sample_hat = sample + ((sigma_hat**2 - sigma**2)**0.5 * eps) + + return sample_hat, sigma_hat + + def step( + self, + model_output: Union[paddle.Tensor, np.ndarray], + sigma_hat: float, + sigma_prev: float, + sample_hat: Union[paddle.Tensor, np.ndarray], + ): + pred_original_sample = sample_hat + sigma_hat * model_output + derivative = (sample_hat - pred_original_sample) / sigma_hat + sample_prev = sample_hat + (sigma_prev - sigma_hat) * derivative + + return {"prev_sample": sample_prev, "derivative": derivative} + + def step_correct( + self, + model_output: Union[paddle.Tensor, np.ndarray], + sigma_hat: float, + sigma_prev: float, + sample_hat: Union[paddle.Tensor, np.ndarray], + sample_prev: Union[paddle.Tensor, np.ndarray], + derivative: Union[paddle.Tensor, np.ndarray], + ): + pred_original_sample = sample_prev + sigma_prev * model_output + derivative_corr = (sample_prev - pred_original_sample) / sigma_prev + sample_prev = sample_hat + (sigma_prev - sigma_hat) * (0.5 * derivative + 0.5 * derivative_corr) + return {"prev_sample": sample_prev, "derivative": derivative_corr} + + def add_noise(self, original_samples, noise, timesteps): + raise NotImplementedError() diff --git a/modules/image/text_to_image/stable_diffusion_img2img/diffusers/schedulers/scheduling_lms_discrete.py b/modules/image/text_to_image/stable_diffusion_img2img/diffusers/schedulers/scheduling_lms_discrete.py new file mode 100644 index 000000000..2ed63cc2c --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_img2img/diffusers/schedulers/scheduling_lms_discrete.py @@ -0,0 +1,133 @@ +# Copyright 2022 Katherine Crowson and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Union + +import numpy as np +import paddle +from scipy import integrate + +from ..configuration_utils import ConfigMixin +from ..configuration_utils import register_to_config +from .scheduling_utils import SchedulerMixin + + +class LMSDiscreteScheduler(SchedulerMixin, ConfigMixin): + + @register_to_config + def __init__( + self, + num_train_timesteps=1000, + beta_start=0.0001, + beta_end=0.02, + beta_schedule="linear", + trained_betas=None, + timestep_values=None, + tensor_format="pd", + ): + """ + Linear Multistep Scheduler for discrete beta schedules. Based on the original k-diffusion implementation by + Katherine Crowson: + https://github.com/crowsonkb/k-diffusion/blob/481677d114f6ea445aa009cf5bd7a9cdee909e47/k_diffusion/sampling.py#L181 + """ + + if beta_schedule == "linear": + self.betas = np.linspace(beta_start, beta_end, num_train_timesteps, dtype=np.float32) + elif beta_schedule == "scaled_linear": + # this schedule is very specific to the latent diffusion model. + self.betas = np.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=np.float32)**2 + else: + raise NotImplementedError(f"{beta_schedule} does is not implemented for {self.__class__}") + + self.alphas = 1.0 - self.betas + self.alphas_cumprod = np.cumprod(self.alphas, axis=0) + + self.sigmas = ((1 - self.alphas_cumprod) / self.alphas_cumprod)**0.5 + + # setable values + self.num_inference_steps = None + self.timesteps = np.arange(0, num_train_timesteps)[::-1].copy() + self.derivatives = [] + + self.tensor_format = tensor_format + self.set_format(tensor_format=tensor_format) + + def get_lms_coefficient(self, order, t, current_order): + """ + Compute a linear multistep coefficient + """ + + def lms_derivative(tau): + prod = 1.0 + for k in range(order): + if current_order == k: + continue + prod *= (tau - self.sigmas[t - k]) / (self.sigmas[t - current_order] - self.sigmas[t - k]) + return prod + + integrated_coeff = integrate.quad(lms_derivative, self.sigmas[t], self.sigmas[t + 1], epsrel=1e-4)[0] + + return integrated_coeff + + def set_timesteps(self, num_inference_steps): + self.num_inference_steps = num_inference_steps + self.timesteps = np.linspace(self.num_train_timesteps - 1, 0, num_inference_steps, dtype=float) + + low_idx = np.floor(self.timesteps).astype(int) + high_idx = np.ceil(self.timesteps).astype(int) + frac = np.mod(self.timesteps, 1.0) + sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod)**0.5) + sigmas = (1 - frac) * sigmas[low_idx] + frac * sigmas[high_idx] + self.sigmas = np.concatenate([sigmas, [0.0]]) + + self.derivatives = [] + + self.set_format(tensor_format=self.tensor_format) + + def step( + self, + model_output: Union[paddle.Tensor, np.ndarray], + timestep: int, + sample: Union[paddle.Tensor, np.ndarray], + order: int = 4, + ): + sigma = self.sigmas[timestep] + + # 1. compute predicted original sample (x_0) from sigma-scaled predicted noise + pred_original_sample = sample - sigma * model_output + + # 2. Convert to an ODE derivative + derivative = (sample - pred_original_sample) / sigma + self.derivatives.append(derivative) + if len(self.derivatives) > order: + self.derivatives.pop(0) + + # 3. Compute linear multistep coefficients + order = min(timestep + 1, order) + lms_coeffs = [self.get_lms_coefficient(order, timestep, curr_order) for curr_order in range(order)] + + # 4. Compute previous sample based on the derivatives path + prev_sample = sample + sum(coeff * derivative + for coeff, derivative in zip(lms_coeffs, reversed(self.derivatives))) + + return {"prev_sample": prev_sample} + + def add_noise(self, original_samples, noise, timesteps): + alpha_prod = self.alphas_cumprod[timesteps] + alpha_prod = self.match_shape(alpha_prod, original_samples) + + noisy_samples = (alpha_prod**0.5) * original_samples + ((1 - alpha_prod)**0.5) * noise + return noisy_samples + + def __len__(self): + return self.config.num_train_timesteps diff --git a/modules/image/text_to_image/stable_diffusion_img2img/diffusers/schedulers/scheduling_pndm.py b/modules/image/text_to_image/stable_diffusion_img2img/diffusers/schedulers/scheduling_pndm.py new file mode 100644 index 000000000..12abd9cfe --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_img2img/diffusers/schedulers/scheduling_pndm.py @@ -0,0 +1,258 @@ +# Copyright 2022 Zhejiang University Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# DISCLAIMER: This file is strongly influenced by https://github.com/ermongroup/ddim +import math +from typing import Union + +import numpy as np +import paddle + +from ..configuration_utils import ConfigMixin +from ..configuration_utils import register_to_config +from .scheduling_utils import SchedulerMixin + + +def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999): + """ + Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of + (1-beta) over time from t = [0,1]. + + :param num_diffusion_timesteps: the number of betas to produce. :param alpha_bar: a lambda that takes an argument t + from 0 to 1 and + produces the cumulative product of (1-beta) up to that part of the diffusion process. + :param max_beta: the maximum beta to use; use values lower than 1 to + prevent singularities. + """ + + def alpha_bar(time_step): + return math.cos((time_step + 0.008) / 1.008 * math.pi / 2)**2 + + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta)) + return np.array(betas, dtype=np.float32) + + +class PNDMScheduler(SchedulerMixin, ConfigMixin): + + @register_to_config + def __init__( + self, + num_train_timesteps=1000, + beta_start=0.0001, + beta_end=0.02, + beta_schedule="linear", + tensor_format="pd", + skip_prk_steps=False, + ): + + if beta_schedule == "linear": + self.betas = np.linspace(beta_start, beta_end, num_train_timesteps, dtype=np.float32) + elif beta_schedule == "scaled_linear": + # this schedule is very specific to the latent diffusion model. + self.betas = np.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=np.float32)**2 + elif beta_schedule == "squaredcos_cap_v2": + # Glide cosine schedule + self.betas = betas_for_alpha_bar(num_train_timesteps) + else: + raise NotImplementedError(f"{beta_schedule} does is not implemented for {self.__class__}") + + self.alphas = 1.0 - self.betas + self.alphas_cumprod = np.cumprod(self.alphas, axis=0) + + self.one = np.array(1.0) + + # For now we only support F-PNDM, i.e. the runge-kutta method + # For more information on the algorithm please take a look at the paper: https://arxiv.org/pdf/2202.09778.pdf + # mainly at formula (9), (12), (13) and the Algorithm 2. + self.pndm_order = 4 + + # running values + self.cur_model_output = 0 + self.counter = 0 + self.cur_sample = None + self.ets = [] + + # setable values + self.num_inference_steps = None + self._timesteps = np.arange(0, num_train_timesteps)[::-1].copy() + self._offset = 0 + self.prk_timesteps = None + self.plms_timesteps = None + self.timesteps = None + + self.tensor_format = tensor_format + self.set_format(tensor_format=tensor_format) + + def set_timesteps(self, num_inference_steps, offset=0): + self.num_inference_steps = num_inference_steps + self._timesteps = list( + range(0, self.config.num_train_timesteps, self.config.num_train_timesteps // num_inference_steps)) + self._offset = offset + self._timesteps = [t + self._offset for t in self._timesteps] + + if self.config.skip_prk_steps: + # for some models like stable diffusion the prk steps can/should be skipped to + # produce better results. When using PNDM with `self.config.skip_prk_steps` the implementation + # is based on crowsonkb's PLMS sampler implementation: https://github.com/CompVis/latent-diffusion/pull/51 + self.prk_timesteps = [] + self.plms_timesteps = list(reversed(self._timesteps[:-1] + self._timesteps[-2:-1] + self._timesteps[-1:])) + else: + prk_timesteps = np.array(self._timesteps[-self.pndm_order:]).repeat(2) + np.tile( + np.array([0, self.config.num_train_timesteps // num_inference_steps // 2]), self.pndm_order) + self.prk_timesteps = list(reversed(prk_timesteps[:-1].repeat(2)[1:-1])) + self.plms_timesteps = list(reversed(self._timesteps[:-3])) + + self.timesteps = self.prk_timesteps + self.plms_timesteps + + self.ets = [] + self.counter = 0 + self.set_format(tensor_format=self.tensor_format) + + def step( + self, + model_output: Union[paddle.Tensor, np.ndarray], + timestep: int, + sample: Union[paddle.Tensor, np.ndarray], + ): + if self.counter < len(self.prk_timesteps) and not self.config.skip_prk_steps: + return self.step_prk(model_output=model_output, timestep=timestep, sample=sample) + else: + return self.step_plms(model_output=model_output, timestep=timestep, sample=sample) + + def step_prk( + self, + model_output: Union[paddle.Tensor, np.ndarray], + timestep: int, + sample: Union[paddle.Tensor, np.ndarray], + ): + """ + Step function propagating the sample with the Runge-Kutta method. RK takes 4 forward passes to approximate the + solution to the differential equation. + """ + diff_to_prev = 0 if self.counter % 2 else self.config.num_train_timesteps // self.num_inference_steps // 2 + prev_timestep = max(timestep - diff_to_prev, self.prk_timesteps[-1]) + timestep = self.prk_timesteps[self.counter // 4 * 4] + + if self.counter % 4 == 0: + self.cur_model_output += 1 / 6 * model_output + self.ets.append(model_output) + self.cur_sample = sample + elif (self.counter - 1) % 4 == 0: + self.cur_model_output += 1 / 3 * model_output + elif (self.counter - 2) % 4 == 0: + self.cur_model_output += 1 / 3 * model_output + elif (self.counter - 3) % 4 == 0: + model_output = self.cur_model_output + 1 / 6 * model_output + self.cur_model_output = 0 + + # cur_sample should not be `None` + cur_sample = self.cur_sample if self.cur_sample is not None else sample + + prev_sample = self._get_prev_sample(cur_sample, timestep, prev_timestep, model_output) + self.counter += 1 + + return {"prev_sample": prev_sample} + + def step_plms( + self, + model_output: Union[paddle.Tensor, np.ndarray], + timestep: int, + sample: Union[paddle.Tensor, np.ndarray], + ): + """ + Step function propagating the sample with the linear multi-step method. This has one forward pass with multiple + times to approximate the solution. + """ + if not self.config.skip_prk_steps and len(self.ets) < 3: + raise ValueError( + f"{self.__class__} can only be run AFTER scheduler has been run " + "in 'prk' mode for at least 12 iterations " + "See: https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/pipeline_pndm.py " + "for more information.") + + prev_timestep = max(timestep - self.config.num_train_timesteps // self.num_inference_steps, 0) + + if self.counter != 1: + self.ets.append(model_output) + else: + prev_timestep = timestep + timestep = timestep + self.config.num_train_timesteps // self.num_inference_steps + + if len(self.ets) == 1 and self.counter == 0: + model_output = model_output + self.cur_sample = sample + elif len(self.ets) == 1 and self.counter == 1: + model_output = (model_output + self.ets[-1]) / 2 + sample = self.cur_sample + self.cur_sample = None + elif len(self.ets) == 2: + model_output = (3 * self.ets[-1] - self.ets[-2]) / 2 + elif len(self.ets) == 3: + model_output = (23 * self.ets[-1] - 16 * self.ets[-2] + 5 * self.ets[-3]) / 12 + else: + model_output = (1 / 24) * (55 * self.ets[-1] - 59 * self.ets[-2] + 37 * self.ets[-3] - 9 * self.ets[-4]) + + prev_sample = self._get_prev_sample(sample, timestep, prev_timestep, model_output) + self.counter += 1 + + return {"prev_sample": prev_sample} + + def _get_prev_sample(self, sample, timestep, timestep_prev, model_output): + # See formula (9) of PNDM paper https://arxiv.org/pdf/2202.09778.pdf + # this function computes x_(t−δ) using the formula of (9) + # Note that x_t needs to be added to both sides of the equation + + # Notation ( -> + # alpha_prod_t -> α_t + # alpha_prod_t_prev -> α_(t−δ) + # beta_prod_t -> (1 - α_t) + # beta_prod_t_prev -> (1 - α_(t−δ)) + # sample -> x_t + # model_output -> e_θ(x_t, t) + # prev_sample -> x_(t−δ) + alpha_prod_t = self.alphas_cumprod[timestep + 1 - self._offset] + alpha_prod_t_prev = self.alphas_cumprod[timestep_prev + 1 - self._offset] + beta_prod_t = 1 - alpha_prod_t + beta_prod_t_prev = 1 - alpha_prod_t_prev + + # corresponds to (α_(t−δ) - α_t) divided by + # denominator of x_t in formula (9) and plus 1 + # Note: (α_(t−δ) - α_t) / (sqrt(α_t) * (sqrt(α_(t−δ)) + sqr(α_t))) = + # sqrt(α_(t−δ)) / sqrt(α_t)) + sample_coeff = (alpha_prod_t_prev / alpha_prod_t)**(0.5) + + # corresponds to denominator of e_θ(x_t, t) in formula (9) + model_output_denom_coeff = alpha_prod_t * beta_prod_t_prev**(0.5) + (alpha_prod_t * beta_prod_t * + alpha_prod_t_prev)**(0.5) + + # full formula (9) + prev_sample = (sample_coeff * sample - + (alpha_prod_t_prev - alpha_prod_t) * model_output / model_output_denom_coeff) + + return prev_sample + + def add_noise(self, original_samples, noise, timesteps): + sqrt_alpha_prod = self.alphas_cumprod[timesteps]**0.5 + sqrt_alpha_prod = self.match_shape(sqrt_alpha_prod, original_samples) + sqrt_one_minus_alpha_prod = (1 - self.alphas_cumprod[timesteps])**0.5 + sqrt_one_minus_alpha_prod = self.match_shape(sqrt_one_minus_alpha_prod, original_samples) + + noisy_samples = sqrt_alpha_prod * original_samples + sqrt_one_minus_alpha_prod * noise + return noisy_samples + + def __len__(self): + return self.config.num_train_timesteps diff --git a/modules/image/text_to_image/stable_diffusion_img2img/diffusers/schedulers/scheduling_sde_ve.py b/modules/image/text_to_image/stable_diffusion_img2img/diffusers/schedulers/scheduling_sde_ve.py new file mode 100644 index 000000000..92ca23d5b --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_img2img/diffusers/schedulers/scheduling_sde_ve.py @@ -0,0 +1,172 @@ +# Copyright 2022 Google Brain and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# DISCLAIMER: This file is strongly influenced by https://github.com/yang-song/score_sde_pypaddle +# TODO(Patrick, Anton, Suraj) - make scheduler framework indepedent and clean-up a bit +from typing import Union + +import numpy as np +import paddle + +from ..configuration_utils import ConfigMixin +from ..configuration_utils import register_to_config +from .scheduling_utils import SchedulerMixin + + +class ScoreSdeVeScheduler(SchedulerMixin, ConfigMixin): + """ + The variance exploding stochastic differential equation (SDE) scheduler. + + :param snr: coefficient weighting the step from the model_output sample (from the network) to the random noise. + :param sigma_min: initial noise scale for sigma sequence in sampling procedure. The minimum sigma should mirror the + distribution of the data. + :param sigma_max: :param sampling_eps: the end value of sampling, where timesteps decrease progessively from 1 to + epsilon. :param correct_steps: number of correction steps performed on a produced sample. :param tensor_format: + "np" or "pd" for the expected format of samples passed to the Scheduler. + """ + + @register_to_config + def __init__( + self, + num_train_timesteps=2000, + snr=0.15, + sigma_min=0.01, + sigma_max=1348, + sampling_eps=1e-5, + correct_steps=1, + tensor_format="pd", + ): + # self.sigmas = None + # self.discrete_sigmas = None + # + # # setable values + # self.num_inference_steps = None + self.timesteps = None + + self.set_sigmas(num_train_timesteps, sigma_min, sigma_max, sampling_eps) + + self.tensor_format = tensor_format + self.set_format(tensor_format=tensor_format) + + def set_timesteps(self, num_inference_steps, sampling_eps=None): + sampling_eps = sampling_eps if sampling_eps is not None else self.config.sampling_eps + tensor_format = getattr(self, "tensor_format", "pd") + if tensor_format == "np": + self.timesteps = np.linspace(1, sampling_eps, num_inference_steps) + elif tensor_format == "pd": + self.timesteps = paddle.linspace(1, sampling_eps, num_inference_steps) + else: + raise ValueError(f"`self.tensor_format`: {self.tensor_format} is not valid.") + + def set_sigmas(self, num_inference_steps, sigma_min=None, sigma_max=None, sampling_eps=None): + sigma_min = sigma_min if sigma_min is not None else self.config.sigma_min + sigma_max = sigma_max if sigma_max is not None else self.config.sigma_max + sampling_eps = sampling_eps if sampling_eps is not None else self.config.sampling_eps + if self.timesteps is None: + self.set_timesteps(num_inference_steps, sampling_eps) + + tensor_format = getattr(self, "tensor_format", "pd") + if tensor_format == "np": + self.discrete_sigmas = np.exp(np.linspace(np.log(sigma_min), np.log(sigma_max), num_inference_steps)) + self.sigmas = np.array([sigma_min * (sigma_max / sigma_min)**t for t in self.timesteps]) + elif tensor_format == "pd": + self.discrete_sigmas = paddle.exp(paddle.linspace(np.log(sigma_min), np.log(sigma_max), + num_inference_steps)) + self.sigmas = paddle.to_tensor([sigma_min * (sigma_max / sigma_min)**t for t in self.timesteps]) + else: + raise ValueError(f"`self.tensor_format`: {self.tensor_format} is not valid.") + + def get_adjacent_sigma(self, timesteps, t): + tensor_format = getattr(self, "tensor_format", "pd") + if tensor_format == "np": + return np.where(timesteps == 0, np.zeros_like(t), self.discrete_sigmas[timesteps - 1]) + elif tensor_format == "pd": + return paddle.where(timesteps == 0, paddle.zeros_like(t), self.discrete_sigmas[timesteps - 1]) + + raise ValueError(f"`self.tensor_format`: {self.tensor_format} is not valid.") + + def set_seed(self, seed): + tensor_format = getattr(self, "tensor_format", "pd") + if tensor_format == "np": + np.random.seed(seed) + elif tensor_format == "pd": + paddle.seed(seed) + else: + raise ValueError(f"`self.tensor_format`: {self.tensor_format} is not valid.") + + def step_pred( + self, + model_output: Union[paddle.Tensor, np.ndarray], + timestep: int, + sample: Union[paddle.Tensor, np.ndarray], + seed=None, + ): + """ + Predict the sample at the previous timestep by reversing the SDE. + """ + if seed is not None: + self.set_seed(seed) + # TODO(Patrick) non-Pypaddle + + timestep = timestep * paddle.ones(sample.shape[0]) # paddle.repeat_interleave(timestep, sample.shape[0]) + timesteps = (timestep * (len(self.timesteps) - 1)).astype("int64") + + sigma = self.discrete_sigmas[timesteps] + adjacent_sigma = self.get_adjacent_sigma(timesteps, timestep) + drift = self.zeros_like(sample) + diffusion = (sigma**2 - adjacent_sigma**2)**0.5 + + # equation 6 in the paper: the model_output modeled by the network is grad_x log pt(x) + # also equation 47 shows the analog from SDE models to ancestral sampling methods + drift = drift - diffusion[:, None, None, None]**2 * model_output + + # equation 6: sample noise for the diffusion term of + noise = self.randn_like(sample) + prev_sample_mean = sample - drift # subtract because `dt` is a small negative timestep + # TODO is the variable diffusion the correct scaling term for the noise? + prev_sample = prev_sample_mean + diffusion[:, None, None, None] * noise # add impact of diffusion field g + + return {"prev_sample": prev_sample, "prev_sample_mean": prev_sample_mean} + + def step_correct( + self, + model_output: Union[paddle.Tensor, np.ndarray], + sample: Union[paddle.Tensor, np.ndarray], + seed=None, + ): + """ + Correct the predicted sample based on the output model_output of the network. This is often run repeatedly + after making the prediction for the previous timestep. + """ + if seed is not None: + self.set_seed(seed) + + # For small batch sizes, the paper "suggest replacing norm(z) with sqrt(d), where d is the dim. of z" + # sample noise for correction + noise = self.randn_like(sample) + + # compute step size from the model_output, the noise, and the snr + grad_norm = self.norm(model_output) + noise_norm = self.norm(noise) + step_size = (self.config.snr * noise_norm / grad_norm)**2 * 2 + step_size = step_size * paddle.ones(sample.shape[0]) + # self.repeat_scalar(step_size, sample.shape[0]) + + # compute corrected sample: model_output term and noise term + prev_sample_mean = sample + step_size[:, None, None, None] * model_output + prev_sample = prev_sample_mean + ((step_size * 2)**0.5)[:, None, None, None] * noise + + return {"prev_sample": prev_sample} + + def __len__(self): + return self.config.num_train_timesteps diff --git a/modules/image/text_to_image/stable_diffusion_img2img/diffusers/schedulers/scheduling_sde_vp.py b/modules/image/text_to_image/stable_diffusion_img2img/diffusers/schedulers/scheduling_sde_vp.py new file mode 100644 index 000000000..8ad84c73e --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_img2img/diffusers/schedulers/scheduling_sde_vp.py @@ -0,0 +1,59 @@ +# Copyright 2022 Google Brain and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# DISCLAIMER: This file is strongly influenced by https://github.com/yang-song/score_sde_pytorch +# TODO(Patrick, Anton, Suraj) - make scheduler framework indepedent and clean-up a bit +import numpy as np +import paddle + +from ..configuration_utils import ConfigMixin +from ..configuration_utils import register_to_config +from .scheduling_utils import SchedulerMixin + + +class ScoreSdeVpScheduler(SchedulerMixin, ConfigMixin): + + @register_to_config + def __init__(self, num_train_timesteps=2000, beta_min=0.1, beta_max=20, sampling_eps=1e-3, tensor_format="np"): + + self.sigmas = None + self.discrete_sigmas = None + self.timesteps = None + + def set_timesteps(self, num_inference_steps): + self.timesteps = paddle.linspace(1, self.config.sampling_eps, num_inference_steps) + + def step_pred(self, score, x, t): + # TODO(Patrick) better comments + non-PyTorch + # postprocess model score + log_mean_coeff = (-0.25 * t**2 * (self.config.beta_max - self.config.beta_min) - 0.5 * t * self.config.beta_min) + std = paddle.sqrt(1.0 - paddle.exp(2.0 * log_mean_coeff)) + score = -score / std[:, None, None, None] + + # compute + dt = -1.0 / len(self.timesteps) + + beta_t = self.config.beta_min + t * (self.config.beta_max - self.config.beta_min) + drift = -0.5 * beta_t[:, None, None, None] * x + diffusion = paddle.sqrt(beta_t) + drift = drift - diffusion[:, None, None, None]**2 * score + x_mean = x + drift * dt + + # add noise + noise = self.randn_like(x) + x = x_mean + diffusion[:, None, None, None] * np.sqrt(-dt) * noise + + return x, x_mean + + def __len__(self): + return self.config.num_train_timesteps diff --git a/modules/image/text_to_image/stable_diffusion_img2img/diffusers/schedulers/scheduling_utils.py b/modules/image/text_to_image/stable_diffusion_img2img/diffusers/schedulers/scheduling_utils.py new file mode 100644 index 000000000..dc3cbde5a --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_img2img/diffusers/schedulers/scheduling_utils.py @@ -0,0 +1,102 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Union + +import numpy as np +import paddle + +SCHEDULER_CONFIG_NAME = "scheduler_config.json" + + +class SchedulerMixin: + + config_name = SCHEDULER_CONFIG_NAME + ignore_for_config = ["tensor_format"] + + def set_format(self, tensor_format="pd"): + self.tensor_format = tensor_format + if tensor_format == "pd": + for key, value in vars(self).items(): + if isinstance(value, np.ndarray): + setattr(self, key, paddle.to_tensor(value)) + + return self + + def clip(self, tensor, min_value=None, max_value=None): + tensor_format = getattr(self, "tensor_format", "pd") + + if tensor_format == "np": + return np.clip(tensor, min_value, max_value) + elif tensor_format == "pd": + return paddle.clip(tensor, min_value, max_value) + + raise ValueError(f"`self.tensor_format`: {self.tensor_format} is not valid.") + + def log(self, tensor): + tensor_format = getattr(self, "tensor_format", "pd") + + if tensor_format == "np": + return np.log(tensor) + elif tensor_format == "pd": + return paddle.log(tensor) + + raise ValueError(f"`self.tensor_format`: {self.tensor_format} is not valid.") + + def match_shape(self, values: Union[np.ndarray, paddle.Tensor], broadcast_array: Union[np.ndarray, paddle.Tensor]): + """ + Turns a 1-D array into an array or tensor with len(broadcast_array.shape) dims. + + Args: + values: an array or tensor of values to extract. + broadcast_array: an array with a larger shape of K dimensions with the batch + dimension equal to the length of timesteps. + Returns: + a tensor of shape [batch_size, 1, ...] where the shape has K dims. + """ + + tensor_format = getattr(self, "tensor_format", "pd") + values = values.flatten() + + while len(values.shape) < len(broadcast_array.shape): + values = values[..., None] + + return values + + def norm(self, tensor): + tensor_format = getattr(self, "tensor_format", "pd") + if tensor_format == "np": + return np.linalg.norm(tensor) + elif tensor_format == "pd": + return paddle.norm(tensor.reshape([tensor.shape[0], -1]), axis=-1).mean() + + raise ValueError(f"`self.tensor_format`: {self.tensor_format} is not valid.") + + def randn_like(self, tensor, generator=None): + tensor_format = getattr(self, "tensor_format", "pd") + if tensor_format == "np": + return np.random.randn(np.shape(tensor)) + elif tensor_format == "pd": + # return paddle.randn_like(tensor) + return paddle.randn(tensor.shape) + + raise ValueError(f"`self.tensor_format`: {self.tensor_format} is not valid.") + + def zeros_like(self, tensor): + tensor_format = getattr(self, "tensor_format", "pd") + if tensor_format == "np": + return np.zeros_like(tensor) + elif tensor_format == "pd": + return paddle.zeros_like(tensor) + + raise ValueError(f"`self.tensor_format`: {self.tensor_format} is not valid.") diff --git a/modules/image/text_to_image/stable_diffusion_img2img/module.py b/modules/image/text_to_image/stable_diffusion_img2img/module.py new file mode 100755 index 000000000..30361f19e --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_img2img/module.py @@ -0,0 +1,428 @@ +# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import ast +import base64 +import inspect +import os +import random +import sys +from functools import partial +from io import BytesIO +from typing import List +from typing import Optional + +import numpy as np +import paddle +from docarray import Document +from docarray import DocumentArray +from IPython import display +from PIL import Image +from stable_diffusion_img2img.clip.clip.utils import build_model +from stable_diffusion_img2img.clip.clip.utils import tokenize +from stable_diffusion_img2img.diffusers import AutoencoderKL +from stable_diffusion_img2img.diffusers import DDIMScheduler +from stable_diffusion_img2img.diffusers import LMSDiscreteScheduler +from stable_diffusion_img2img.diffusers import PNDMScheduler +from stable_diffusion_img2img.diffusers import UNet2DConditionModel +from stable_diffusion_img2img.utils import preprocess +from tqdm.auto import tqdm + +import paddlehub as hub +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving + + +@moduleinfo(name="stable_diffusion_img2img", + version="1.0.0", + type="image/text_to_image", + summary="", + author="paddlepaddle", + author_email="paddle-dev@baidu.com") +class StableDiffusionImg2Img: + + def __init__(self): + self.vae = AutoencoderKL(in_channels=3, + out_channels=3, + down_block_types=("DownEncoderBlock2D", "DownEncoderBlock2D", "DownEncoderBlock2D", + "DownEncoderBlock2D"), + up_block_types=("UpDecoderBlock2D", "UpDecoderBlock2D", "UpDecoderBlock2D", + "UpDecoderBlock2D"), + block_out_channels=(128, 256, 512, 512), + layers_per_block=2, + act_fn="silu", + latent_channels=4, + sample_size=512) + + self.unet = UNet2DConditionModel(sample_size=64, + in_channels=4, + out_channels=4, + center_input_sample=False, + flip_sin_to_cos=True, + freq_shift=0, + down_block_types=("CrossAttnDownBlock2D", "CrossAttnDownBlock2D", + "CrossAttnDownBlock2D", "DownBlock2D"), + up_block_types=("UpBlock2D", "CrossAttnUpBlock2D", "CrossAttnUpBlock2D", + "CrossAttnUpBlock2D"), + block_out_channels=(320, 640, 1280, 1280), + layers_per_block=2, + downsample_padding=1, + mid_block_scale_factor=1, + act_fn="silu", + norm_num_groups=32, + norm_eps=1e-5, + cross_attention_dim=768, + attention_head_dim=8) + + vae_path = os.path.join(self.directory, 'pre_trained', 'stable-diffusion-v1-4-vae.pdparams') + unet_path = os.path.join(self.directory, 'pre_trained', 'stable-diffusion-v1-4-unet.pdparams') + self.unet.set_dict(paddle.load(unet_path)) + self.vae.set_dict(paddle.load(vae_path)) + for parameter in self.unet.parameters(): + parameter.stop_gradient = True + self.vae.eval() + for parameter in self.vae.parameters(): + parameter.stop_gradient = True + self.unet.eval() + + self.text_encoder = build_model() + for parameter in self.text_encoder.parameters(): + parameter.stop_gradient = True + self.scheduler = PNDMScheduler(beta_start=0.00085, + beta_end=0.012, + beta_schedule="scaled_linear", + num_train_timesteps=1000, + skip_prk_steps=True) + + def generate_image(self, + text_prompts, + init_image, + strength: float = 0.8, + style: Optional[str] = None, + artist: Optional[str] = None, + batch_size: Optional[int] = 1, + num_inference_steps=50, + guidance_scale=7.5, + enable_fp16=False, + seed=None, + eta=0.0, + display_rate=5, + use_gpu=True, + output_dir: Optional[str] = 'stable_diffusion_img2img_out'): + """ + Create Stable Diffusion artworks and save the result into a DocumentArray. + + :param text_prompts: Phrase, sentence, or string of words and phrases describing what the image should look like. The words will be analyzed by the AI and will guide the diffusion process toward the image(s) you describe. + :param init_image: Initial image. + :param strength: Control the noise strength added to initial image, value is in the interval [0.0, 1.0]. The closer to 1, the bigger change to the initial image. + :param style: Image style, such as oil paintings, if specified, style will be used to construct prompts. + :param artist: Artist style, if specified, style will be used to construct prompts. + :param batch_size: This variable sets the number of still images you want SD to create for each prompt. + :param num_inference_steps: The number of inference steps. + :param guidance_scale: Increase the adherence to the conditional signal which in this case is text as well as overall sample quality. + :param enable_fp16: Whether to use float16. + :param use_gpu: whether to use gpu or not. + :param output_dir: Output directory. + :return: a DocumentArray object that has `n_batches` Documents + """ + if seed: + np.random.seed(seed) + random.seed(seed) + paddle.seed(seed) + + if use_gpu: + try: + _places = os.environ.get("CUDA_VISIBLE_DEVICES", None) + if _places: + paddle.device.set_device("gpu:{}".format(0)) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + else: + paddle.device.set_device("cpu") + paddle.disable_static() + + if not os.path.exists(output_dir): + os.makedirs(output_dir, exist_ok=True) + + if isinstance(text_prompts, str): + text_prompts = text_prompts.rstrip(',.,。') + if style is not None: + text_prompts += ",{}".format(style) + if artist is not None: + text_prompts += ",{},trending on artstation".format(artist) + text_prompts = [text_prompts] + elif isinstance(text_prompts, list): + for i, prompt in enumerate( + text_prompts): # different from dd here, dd can have multiple prompts for one image with weight. + text_prompts[i] = prompt.rstrip(',.,。') + if style is not None: + text_prompts[i] += ",{}".format(style) + if artist is not None: + text_prompts[i] += ",{},trending on artstation".format(artist) + + if isinstance(init_image, str): + init_image = preprocess(Image.open(init_image)) + else: + init_image = preprocess(init_image) + + # set timesteps + accepts_offset = "offset" in set(inspect.signature(self.scheduler.set_timesteps).parameters.keys()) + extra_set_kwargs = {} + offset = 0 + if accepts_offset: + offset = 1 + extra_set_kwargs["offset"] = 1 + + self.scheduler.set_timesteps(num_inference_steps, **extra_set_kwargs) + + # encode the init image into latents and scale the latents + init_latents = self.vae.encode(init_image).sample() + init_latents = 0.18215 * init_latents + + # expand init_latents for batch_size + init_latents = paddle.concat([init_latents] * batch_size) + + # get the original timestep using init_timestep + init_timestep = int(num_inference_steps * strength) + offset + init_timestep = min(init_timestep, num_inference_steps) + if isinstance(self.scheduler, LMSDiscreteScheduler): + timesteps = paddle.to_tensor([num_inference_steps - init_timestep] * batch_size, dtype="int64") + else: + timesteps = self.scheduler.timesteps[-init_timestep] + timesteps = paddle.to_tensor([timesteps] * batch_size, dtype="int64") + + # add noise to latents using the timesteps + noise = paddle.randn(init_latents.shape) + init_latents = self.scheduler.add_noise(init_latents, noise, timesteps) + + # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2) + # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1` + # corresponds to doing no classifier free guidance. + do_classifier_free_guidance = guidance_scale > 1.0 + + # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature + # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers. + # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502 + # and should be between [0, 1] + accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys()) + extra_step_kwargs = {} + if accepts_eta: + extra_step_kwargs["eta"] = eta + + da_batches = DocumentArray() + + for prompt in text_prompts: + d = Document(tags={'prompt': prompt}) + da_batches.append(d) + for i in range(batch_size): + d.chunks.append(Document(tags={'prompt': prompt, 'image idx': i})) + d.chunks.append(Document(tags={'prompt': prompt, 'image idx': 'merged'})) + with paddle.amp.auto_cast(enable=enable_fp16, level='O2'): + prompts = [prompt] * batch_size + text_input = tokenize(prompts) + text_embeddings = self.text_encoder(text_input) + if do_classifier_free_guidance: + uncond_input = tokenize([""] * batch_size) + uncond_embeddings = self.text_encoder(uncond_input) + text_embeddings = paddle.concat([uncond_embeddings, text_embeddings]) + + latents = init_latents + + t_start = max(num_inference_steps - init_timestep + offset, 0) + for i, t in tqdm(enumerate(self.scheduler.timesteps[t_start:])): + t_index = t_start + i + # expand the latents if we are doing classifier-free guidance to avoid doing two forward passes. + latent_model_input = (paddle.concat([latents] * 2) if do_classifier_free_guidance else latents) + + if isinstance(self.scheduler, LMSDiscreteScheduler): + sigma = self.scheduler.sigmas[t_index] + latent_model_input = latent_model_input / ((sigma**2 + 1)**0.5) + + # predict the noise residual + noise_pred = self.unet(latent_model_input, t, encoder_hidden_states=text_embeddings)["sample"] + + # perform guidance + if do_classifier_free_guidance: + noise_pred_uncond, noise_pred_text = noise_pred.chunk(2) + noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond) + + # compute the previous noisy sample x_t -> x_t-1 + if isinstance(self.scheduler, LMSDiscreteScheduler): + latents = self.scheduler.step(noise_pred, t_index, latents, **extra_step_kwargs)["prev_sample"] + else: + latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs)["prev_sample"] + if i % display_rate == 0: + # vae decode + images = self.vae.decode(1 / 0.18215 * latents) + images = (images / 2 + 0.5).clip(0, 1) + merge_image = images.cpu().transpose([2, 0, 3, 1]).flatten(1, 2).numpy() + merge_image = (merge_image * 255).round().astype(np.uint8) + merge_image = Image.fromarray(merge_image) + merge_image.save(os.path.join(output_dir, f'{prompt}-progress.png')) + c = Document(tags={'step': i, 'prompt': prompt}) + c.load_pil_image_to_datauri(merge_image) + d.chunks[-1].chunks.append(c) + display.clear_output(wait=True) + display.display(merge_image) + images = images.cpu().transpose([0, 2, 3, 1]).numpy() + images = (images * 255).round().astype(np.uint8) + for j in range(images.shape[0]): + image = Image.fromarray(images[j]) + c = Document(tags={'step': i, 'prompt': prompt}) + c.load_pil_image_to_datauri(image) + d.chunks[j].chunks.append(c) + + # vae decode + images = self.vae.decode(1 / 0.18215 * latents) + images = (images / 2 + 0.5).clip(0, 1) + merge_image = images.cpu().transpose([2, 0, 3, 1]).flatten(1, 2).numpy() + merge_image = (merge_image * 255).round().astype(np.uint8) + merge_image = Image.fromarray(merge_image) + merge_image.save(os.path.join(output_dir, f'{prompt}-merge.png')) + display.clear_output(wait=True) + display.display(merge_image) + d.load_pil_image_to_datauri(merge_image) + d.chunks[-1].load_pil_image_to_datauri(merge_image) + images = images.cpu().transpose([0, 2, 3, 1]).numpy() + images = (images * 255).round().astype(np.uint8) + for j in range(images.shape[0]): + image = Image.fromarray(images[j]) + image.save(os.path.join(output_dir, f'{prompt}-image-{j}.png')) + d.chunks[j].load_pil_image_to_datauri(image) + return da_batches + + @serving + def serving_method(self, text_prompts, init_image, **kwargs): + """ + Run as a service. + """ + init_image = Image.open(BytesIO(base64.b64decode(init_image))) + results = self.generate_image(text_prompts=text_prompts, init_image=init_image, **kwargs).to_base64() + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.generate_image(text_prompts=args.text_prompts, + init_image=args.init_image, + strength=args.strength, + style=args.style, + artist=args.artist, + batch_size=args.batch_size, + num_inference_steps=args.num_inference_steps, + guidance_scale=args.guidance_scale, + enable_fp16=args.enable_fp16, + seed=args.seed, + display_rate=args.display_rate, + use_gpu=args.use_gpu, + output_dir=args.output_dir) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + + self.arg_input_group.add_argument('--num_inference_steps', + type=int, + default=50, + help="The number of inference steps.") + + self.arg_input_group.add_argument( + '--guidance_scale', + type=float, + default=7.5, + help= + "Increase the adherence to the conditional signal which in this case is text as well as overall sample quality." + ) + + self.arg_input_group.add_argument( + '--seed', + type=int, + default=None, + help= + "Deep in the diffusion code, there is a random number ‘seed’ which is used as the basis for determining the initial state of the diffusion. By default, this is random, but you can also specify your own seed." + ) + + self.arg_input_group.add_argument( + '--display_rate', + type=int, + default=10, + help="During a diffusion run, you can monitor the progress of each image being created with this variable.") + + self.arg_config_group.add_argument('--use_gpu', + type=ast.literal_eval, + default=True, + help="whether use GPU or not") + + self.arg_config_group.add_argument('--enable_fp16', + type=ast.literal_eval, + default=False, + help="whether use float16 or not") + + self.arg_config_group.add_argument('--output_dir', + type=str, + default='stable_diffusion_img2img_out', + help='Output directory.') + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument( + '--text_prompts', + type=str, + help= + 'Phrase, sentence, or string of words and phrases describing what the image should look like. The words will be analyzed by the AI and will guide the diffusion process toward the image(s) you describe. These can include commas and weights to adjust the relative importance of each element. E.g. "A beautiful painting of a singular lighthouse, shining its light across a tumultuous sea of blood by greg rutkowski and thomas kinkade, Trending on artstation."Notice that this prompt loosely follows a structure: [subject], [prepositional details], [setting], [meta modifiers and artist]; this is a good starting point for your experiments. Developing text prompts takes practice and experience, and is not the subject of this guide. If you are a beginner to writing text prompts, a good place to start is on a simple AI art app like Night Cafe, starry ai or WOMBO prior to using DD, to get a feel for how text gets translated into images by GAN tools. These other apps use different technologies, but many of the same principles apply.' + ) + + self.arg_input_group.add_argument('--init_image', type=str, help='Initial image.') + + self.arg_input_group.add_argument( + '--strength', + type=float, + help= + 'Control the noise strength added to initial image, value is in the interval [0.0, 1.0]. The closer to 1, the bigger change to the initial image.' + ) + + self.arg_input_group.add_argument( + '--style', + type=str, + default=None, + help='Image style, such as oil paintings, if specified, style will be used to construct prompts.') + self.arg_input_group.add_argument('--artist', + type=str, + default=None, + help='Artist style, if specified, style will be used to construct prompts.') + + self.arg_input_group.add_argument( + '--batch_size', + type=int, + default=1, + help="This variable sets the number of still images you want SD to create for each prompt.") diff --git a/modules/image/text_to_image/stable_diffusion_img2img/requirements.txt b/modules/image/text_to_image/stable_diffusion_img2img/requirements.txt new file mode 100644 index 000000000..45e6baa06 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_img2img/requirements.txt @@ -0,0 +1,8 @@ +numpy +ftfy +regex +docarray>=0.13.29 +pyyaml +regex +tqdm +ipywidgets diff --git a/modules/image/text_to_image/stable_diffusion_img2img/utils.py b/modules/image/text_to_image/stable_diffusion_img2img/utils.py new file mode 100644 index 000000000..b9de4919c --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_img2img/utils.py @@ -0,0 +1,16 @@ +import numpy as np +import paddle +import PIL +from PIL import Image + + +def preprocess(image): + if isinstance(image, np.ndarray): + image = Image.fromarray(image) + w, h = image.size + w, h = map(lambda x: x - x % 32, (w, h)) # resize to integer multiple of 32 + image = image.resize((w, h), resample=PIL.Image.LANCZOS) + image = np.array(image).astype(np.float32) / 255.0 + image = image[None].transpose(0, 3, 1, 2) + image = paddle.to_tensor(image) + return 2.0 * image - 1.0 From ef1d7a6f998fc061160b259cf48504f63489c6c8 Mon Sep 17 00:00:00 2001 From: chenjian Date: Mon, 10 Oct 2022 15:35:04 +0800 Subject: [PATCH 092/117] Add stable_diffusion_inpainting module --- .../stable_diffusion_inpainting/LICENSE | 82 + .../stable_diffusion_inpainting/README.md | 179 +++ .../clip/README.md | 2 + .../clip/clip/__init__.py | 1 + .../clip/clip/layers.py | 182 +++ .../clip/clip/model.py | 259 +++ .../clip/clip/simple_tokenizer.py | 135 ++ .../clip/clip/utils.py | 88 + .../diffusers/__init__.py | 20 + .../diffusers/configuration_utils.py | 312 ++++ .../diffusers/models/README.md | 11 + .../diffusers/models/__init__.py | 20 + .../diffusers/models/attention.py | 465 ++++++ .../diffusers/models/embeddings.py | 116 ++ .../diffusers/models/resnet.py | 515 ++++++ .../diffusers/models/unet_2d.py | 206 +++ .../diffusers/models/unet_2d_condition.py | 206 +++ .../diffusers/models/unet_blocks.py | 1428 +++++++++++++++++ .../diffusers/models/vae.py | 465 ++++++ .../diffusers/schedulers/README.md | 18 + .../diffusers/schedulers/__init__.py | 24 + .../diffusers/schedulers/scheduling_ddim.py | 182 +++ .../diffusers/schedulers/scheduling_ddpm.py | 191 +++ .../schedulers/scheduling_karras_ve.py | 124 ++ .../schedulers/scheduling_lms_discrete.py | 133 ++ .../diffusers/schedulers/scheduling_pndm.py | 258 +++ .../diffusers/schedulers/scheduling_sde_ve.py | 172 ++ .../diffusers/schedulers/scheduling_sde_vp.py | 59 + .../diffusers/schedulers/scheduling_utils.py | 102 ++ .../stable_diffusion_inpainting/module.py | 450 ++++++ .../requirements.txt | 8 + .../stable_diffusion_inpainting/utils.py | 31 + 32 files changed, 6444 insertions(+) create mode 100644 modules/image/text_to_image/stable_diffusion_inpainting/LICENSE create mode 100644 modules/image/text_to_image/stable_diffusion_inpainting/README.md create mode 100755 modules/image/text_to_image/stable_diffusion_inpainting/clip/README.md create mode 100755 modules/image/text_to_image/stable_diffusion_inpainting/clip/clip/__init__.py create mode 100755 modules/image/text_to_image/stable_diffusion_inpainting/clip/clip/layers.py create mode 100755 modules/image/text_to_image/stable_diffusion_inpainting/clip/clip/model.py create mode 100755 modules/image/text_to_image/stable_diffusion_inpainting/clip/clip/simple_tokenizer.py create mode 100755 modules/image/text_to_image/stable_diffusion_inpainting/clip/clip/utils.py create mode 100644 modules/image/text_to_image/stable_diffusion_inpainting/diffusers/__init__.py create mode 100644 modules/image/text_to_image/stable_diffusion_inpainting/diffusers/configuration_utils.py create mode 100644 modules/image/text_to_image/stable_diffusion_inpainting/diffusers/models/README.md create mode 100644 modules/image/text_to_image/stable_diffusion_inpainting/diffusers/models/__init__.py create mode 100644 modules/image/text_to_image/stable_diffusion_inpainting/diffusers/models/attention.py create mode 100644 modules/image/text_to_image/stable_diffusion_inpainting/diffusers/models/embeddings.py create mode 100644 modules/image/text_to_image/stable_diffusion_inpainting/diffusers/models/resnet.py create mode 100644 modules/image/text_to_image/stable_diffusion_inpainting/diffusers/models/unet_2d.py create mode 100644 modules/image/text_to_image/stable_diffusion_inpainting/diffusers/models/unet_2d_condition.py create mode 100644 modules/image/text_to_image/stable_diffusion_inpainting/diffusers/models/unet_blocks.py create mode 100644 modules/image/text_to_image/stable_diffusion_inpainting/diffusers/models/vae.py create mode 100644 modules/image/text_to_image/stable_diffusion_inpainting/diffusers/schedulers/README.md create mode 100644 modules/image/text_to_image/stable_diffusion_inpainting/diffusers/schedulers/__init__.py create mode 100644 modules/image/text_to_image/stable_diffusion_inpainting/diffusers/schedulers/scheduling_ddim.py create mode 100644 modules/image/text_to_image/stable_diffusion_inpainting/diffusers/schedulers/scheduling_ddpm.py create mode 100644 modules/image/text_to_image/stable_diffusion_inpainting/diffusers/schedulers/scheduling_karras_ve.py create mode 100644 modules/image/text_to_image/stable_diffusion_inpainting/diffusers/schedulers/scheduling_lms_discrete.py create mode 100644 modules/image/text_to_image/stable_diffusion_inpainting/diffusers/schedulers/scheduling_pndm.py create mode 100644 modules/image/text_to_image/stable_diffusion_inpainting/diffusers/schedulers/scheduling_sde_ve.py create mode 100644 modules/image/text_to_image/stable_diffusion_inpainting/diffusers/schedulers/scheduling_sde_vp.py create mode 100644 modules/image/text_to_image/stable_diffusion_inpainting/diffusers/schedulers/scheduling_utils.py create mode 100755 modules/image/text_to_image/stable_diffusion_inpainting/module.py create mode 100644 modules/image/text_to_image/stable_diffusion_inpainting/requirements.txt create mode 100644 modules/image/text_to_image/stable_diffusion_inpainting/utils.py diff --git a/modules/image/text_to_image/stable_diffusion_inpainting/LICENSE b/modules/image/text_to_image/stable_diffusion_inpainting/LICENSE new file mode 100644 index 000000000..928aa738f --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_inpainting/LICENSE @@ -0,0 +1,82 @@ +Copyright (c) 2022 Robin Rombach and Patrick Esser and contributors + +CreativeML Open RAIL-M +dated August 22, 2022 + +Section I: PREAMBLE + +Multimodal generative models are being widely adopted and used, and have the potential to transform the way artists, among other individuals, conceive and benefit from AI or ML technologies as a tool for content creation. + +Notwithstanding the current and potential benefits that these artifacts can bring to society at large, there are also concerns about potential misuses of them, either due to their technical limitations or ethical considerations. + +In short, this license strives for both the open and responsible downstream use of the accompanying model. When it comes to the open character, we took inspiration from open source permissive licenses regarding the grant of IP rights. Referring to the downstream responsible use, we added use-based restrictions not permitting the use of the Model in very specific scenarios, in order for the licensor to be able to enforce the license in case potential misuses of the Model may occur. At the same time, we strive to promote open and responsible research on generative models for art and content generation. + +Even though downstream derivative versions of the model could be released under different licensing terms, the latter will always have to include - at minimum - the same use-based restrictions as the ones in the original license (this license). We believe in the intersection between open and responsible AI development; thus, this License aims to strike a balance between both in order to enable responsible open-science in the field of AI. + +This License governs the use of the model (and its derivatives) and is informed by the model card associated with the model. + +NOW THEREFORE, You and Licensor agree as follows: + +1. Definitions + +- "License" means the terms and conditions for use, reproduction, and Distribution as defined in this document. +- "Data" means a collection of information and/or content extracted from the dataset used with the Model, including to train, pretrain, or otherwise evaluate the Model. The Data is not licensed under this License. +- "Output" means the results of operating a Model as embodied in informational content resulting therefrom. +- "Model" means any accompanying machine-learning based assemblies (including checkpoints), consisting of learnt weights, parameters (including optimizer states), corresponding to the model architecture as embodied in the Complementary Material, that have been trained or tuned, in whole or in part on the Data, using the Complementary Material. +- "Derivatives of the Model" means all modifications to the Model, works based on the Model, or any other model which is created or initialized by transfer of patterns of the weights, parameters, activations or output of the Model, to the other model, in order to cause the other model to perform similarly to the Model, including - but not limited to - distillation methods entailing the use of intermediate data representations or methods based on the generation of synthetic data by the Model for training the other model. +- "Complementary Material" means the accompanying source code and scripts used to define, run, load, benchmark or evaluate the Model, and used to prepare data for training or evaluation, if any. This includes any accompanying documentation, tutorials, examples, etc, if any. +- "Distribution" means any transmission, reproduction, publication or other sharing of the Model or Derivatives of the Model to a third party, including providing the Model as a hosted service made available by electronic or other remote means - e.g. API-based or web access. +- "Licensor" means the copyright owner or entity authorized by the copyright owner that is granting the License, including the persons or entities that may have rights in the Model and/or distributing the Model. +- "You" (or "Your") means an individual or Legal Entity exercising permissions granted by this License and/or making use of the Model for whichever purpose and in any field of use, including usage of the Model in an end-use application - e.g. chatbot, translator, image generator. +- "Third Parties" means individuals or legal entities that are not under common control with Licensor or You. +- "Contribution" means any work of authorship, including the original version of the Model and any modifications or additions to that Model or Derivatives of the Model thereof, that is intentionally submitted to Licensor for inclusion in the Model by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Model, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." +- "Contributor" means Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Model. + +Section II: INTELLECTUAL PROPERTY RIGHTS + +Both copyright and patent grants apply to the Model, Derivatives of the Model and Complementary Material. The Model and Derivatives of the Model are subject to additional terms as described in Section III. + +2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare, publicly display, publicly perform, sublicense, and distribute the Complementary Material, the Model, and Derivatives of the Model. +3. Grant of Patent License. Subject to the terms and conditions of this License and where and as applicable, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this paragraph) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Model and the Complementary Material, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Model to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Model and/or Complementary Material or a Contribution incorporated within the Model and/or Complementary Material constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for the Model and/or Work shall terminate as of the date such litigation is asserted or filed. + +Section III: CONDITIONS OF USAGE, DISTRIBUTION AND REDISTRIBUTION + +4. Distribution and Redistribution. You may host for Third Party remote access purposes (e.g. software-as-a-service), reproduce and distribute copies of the Model or Derivatives of the Model thereof in any medium, with or without modifications, provided that You meet the following conditions: +Use-based restrictions as referenced in paragraph 5 MUST be included as an enforceable provision by You in any type of legal agreement (e.g. a license) governing the use and/or distribution of the Model or Derivatives of the Model, and You shall give notice to subsequent users You Distribute to, that the Model or Derivatives of the Model are subject to paragraph 5. This provision does not apply to the use of Complementary Material. +You must give any Third Party recipients of the Model or Derivatives of the Model a copy of this License; +You must cause any modified files to carry prominent notices stating that You changed the files; +You must retain all copyright, patent, trademark, and attribution notices excluding those notices that do not pertain to any part of the Model, Derivatives of the Model. +You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions - respecting paragraph 4.a. - for use, reproduction, or Distribution of Your modifications, or for any such Derivatives of the Model as a whole, provided Your use, reproduction, and Distribution of the Model otherwise complies with the conditions stated in this License. +5. Use-based restrictions. The restrictions set forth in Attachment A are considered Use-based restrictions. Therefore You cannot use the Model and the Derivatives of the Model for the specified restricted uses. You may use the Model subject to this License, including only for lawful purposes and in accordance with the License. Use may include creating any content with, finetuning, updating, running, training, evaluating and/or reparametrizing the Model. You shall require all of Your users who use the Model or a Derivative of the Model to comply with the terms of this paragraph (paragraph 5). +6. The Output You Generate. Except as set forth herein, Licensor claims no rights in the Output You generate using the Model. You are accountable for the Output you generate and its subsequent uses. No use of the output can contravene any provision as stated in the License. + +Section IV: OTHER PROVISIONS + +7. Updates and Runtime Restrictions. To the maximum extent permitted by law, Licensor reserves the right to restrict (remotely or otherwise) usage of the Model in violation of this License, update the Model through electronic means, or modify the Output of the Model based on updates. You shall undertake reasonable efforts to use the latest version of the Model. +8. Trademarks and related. Nothing in this License permits You to make use of Licensors’ trademarks, trade names, logos or to otherwise suggest endorsement or misrepresent the relationship between the parties; and any rights not expressly granted herein are reserved by the Licensors. +9. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Model and the Complementary Material (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Model, Derivatives of the Model, and the Complementary Material and assume any risks associated with Your exercise of permissions under this License. +10. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Model and the Complementary Material (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. +11. Accepting Warranty or Additional Liability. While redistributing the Model, Derivatives of the Model and the Complementary Material thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. +12. If any provision of this License is held to be invalid, illegal or unenforceable, the remaining provisions shall be unaffected thereby and remain valid as if such provision had not been set forth herein. + +END OF TERMS AND CONDITIONS + + + + +Attachment A + +Use Restrictions + +You agree not to use the Model or Derivatives of the Model: +- In any way that violates any applicable national, federal, state, local or international law or regulation; +- For the purpose of exploiting, harming or attempting to exploit or harm minors in any way; +- To generate or disseminate verifiably false information and/or content with the purpose of harming others; +- To generate or disseminate personal identifiable information that can be used to harm an individual; +- To defame, disparage or otherwise harass others; +- For fully automated decision making that adversely impacts an individual’s legal rights or otherwise creates or modifies a binding, enforceable obligation; +- For any use intended to or which has the effect of discriminating against or harming individuals or groups based on online or offline social behavior or known or predicted personal or personality characteristics; +- To exploit any of the vulnerabilities of a specific group of persons based on their age, social, physical or mental characteristics, in order to materially distort the behavior of a person pertaining to that group in a manner that causes or is likely to cause that person or another person physical or psychological harm; +- For any use intended to or which has the effect of discriminating against individuals or groups based on legally protected characteristics or categories; +- To provide medical advice and medical results interpretation; +- To generate or disseminate information for the purpose to be used for administration of justice, law enforcement, immigration or asylum processes, such as predicting an individual will commit fraud/crime commitment (e.g. by text profiling, drawing causal relationships between assertions made in documents, indiscriminate and arbitrarily-targeted use). \ No newline at end of file diff --git a/modules/image/text_to_image/stable_diffusion_inpainting/README.md b/modules/image/text_to_image/stable_diffusion_inpainting/README.md new file mode 100644 index 000000000..82b1ab0e4 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_inpainting/README.md @@ -0,0 +1,179 @@ +# stable_diffusion_inpainting + +|模型名称|stable_diffusion_inpainting| +| :--- | :---: | +|类别|多模态-文图生成| +|网络|CLIP Text Encoder+UNet+VAD| +|数据集|-| +|是否支持Fine-tuning|否| +|模型大小|4.0GB| +|最新更新日期|2022-08-26| +|数据指标|-| + +## 一、模型基本信息 + +### 应用效果展示 + + - 输入文本 "a cat sitting on a bench" + + - 输入图像 +

+ +
+ + - 输入mask +

+ +
+ + - 输出图像 +

+ +
+ + - 生成过程 +

+ +
+ +### 模型介绍 + +Stable Diffusion是一种潜在扩散模型(Latent Diffusion), 属于生成类模型,这类模型通过对随机噪声进行一步步地迭代降噪并采样来获得感兴趣的图像,当前取得了令人惊艳的效果。相比于Disco Diffusion, Stable Diffusion通过在低纬度的潜在空间(lower dimensional latent space)而不是原像素空间来做迭代,极大地降低了内存和计算量的需求,并且在V100上一分钟之内即可以渲染出想要的图像,欢迎体验。该模块支持输入文本以及一张图片,一张掩码图片,对掩码部分的内容进行改变。 + +更多详情请参考论文:[High-Resolution Image Synthesis with Latent Diffusion Models](https://arxiv.org/abs/2112.10752) + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install stable_diffusion_inpainting + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run stable_diffusion_inpainting --text_prompts "a cat sitting on a bench" --init_image /PATH/TO/IMAGE --mask_image /PATH/TO/IMAGE --output_dir stable_diffusion_inpainting_out + ``` + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + + module = hub.Module(name="stable_diffusion_inpainting") + text_prompts = ["a cat sitting on a bench"] + # 生成图像, 默认会在stable_diffusion_inpainting_out目录保存图像 + # 返回的da是一个DocumentArray对象,保存了所有的结果,包括最终结果和迭代过程的中间结果 + # 可以通过操作DocumentArray对象对生成的图像做后处理,保存或者分析 + # 您可以设置batch_size一次生成多张 + da = module.generate_image(text_prompts=text_prompts, batch_size=2, output_dir='./stable_diffusion_inpainting_out/') + # 展示所有的中间结果 + da[0].chunks[-1].chunks.plot_image_sprites(skip_empty=True, show_index=True, keep_aspect_ratio=True) + # 将整个生成过程保存为一个动态图gif + da[0].chunks[-1].chunks.save_gif('stable_diffusion_inpainting_out-merged-result.gif') + # da索引的是prompt, da[0].chunks索引的是该prompt下生成的第一张图,在batch_size不为1时能同时生成多张图 + # 您也可以按照上述操作显示单张图,如第0张的生成过程 + da[0].chunks[0].chunks.plot_image_sprites(skip_empty=True, show_index=True, keep_aspect_ratio=True) + da[0].chunks[0].chunks.save_gif('stable_diffusion_inpainting-image-0-result.gif') + ``` + +- ### 3、API + + - ```python + def generate_image( + text_prompts, + init_image, + mask_image, + strength: float = 0.8, + width_height: Optional[List[int]] = [512, 512], + seed: Optional[int] = None, + batch_size: Optional[int] = 1, + display_rate: Optional[int] = 5, + output_dir: Optional[str] = 'stable_diffusion_inpainting_out'): + ``` + + - 文图生成API,生成文本描述内容的图像。 + + - **参数** + + - text_prompts(str): 输入的语句,描述想要生成的图像的内容。 + - init_image(str|numpy.ndarray|PIL.Image): 输入的初始图像。 + - mask_image(str|numpy.ndarray|PIL.Image): 输入的掩码图像。 + - strength(float): 控制添加到输入图像的噪声强度,取值范围0到1。越接近1.0,图像变化越大。 + - width_height(Optional[List[int]]): 指定最终输出图像的宽高,宽和高都需要是64的倍数,生成的图像越大,所需要的计算时间越长。 + - seed(Optional[int]): 随机种子,由于输入默认是随机高斯噪声,设置不同的随机种子会由不同的初始输入,从而最终生成不同的结果,可以设置该参数来获得不同的输出图像。 + - batch_size(Optional[int]): 指定每个prompt一次生成的图像的数量。 + - display_rate(Optional[int]): 保存中间结果的频率,默认每5个step保存一次中间结果,如果不需要中间结果来让程序跑的更快,可以将这个值设大。 + - output_dir(Optional[str]): 保存输出图像的目录,默认为"stable_diffusion_out"。 + + + - **返回** + - ra(DocumentArray): DocumentArray对象, 包含`n_batches`个Documents,其中每个Document都保存了迭代过程的所有中间结果。详细可参考[DocumentArray使用文档](https://docarray.jina.ai/fundamentals/documentarray/index.html)。 + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线文图生成服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m stable_diffusion_inpainting + ``` + + - 这样就完成了一个文图生成的在线服务API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果,返回的预测结果在反序列化后即是上述接口声明中说明的DocumentArray类型,返回后对结果的操作方式和使用generate_image接口完全相同。 + + - ```python + import requests + import json + import cv2 + import base64 + from docarray import DocumentArray + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tobytes()) + + # 发送HTTP请求 + data = {'text_prompts': 'a cat sitting on a bench', 'init_image': cv2_to_base64(cv2.imread('/PATH/TO/IMAGE')), + 'mask_image': cv2_to_base64(cv2.imread('/PATH/TO/IMAGE')} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/stable_diffusion_inpainting" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 获取返回结果 + r.json()["results"] + da = DocumentArray.from_base64(r.json()["results"]) + # 保存结果图 + da[0].save_uri_to_file('stable_diffusion_inpainting_out.png') + # 将生成过程保存为一个动态图gif + da[0].chunks[0].chunks.save_gif('stable_diffusion_inpainting_out.gif') + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + ```shell + $ hub install stable_diffusion_inpainting == 1.0.0 + ``` diff --git a/modules/image/text_to_image/stable_diffusion_inpainting/clip/README.md b/modules/image/text_to_image/stable_diffusion_inpainting/clip/README.md new file mode 100755 index 000000000..9944794f8 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_inpainting/clip/README.md @@ -0,0 +1,2 @@ +# OpenAI CLIP implemented in Paddle. +The original implementation repo is [ranchlai/clip.paddle](https://github.com/ranchlai/clip.paddle). We use this repo here for text encoder in stable diffusion. diff --git a/modules/image/text_to_image/stable_diffusion_inpainting/clip/clip/__init__.py b/modules/image/text_to_image/stable_diffusion_inpainting/clip/clip/__init__.py new file mode 100755 index 000000000..5657b56e6 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_inpainting/clip/clip/__init__.py @@ -0,0 +1 @@ +from .utils import * diff --git a/modules/image/text_to_image/stable_diffusion_inpainting/clip/clip/layers.py b/modules/image/text_to_image/stable_diffusion_inpainting/clip/clip/layers.py new file mode 100755 index 000000000..286f35ab4 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_inpainting/clip/clip/layers.py @@ -0,0 +1,182 @@ +from typing import Optional + +import paddle +import paddle.nn as nn +from paddle import Tensor +from paddle.nn import functional as F +from paddle.nn import Linear + +__all__ = ['ResidualAttentionBlock', 'AttentionPool2d', 'multi_head_attention_forward', 'MultiHeadAttention'] + + +def multi_head_attention_forward(x: Tensor, + num_heads: int, + q_proj: Linear, + k_proj: Linear, + v_proj: Linear, + c_proj: Linear, + attn_mask: Optional[Tensor] = None): + max_len, batch_size, emb_dim = x.shape + head_dim = emb_dim // num_heads + scaling = float(head_dim)**-0.5 + q = q_proj(x) # L, N, E + k = k_proj(x) # L, N, E + v = v_proj(x) # L, N, E + #k = k.con + v = v.reshape((-1, batch_size * num_heads, head_dim)).transpose((1, 0, 2)) + k = k.reshape((-1, batch_size * num_heads, head_dim)).transpose((1, 0, 2)) + q = q.reshape((-1, batch_size * num_heads, head_dim)).transpose((1, 0, 2)) + + q = q * scaling + qk = paddle.bmm(q, k.transpose((0, 2, 1))) + if attn_mask is not None: + if attn_mask.ndim == 2: + attn_mask.unsqueeze_(0) + #assert str(attn_mask.dtype) == 'VarType.FP32' and attn_mask.ndim == 3 + assert attn_mask.shape[0] == 1 and attn_mask.shape[1] == max_len and attn_mask.shape[2] == max_len + qk += attn_mask + + qk = paddle.nn.functional.softmax(qk, axis=-1) + atten = paddle.bmm(qk, v) + atten = atten.transpose((1, 0, 2)) + atten = atten.reshape((max_len, batch_size, emb_dim)) + atten = c_proj(atten) + return atten + + +class MultiHeadAttention(nn.Layer): # without attention mask + + def __init__(self, emb_dim: int, num_heads: int): + super().__init__() + self.q_proj = nn.Linear(emb_dim, emb_dim, bias_attr=True) + self.k_proj = nn.Linear(emb_dim, emb_dim, bias_attr=True) + self.v_proj = nn.Linear(emb_dim, emb_dim, bias_attr=True) + self.c_proj = nn.Linear(emb_dim, emb_dim, bias_attr=True) + self.head_dim = emb_dim // num_heads + self.emb_dim = emb_dim + self.num_heads = num_heads + assert self.head_dim * num_heads == emb_dim, "embed_dim must be divisible by num_heads" + #self.scaling = float(self.head_dim) ** -0.5 + + def forward(self, x, attn_mask=None): # x is in shape[max_len,batch_size,emb_dim] + + atten = multi_head_attention_forward(x, + self.num_heads, + self.q_proj, + self.k_proj, + self.v_proj, + self.c_proj, + attn_mask=attn_mask) + + return atten + + +class Identity(nn.Layer): + + def __init__(self): + super().__init__() + + def forward(self, x): + return x + + +class Bottleneck(nn.Layer): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1): + super().__init__() + + # all conv layers have stride 1. an avgpool is performed after the second convolution when stride > 1 + self.conv1 = nn.Conv2D(inplanes, planes, 1, bias_attr=False) + self.bn1 = nn.BatchNorm2D(planes) + + self.conv2 = nn.Conv2D(planes, planes, 3, padding=1, bias_attr=False) + self.bn2 = nn.BatchNorm2D(planes) + + self.avgpool = nn.AvgPool2D(stride) if stride > 1 else Identity() + + self.conv3 = nn.Conv2D(planes, planes * self.expansion, 1, bias_attr=False) + self.bn3 = nn.BatchNorm2D(planes * self.expansion) + + self.relu = nn.ReLU() + self.downsample = None + self.stride = stride + + if stride > 1 or inplanes != planes * Bottleneck.expansion: + self.downsample = nn.Sequential( + ("-1", nn.AvgPool2D(stride)), + ("0", nn.Conv2D(inplanes, planes * self.expansion, 1, stride=1, bias_attr=False)), + ("1", nn.BatchNorm2D(planes * self.expansion))) + + def forward(self, x): + identity = x + + out = self.relu(self.bn1(self.conv1(x))) + out = self.relu(self.bn2(self.conv2(out))) + out = self.avgpool(out) + out = self.bn3(self.conv3(out)) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.relu(out) + return out + + +class AttentionPool2d(nn.Layer): + + def __init__(self, spacial_dim: int, embed_dim: int, num_heads: int, output_dim: int = None): + super().__init__() + + self.positional_embedding = paddle.create_parameter((spacial_dim**2 + 1, embed_dim), dtype='float32') + + self.q_proj = nn.Linear(embed_dim, embed_dim, bias_attr=True) + self.k_proj = nn.Linear(embed_dim, embed_dim, bias_attr=True) + self.v_proj = nn.Linear(embed_dim, embed_dim, bias_attr=True) + self.c_proj = nn.Linear(embed_dim, output_dim or embed_dim, bias_attr=True) + self.num_heads = num_heads + + self.head_dim = embed_dim // num_heads + assert self.head_dim * num_heads == embed_dim, "embed_dim must be divisible by num_heads" + + def forward(self, x): + + x = x.reshape((x.shape[0], x.shape[1], x.shape[2] * x.shape[3])).transpose((2, 0, 1)) # NCHW -> (HW)NC + max_len, batch_size, emb_dim = x.shape + head_dim = self.head_dim + x = paddle.concat([paddle.mean(x, axis=0, keepdim=True), x], axis=0) + x = x + paddle.unsqueeze(self.positional_embedding, 1) + out = multi_head_attention_forward(x, self.num_heads, self.q_proj, self.k_proj, self.v_proj, self.c_proj) + + return out[0] + + +class QuickGELU(nn.Layer): + + def forward(self, x): + return x * paddle.nn.functional.sigmoid(1.702 * x) + + +class ResidualAttentionBlock(nn.Layer): + + def __init__(self, d_model: int, n_head: int, attn_mask=None): + super().__init__() + + self.attn = MultiHeadAttention(d_model, n_head) + self.ln_1 = nn.LayerNorm(d_model) + self.mlp = nn.Sequential(("c_fc", nn.Linear(d_model, d_model * 4)), ("gelu", QuickGELU()), + ("c_proj", nn.Linear(d_model * 4, d_model))) + self.ln_2 = nn.LayerNorm(d_model) + self.attn_mask = attn_mask + + def attention(self, x): + x = self.attn(x, self.attn_mask) + assert isinstance(x, paddle.Tensor) # not tuble here + return x + + def forward(self, x): + + x = x + self.attention(self.ln_1(x)) + x = x + self.mlp(self.ln_2(x)) + return x diff --git a/modules/image/text_to_image/stable_diffusion_inpainting/clip/clip/model.py b/modules/image/text_to_image/stable_diffusion_inpainting/clip/clip/model.py new file mode 100755 index 000000000..06affcc4b --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_inpainting/clip/clip/model.py @@ -0,0 +1,259 @@ +from typing import Tuple +from typing import Union + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddle import nn + +from .layers import AttentionPool2d +from .layers import Bottleneck +from .layers import MultiHeadAttention +from .layers import ResidualAttentionBlock + + +class ModifiedResNet(nn.Layer): + """ + A ResNet class that is similar to torchvision's but contains the following changes: + - There are now 3 "stem" convolutions as opposed to 1, with an average pool instead of a max pool. + - Performs anti-aliasing strided convolutions, where an avgpool is prepended to convolutions with stride > 1 + - The final pooling layer is a QKV attention instead of an average pool + """ + + def __init__(self, layers, output_dim, heads, input_resolution=224, width=64): + super().__init__() + self.output_dim = output_dim + self.input_resolution = input_resolution + + # the 3-layer stem + self.conv1 = nn.Conv2D(3, width // 2, kernel_size=3, stride=2, padding=1, bias_attr=False) + self.bn1 = nn.BatchNorm2D(width // 2) + self.conv2 = nn.Conv2D(width // 2, width // 2, kernel_size=3, padding=1, bias_attr=False) + self.bn2 = nn.BatchNorm2D(width // 2) + self.conv3 = nn.Conv2D(width // 2, width, kernel_size=3, padding=1, bias_attr=False) + self.bn3 = nn.BatchNorm2D(width) + self.avgpool = nn.AvgPool2D(2) + self.relu = nn.ReLU() + + # residual layers + self._inplanes = width # this is a *mutable* variable used during construction + self.layer1 = self._make_layer(width, layers[0]) + self.layer2 = self._make_layer(width * 2, layers[1], stride=2) + self.layer3 = self._make_layer(width * 4, layers[2], stride=2) + self.layer4 = self._make_layer(width * 8, layers[3], stride=2) + + embed_dim = width * 32 # the ResNet feature dimension + self.attnpool = AttentionPool2d(input_resolution // 32, embed_dim, heads, output_dim) + + def _make_layer(self, planes, blocks, stride=1): + layers = [Bottleneck(self._inplanes, planes, stride)] + + self._inplanes = planes * Bottleneck.expansion + for _ in range(1, blocks): + layers.append(Bottleneck(self._inplanes, planes)) + + return nn.Sequential(*layers) + + def forward(self, x): + + def stem(x): + for conv, bn in [(self.conv1, self.bn1), (self.conv2, self.bn2), (self.conv3, self.bn3)]: + x = self.relu(bn(conv(x))) + x = self.avgpool(x) + return x + + #x = x.type(self.conv1.weight.dtype) + x = stem(x) + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + x = self.attnpool(x) + + return x + + +class Transformer(nn.Layer): + + def __init__(self, width: int, layers: int, heads: int, attn_mask=None): + super().__init__() + self.width = width + self.layers = layers + self.resblocks = nn.Sequential(*[ResidualAttentionBlock(width, heads, attn_mask) for _ in range(layers)]) + + def forward(self, x): + return self.resblocks(x) + + +class VisualTransformer(nn.Layer): + + def __init__(self, input_resolution: int, patch_size: int, width: int, layers: int, heads: int, output_dim: int): + super().__init__() + self.input_resolution = input_resolution + self.output_dim = output_dim + # used patch_size x patch_size, stride patch_size to do linear projection + self.conv1 = nn.Conv2D(in_channels=3, + out_channels=width, + kernel_size=patch_size, + stride=patch_size, + bias_attr=False) + + # scale = width ** -0.5 + self.class_embedding = paddle.create_parameter((width, ), 'float32') + + self.positional_embedding = paddle.create_parameter(((input_resolution // patch_size)**2 + 1, width), 'float32') + + self.ln_pre = nn.LayerNorm(width) + + self.transformer = Transformer(width, layers, heads) + + self.ln_post = nn.LayerNorm(width) + self.proj = paddle.create_parameter((width, output_dim), 'float32') + + def forward(self, x): + + x = self.conv1(x) + x = x.reshape((x.shape[0], x.shape[1], -1)) + x = x.transpose((0, 2, 1)) + x = paddle.concat([self.class_embedding + paddle.zeros((x.shape[0], 1, x.shape[-1]), dtype=x.dtype), x], axis=1) + + x = x + self.positional_embedding + x = self.ln_pre(x) + x = x.transpose((1, 0, 2)) + x = self.transformer(x) + x = x.transpose((1, 0, 2)) + x = self.ln_post(x[:, 0, :]) + if self.proj is not None: + x = paddle.matmul(x, self.proj) + + return x + + +class TextTransformer(nn.Layer): + + def __init__(self, context_length: int, vocab_size: int, transformer_width: int, transformer_heads: int, + transformer_layers: int): + super().__init__() + self.context_length = context_length + self.transformer = Transformer(width=transformer_width, + layers=transformer_layers, + heads=transformer_heads, + attn_mask=self.build_attention_mask()) + + self.vocab_size = vocab_size + self.token_embedding = nn.Embedding(vocab_size, transformer_width) + self.positional_embedding = paddle.create_parameter((self.context_length, transformer_width), 'float32') + self.ln_final = nn.LayerNorm(transformer_width) + + def build_attention_mask(self): + # lazily create causal attention mask, with full attention between the vision tokens + # mask = paddle.empty((self.context_length, self.context_length),dtype='float32') + # mask.fill_(float("-inf")) + #mask.triu_(1) # zero out the lower diagonal + + mask = paddle.ones((self.context_length, self.context_length)) * float("-inf") + mask = paddle.triu(mask, diagonal=1) + + return mask + + def forward(self, text): + x = self.token_embedding(text) # [batch_size, n_ctx, d_model] + x = x + self.positional_embedding + x = x.transpose((1, 0, 2)) # NLD -> LND + x = self.transformer(x) + x = x.transpose((1, 0, 2)) # LND -> NLD + x = self.ln_final(x) + return x + + +class CLIP(nn.Layer): + + def __init__( + self, + embed_dim: int, + # vision + image_resolution: int, + vision_layers: Union[Tuple[int, int, int, int], int], + vision_width: int, + vision_patch_size: int, + # text + context_length: int, + vocab_size: int, + transformer_width: int, + transformer_heads: int, + transformer_layers: int): + super().__init__() + + self.context_length = context_length + if isinstance(vision_layers, (tuple, list)): + vision_heads = vision_width * 32 // 64 + self.visual = ModifiedResNet(layers=vision_layers, + output_dim=embed_dim, + heads=vision_heads, + input_resolution=image_resolution, + width=vision_width) + else: + vision_heads = vision_width // 64 + self.visual = VisualTransformer(input_resolution=image_resolution, + patch_size=vision_patch_size, + width=vision_width, + layers=vision_layers, + heads=vision_heads, + output_dim=embed_dim) + + self.transformer = Transformer(width=transformer_width, + layers=transformer_layers, + heads=transformer_heads, + attn_mask=self.build_attention_mask()) + + self.vocab_size = vocab_size + self.token_embedding = nn.Embedding(vocab_size, transformer_width) + self.positional_embedding = paddle.create_parameter((self.context_length, transformer_width), 'float32') + self.ln_final = nn.LayerNorm(transformer_width) + + self.text_projection = paddle.create_parameter((transformer_width, embed_dim), 'float32') + self.logit_scale = paddle.create_parameter((1, ), 'float32') + + def build_attention_mask(self): + # lazily create causal attention mask, with full attention between the vision tokens + # mask = paddle.empty((self.context_length, self.context_length),dtype='float32') + # mask.fill_(float("-inf")) + #mask.triu_(1) # zero out the lower diagonal + + mask = paddle.ones((self.context_length, self.context_length)) * float("-inf") + mask = paddle.triu(mask, diagonal=1) + + return mask + + def encode_image(self, image): + return self.visual(image) + + def encode_text(self, text): + x = self.token_embedding(text) # [batch_size, n_ctx, d_model] + x = x + self.positional_embedding + x = x.transpose((1, 0, 2)) # NLD -> LND + x = self.transformer(x) + x = x.transpose((1, 0, 2)) # LND -> NLD + x = self.ln_final(x) + idx = text.numpy().argmax(-1) + idx = list(idx) + x = [x[i:i + 1, int(j), :] for i, j in enumerate(idx)] + x = paddle.concat(x, 0) + x = paddle.matmul(x, self.text_projection) + return x + + def forward(self, image, text): + image_features = self.encode_image(image) + text_features = self.encode_text(text) + + # normalized features + image_features = image_features / image_features.norm(dim=-1, keepdim=True) + text_features = text_features / text_features.norm(dim=-1, keepdim=True) + + # cosine similarity as logits + logit_scale = self.logit_scale.exp() + logits_per_image = paddle.matmul(logit_scale * image_features, text_features.t()) + logits_per_text = paddle.matmul(logit_scale * text_features, image_features.t()) + + # shape = [global_batch_size, global_batch_size] + return logits_per_image, logits_per_text diff --git a/modules/image/text_to_image/stable_diffusion_inpainting/clip/clip/simple_tokenizer.py b/modules/image/text_to_image/stable_diffusion_inpainting/clip/clip/simple_tokenizer.py new file mode 100755 index 000000000..4eaf82e9e --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_inpainting/clip/clip/simple_tokenizer.py @@ -0,0 +1,135 @@ +import gzip +import html +import os +from functools import lru_cache + +import ftfy +import regex as re + + +@lru_cache() +def default_bpe(): + return os.path.join(os.path.dirname(os.path.abspath(__file__)), "../assets/bpe_simple_vocab_16e6.txt.gz") + + +@lru_cache() +def bytes_to_unicode(): + """ + Returns list of utf-8 byte and a corresponding list of unicode strings. + The reversible bpe codes work on unicode strings. + This means you need a large # of unicode characters in your vocab if you want to avoid UNKs. + When you're at something like a 10B token dataset you end up needing around 5K for decent coverage. + This is a signficant percentage of your normal, say, 32K bpe vocab. + To avoid that, we want lookup tables between utf-8 bytes and unicode strings. + And avoids mapping to whitespace/control characters the bpe code barfs on. + """ + bs = list(range(ord("!"), ord("~") + 1)) + list(range(ord("¡"), ord("¬") + 1)) + list(range(ord("®"), ord("ÿ") + 1)) + cs = bs[:] + n = 0 + for b in range(2**8): + if b not in bs: + bs.append(b) + cs.append(2**8 + n) + n += 1 + cs = [chr(n) for n in cs] + return dict(zip(bs, cs)) + + +def get_pairs(word): + """Return set of symbol pairs in a word. + Word is represented as tuple of symbols (symbols being variable-length strings). + """ + pairs = set() + prev_char = word[0] + for char in word[1:]: + pairs.add((prev_char, char)) + prev_char = char + return pairs + + +def basic_clean(text): + text = ftfy.fix_text(text) + text = html.unescape(html.unescape(text)) + return text.strip() + + +def whitespace_clean(text): + text = re.sub(r'\s+', ' ', text) + text = text.strip() + return text + + +class SimpleTokenizer(object): + + def __init__(self, bpe_path: str = default_bpe()): + self.byte_encoder = bytes_to_unicode() + self.byte_decoder = {v: k for k, v in self.byte_encoder.items()} + merges = gzip.open(bpe_path).read().decode("utf-8").split('\n') + merges = merges[1:49152 - 256 - 2 + 1] + merges = [tuple(merge.split()) for merge in merges] + vocab = list(bytes_to_unicode().values()) + vocab = vocab + [v + '' for v in vocab] + for merge in merges: + vocab.append(''.join(merge)) + vocab.extend(['<|startoftext|>', '<|endoftext|>']) + self.encoder = dict(zip(vocab, range(len(vocab)))) + self.decoder = {v: k for k, v in self.encoder.items()} + self.bpe_ranks = dict(zip(merges, range(len(merges)))) + self.cache = {'<|startoftext|>': '<|startoftext|>', '<|endoftext|>': '<|endoftext|>'} + self.pat = re.compile( + r"""<\|startoftext\|>|<\|endoftext\|>|'s|'t|'re|'ve|'m|'ll|'d|[\p{L}]+|[\p{N}]|[^\s\p{L}\p{N}]+""", + re.IGNORECASE) + + def bpe(self, token): + if token in self.cache: + return self.cache[token] + word = tuple(token[:-1]) + (token[-1] + '', ) + pairs = get_pairs(word) + + if not pairs: + return token + '' + + while True: + bigram = min(pairs, key=lambda pair: self.bpe_ranks.get(pair, float('inf'))) + if bigram not in self.bpe_ranks: + break + first, second = bigram + new_word = [] + i = 0 + while i < len(word): + try: + j = word.index(first, i) + new_word.extend(word[i:j]) + i = j + except: + new_word.extend(word[i:]) + break + + if word[i] == first and i < len(word) - 1 and word[i + 1] == second: + new_word.append(first + second) + i += 2 + else: + new_word.append(word[i]) + i += 1 + new_word = tuple(new_word) + word = new_word + if len(word) == 1: + break + else: + pairs = get_pairs(word) + word = ' '.join(word) + self.cache[token] = word + return word + + def encode(self, text): + bpe_tokens = [] + text = whitespace_clean(basic_clean(text)).lower() + for token in re.findall(self.pat, text): + token = ''.join(self.byte_encoder[b] for b in token.encode('utf-8')) + bpe_tokens.extend(self.encoder[bpe_token] for bpe_token in self.bpe(token).split(' ')) + return bpe_tokens + + def decode(self, tokens): + text = ''.join([self.decoder[token] for token in tokens]) + text = bytearray([self.byte_decoder[c] for c in text]).decode('utf-8', errors="replace").replace('', ' ') + return text diff --git a/modules/image/text_to_image/stable_diffusion_inpainting/clip/clip/utils.py b/modules/image/text_to_image/stable_diffusion_inpainting/clip/clip/utils.py new file mode 100755 index 000000000..b5d417144 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_inpainting/clip/clip/utils.py @@ -0,0 +1,88 @@ +import os +from typing import List +from typing import Union + +import numpy as np +import paddle +from paddle.utils import download +from paddle.vision.transforms import CenterCrop +from paddle.vision.transforms import Compose +from paddle.vision.transforms import Normalize +from paddle.vision.transforms import Resize +from paddle.vision.transforms import ToTensor + +from .model import CLIP +from .model import TextTransformer +from .simple_tokenizer import SimpleTokenizer + +__all__ = ['transform', 'tokenize', 'build_model'] + +MODEL_NAMES = ['VITL14'] + +URL = {'VITL14': os.path.join(os.path.dirname(__file__), 'pre_trained', 'vitl14_textencoder.pdparams')} + +MEAN, STD = (0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711) +_tokenizer = SimpleTokenizer() + +transform = Compose([ + Resize(224, interpolation='bicubic'), + CenterCrop(224), lambda image: image.convert('RGB'), + ToTensor(), + Normalize(mean=MEAN, std=STD), lambda t: t.unsqueeze_(0) +]) + + +def tokenize(texts: Union[str, List[str]], context_length: int = 77): + """ + Returns the tokenized representation of given input string(s) + + Parameters + ---------- + texts : Union[str, List[str]] + An input string or a list of input strings to tokenize + + context_length : int + The context length to use; all CLIP models use 77 as the context length + + Returns + ------- + A two-dimensional tensor containing the resulting tokens, shape = [number of input strings, context_length] + """ + if isinstance(texts, str): + texts = [texts] + + sot_token = _tokenizer.encoder["<|startoftext|>"] + eot_token = _tokenizer.encoder["<|endoftext|>"] + all_tokens = [[sot_token] + _tokenizer.encode(text) + [eot_token] for text in texts] + result = paddle.zeros((len(all_tokens), context_length), dtype='int64') + + for i, tokens in enumerate(all_tokens): + if len(tokens) > context_length: + raise RuntimeError(f"Input {texts[i]} is too long for context length {context_length}") + result[i, :len(tokens)] = paddle.Tensor(np.array(tokens)) + + return result + + +def build_model(name='VITL14'): + assert name in MODEL_NAMES, f"model name must be one of {MODEL_NAMES}" + name2model = {'VITL14': build_vitl14_language_model} + model = name2model[name]() + weight = URL[name] + sd = paddle.load(weight) + state_dict = model.state_dict() + for key, value in sd.items(): + if key in state_dict: + state_dict[key] = value + model.load_dict(state_dict) + model.eval() + return model + + +def build_vitl14_language_model(): + model = TextTransformer(context_length=77, + vocab_size=49408, + transformer_width=768, + transformer_heads=12, + transformer_layers=12) + return model diff --git a/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/__init__.py b/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/__init__.py new file mode 100644 index 000000000..7f41816d7 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/__init__.py @@ -0,0 +1,20 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__version__ = "0.2.4" + +from .models import AutoencoderKL, UNet2DConditionModel, UNet2DModel, VQModel + +from .schedulers import (DDIMScheduler, DDPMScheduler, KarrasVeScheduler, PNDMScheduler, SchedulerMixin, + ScoreSdeVeScheduler, LMSDiscreteScheduler) diff --git a/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/configuration_utils.py b/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/configuration_utils.py new file mode 100644 index 000000000..c90ebd5be --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/configuration_utils.py @@ -0,0 +1,312 @@ +# coding=utf-8 +# Copyright 2022 The HuggingFace Inc. team. +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" ConfigMixinuration base class and utilities.""" +import functools +import inspect +import json +import os +import re +from collections import OrderedDict +from typing import Any +from typing import Dict +from typing import Tuple +from typing import Union + +from requests import HTTPError + +from paddlehub.common.logger import logger + +HUGGINGFACE_CO_RESOLVE_ENDPOINT = "HUGGINGFACE_CO_RESOLVE_ENDPOINT" +DIFFUSERS_CACHE = "./caches" + +_re_configuration_file = re.compile(r"config\.(.*)\.json") + + +class ConfigMixin: + r""" + Base class for all configuration classes. Handles a few parameters common to all models' configurations as well as + methods for loading/downloading/saving configurations. + + """ + config_name = "model_config.json" + ignore_for_config = [] + + def register_to_config(self, **kwargs): + if self.config_name is None: + raise NotImplementedError(f"Make sure that {self.__class__} has defined a class name `config_name`") + kwargs["_class_name"] = self.__class__.__name__ + kwargs["_diffusers_version"] = "0.0.1" + + for key, value in kwargs.items(): + try: + setattr(self, key, value) + except AttributeError as err: + logger.error(f"Can't set {key} with value {value} for {self}") + raise err + + if not hasattr(self, "_internal_dict"): + internal_dict = kwargs + else: + previous_dict = dict(self._internal_dict) + internal_dict = {**self._internal_dict, **kwargs} + logger.debug(f"Updating config from {previous_dict} to {internal_dict}") + + self._internal_dict = FrozenDict(internal_dict) + + def save_config(self, save_directory: Union[str, os.PathLike], push_to_hub: bool = False, **kwargs): + """ + Save a configuration object to the directory `save_directory`, so that it can be re-loaded using the + [`~ConfigMixin.from_config`] class method. + + Args: + save_directory (`str` or `os.PathLike`): + Directory where the configuration JSON file will be saved (will be created if it does not exist). + kwargs: + Additional key word arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method. + """ + if os.path.isfile(save_directory): + raise AssertionError(f"Provided path ({save_directory}) should be a directory, not a file") + + os.makedirs(save_directory, exist_ok=True) + + # If we save using the predefined names, we can load using `from_config` + output_config_file = os.path.join(save_directory, self.config_name) + + self.to_json_file(output_config_file) + logger.info(f"ConfigMixinuration saved in {output_config_file}") + + @classmethod + def from_config(cls, pretrained_model_name_or_path: Union[str, os.PathLike], return_unused_kwargs=False, **kwargs): + config_dict = cls.get_config_dict(pretrained_model_name_or_path=pretrained_model_name_or_path, **kwargs) + + init_dict, unused_kwargs = cls.extract_init_dict(config_dict, **kwargs) + + model = cls(**init_dict) + + if return_unused_kwargs: + return model, unused_kwargs + else: + return model + + @classmethod + def get_config_dict(cls, pretrained_model_name_or_path: Union[str, os.PathLike], + **kwargs) -> Tuple[Dict[str, Any], Dict[str, Any]]: + cache_dir = kwargs.pop("cache_dir", DIFFUSERS_CACHE) + force_download = kwargs.pop("force_download", False) + resume_download = kwargs.pop("resume_download", False) + proxies = kwargs.pop("proxies", None) + use_auth_token = kwargs.pop("use_auth_token", None) + local_files_only = kwargs.pop("local_files_only", False) + revision = kwargs.pop("revision", None) + subfolder = kwargs.pop("subfolder", None) + + user_agent = {"file_type": "config"} + + pretrained_model_name_or_path = str(pretrained_model_name_or_path) + + if cls.config_name is None: + raise ValueError( + "`self.config_name` is not defined. Note that one should not load a config from " + "`ConfigMixin`. Please make sure to define `config_name` in a class inheriting from `ConfigMixin`") + + if os.path.isfile(pretrained_model_name_or_path): + config_file = pretrained_model_name_or_path + elif os.path.isdir(pretrained_model_name_or_path): + if os.path.isfile(os.path.join(pretrained_model_name_or_path, cls.config_name)): + # Load from a PyTorch checkpoint + config_file = os.path.join(pretrained_model_name_or_path, cls.config_name) + elif subfolder is not None and os.path.isfile( + os.path.join(pretrained_model_name_or_path, subfolder, cls.config_name)): + config_file = os.path.join(pretrained_model_name_or_path, subfolder, cls.config_name) + else: + raise EnvironmentError( + f"Error no file named {cls.config_name} found in directory {pretrained_model_name_or_path}.") + else: + try: + # Load from URL or cache if already cached + from huggingface_hub import hf_hub_download + config_file = hf_hub_download( + pretrained_model_name_or_path, + filename=cls.config_name, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + resume_download=resume_download, + local_files_only=local_files_only, + use_auth_token=use_auth_token, + user_agent=user_agent, + subfolder=subfolder, + ) + + except HTTPError as err: + raise EnvironmentError("There was a specific connection error when trying to load" + f" {pretrained_model_name_or_path}:\n{err}") + except ValueError: + raise EnvironmentError( + f"We couldn't connect to '{HUGGINGFACE_CO_RESOLVE_ENDPOINT}' to load this model, couldn't find it" + f" in the cached files and it looks like {pretrained_model_name_or_path} is not the path to a" + f" directory containing a {cls.config_name} file.\nCheckout your internet connection or see how to" + " run the library in offline mode at" + " 'https://huggingface.co/docs/diffusers/installation#offline-mode'.") + except EnvironmentError: + raise EnvironmentError( + f"Can't load config for '{pretrained_model_name_or_path}'. If you were trying to load it from " + "'https://huggingface.co/models', make sure you don't have a local directory with the same name. " + f"Otherwise, make sure '{pretrained_model_name_or_path}' is the correct path to a directory " + f"containing a {cls.config_name} file") + + try: + # Load config dict + config_dict = cls._dict_from_json_file(config_file) + except (json.JSONDecodeError, UnicodeDecodeError): + raise EnvironmentError(f"It looks like the config file at '{config_file}' is not a valid JSON file.") + + return config_dict + + @classmethod + def extract_init_dict(cls, config_dict, **kwargs): + expected_keys = set(dict(inspect.signature(cls.__init__).parameters).keys()) + expected_keys.remove("self") + # remove general kwargs if present in dict + if "kwargs" in expected_keys: + expected_keys.remove("kwargs") + # remove keys to be ignored + if len(cls.ignore_for_config) > 0: + expected_keys = expected_keys - set(cls.ignore_for_config) + init_dict = {} + for key in expected_keys: + if key in kwargs: + # overwrite key + init_dict[key] = kwargs.pop(key) + elif key in config_dict: + # use value from config dict + init_dict[key] = config_dict.pop(key) + + unused_kwargs = config_dict.update(kwargs) + + passed_keys = set(init_dict.keys()) + if len(expected_keys - passed_keys) > 0: + logger.warning( + f"{expected_keys - passed_keys} was not found in config. Values will be initialized to default values.") + + return init_dict, unused_kwargs + + @classmethod + def _dict_from_json_file(cls, json_file: Union[str, os.PathLike]): + with open(json_file, "r", encoding="utf-8") as reader: + text = reader.read() + return json.loads(text) + + def __repr__(self): + return f"{self.__class__.__name__} {self.to_json_string()}" + + @property + def config(self) -> Dict[str, Any]: + return self._internal_dict + + def to_json_string(self) -> str: + """ + Serializes this instance to a JSON string. + + Returns: + `str`: String containing all the attributes that make up this configuration instance in JSON format. + """ + config_dict = self._internal_dict if hasattr(self, "_internal_dict") else {} + return json.dumps(config_dict, indent=2, sort_keys=True) + "\n" + + def to_json_file(self, json_file_path: Union[str, os.PathLike]): + """ + Save this instance to a JSON file. + + Args: + json_file_path (`str` or `os.PathLike`): + Path to the JSON file in which this configuration instance's parameters will be saved. + """ + with open(json_file_path, "w", encoding="utf-8") as writer: + writer.write(self.to_json_string()) + + +class FrozenDict(OrderedDict): + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + for key, value in self.items(): + setattr(self, key, value) + + self.__frozen = True + + def __delitem__(self, *args, **kwargs): + raise Exception(f"You cannot use ``__delitem__`` on a {self.__class__.__name__} instance.") + + def setdefault(self, *args, **kwargs): + raise Exception(f"You cannot use ``setdefault`` on a {self.__class__.__name__} instance.") + + def pop(self, *args, **kwargs): + raise Exception(f"You cannot use ``pop`` on a {self.__class__.__name__} instance.") + + def update(self, *args, **kwargs): + raise Exception(f"You cannot use ``update`` on a {self.__class__.__name__} instance.") + + def __setattr__(self, name, value): + if hasattr(self, "__frozen") and self.__frozen: + raise Exception(f"You cannot use ``__setattr__`` on a {self.__class__.__name__} instance.") + super().__setattr__(name, value) + + def __setitem__(self, name, value): + if hasattr(self, "__frozen") and self.__frozen: + raise Exception(f"You cannot use ``__setattr__`` on a {self.__class__.__name__} instance.") + super().__setitem__(name, value) + + +def register_to_config(init): + """ + Decorator to apply on the init of classes inheriting from `ConfigMixin` so that all the arguments are automatically + sent to `self.register_for_config`. To ignore a specific argument accepted by the init but that shouldn't be + registered in the config, use the `ignore_for_config` class variable + + Warning: Once decorated, all private arguments (beginning with an underscore) are trashed and not sent to the init! + """ + + @functools.wraps(init) + def inner_init(self, *args, **kwargs): + # Ignore private kwargs in the init. + init_kwargs = {k: v for k, v in kwargs.items() if not k.startswith("_")} + init(self, *args, **init_kwargs) + if not isinstance(self, ConfigMixin): + raise RuntimeError( + f"`@register_for_config` was applied to {self.__class__.__name__} init method, but this class does " + "not inherit from `ConfigMixin`.") + + ignore = getattr(self, "ignore_for_config", []) + # Get positional arguments aligned with kwargs + new_kwargs = {} + signature = inspect.signature(init) + parameters = { + name: p.default + for i, (name, p) in enumerate(signature.parameters.items()) if i > 0 and name not in ignore + } + for arg, name in zip(args, parameters.keys()): + new_kwargs[name] = arg + + # Then add all kwargs + new_kwargs.update({ + k: init_kwargs.get(k, default) + for k, default in parameters.items() if k not in ignore and k not in new_kwargs + }) + getattr(self, "register_to_config")(**new_kwargs) + + return inner_init diff --git a/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/models/README.md b/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/models/README.md new file mode 100644 index 000000000..e786fe518 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/models/README.md @@ -0,0 +1,11 @@ +# Models + +- Models: Neural network that models $p_\theta(\mathbf{x}_{t-1}|\mathbf{x}_t)$ (see image below) and is trained end-to-end to denoise a noisy input to an image. Examples: UNet, Conditioned UNet, 3D UNet, Transformer UNet + +## API + +TODO(Suraj, Patrick) + +## Examples + +TODO(Suraj, Patrick) diff --git a/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/models/__init__.py b/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/models/__init__.py new file mode 100644 index 000000000..f55cc88a8 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/models/__init__.py @@ -0,0 +1,20 @@ +# flake8: noqa +# There's no way to ignore "F401 '...' imported but unused" warnings in this +# module, but to preserve other warnings. So, don't check this module at all. +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from .unet_2d import UNet2DModel +from .unet_2d_condition import UNet2DConditionModel +from .vae import AutoencoderKL +from .vae import VQModel diff --git a/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/models/attention.py b/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/models/attention.py new file mode 100644 index 000000000..29d0e73a7 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/models/attention.py @@ -0,0 +1,465 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +from inspect import isfunction + +import numpy as np +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + + +def finfo(dtype): + if dtype == paddle.float32: + return np.finfo(np.float32) + if dtype == paddle.float16: + return np.finfo(np.float16) + if dtype == paddle.float64: + return np.finfo(np.float64) + + +paddle.finfo = finfo + + +class AttentionBlockNew(nn.Layer): + """ + An attention block that allows spatial positions to attend to each other. Originally ported from here, but adapted + to the N-d case. + https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/models/unet.py#L66. + Uses three q, k, v linear layers to compute attention + """ + + def __init__( + self, + channels, + num_head_channels=None, + num_groups=32, + rescale_output_factor=1.0, + eps=1e-5, + ): + super().__init__() + self.channels = channels + + self.num_heads = channels // num_head_channels if num_head_channels is not None else 1 + self.num_head_size = num_head_channels + self.group_norm = nn.GroupNorm(num_channels=channels, num_groups=num_groups, epsilon=eps) + + # define q,k,v as linear layers + self.query = nn.Linear(channels, channels) + self.key = nn.Linear(channels, channels) + self.value = nn.Linear(channels, channels) + + self.rescale_output_factor = rescale_output_factor + self.proj_attn = nn.Linear(channels, channels) + + def transpose_for_scores(self, projection: paddle.Tensor) -> paddle.Tensor: + new_projection_shape = projection.shape[:-1] + [self.num_heads, -1] + # move heads to 2nd position (B, T, H * D) -> (B, T, H, D) -> (B, H, T, D) + new_projection = projection.reshape(new_projection_shape).transpose([0, 2, 1, 3]) + return new_projection + + def forward(self, hidden_states): + residual = hidden_states + batch, channel, height, width = hidden_states.shape + + # norm + hidden_states = self.group_norm(hidden_states) + + hidden_states = hidden_states.reshape([batch, channel, height * width]).transpose([0, 2, 1]) + + # proj to q, k, v + query_proj = self.query(hidden_states) + key_proj = self.key(hidden_states) + value_proj = self.value(hidden_states) + + # transpose + query_states = self.transpose_for_scores(query_proj) + key_states = self.transpose_for_scores(key_proj) + value_states = self.transpose_for_scores(value_proj) + + # get scores + scale = 1 / math.sqrt(math.sqrt(self.channels / self.num_heads)) + attention_scores = paddle.matmul(query_states * scale, key_states * scale, transpose_y=True) + attention_probs = F.softmax(attention_scores.astype("float32"), axis=-1).astype(attention_scores.dtype) + + # compute attention output + context_states = paddle.matmul(attention_probs, value_states) + + context_states = context_states.transpose([0, 2, 1, 3]) + new_context_states_shape = context_states.shape[:-2] + [ + self.channels, + ] + context_states = context_states.reshape(new_context_states_shape) + + # compute next hidden_states + hidden_states = self.proj_attn(context_states) + hidden_states = hidden_states.transpose([0, 2, 1]).reshape([batch, channel, height, width]) + + # res connect and rescale + hidden_states = (hidden_states + residual) / self.rescale_output_factor + return hidden_states + + def set_weight(self, attn_layer): + self.group_norm.weight.set_value(attn_layer.norm.weight) + self.group_norm.bias.set_value(attn_layer.norm.bias) + + if hasattr(attn_layer, "q"): + self.query.weight.set_value(attn_layer.q.weight[:, :, 0, 0]) + self.key.weight.set_value(attn_layer.k.weight[:, :, 0, 0]) + self.value.weight.set_value(attn_layer.v.weight[:, :, 0, 0]) + + self.query.bias.set_value(attn_layer.q.bias) + self.key.bias.set_value(attn_layer.k.bias) + self.value.bias.set_value(attn_layer.v.bias) + + self.proj_attn.weight.set_value(attn_layer.proj_out.weight[:, :, 0, 0]) + self.proj_attn.bias.set_value(attn_layer.proj_out.bias) + elif hasattr(attn_layer, "NIN_0"): + self.query.weight.set_value(attn_layer.NIN_0.W.t()) + self.key.weight.set_value(attn_layer.NIN_1.W.t()) + self.value.weight.set_value(attn_layer.NIN_2.W.t()) + + self.query.bias.set_value(attn_layer.NIN_0.b) + self.key.bias.set_value(attn_layer.NIN_1.b) + self.value.bias.set_value(attn_layer.NIN_2.b) + + self.proj_attn.weight.set_value(attn_layer.NIN_3.W.t()) + self.proj_attn.bias.set_value(attn_layer.NIN_3.b) + + self.group_norm.weight.set_value(attn_layer.GroupNorm_0.weight) + self.group_norm.bias.set_value(attn_layer.GroupNorm_0.bias) + else: + qkv_weight = attn_layer.qkv.weight.reshape( + [self.num_heads, 3 * self.channels // self.num_heads, self.channels]) + qkv_bias = attn_layer.qkv.bias.reshape([self.num_heads, 3 * self.channels // self.num_heads]) + + q_w, k_w, v_w = qkv_weight.split(self.channels // self.num_heads, axis=1) + q_b, k_b, v_b = qkv_bias.split(self.channels // self.num_heads, axis=1) + + self.query.weight.set_value(q_w.reshape([-1, self.channels])) + self.key.weight.set_value(k_w.reshape([-1, self.channels])) + self.value.weight.set_value(v_w.reshape([-1, self.channels])) + + self.query.bias.set_value(q_b.flatten()) + self.key.bias.set_value(k_b.flatten()) + self.value.bias.set_value(v_b.flatten()) + + self.proj_attn.weight.set_value(attn_layer.proj.weight[:, :, 0]) + self.proj_attn.bias.set_value(attn_layer.proj.bias) + + +class SpatialTransformer(nn.Layer): + """ + Transformer block for image-like data. First, project the input (aka embedding) and reshape to b, t, d. Then apply + standard transformer action. Finally, reshape to image + """ + + def __init__(self, in_channels, n_heads, d_head, depth=1, dropout=0.0, context_dim=None): + super().__init__() + self.n_heads = n_heads + self.d_head = d_head + self.in_channels = in_channels + inner_dim = n_heads * d_head + self.norm = nn.GroupNorm(num_groups=32, num_channels=in_channels, epsilon=1e-6) + + self.proj_in = nn.Conv2D(in_channels, inner_dim, kernel_size=1, stride=1, padding=0) + + self.transformer_blocks = nn.LayerList([ + BasicTransformerBlock(inner_dim, n_heads, d_head, dropout=dropout, context_dim=context_dim) + for d in range(depth) + ]) + + self.proj_out = nn.Conv2D(inner_dim, in_channels, kernel_size=1, stride=1, padding=0) + + def forward(self, x, context=None): + # note: if no context is given, cross-attention defaults to self-attention + b, c, h, w = x.shape + x_in = x + x = self.norm(x) + x = self.proj_in(x) + x = x.transpose([0, 2, 3, 1]).reshape([b, h * w, c]) + for block in self.transformer_blocks: + x = block(x, context=context) + x = x.reshape([b, h, w, c]).transpose([0, 3, 1, 2]) + x = self.proj_out(x) + return x + x_in + + def set_weight(self, layer): + self.norm = layer.norm + self.proj_in = layer.proj_in + self.transformer_blocks = layer.transformer_blocks + self.proj_out = layer.proj_out + + +class BasicTransformerBlock(nn.Layer): + + def __init__(self, dim, n_heads, d_head, dropout=0.0, context_dim=None, gated_ff=True, checkpoint=True): + super().__init__() + self.attn1 = CrossAttention(query_dim=dim, heads=n_heads, dim_head=d_head, + dropout=dropout) # is a self-attention + self.ff = FeedForward(dim, dropout=dropout, glu=gated_ff) + self.attn2 = CrossAttention(query_dim=dim, + context_dim=context_dim, + heads=n_heads, + dim_head=d_head, + dropout=dropout) # is self-attn if context is none + self.norm1 = nn.LayerNorm(dim) + self.norm2 = nn.LayerNorm(dim) + self.norm3 = nn.LayerNorm(dim) + self.checkpoint = checkpoint + + def forward(self, x, context=None): + x = self.attn1(self.norm1(x)) + x + x = self.attn2(self.norm2(x), context=context) + x + x = self.ff(self.norm3(x)) + x + return x + + +class CrossAttention(nn.Layer): + + def __init__(self, query_dim, context_dim=None, heads=8, dim_head=64, dropout=0.0): + super().__init__() + inner_dim = dim_head * heads + context_dim = default(context_dim, query_dim) + + self.scale = dim_head**-0.5 + self.heads = heads + + self.to_q = nn.Linear(query_dim, inner_dim, bias_attr=False) + self.to_k = nn.Linear(context_dim, inner_dim, bias_attr=False) + self.to_v = nn.Linear(context_dim, inner_dim, bias_attr=False) + + self.to_out = nn.Sequential(nn.Linear(inner_dim, query_dim), nn.Dropout(dropout)) + + def reshape_heads_to_batch_dim(self, tensor): + batch_size, seq_len, dim = tensor.shape + head_size = self.heads + tensor = tensor.reshape([batch_size, seq_len, head_size, dim // head_size]) + tensor = tensor.transpose([0, 2, 1, 3]).reshape([batch_size * head_size, seq_len, dim // head_size]) + return tensor + + def reshape_batch_dim_to_heads(self, tensor): + batch_size, seq_len, dim = tensor.shape + head_size = self.heads + tensor = tensor.reshape([batch_size // head_size, head_size, seq_len, dim]) + tensor = tensor.transpose([0, 2, 1, 3]).reshape([batch_size // head_size, seq_len, dim * head_size]) + return tensor + + def forward(self, x, context=None, mask=None): + batch_size, sequence_length, dim = x.shape + + h = self.heads + + q = self.to_q(x) + context = default(context, x) + k = self.to_k(context) + v = self.to_v(context) + + q = self.reshape_heads_to_batch_dim(q) + k = self.reshape_heads_to_batch_dim(k) + v = self.reshape_heads_to_batch_dim(v) + + sim = paddle.einsum("b i d, b j d -> b i j", q * self.scale, k) + + if exists(mask): + mask = mask.reshape([batch_size, -1]) + max_neg_value = -paddle.finfo(sim.dtype).max + mask = mask[:, None, :].repeat(h, 1, 1) + sim.masked_fill_(~mask, max_neg_value) + + # attention, what we cannot get enough of + attn = F.softmax(sim, axis=-1) + + out = paddle.einsum("b i j, b j d -> b i d", attn, v) + out = self.reshape_batch_dim_to_heads(out) + return self.to_out(out) + + +class FeedForward(nn.Layer): + + def __init__(self, dim, dim_out=None, mult=4, glu=False, dropout=0.0): + super().__init__() + inner_dim = int(dim * mult) + dim_out = default(dim_out, dim) + project_in = nn.Sequential(nn.Linear(dim, inner_dim), nn.GELU()) if not glu else GEGLU(dim, inner_dim) + + self.net = nn.Sequential(project_in, nn.Dropout(dropout), nn.Linear(inner_dim, dim_out)) + + def forward(self, x): + return self.net(x) + + +# feedforward +class GEGLU(nn.Layer): + + def __init__(self, dim_in, dim_out): + super().__init__() + self.proj = nn.Linear(dim_in, dim_out * 2) + + def forward(self, x): + x, gate = self.proj(x).chunk(2, axis=-1) + return x * F.gelu(gate) + + +# TODO(Patrick) - remove once all weights have been converted -> not needed anymore then +class NIN(nn.Layer): + + def __init__(self, in_dim, num_units, init_scale=0.1): + super().__init__() + self.W = self.create_parameter(shape=[in_dim, num_units], default_initializer=nn.initializer.Constant(0.)) + self.b = self.create_parameter(shape=[ + num_units, + ], + is_bias=True, + default_initializer=nn.initializer.Constant(0.)) + + +def exists(val): + return val is not None + + +def default(val, d): + if exists(val): + return val + return d() if isfunction(d) else d + + +# the main attention block that is used for all models +class AttentionBlock(nn.Layer): + """ + An attention block that allows spatial positions to attend to each other. + + Originally ported from here, but adapted to the N-d case. + https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/models/unet.py#L66. + """ + + def __init__( + self, + channels, + num_heads=1, + num_head_channels=None, + num_groups=32, + encoder_channels=None, + overwrite_qkv=False, + overwrite_linear=False, + rescale_output_factor=1.0, + eps=1e-5, + ): + super().__init__() + self.channels = channels + if num_head_channels is None: + self.num_heads = num_heads + else: + assert (channels % num_head_channels == 0 + ), f"q,k,v channels {channels} is not divisible by num_head_channels {num_head_channels}" + self.num_heads = channels // num_head_channels + + self.norm = nn.GroupNorm(num_channels=channels, num_groups=num_groups, epsilon=eps) + self.qkv = nn.Conv1D(channels, channels * 3, 1) + self.n_heads = self.num_heads + self.rescale_output_factor = rescale_output_factor + + if encoder_channels is not None: + self.encoder_kv = nn.Conv1D(encoder_channels, channels * 2, 1) + + self.proj = nn.Conv1D(channels, channels, 1) + + self.overwrite_qkv = overwrite_qkv + self.overwrite_linear = overwrite_linear + + if overwrite_qkv: + in_channels = channels + self.norm = nn.GroupNorm(num_channels=channels, num_groups=num_groups, epsilon=1e-6) + self.q = nn.Conv2D(in_channels, in_channels, kernel_size=1, stride=1, padding=0) + self.k = nn.Conv2D(in_channels, in_channels, kernel_size=1, stride=1, padding=0) + self.v = nn.Conv2D(in_channels, in_channels, kernel_size=1, stride=1, padding=0) + self.proj_out = nn.Conv2D(in_channels, in_channels, kernel_size=1, stride=1, padding=0) + elif self.overwrite_linear: + num_groups = min(channels // 4, 32) + self.norm = nn.GroupNorm(num_channels=channels, num_groups=num_groups, epsilon=1e-6) + self.NIN_0 = NIN(channels, channels) + self.NIN_1 = NIN(channels, channels) + self.NIN_2 = NIN(channels, channels) + self.NIN_3 = NIN(channels, channels) + + self.GroupNorm_0 = nn.GroupNorm(num_groups=num_groups, num_channels=channels, epsilon=1e-6) + else: + self.proj_out = nn.Conv1D(channels, channels, 1) + self.set_weights(self) + + self.is_overwritten = False + + def set_weights(self, layer): + if self.overwrite_qkv: + qkv_weight = paddle.concat([layer.q.weight, layer.k.weight, layer.v.weight], axis=0)[:, :, :, 0] + qkv_bias = paddle.concat([layer.q.bias, layer.k.bias, layer.v.bias], axis=0) + + self.qkv.weight.set_value(qkv_weight) + self.qkv.bias.set_value(qkv_bias) + + proj_out = nn.Conv1D(self.channels, self.channels, 1) + proj_out.weight.set_value(layer.proj_out.weight[:, :, :, 0]) + proj_out.bias.set_value(layer.proj_out.bias) + + self.proj = proj_out + elif self.overwrite_linear: + self.qkv.weight.set_value( + paddle.concat([self.NIN_0.W.t(), self.NIN_1.W.t(), self.NIN_2.W.t()], axis=0)[:, :, None]) + self.qkv.bias.set_value(paddle.concat([self.NIN_0.b, self.NIN_1.b, self.NIN_2.b], axis=0)) + + self.proj.weight.set_value(self.NIN_3.W.t()[:, :, None]) + self.proj.bias.set_value(self.NIN_3.b) + + self.norm.weight.set_value(self.GroupNorm_0.weight) + self.norm.bias.set_value(self.GroupNorm_0.bias) + else: + self.proj.weight.set_value(self.proj_out.weight) + self.proj.bias.set_value(self.proj_out.bias) + + def forward(self, x, encoder_out=None): + if not self.is_overwritten and (self.overwrite_qkv or self.overwrite_linear): + self.set_weights(self) + self.is_overwritten = True + + b, c, *spatial = x.shape + hid_states = self.norm(x).reshape([b, c, -1]) + + qkv = self.qkv(hid_states) + bs, width, length = qkv.shape + assert width % (3 * self.n_heads) == 0 + ch = width // (3 * self.n_heads) + q, k, v = qkv.reshape([bs * self.n_heads, ch * 3, length]).split(ch, axis=1) + + if encoder_out is not None: + encoder_kv = self.encoder_kv(encoder_out) + assert encoder_kv.shape[1] == self.n_heads * ch * 2 + ek, ev = encoder_kv.reshape([bs * self.n_heads, ch * 2, -1]).split(ch, axis=1) + k = paddle.concat([ek, k], axis=-1) + v = paddle.concat([ev, v], axis=-1) + + scale = 1 / math.sqrt(math.sqrt(ch)) + weight = paddle.einsum("bct,bcs->bts", q * scale, k * scale) # More stable with f16 than dividing afterwards + weight = F.softmax(weight.astype("float32"), axis=-1).astype(weight.dtype) + + a = paddle.einsum("bts,bcs->bct", weight, v) + h = a.reshape([bs, -1, length]) + + h = self.proj(h) + h = h.reshape([b, c, *spatial]) + + result = x + h + + result = result / self.rescale_output_factor + + return result diff --git a/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/models/embeddings.py b/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/models/embeddings.py new file mode 100644 index 000000000..3e826193b --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/models/embeddings.py @@ -0,0 +1,116 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math + +import numpy as np +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + + +def get_timestep_embedding(timesteps, + embedding_dim, + flip_sin_to_cos=False, + downscale_freq_shift=1, + scale=1, + max_period=10000): + """ + This matches the implementation in Denoising Diffusion Probabilistic Models: Create sinusoidal timestep embeddings. + + :param timesteps: a 1-D Tensor of N indices, one per batch element. + These may be fractional. + :param embedding_dim: the dimension of the output. :param max_period: controls the minimum frequency of the + embeddings. :return: an [N x dim] Tensor of positional embeddings. + """ + assert len(timesteps.shape) == 1, "Timesteps should be a 1d-array" + + half_dim = embedding_dim // 2 + exponent = -math.log(max_period) * paddle.arange(start=0, end=half_dim, dtype="float32") + exponent = exponent / (half_dim - downscale_freq_shift) + + emb = paddle.exp(exponent) + emb = timesteps[:, None].astype("float32") * emb[None, :] + + # scale embeddings + emb = scale * emb + + # concat sine and cosine embeddings + emb = paddle.concat([paddle.sin(emb), paddle.cos(emb)], axis=-1) + + # flip sine and cosine embeddings + if flip_sin_to_cos: + emb = paddle.concat([emb[:, half_dim:], emb[:, :half_dim]], axis=-1) + + # zero pad + if embedding_dim % 2 == 1: + emb = paddle.concat(emb, paddle.zeros([emb.shape[0], 1]), axis=-1) + return emb + + +class TimestepEmbedding(nn.Layer): + + def __init__(self, channel, time_embed_dim, act_fn="silu"): + super().__init__() + + self.linear_1 = nn.Linear(channel, time_embed_dim) + self.act = None + if act_fn == "silu": + self.act = nn.Silu() + self.linear_2 = nn.Linear(time_embed_dim, time_embed_dim) + + def forward(self, sample): + sample = self.linear_1(sample) + + if self.act is not None: + sample = self.act(sample) + + sample = self.linear_2(sample) + return sample + + +class Timesteps(nn.Layer): + + def __init__(self, num_channels, flip_sin_to_cos, downscale_freq_shift): + super().__init__() + self.num_channels = num_channels + self.flip_sin_to_cos = flip_sin_to_cos + self.downscale_freq_shift = downscale_freq_shift + + def forward(self, timesteps): + t_emb = get_timestep_embedding( + timesteps, + self.num_channels, + flip_sin_to_cos=self.flip_sin_to_cos, + downscale_freq_shift=self.downscale_freq_shift, + ) + return t_emb + + +class GaussianFourierProjection(nn.Layer): + """Gaussian Fourier embeddings for noise levels.""" + + def __init__(self, embedding_size=256, scale=1.0): + super().__init__() + self.register_buffer("weight", paddle.randn((embedding_size, )) * scale) + + # to delete later + self.register_buffer("W", paddle.randn((embedding_size, )) * scale) + + self.weight = self.W + + def forward(self, x): + x = paddle.log(x) + x_proj = x[:, None] * self.weight[None, :] * 2 * np.pi + out = paddle.concat([paddle.sin(x_proj), paddle.cos(x_proj)], axis=-1) + return out diff --git a/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/models/resnet.py b/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/models/resnet.py new file mode 100644 index 000000000..944bc11cd --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/models/resnet.py @@ -0,0 +1,515 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from functools import partial + +import numpy as np +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + + +def pad_new(x, pad, mode="constant", value=0): + new_pad = [] + for _ in range(x.ndim * 2 - len(pad)): + new_pad.append(0) + ndim = list(range(x.ndim - 1, 0, -1)) + axes_start = {} + for i, _pad in enumerate(pad): + if _pad < 0: + new_pad.append(0) + zhengshu, yushu = divmod(i, 2) + if yushu == 0: + axes_start[ndim[zhengshu]] = -_pad + else: + new_pad.append(_pad) + + padded = paddle.nn.functional.pad(x, new_pad, mode=mode, value=value) + padded_shape = paddle.shape(padded) + axes = [] + starts = [] + ends = [] + for k, v in axes_start.items(): + axes.append(k) + starts.append(v) + ends.append(padded_shape[k]) + assert v < padded_shape[k] + + if axes: + return padded.slice(axes=axes, starts=starts, ends=ends) + else: + return padded + + +class Upsample2D(nn.Layer): + """ + An upsampling layer with an optional convolution. + + :param channels: channels in the inputs and outputs. :param use_conv: a bool determining if a convolution is + applied. :param dims: determines if the signal is 1D, 2D, or 3D. If 3D, then + upsampling occurs in the inner-two dimensions. + """ + + def __init__(self, channels, use_conv=False, use_conv_transpose=False, out_channels=None, name="conv"): + super().__init__() + self.channels = channels + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.use_conv_transpose = use_conv_transpose + self.name = name + + conv = None + if use_conv_transpose: + conv = nn.Conv2DTranspose(channels, self.out_channels, 4, 2, 1) + elif use_conv: + conv = nn.Conv2D(self.channels, self.out_channels, 3, padding=1) + + # TODO(Suraj, Patrick) - clean up after weight dicts are correctly renamed + if name == "conv": + self.conv = conv + else: + self.Conv2d_0 = conv + + def forward(self, x): + assert x.shape[1] == self.channels + if self.use_conv_transpose: + return self.conv(x) + + x = F.interpolate(x, scale_factor=2.0, mode="nearest") + + # TODO(Suraj, Patrick) - clean up after weight dicts are correctly renamed + if self.use_conv: + if self.name == "conv": + x = self.conv(x) + else: + x = self.Conv2d_0(x) + + return x + + +class Downsample2D(nn.Layer): + """ + A downsampling layer with an optional convolution. + + :param channels: channels in the inputs and outputs. :param use_conv: a bool determining if a convolution is + applied. :param dims: determines if the signal is 1D, 2D, or 3D. If 3D, then + downsampling occurs in the inner-two dimensions. + """ + + def __init__(self, channels, use_conv=False, out_channels=None, padding=1, name="conv"): + super().__init__() + self.channels = channels + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.padding = padding + stride = 2 + self.name = name + + if use_conv: + conv = nn.Conv2D(self.channels, self.out_channels, 3, stride=stride, padding=padding) + else: + assert self.channels == self.out_channels + conv = nn.AvgPool2D(kernel_size=stride, stride=stride) + + # TODO(Suraj, Patrick) - clean up after weight dicts are correctly renamed + if name == "conv": + self.Conv2d_0 = conv + self.conv = conv + elif name == "Conv2d_0": + self.conv = conv + else: + self.conv = conv + + def forward(self, x): + assert x.shape[1] == self.channels + if self.use_conv and self.padding == 0: + pad = (0, 1, 0, 1) + x = pad_new(x, pad, mode="constant", value=0) + + assert x.shape[1] == self.channels + x = self.conv(x) + + return x + + +class FirUpsample2D(nn.Layer): + + def __init__(self, channels=None, out_channels=None, use_conv=False, fir_kernel=(1, 3, 3, 1)): + super().__init__() + out_channels = out_channels if out_channels else channels + if use_conv: + self.Conv2d_0 = nn.Conv2D(channels, out_channels, kernel_size=3, stride=1, padding=1) + self.use_conv = use_conv + self.fir_kernel = fir_kernel + self.out_channels = out_channels + + def _upsample_2d(self, x, w=None, k=None, factor=2, gain=1): + """Fused `upsample_2d()` followed by `Conv2d()`. + + Args: + Padding is performed only once at the beginning, not between the operations. The fused op is considerably more + efficient than performing the same calculation using standard TensorFlow ops. It supports gradients of arbitrary: + order. + x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, + C]`. + w: Weight tensor of the shape `[filterH, filterW, inChannels, + outChannels]`. Grouped convolution can be performed by `inChannels = x.shape[0] // numGroups`. + k: FIR filter of the shape `[firH, firW]` or `[firN]` + (separable). The default is `[1] * factor`, which corresponds to nearest-neighbor upsampling. + factor: Integer upsampling factor (default: 2). gain: Scaling factor for signal magnitude (default: 1.0). + + Returns: + Tensor of the shape `[N, C, H * factor, W * factor]` or `[N, H * factor, W * factor, C]`, and same datatype as + `x`. + """ + + assert isinstance(factor, int) and factor >= 1 + + # Setup filter kernel. + if k is None: + k = [1] * factor + + # setup kernel + k = np.asarray(k, dtype=np.float32) + if k.ndim == 1: + k = np.outer(k, k) + k /= np.sum(k) + + k = k * (gain * (factor**2)) + + if self.use_conv: + convH = w.shape[2] + convW = w.shape[3] + inC = w.shape[1] + + p = (k.shape[0] - factor) - (convW - 1) + + stride = (factor, factor) + # Determine data dimensions. + stride = [1, 1, factor, factor] + output_shape = ((x.shape[2] - 1) * factor + convH, (x.shape[3] - 1) * factor + convW) + output_padding = ( + output_shape[0] - (x.shape[2] - 1) * stride[0] - convH, + output_shape[1] - (x.shape[3] - 1) * stride[1] - convW, + ) + assert output_padding[0] >= 0 and output_padding[1] >= 0 + inC = w.shape[1] + num_groups = x.shape[1] // inC + + # Transpose weights. + w = paddle.reshape(w, (num_groups, -1, inC, convH, convW)) + w = w[..., ::-1, ::-1].transpose([0, 2, 1, 3, 4]) + w = paddle.reshape(w, (num_groups * inC, -1, convH, convW)) + + x = F.conv2d_transpose(x, w, stride=stride, output_padding=output_padding, padding=0) + + x = upfirdn2d_native(x, paddle.to_tensor(k), pad=((p + 1) // 2 + factor - 1, p // 2 + 1)) + else: + p = k.shape[0] - factor + x = upfirdn2d_native(x, paddle.to_tensor(k), up=factor, pad=((p + 1) // 2 + factor - 1, p // 2)) + + return x + + def forward(self, x): + if self.use_conv: + h = self._upsample_2d(x, self.Conv2d_0.weight, k=self.fir_kernel) + h = h + self.Conv2d_0.bias.reshape([1, -1, 1, 1]) + else: + h = self._upsample_2d(x, k=self.fir_kernel, factor=2) + + return h + + +class FirDownsample2D(nn.Layer): + + def __init__(self, channels=None, out_channels=None, use_conv=False, fir_kernel=(1, 3, 3, 1)): + super().__init__() + out_channels = out_channels if out_channels else channels + if use_conv: + self.Conv2d_0 = nn.Conv2D(channels, out_channels, kernel_size=3, stride=1, padding=1) + self.fir_kernel = fir_kernel + self.use_conv = use_conv + self.out_channels = out_channels + + def _downsample_2d(self, x, w=None, k=None, factor=2, gain=1): + """Fused `Conv2d()` followed by `downsample_2d()`. + + Args: + Padding is performed only once at the beginning, not between the operations. The fused op is considerably more + efficient than performing the same calculation using standard TensorFlow ops. It supports gradients of arbitrary: + order. + x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`. w: Weight tensor of the shape `[filterH, + filterW, inChannels, outChannels]`. Grouped convolution can be performed by `inChannels = x.shape[0] // + numGroups`. k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable). The default is `[1] * + factor`, which corresponds to average pooling. factor: Integer downsampling factor (default: 2). gain: + Scaling factor for signal magnitude (default: 1.0). + + Returns: + Tensor of the shape `[N, C, H // factor, W // factor]` or `[N, H // factor, W // factor, C]`, and same + datatype as `x`. + """ + + assert isinstance(factor, int) and factor >= 1 + if k is None: + k = [1] * factor + + # setup kernel + k = np.asarray(k, dtype=np.float32) + if k.ndim == 1: + k = np.outer(k, k) + k /= np.sum(k) + + k = k * gain + + if self.use_conv: + _, _, convH, convW = w.shape + p = (k.shape[0] - factor) + (convW - 1) + s = [factor, factor] + x = upfirdn2d_native(x, paddle.to_tensor(k), pad=((p + 1) // 2, p // 2)) + x = F.conv2d(x, w, stride=s, padding=0) + else: + p = k.shape[0] - factor + x = upfirdn2d_native(x, paddle.to_tensor(k), down=factor, pad=((p + 1) // 2, p // 2)) + + return x + + def forward(self, x): + if self.use_conv: + x = self._downsample_2d(x, w=self.Conv2d_0.weight, k=self.fir_kernel) + x = x + self.Conv2d_0.bias.reshape([1, -1, 1, 1]) + else: + x = self._downsample_2d(x, k=self.fir_kernel, factor=2) + + return x + + +class ResnetBlock(nn.Layer): + + def __init__( + self, + *, + in_channels, + out_channels=None, + conv_shortcut=False, + dropout=0.0, + temb_channels=512, + groups=32, + groups_out=None, + pre_norm=True, + eps=1e-6, + non_linearity="swish", + time_embedding_norm="default", + kernel=None, + output_scale_factor=1.0, + use_nin_shortcut=None, + up=False, + down=False, + ): + super().__init__() + self.pre_norm = pre_norm + self.pre_norm = True + self.in_channels = in_channels + out_channels = in_channels if out_channels is None else out_channels + self.out_channels = out_channels + self.use_conv_shortcut = conv_shortcut + self.time_embedding_norm = time_embedding_norm + self.up = up + self.down = down + self.output_scale_factor = output_scale_factor + + if groups_out is None: + groups_out = groups + + self.norm1 = nn.GroupNorm(num_groups=groups, num_channels=in_channels, epsilon=eps) + + self.conv1 = nn.Conv2D(in_channels, out_channels, kernel_size=3, stride=1, padding=1) + + if temb_channels is not None: + self.time_emb_proj = nn.Linear(temb_channels, out_channels) + else: + self.time_emb_proj = None + + self.norm2 = nn.GroupNorm(num_groups=groups_out, num_channels=out_channels, epsilon=eps) + self.dropout = nn.Dropout(dropout) + self.conv2 = nn.Conv2D(out_channels, out_channels, kernel_size=3, stride=1, padding=1) + + if non_linearity == "swish": + self.nonlinearity = lambda x: F.silu(x) + elif non_linearity == "mish": + self.nonlinearity = Mish() + elif non_linearity == "silu": + self.nonlinearity = nn.Silu() + + self.upsample = self.downsample = None + if self.up: + if kernel == "fir": + fir_kernel = (1, 3, 3, 1) + self.upsample = lambda x: upsample_2d(x, k=fir_kernel) + elif kernel == "sde_vp": + self.upsample = partial(F.interpolate, scale_factor=2.0, mode="nearest") + else: + self.upsample = Upsample2D(in_channels, use_conv=False) + elif self.down: + if kernel == "fir": + fir_kernel = (1, 3, 3, 1) + self.downsample = lambda x: downsample_2d(x, k=fir_kernel) + elif kernel == "sde_vp": + self.downsample = partial(F.avg_pool2d, kernel_size=2, stride=2) + else: + self.downsample = Downsample2D(in_channels, use_conv=False, padding=1, name="op") + + self.use_nin_shortcut = self.in_channels != self.out_channels if use_nin_shortcut is None else use_nin_shortcut + + self.conv_shortcut = None + if self.use_nin_shortcut: + self.conv_shortcut = nn.Conv2D(in_channels, out_channels, kernel_size=1, stride=1, padding=0) + + def forward(self, x, temb, hey=False): + h = x + + # make sure hidden states is in float32 + # when running in half-precision + h = self.norm1(h.astype("float32")).astype(h.dtype) + h = self.nonlinearity(h) + + if self.upsample is not None: + x = self.upsample(x) + h = self.upsample(h) + elif self.downsample is not None: + x = self.downsample(x) + h = self.downsample(h) + + h = self.conv1(h) + + if temb is not None: + temb = self.time_emb_proj(self.nonlinearity(temb))[:, :, None, None] + h = h + temb + + # make sure hidden states is in float32 + # when running in half-precision + h = self.norm2(h.astype("float32")).astype(h.dtype) + h = self.nonlinearity(h) + + h = self.dropout(h) + h = self.conv2(h) + + if self.conv_shortcut is not None: + x = self.conv_shortcut(x) + + out = (x + h) / self.output_scale_factor + + return out + + +class Mish(nn.Layer): + + def forward(self, x): + return x * F.tanh(F.softplus(x)) + + +def upsample_2d(x, k=None, factor=2, gain=1): + r"""Upsample2D a batch of 2D images with the given filter. + + Args: + Accepts a batch of 2D images of the shape `[N, C, H, W]` or `[N, H, W, C]` and upsamples each image with the given + filter. The filter is normalized so that if the input pixels are constant, they will be scaled by the specified + `gain`. Pixels outside the image are assumed to be zero, and the filter is padded with zeros so that its shape is a: + multiple of the upsampling factor. + x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, + C]`. + k: FIR filter of the shape `[firH, firW]` or `[firN]` + (separable). The default is `[1] * factor`, which corresponds to nearest-neighbor upsampling. + factor: Integer upsampling factor (default: 2). gain: Scaling factor for signal magnitude (default: 1.0). + + Returns: + Tensor of the shape `[N, C, H * factor, W * factor]` + """ + assert isinstance(factor, int) and factor >= 1 + if k is None: + k = [1] * factor + + k = np.asarray(k, dtype=np.float32) + if k.ndim == 1: + k = np.outer(k, k) + k /= np.sum(k) + + k = k * (gain * (factor**2)) + p = k.shape[0] - factor + return upfirdn2d_native(x, paddle.to_tensor(k), up=factor, pad=((p + 1) // 2 + factor - 1, p // 2)) + + +def downsample_2d(x, k=None, factor=2, gain=1): + r"""Downsample2D a batch of 2D images with the given filter. + + Args: + Accepts a batch of 2D images of the shape `[N, C, H, W]` or `[N, H, W, C]` and downsamples each image with the + given filter. The filter is normalized so that if the input pixels are constant, they will be scaled by the + specified `gain`. Pixels outside the image are assumed to be zero, and the filter is padded with zeros so that its + shape is a multiple of the downsampling factor. + x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, + C]`. + k: FIR filter of the shape `[firH, firW]` or `[firN]` + (separable). The default is `[1] * factor`, which corresponds to average pooling. + factor: Integer downsampling factor (default: 2). gain: Scaling factor for signal magnitude (default: 1.0). + + Returns: + Tensor of the shape `[N, C, H // factor, W // factor]` + """ + + assert isinstance(factor, int) and factor >= 1 + if k is None: + k = [1] * factor + + k = np.asarray(k, dtype=np.float32) + if k.ndim == 1: + k = np.outer(k, k) + k /= np.sum(k) + + k = k * gain + p = k.shape[0] - factor + return upfirdn2d_native(x, paddle.to_tensor(k), down=factor, pad=((p + 1) // 2, p // 2)) + + +def upfirdn2d_native(input, kernel, up=1, down=1, pad=(0, 0)): + up_x = up_y = up + down_x = down_y = down + pad_x0 = pad_y0 = pad[0] + pad_x1 = pad_y1 = pad[1] + + _, channel, in_h, in_w = input.shape + input = input.reshape([-1, in_h, in_w, 1]) + + _, in_h, in_w, minor = input.shape + kernel_h, kernel_w = kernel.shape + + out = input.reshape([-1, in_h, 1, in_w, 1, minor]) + # TODO + out = pad_new(out, [0, 0, 0, up_x - 1, 0, 0, 0, up_y - 1]) + out = out.reshape([-1, in_h * up_y, in_w * up_x, minor]) + + out = pad_new(out, [0, 0, max(pad_x0, 0), max(pad_x1, 0), max(pad_y0, 0), max(pad_y1, 0)]) + out = out[:, max(-pad_y0, 0):out.shape[1] - max(-pad_y1, 0), max(-pad_x0, 0):out.shape[2] - max(-pad_x1, 0), :, ] + + out = out.transpose([0, 3, 1, 2]) + out = out.reshape([-1, 1, in_h * up_y + pad_y0 + pad_y1, in_w * up_x + pad_x0 + pad_x1]) + w = paddle.flip(kernel, [0, 1]).reshape([1, 1, kernel_h, kernel_w]) + out = F.conv2d(out, w) + out = out.reshape( + [-1, minor, in_h * up_y + pad_y0 + pad_y1 - kernel_h + 1, in_w * up_x + pad_x0 + pad_x1 - kernel_w + 1]) + out = out.transpose([0, 2, 3, 1]) + out = out[:, ::down_y, ::down_x, :] + + out_h = (in_h * up_y + pad_y0 + pad_y1 - kernel_h) // down_y + 1 + out_w = (in_w * up_x + pad_x0 + pad_x1 - kernel_w) // down_x + 1 + + return out.reshape([-1, channel, out_h, out_w]) diff --git a/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/models/unet_2d.py b/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/models/unet_2d.py new file mode 100644 index 000000000..11316a819 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/models/unet_2d.py @@ -0,0 +1,206 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Dict +from typing import Union + +import paddle +import paddle.nn as nn + +from ..configuration_utils import ConfigMixin +from ..configuration_utils import register_to_config +from .embeddings import GaussianFourierProjection +from .embeddings import TimestepEmbedding +from .embeddings import Timesteps +from .unet_blocks import get_down_block +from .unet_blocks import get_up_block +from .unet_blocks import UNetMidBlock2D + + +class UNet2DModel(nn.Layer, ConfigMixin): + + @register_to_config + def __init__( + self, + sample_size=None, + in_channels=3, + out_channels=3, + center_input_sample=False, + time_embedding_type="positional", + freq_shift=0, + flip_sin_to_cos=True, + down_block_types=("DownBlock2D", "AttnDownBlock2D", "AttnDownBlock2D", "AttnDownBlock2D"), + up_block_types=("AttnUpBlock2D", "AttnUpBlock2D", "AttnUpBlock2D", "UpBlock2D"), + block_out_channels=(224, 448, 672, 896), + layers_per_block=2, + mid_block_scale_factor=1, + downsample_padding=1, + act_fn="silu", + attention_head_dim=8, + norm_num_groups=32, + norm_eps=1e-5, + ): + super().__init__() + + self.sample_size = sample_size + time_embed_dim = block_out_channels[0] * 4 + + # input + self.conv_in = nn.Conv2D(in_channels, block_out_channels[0], kernel_size=3, padding=(1, 1)) + + # time + if time_embedding_type == "fourier": + self.time_proj = GaussianFourierProjection(embedding_size=block_out_channels[0], scale=16) + timestep_input_dim = 2 * block_out_channels[0] + elif time_embedding_type == "positional": + self.time_proj = Timesteps(block_out_channels[0], flip_sin_to_cos, freq_shift) + timestep_input_dim = block_out_channels[0] + + self.time_embedding = TimestepEmbedding(timestep_input_dim, time_embed_dim) + + self.down_blocks = nn.LayerList([]) + self.mid_block = None + self.up_blocks = nn.LayerList([]) + + # down + output_channel = block_out_channels[0] + for i, down_block_type in enumerate(down_block_types): + input_channel = output_channel + output_channel = block_out_channels[i] + is_final_block = i == len(block_out_channels) - 1 + + down_block = get_down_block( + down_block_type, + num_layers=layers_per_block, + in_channels=input_channel, + out_channels=output_channel, + temb_channels=time_embed_dim, + add_downsample=not is_final_block, + resnet_eps=norm_eps, + resnet_act_fn=act_fn, + attn_num_head_channels=attention_head_dim, + downsample_padding=downsample_padding, + ) + self.down_blocks.append(down_block) + + # mid + self.mid_block = UNetMidBlock2D( + in_channels=block_out_channels[-1], + temb_channels=time_embed_dim, + resnet_eps=norm_eps, + resnet_act_fn=act_fn, + output_scale_factor=mid_block_scale_factor, + resnet_time_scale_shift="default", + attn_num_head_channels=attention_head_dim, + resnet_groups=norm_num_groups, + ) + + # up + reversed_block_out_channels = list(reversed(block_out_channels)) + output_channel = reversed_block_out_channels[0] + for i, up_block_type in enumerate(up_block_types): + prev_output_channel = output_channel + output_channel = reversed_block_out_channels[i] + input_channel = reversed_block_out_channels[min(i + 1, len(block_out_channels) - 1)] + + is_final_block = i == len(block_out_channels) - 1 + + up_block = get_up_block( + up_block_type, + num_layers=layers_per_block + 1, + in_channels=input_channel, + out_channels=output_channel, + prev_output_channel=prev_output_channel, + temb_channels=time_embed_dim, + add_upsample=not is_final_block, + resnet_eps=norm_eps, + resnet_act_fn=act_fn, + attn_num_head_channels=attention_head_dim, + ) + self.up_blocks.append(up_block) + prev_output_channel = output_channel + + # out + num_groups_out = norm_num_groups if norm_num_groups is not None else min(block_out_channels[0] // 4, 32) + self.conv_norm_out = nn.GroupNorm(num_channels=block_out_channels[0], + num_groups=num_groups_out, + epsilon=norm_eps) + self.conv_act = nn.Silu() + self.conv_out = nn.Conv2D(block_out_channels[0], out_channels, 3, padding=1) + + def forward(self, sample: paddle.Tensor, timestep: Union[paddle.Tensor, float, int]) -> Dict[str, paddle.Tensor]: + + # 0. center input if necessary + if self.config.center_input_sample: + sample = 2 * sample - 1.0 + + # 1. time + timesteps = timestep + if not paddle.is_tensor(timesteps): + timesteps = paddle.to_tensor([timesteps], dtype="int64") + elif paddle.is_tensor(timesteps) and len(timesteps.shape) == 0: + timesteps = timesteps[None] + + # broadcast to batch dimension + timesteps = paddle.broadcast_to(timesteps, [sample.shape[0]]) + + t_emb = self.time_proj(timesteps) + emb = self.time_embedding(t_emb) + + # 2. pre-process + skip_sample = sample + sample = self.conv_in(sample) + + # 3. down + down_block_res_samples = (sample, ) + for downsample_block in self.down_blocks: + if hasattr(downsample_block, "skip_conv"): + sample, res_samples, skip_sample = downsample_block(hidden_states=sample, + temb=emb, + skip_sample=skip_sample) + else: + sample, res_samples = downsample_block(hidden_states=sample, temb=emb) + + down_block_res_samples += res_samples + + # 4. mid + sample = self.mid_block(sample, emb) + + # 5. up + skip_sample = None + for upsample_block in self.up_blocks: + res_samples = down_block_res_samples[-len(upsample_block.resnets):] + down_block_res_samples = down_block_res_samples[:-len(upsample_block.resnets)] + + if hasattr(upsample_block, "skip_conv"): + sample, skip_sample = upsample_block(sample, res_samples, emb, skip_sample) + else: + sample = upsample_block(sample, res_samples, emb) + + # 6. post-process + # make sure hidden states is in float32 + # when running in half-precision + sample = self.conv_norm_out(sample.astype("float32")).astype(sample.dtype) + sample = self.conv_act(sample) + sample = self.conv_out(sample) + + if skip_sample is not None: + sample += skip_sample + + if self.config.time_embedding_type == "fourier": + timesteps = timesteps.reshape((sample.shape[0], *([1] * len(sample.shape[1:])))) + sample = sample / timesteps + + output = {"sample": sample} + + return output diff --git a/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/models/unet_2d_condition.py b/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/models/unet_2d_condition.py new file mode 100644 index 000000000..897491b2f --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/models/unet_2d_condition.py @@ -0,0 +1,206 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Dict +from typing import Union + +import paddle +import paddle.nn as nn + +from ..configuration_utils import ConfigMixin +from ..configuration_utils import register_to_config +from .embeddings import TimestepEmbedding +from .embeddings import Timesteps +from .unet_blocks import get_down_block +from .unet_blocks import get_up_block +from .unet_blocks import UNetMidBlock2DCrossAttn + + +class UNet2DConditionModel(nn.Layer, ConfigMixin): + + @register_to_config + def __init__( + self, + sample_size=64, + in_channels=4, + out_channels=4, + center_input_sample=False, + flip_sin_to_cos=True, + freq_shift=0, + down_block_types=("CrossAttnDownBlock2D", "CrossAttnDownBlock2D", "CrossAttnDownBlock2D", "DownBlock2D"), + up_block_types=("UpBlock2D", "CrossAttnUpBlock2D", "CrossAttnUpBlock2D", "CrossAttnUpBlock2D"), + block_out_channels=(320, 640, 1280, 1280), + layers_per_block=2, + downsample_padding=1, + mid_block_scale_factor=1, + act_fn="silu", + norm_num_groups=32, + norm_eps=1e-5, + cross_attention_dim=768, + attention_head_dim=8, + ): + super().__init__() + + self.sample_size = sample_size + time_embed_dim = block_out_channels[0] * 4 + + # input + self.conv_in = nn.Conv2D(in_channels, block_out_channels[0], kernel_size=3, padding=(1, 1)) + + # time + self.time_proj = Timesteps(block_out_channels[0], flip_sin_to_cos, freq_shift) + timestep_input_dim = block_out_channels[0] + + self.time_embedding = TimestepEmbedding(timestep_input_dim, time_embed_dim) + + self.down_blocks = nn.LayerList([]) + self.mid_block = None + self.up_blocks = nn.LayerList([]) + + # down + output_channel = block_out_channels[0] + for i, down_block_type in enumerate(down_block_types): + input_channel = output_channel + output_channel = block_out_channels[i] + is_final_block = i == len(block_out_channels) - 1 + + down_block = get_down_block( + down_block_type, + num_layers=layers_per_block, + in_channels=input_channel, + out_channels=output_channel, + temb_channels=time_embed_dim, + add_downsample=not is_final_block, + resnet_eps=norm_eps, + resnet_act_fn=act_fn, + cross_attention_dim=cross_attention_dim, + attn_num_head_channels=attention_head_dim, + downsample_padding=downsample_padding, + ) + self.down_blocks.append(down_block) + + # mid + self.mid_block = UNetMidBlock2DCrossAttn( + in_channels=block_out_channels[-1], + temb_channels=time_embed_dim, + resnet_eps=norm_eps, + resnet_act_fn=act_fn, + output_scale_factor=mid_block_scale_factor, + resnet_time_scale_shift="default", + cross_attention_dim=cross_attention_dim, + attn_num_head_channels=attention_head_dim, + resnet_groups=norm_num_groups, + ) + + # up + reversed_block_out_channels = list(reversed(block_out_channels)) + output_channel = reversed_block_out_channels[0] + for i, up_block_type in enumerate(up_block_types): + prev_output_channel = output_channel + output_channel = reversed_block_out_channels[i] + input_channel = reversed_block_out_channels[min(i + 1, len(block_out_channels) - 1)] + + is_final_block = i == len(block_out_channels) - 1 + + up_block = get_up_block( + up_block_type, + num_layers=layers_per_block + 1, + in_channels=input_channel, + out_channels=output_channel, + prev_output_channel=prev_output_channel, + temb_channels=time_embed_dim, + add_upsample=not is_final_block, + resnet_eps=norm_eps, + resnet_act_fn=act_fn, + cross_attention_dim=cross_attention_dim, + attn_num_head_channels=attention_head_dim, + ) + self.up_blocks.append(up_block) + prev_output_channel = output_channel + + # out + self.conv_norm_out = nn.GroupNorm(num_channels=block_out_channels[0], + num_groups=norm_num_groups, + epsilon=norm_eps) + self.conv_act = nn.Silu() + self.conv_out = nn.Conv2D(block_out_channels[0], out_channels, 3, padding=1) + + def forward( + self, + sample: paddle.Tensor, + timestep: Union[paddle.Tensor, float, int], + encoder_hidden_states: paddle.Tensor, + ) -> Dict[str, paddle.Tensor]: + + # 0. center input if necessary + if self.config.center_input_sample: + sample = 2 * sample - 1.0 + + # 1. time + timesteps = timestep + if not paddle.is_tensor(timesteps): + timesteps = paddle.to_tensor([timesteps], dtype="int64") + elif paddle.is_tensor(timesteps) and len(timesteps.shape) == 0: + timesteps = timesteps[None] + + # broadcast to batch dimension + timesteps = paddle.broadcast_to(timesteps, [sample.shape[0]]) + + t_emb = self.time_proj(timesteps) + emb = self.time_embedding(t_emb) + + # 2. pre-process + sample = self.conv_in(sample) + + # 3. down + down_block_res_samples = (sample, ) + for downsample_block in self.down_blocks: + + if hasattr(downsample_block, "attentions") and downsample_block.attentions is not None: + sample, res_samples = downsample_block(hidden_states=sample, + temb=emb, + encoder_hidden_states=encoder_hidden_states) + else: + sample, res_samples = downsample_block(hidden_states=sample, temb=emb) + + down_block_res_samples += res_samples + + # 4. mid + sample = self.mid_block(sample, emb, encoder_hidden_states=encoder_hidden_states) + + # 5. up + for upsample_block in self.up_blocks: + + res_samples = down_block_res_samples[-len(upsample_block.resnets):] + down_block_res_samples = down_block_res_samples[:-len(upsample_block.resnets)] + + if hasattr(upsample_block, "attentions") and upsample_block.attentions is not None: + sample = upsample_block( + hidden_states=sample, + temb=emb, + res_hidden_states_tuple=res_samples, + encoder_hidden_states=encoder_hidden_states, + ) + else: + sample = upsample_block(hidden_states=sample, temb=emb, res_hidden_states_tuple=res_samples) + + # 6. post-process + # make sure hidden states is in float32 + # when running in half-precision + sample = self.conv_norm_out(sample.astype("float32")).astype(sample.dtype) + sample = self.conv_act(sample) + sample = self.conv_out(sample) + + output = {"sample": sample} + + return output diff --git a/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/models/unet_blocks.py b/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/models/unet_blocks.py new file mode 100644 index 000000000..684a2a43d --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/models/unet_blocks.py @@ -0,0 +1,1428 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import paddle +import paddle.nn as nn + +from .attention import AttentionBlockNew +from .attention import SpatialTransformer +from .resnet import Downsample2D +from .resnet import FirDownsample2D +from .resnet import FirUpsample2D +from .resnet import ResnetBlock +from .resnet import Upsample2D + + +def get_down_block( + down_block_type, + num_layers, + in_channels, + out_channels, + temb_channels, + add_downsample, + resnet_eps, + resnet_act_fn, + attn_num_head_channels, + cross_attention_dim=None, + downsample_padding=None, +): + down_block_type = down_block_type[7:] if down_block_type.startswith("UNetRes") else down_block_type + if down_block_type == "DownBlock2D": + return DownBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + temb_channels=temb_channels, + add_downsample=add_downsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + downsample_padding=downsample_padding, + ) + elif down_block_type == "AttnDownBlock2D": + return AttnDownBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + temb_channels=temb_channels, + add_downsample=add_downsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + downsample_padding=downsample_padding, + attn_num_head_channels=attn_num_head_channels, + ) + elif down_block_type == "CrossAttnDownBlock2D": + if cross_attention_dim is None: + raise ValueError("cross_attention_dim must be specified for CrossAttnUpBlock2D") + return CrossAttnDownBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + temb_channels=temb_channels, + add_downsample=add_downsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + downsample_padding=downsample_padding, + cross_attention_dim=cross_attention_dim, + attn_num_head_channels=attn_num_head_channels, + ) + elif down_block_type == "SkipDownBlock2D": + return SkipDownBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + temb_channels=temb_channels, + add_downsample=add_downsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + downsample_padding=downsample_padding, + ) + elif down_block_type == "AttnSkipDownBlock2D": + return AttnSkipDownBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + temb_channels=temb_channels, + add_downsample=add_downsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + downsample_padding=downsample_padding, + attn_num_head_channels=attn_num_head_channels, + ) + elif down_block_type == "DownEncoderBlock2D": + return DownEncoderBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + add_downsample=add_downsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + downsample_padding=downsample_padding, + ) + + +def get_up_block( + up_block_type, + num_layers, + in_channels, + out_channels, + prev_output_channel, + temb_channels, + add_upsample, + resnet_eps, + resnet_act_fn, + attn_num_head_channels, + cross_attention_dim=None, +): + up_block_type = up_block_type[7:] if up_block_type.startswith("UNetRes") else up_block_type + if up_block_type == "UpBlock2D": + return UpBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + prev_output_channel=prev_output_channel, + temb_channels=temb_channels, + add_upsample=add_upsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + ) + elif up_block_type == "CrossAttnUpBlock2D": + if cross_attention_dim is None: + raise ValueError("cross_attention_dim must be specified for CrossAttnUpBlock2D") + return CrossAttnUpBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + prev_output_channel=prev_output_channel, + temb_channels=temb_channels, + add_upsample=add_upsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + cross_attention_dim=cross_attention_dim, + attn_num_head_channels=attn_num_head_channels, + ) + elif up_block_type == "AttnUpBlock2D": + return AttnUpBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + prev_output_channel=prev_output_channel, + temb_channels=temb_channels, + add_upsample=add_upsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + attn_num_head_channels=attn_num_head_channels, + ) + elif up_block_type == "SkipUpBlock2D": + return SkipUpBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + prev_output_channel=prev_output_channel, + temb_channels=temb_channels, + add_upsample=add_upsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + ) + elif up_block_type == "AttnSkipUpBlock2D": + return AttnSkipUpBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + prev_output_channel=prev_output_channel, + temb_channels=temb_channels, + add_upsample=add_upsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + attn_num_head_channels=attn_num_head_channels, + ) + elif up_block_type == "UpDecoderBlock2D": + return UpDecoderBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + add_upsample=add_upsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + ) + raise ValueError(f"{up_block_type} does not exist.") + + +class UNetMidBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + attn_num_head_channels=1, + attention_type="default", + output_scale_factor=1.0, + **kwargs, + ): + super().__init__() + + self.attention_type = attention_type + resnet_groups = resnet_groups if resnet_groups is not None else min(in_channels // 4, 32) + + # there is always at least one resnet + resnets = [ + ResnetBlock( + in_channels=in_channels, + out_channels=in_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + ) + ] + attentions = [] + + for _ in range(num_layers): + attentions.append( + AttentionBlockNew( + in_channels, + num_head_channels=attn_num_head_channels, + rescale_output_factor=output_scale_factor, + eps=resnet_eps, + num_groups=resnet_groups, + )) + resnets.append( + ResnetBlock( + in_channels=in_channels, + out_channels=in_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + + self.attentions = nn.LayerList(attentions) + self.resnets = nn.LayerList(resnets) + + def forward(self, hidden_states, temb=None, encoder_states=None): + hidden_states = self.resnets[0](hidden_states, temb) + for attn, resnet in zip(self.attentions, self.resnets[1:]): + if self.attention_type == "default": + hidden_states = attn(hidden_states) + else: + hidden_states = attn(hidden_states, encoder_states) + hidden_states = resnet(hidden_states, temb) + + return hidden_states + + +class UNetMidBlock2DCrossAttn(nn.Layer): + + def __init__( + self, + in_channels: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + attn_num_head_channels=1, + attention_type="default", + output_scale_factor=1.0, + cross_attention_dim=1280, + **kwargs, + ): + super().__init__() + + self.attention_type = attention_type + resnet_groups = resnet_groups if resnet_groups is not None else min(in_channels // 4, 32) + + # there is always at least one resnet + resnets = [ + ResnetBlock( + in_channels=in_channels, + out_channels=in_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + ) + ] + attentions = [] + + for _ in range(num_layers): + attentions.append( + SpatialTransformer( + in_channels, + attn_num_head_channels, + in_channels // attn_num_head_channels, + depth=1, + context_dim=cross_attention_dim, + )) + resnets.append( + ResnetBlock( + in_channels=in_channels, + out_channels=in_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + + self.attentions = nn.LayerList(attentions) + self.resnets = nn.LayerList(resnets) + + def forward(self, hidden_states, temb=None, encoder_hidden_states=None): + hidden_states = self.resnets[0](hidden_states, temb) + for attn, resnet in zip(self.attentions, self.resnets[1:]): + hidden_states = attn(hidden_states, encoder_hidden_states) + hidden_states = resnet(hidden_states, temb) + + return hidden_states + + +class AttnDownBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + out_channels: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + attn_num_head_channels=1, + attention_type="default", + output_scale_factor=1.0, + downsample_padding=1, + add_downsample=True, + ): + super().__init__() + resnets = [] + attentions = [] + + self.attention_type = attention_type + + for i in range(num_layers): + in_channels = in_channels if i == 0 else out_channels + resnets.append( + ResnetBlock( + in_channels=in_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + attentions.append( + AttentionBlockNew( + out_channels, + num_head_channels=attn_num_head_channels, + rescale_output_factor=output_scale_factor, + eps=resnet_eps, + )) + + self.attentions = nn.LayerList(attentions) + self.resnets = nn.LayerList(resnets) + + if add_downsample: + self.downsamplers = nn.LayerList([ + Downsample2D(in_channels, + use_conv=True, + out_channels=out_channels, + padding=downsample_padding, + name="op") + ]) + else: + self.downsamplers = None + + def forward(self, hidden_states, temb=None): + output_states = () + + for resnet, attn in zip(self.resnets, self.attentions): + hidden_states = resnet(hidden_states, temb) + hidden_states = attn(hidden_states) + output_states += (hidden_states, ) + + if self.downsamplers is not None: + for downsampler in self.downsamplers: + hidden_states = downsampler(hidden_states) + + output_states += (hidden_states, ) + + return hidden_states, output_states + + +class CrossAttnDownBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + out_channels: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + attn_num_head_channels=1, + cross_attention_dim=1280, + attention_type="default", + output_scale_factor=1.0, + downsample_padding=1, + add_downsample=True, + ): + super().__init__() + resnets = [] + attentions = [] + + self.attention_type = attention_type + + for i in range(num_layers): + in_channels = in_channels if i == 0 else out_channels + resnets.append( + ResnetBlock( + in_channels=in_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + attentions.append( + SpatialTransformer( + out_channels, + attn_num_head_channels, + out_channels // attn_num_head_channels, + depth=1, + context_dim=cross_attention_dim, + )) + self.attentions = nn.LayerList(attentions) + self.resnets = nn.LayerList(resnets) + + if add_downsample: + self.downsamplers = nn.LayerList([ + Downsample2D(in_channels, + use_conv=True, + out_channels=out_channels, + padding=downsample_padding, + name="op") + ]) + else: + self.downsamplers = None + + def forward(self, hidden_states, temb=None, encoder_hidden_states=None): + output_states = () + + for resnet, attn in zip(self.resnets, self.attentions): + hidden_states = resnet(hidden_states, temb) + hidden_states = attn(hidden_states, context=encoder_hidden_states) + output_states += (hidden_states, ) + + if self.downsamplers is not None: + for downsampler in self.downsamplers: + hidden_states = downsampler(hidden_states) + + output_states += (hidden_states, ) + + return hidden_states, output_states + + +class DownBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + out_channels: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + output_scale_factor=1.0, + add_downsample=True, + downsample_padding=1, + ): + super().__init__() + resnets = [] + + for i in range(num_layers): + in_channels = in_channels if i == 0 else out_channels + resnets.append( + ResnetBlock( + in_channels=in_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + + self.resnets = nn.LayerList(resnets) + + if add_downsample: + self.downsamplers = nn.LayerList([ + Downsample2D(in_channels, + use_conv=True, + out_channels=out_channels, + padding=downsample_padding, + name="op") + ]) + else: + self.downsamplers = None + + def forward(self, hidden_states, temb=None): + output_states = () + + for resnet in self.resnets: + hidden_states = resnet(hidden_states, temb) + output_states += (hidden_states, ) + + if self.downsamplers is not None: + for downsampler in self.downsamplers: + hidden_states = downsampler(hidden_states) + + output_states += (hidden_states, ) + + return hidden_states, output_states + + +class DownEncoderBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + out_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + output_scale_factor=1.0, + add_downsample=True, + downsample_padding=1, + ): + super().__init__() + resnets = [] + + for i in range(num_layers): + in_channels = in_channels if i == 0 else out_channels + resnets.append( + ResnetBlock( + in_channels=in_channels, + out_channels=out_channels, + temb_channels=None, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + + self.resnets = nn.LayerList(resnets) + + if add_downsample: + self.downsamplers = nn.LayerList([ + Downsample2D(in_channels, + use_conv=True, + out_channels=out_channels, + padding=downsample_padding, + name="op") + ]) + else: + self.downsamplers = None + + def forward(self, hidden_states): + for resnet in self.resnets: + hidden_states = resnet(hidden_states, temb=None) + + if self.downsamplers is not None: + for downsampler in self.downsamplers: + hidden_states = downsampler(hidden_states) + + return hidden_states + + +class AttnDownEncoderBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + out_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + attn_num_head_channels=1, + output_scale_factor=1.0, + add_downsample=True, + downsample_padding=1, + ): + super().__init__() + resnets = [] + attentions = [] + + for i in range(num_layers): + in_channels = in_channels if i == 0 else out_channels + resnets.append( + ResnetBlock( + in_channels=in_channels, + out_channels=out_channels, + temb_channels=None, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + attentions.append( + AttentionBlockNew( + out_channels, + num_head_channels=attn_num_head_channels, + rescale_output_factor=output_scale_factor, + eps=resnet_eps, + num_groups=resnet_groups, + )) + + self.attentions = nn.LayerList(attentions) + self.resnets = nn.LayerList(resnets) + + if add_downsample: + self.downsamplers = nn.LayerList([ + Downsample2D(in_channels, + use_conv=True, + out_channels=out_channels, + padding=downsample_padding, + name="op") + ]) + else: + self.downsamplers = None + + def forward(self, hidden_states): + for resnet, attn in zip(self.resnets, self.attentions): + hidden_states = resnet(hidden_states, temb=None) + hidden_states = attn(hidden_states) + + if self.downsamplers is not None: + for downsampler in self.downsamplers: + hidden_states = downsampler(hidden_states) + + return hidden_states + + +class AttnSkipDownBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + out_channels: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_pre_norm: bool = True, + attn_num_head_channels=1, + attention_type="default", + output_scale_factor=np.sqrt(2.0), + downsample_padding=1, + add_downsample=True, + ): + super().__init__() + self.attentions = nn.LayerList([]) + self.resnets = nn.LayerList([]) + + self.attention_type = attention_type + + for i in range(num_layers): + in_channels = in_channels if i == 0 else out_channels + self.resnets.append( + ResnetBlock( + in_channels=in_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=min(in_channels // 4, 32), + groups_out=min(out_channels // 4, 32), + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + self.attentions.append( + AttentionBlockNew( + out_channels, + num_head_channels=attn_num_head_channels, + rescale_output_factor=output_scale_factor, + eps=resnet_eps, + )) + + if add_downsample: + self.resnet_down = ResnetBlock( + in_channels=out_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=min(out_channels // 4, 32), + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + use_nin_shortcut=True, + down=True, + kernel="fir", + ) + self.downsamplers = nn.LayerList([FirDownsample2D(in_channels, out_channels=out_channels)]) + self.skip_conv = nn.Conv2D(3, out_channels, kernel_size=(1, 1), stride=(1, 1)) + else: + self.resnet_down = None + self.downsamplers = None + self.skip_conv = None + + def forward(self, hidden_states, temb=None, skip_sample=None): + output_states = () + + for resnet, attn in zip(self.resnets, self.attentions): + hidden_states = resnet(hidden_states, temb) + hidden_states = attn(hidden_states) + output_states += (hidden_states, ) + + if self.downsamplers is not None: + hidden_states = self.resnet_down(hidden_states, temb) + for downsampler in self.downsamplers: + skip_sample = downsampler(skip_sample) + + hidden_states = self.skip_conv(skip_sample) + hidden_states + + output_states += (hidden_states, ) + + return hidden_states, output_states, skip_sample + + +class SkipDownBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + out_channels: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_pre_norm: bool = True, + output_scale_factor=np.sqrt(2.0), + add_downsample=True, + downsample_padding=1, + ): + super().__init__() + self.resnets = nn.LayerList([]) + + for i in range(num_layers): + in_channels = in_channels if i == 0 else out_channels + self.resnets.append( + ResnetBlock( + in_channels=in_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=min(in_channels // 4, 32), + groups_out=min(out_channels // 4, 32), + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + + if add_downsample: + self.resnet_down = ResnetBlock( + in_channels=out_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=min(out_channels // 4, 32), + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + use_nin_shortcut=True, + down=True, + kernel="fir", + ) + self.downsamplers = nn.LayerList([FirDownsample2D(in_channels, out_channels=out_channels)]) + self.skip_conv = nn.Conv2D(3, out_channels, kernel_size=(1, 1), stride=(1, 1)) + else: + self.resnet_down = None + self.downsamplers = None + self.skip_conv = None + + def forward(self, hidden_states, temb=None, skip_sample=None): + output_states = () + + for resnet in self.resnets: + hidden_states = resnet(hidden_states, temb) + output_states += (hidden_states, ) + + if self.downsamplers is not None: + hidden_states = self.resnet_down(hidden_states, temb) + for downsampler in self.downsamplers: + skip_sample = downsampler(skip_sample) + + hidden_states = self.skip_conv(skip_sample) + hidden_states + + output_states += (hidden_states, ) + + return hidden_states, output_states, skip_sample + + +class AttnUpBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + prev_output_channel: int, + out_channels: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + attention_type="default", + attn_num_head_channels=1, + output_scale_factor=1.0, + add_upsample=True, + ): + super().__init__() + resnets = [] + attentions = [] + + self.attention_type = attention_type + + for i in range(num_layers): + res_skip_channels = in_channels if (i == num_layers - 1) else out_channels + resnet_in_channels = prev_output_channel if i == 0 else out_channels + + resnets.append( + ResnetBlock( + in_channels=resnet_in_channels + res_skip_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + attentions.append( + AttentionBlockNew( + out_channels, + num_head_channels=attn_num_head_channels, + rescale_output_factor=output_scale_factor, + eps=resnet_eps, + )) + + self.attentions = nn.LayerList(attentions) + self.resnets = nn.LayerList(resnets) + + if add_upsample: + self.upsamplers = nn.LayerList([Upsample2D(out_channels, use_conv=True, out_channels=out_channels)]) + else: + self.upsamplers = None + + def forward(self, hidden_states, res_hidden_states_tuple, temb=None): + for resnet, attn in zip(self.resnets, self.attentions): + + # pop res hidden states + res_hidden_states = res_hidden_states_tuple[-1] + res_hidden_states_tuple = res_hidden_states_tuple[:-1] + hidden_states = paddle.concat([hidden_states, res_hidden_states], axis=1) + + hidden_states = resnet(hidden_states, temb) + hidden_states = attn(hidden_states) + + if self.upsamplers is not None: + for upsampler in self.upsamplers: + hidden_states = upsampler(hidden_states) + + return hidden_states + + +class CrossAttnUpBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + out_channels: int, + prev_output_channel: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + attn_num_head_channels=1, + cross_attention_dim=1280, + attention_type="default", + output_scale_factor=1.0, + downsample_padding=1, + add_upsample=True, + ): + super().__init__() + resnets = [] + attentions = [] + + self.attention_type = attention_type + + for i in range(num_layers): + res_skip_channels = in_channels if (i == num_layers - 1) else out_channels + resnet_in_channels = prev_output_channel if i == 0 else out_channels + + resnets.append( + ResnetBlock( + in_channels=resnet_in_channels + res_skip_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + attentions.append( + SpatialTransformer( + out_channels, + attn_num_head_channels, + out_channels // attn_num_head_channels, + depth=1, + context_dim=cross_attention_dim, + )) + self.attentions = nn.LayerList(attentions) + self.resnets = nn.LayerList(resnets) + + if add_upsample: + self.upsamplers = nn.LayerList([Upsample2D(out_channels, use_conv=True, out_channels=out_channels)]) + else: + self.upsamplers = None + + def forward(self, hidden_states, res_hidden_states_tuple, temb=None, encoder_hidden_states=None): + for resnet, attn in zip(self.resnets, self.attentions): + + # pop res hidden states + res_hidden_states = res_hidden_states_tuple[-1] + res_hidden_states_tuple = res_hidden_states_tuple[:-1] + hidden_states = paddle.concat([hidden_states, res_hidden_states], axis=1) + + hidden_states = resnet(hidden_states, temb) + hidden_states = attn(hidden_states, context=encoder_hidden_states) + + if self.upsamplers is not None: + for upsampler in self.upsamplers: + hidden_states = upsampler(hidden_states) + + return hidden_states + + +class UpBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + prev_output_channel: int, + out_channels: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + output_scale_factor=1.0, + add_upsample=True, + ): + super().__init__() + resnets = [] + + for i in range(num_layers): + res_skip_channels = in_channels if (i == num_layers - 1) else out_channels + resnet_in_channels = prev_output_channel if i == 0 else out_channels + + resnets.append( + ResnetBlock( + in_channels=resnet_in_channels + res_skip_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + + self.resnets = nn.LayerList(resnets) + + if add_upsample: + self.upsamplers = nn.LayerList([Upsample2D(out_channels, use_conv=True, out_channels=out_channels)]) + else: + self.upsamplers = None + + def forward(self, hidden_states, res_hidden_states_tuple, temb=None): + for resnet in self.resnets: + + # pop res hidden states + res_hidden_states = res_hidden_states_tuple[-1] + res_hidden_states_tuple = res_hidden_states_tuple[:-1] + hidden_states = paddle.concat([hidden_states, res_hidden_states], axis=1) + + hidden_states = resnet(hidden_states, temb) + + if self.upsamplers is not None: + for upsampler in self.upsamplers: + hidden_states = upsampler(hidden_states) + + return hidden_states + + +class UpDecoderBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + out_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + output_scale_factor=1.0, + add_upsample=True, + ): + super().__init__() + resnets = [] + + for i in range(num_layers): + input_channels = in_channels if i == 0 else out_channels + + resnets.append( + ResnetBlock( + in_channels=input_channels, + out_channels=out_channels, + temb_channels=None, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + + self.resnets = nn.LayerList(resnets) + + if add_upsample: + self.upsamplers = nn.LayerList([Upsample2D(out_channels, use_conv=True, out_channels=out_channels)]) + else: + self.upsamplers = None + + def forward(self, hidden_states): + for resnet in self.resnets: + hidden_states = resnet(hidden_states, temb=None) + + if self.upsamplers is not None: + for upsampler in self.upsamplers: + hidden_states = upsampler(hidden_states) + + return hidden_states + + +class AttnUpDecoderBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + out_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + attn_num_head_channels=1, + output_scale_factor=1.0, + add_upsample=True, + ): + super().__init__() + resnets = [] + attentions = [] + + for i in range(num_layers): + input_channels = in_channels if i == 0 else out_channels + + resnets.append( + ResnetBlock( + in_channels=input_channels, + out_channels=out_channels, + temb_channels=None, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + attentions.append( + AttentionBlockNew( + out_channels, + num_head_channels=attn_num_head_channels, + rescale_output_factor=output_scale_factor, + eps=resnet_eps, + num_groups=resnet_groups, + )) + + self.attentions = nn.LayerList(attentions) + self.resnets = nn.LayerList(resnets) + + if add_upsample: + self.upsamplers = nn.LayerList([Upsample2D(out_channels, use_conv=True, out_channels=out_channels)]) + else: + self.upsamplers = None + + def forward(self, hidden_states): + for resnet, attn in zip(self.resnets, self.attentions): + hidden_states = resnet(hidden_states, temb=None) + hidden_states = attn(hidden_states) + + if self.upsamplers is not None: + for upsampler in self.upsamplers: + hidden_states = upsampler(hidden_states) + + return hidden_states + + +class AttnSkipUpBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + prev_output_channel: int, + out_channels: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_pre_norm: bool = True, + attn_num_head_channels=1, + attention_type="default", + output_scale_factor=np.sqrt(2.0), + upsample_padding=1, + add_upsample=True, + ): + super().__init__() + self.attentions = nn.LayerList([]) + self.resnets = nn.LayerList([]) + + self.attention_type = attention_type + + for i in range(num_layers): + res_skip_channels = in_channels if (i == num_layers - 1) else out_channels + resnet_in_channels = prev_output_channel if i == 0 else out_channels + + self.resnets.append( + ResnetBlock( + in_channels=resnet_in_channels + res_skip_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=min(resnet_in_channels + res_skip_channels // 4, 32), + groups_out=min(out_channels // 4, 32), + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + + self.attentions.append( + AttentionBlockNew( + out_channels, + num_head_channels=attn_num_head_channels, + rescale_output_factor=output_scale_factor, + eps=resnet_eps, + )) + + self.upsampler = FirUpsample2D(in_channels, out_channels=out_channels) + if add_upsample: + self.resnet_up = ResnetBlock( + in_channels=out_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=min(out_channels // 4, 32), + groups_out=min(out_channels // 4, 32), + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + use_nin_shortcut=True, + up=True, + kernel="fir", + ) + self.skip_conv = nn.Conv2D(out_channels, 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + self.skip_norm = nn.GroupNorm(num_groups=min(out_channels // 4, 32), + num_channels=out_channels, + eps=resnet_eps, + affine=True) + self.act = nn.SiLU() + else: + self.resnet_up = None + self.skip_conv = None + self.skip_norm = None + self.act = None + + def forward(self, hidden_states, res_hidden_states_tuple, temb=None, skip_sample=None): + for resnet in self.resnets: + # pop res hidden states + res_hidden_states = res_hidden_states_tuple[-1] + res_hidden_states_tuple = res_hidden_states_tuple[:-1] + hidden_states = paddle.concat([hidden_states, res_hidden_states], axis=1) + + hidden_states = resnet(hidden_states, temb) + + hidden_states = self.attentions[0](hidden_states) + + if skip_sample is not None: + skip_sample = self.upsampler(skip_sample) + else: + skip_sample = 0 + + if self.resnet_up is not None: + skip_sample_states = self.skip_norm(hidden_states) + skip_sample_states = self.act(skip_sample_states) + skip_sample_states = self.skip_conv(skip_sample_states) + + skip_sample = skip_sample + skip_sample_states + + hidden_states = self.resnet_up(hidden_states, temb) + + return hidden_states, skip_sample + + +class SkipUpBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + prev_output_channel: int, + out_channels: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_pre_norm: bool = True, + output_scale_factor=np.sqrt(2.0), + add_upsample=True, + upsample_padding=1, + ): + super().__init__() + self.resnets = nn.LayerList([]) + + for i in range(num_layers): + res_skip_channels = in_channels if (i == num_layers - 1) else out_channels + resnet_in_channels = prev_output_channel if i == 0 else out_channels + + self.resnets.append( + ResnetBlock( + in_channels=resnet_in_channels + res_skip_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=min((resnet_in_channels + res_skip_channels) // 4, 32), + groups_out=min(out_channels // 4, 32), + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + + self.upsampler = FirUpsample2D(in_channels, out_channels=out_channels) + if add_upsample: + self.resnet_up = ResnetBlock( + in_channels=out_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=min(out_channels // 4, 32), + groups_out=min(out_channels // 4, 32), + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + use_nin_shortcut=True, + up=True, + kernel="fir", + ) + self.skip_conv = nn.Conv2D(out_channels, 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + self.skip_norm = nn.GroupNorm(num_groups=min(out_channels // 4, 32), + num_channels=out_channels, + eps=resnet_eps, + affine=True) + self.act = nn.SiLU() + else: + self.resnet_up = None + self.skip_conv = None + self.skip_norm = None + self.act = None + + def forward(self, hidden_states, res_hidden_states_tuple, temb=None, skip_sample=None): + for resnet in self.resnets: + # pop res hidden states + res_hidden_states = res_hidden_states_tuple[-1] + res_hidden_states_tuple = res_hidden_states_tuple[:-1] + hidden_states = paddle.concat([hidden_states, res_hidden_states], axis=1) + + hidden_states = resnet(hidden_states, temb) + + if skip_sample is not None: + skip_sample = self.upsampler(skip_sample) + else: + skip_sample = 0 + + if self.resnet_up is not None: + skip_sample_states = self.skip_norm(hidden_states) + skip_sample_states = self.act(skip_sample_states) + skip_sample_states = self.skip_conv(skip_sample_states) + + skip_sample = skip_sample + skip_sample_states + + hidden_states = self.resnet_up(hidden_states, temb) + + return hidden_states, skip_sample diff --git a/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/models/vae.py b/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/models/vae.py new file mode 100644 index 000000000..59e35b0fb --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/models/vae.py @@ -0,0 +1,465 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import paddle +import paddle.nn as nn + +from ..configuration_utils import ConfigMixin +from ..configuration_utils import register_to_config +from .unet_blocks import get_down_block +from .unet_blocks import get_up_block +from .unet_blocks import UNetMidBlock2D + + +class Encoder(nn.Layer): + + def __init__( + self, + in_channels=3, + out_channels=3, + down_block_types=("DownEncoderBlock2D", ), + block_out_channels=(64, ), + layers_per_block=2, + act_fn="silu", + double_z=True, + ): + super().__init__() + self.layers_per_block = layers_per_block + + self.conv_in = nn.Conv2D(in_channels, block_out_channels[0], kernel_size=3, stride=1, padding=1) + + self.mid_block = None + self.down_blocks = nn.LayerList([]) + + # down + output_channel = block_out_channels[0] + for i, down_block_type in enumerate(down_block_types): + input_channel = output_channel + output_channel = block_out_channels[i] + is_final_block = i == len(block_out_channels) - 1 + + down_block = get_down_block( + down_block_type, + num_layers=self.layers_per_block, + in_channels=input_channel, + out_channels=output_channel, + add_downsample=not is_final_block, + resnet_eps=1e-6, + downsample_padding=0, + resnet_act_fn=act_fn, + attn_num_head_channels=None, + temb_channels=None, + ) + self.down_blocks.append(down_block) + + # mid + self.mid_block = UNetMidBlock2D( + in_channels=block_out_channels[-1], + resnet_eps=1e-6, + resnet_act_fn=act_fn, + output_scale_factor=1, + resnet_time_scale_shift="default", + attn_num_head_channels=None, + resnet_groups=32, + temb_channels=None, + ) + + # out + num_groups_out = 32 + self.conv_norm_out = nn.GroupNorm(num_channels=block_out_channels[-1], num_groups=num_groups_out, epsilon=1e-6) + self.conv_act = nn.Silu() + + conv_out_channels = 2 * out_channels if double_z else out_channels + self.conv_out = nn.Conv2D(block_out_channels[-1], conv_out_channels, 3, padding=1) + + def forward(self, x): + sample = x + sample = self.conv_in(sample) + + # down + for down_block in self.down_blocks: + sample = down_block(sample) + + # middle + sample = self.mid_block(sample) + + # post-process + sample = self.conv_norm_out(sample) + sample = self.conv_act(sample) + sample = self.conv_out(sample) + + return sample + + +class Decoder(nn.Layer): + + def __init__( + self, + in_channels=3, + out_channels=3, + up_block_types=("UpDecoderBlock2D", ), + block_out_channels=(64, ), + layers_per_block=2, + act_fn="silu", + ): + super().__init__() + self.layers_per_block = layers_per_block + + self.conv_in = nn.Conv2D(in_channels, block_out_channels[-1], kernel_size=3, stride=1, padding=1) + + self.mid_block = None + self.up_blocks = nn.LayerList([]) + + # mid + self.mid_block = UNetMidBlock2D( + in_channels=block_out_channels[-1], + resnet_eps=1e-6, + resnet_act_fn=act_fn, + output_scale_factor=1, + resnet_time_scale_shift="default", + attn_num_head_channels=None, + resnet_groups=32, + temb_channels=None, + ) + + # up + reversed_block_out_channels = list(reversed(block_out_channels)) + output_channel = reversed_block_out_channels[0] + for i, up_block_type in enumerate(up_block_types): + prev_output_channel = output_channel + output_channel = reversed_block_out_channels[i] + + is_final_block = i == len(block_out_channels) - 1 + + up_block = get_up_block( + up_block_type, + num_layers=self.layers_per_block + 1, + in_channels=prev_output_channel, + out_channels=output_channel, + prev_output_channel=None, + add_upsample=not is_final_block, + resnet_eps=1e-6, + resnet_act_fn=act_fn, + attn_num_head_channels=None, + temb_channels=None, + ) + self.up_blocks.append(up_block) + prev_output_channel = output_channel + + # out + num_groups_out = 32 + self.conv_norm_out = nn.GroupNorm(num_channels=block_out_channels[0], num_groups=num_groups_out, epsilon=1e-6) + self.conv_act = nn.Silu() + self.conv_out = nn.Conv2D(block_out_channels[0], out_channels, 3, padding=1) + + def forward(self, z): + sample = z + sample = self.conv_in(sample) + + # middle + sample = self.mid_block(sample) + + # up + for up_block in self.up_blocks: + sample = up_block(sample) + + # post-process + sample = self.conv_norm_out(sample) + sample = self.conv_act(sample) + sample = self.conv_out(sample) + + return sample + + +class VectorQuantizer(nn.Layer): + """ + Improved version over VectorQuantizer, can be used as a drop-in replacement. Mostly avoids costly matrix + multiplications and allows for post-hoc remapping of indices. + """ + + # NOTE: due to a bug the beta term was applied to the wrong term. for + # backwards compatibility we use the buggy version by default, but you can + # specify legacy=False to fix it. + def __init__(self, n_e, e_dim, beta, remap=None, unknown_index="random", sane_index_shape=False, legacy=True): + super().__init__() + self.n_e = n_e + self.e_dim = e_dim + self.beta = beta + self.legacy = legacy + + self.embedding = nn.Embedding(self.n_e, self.e_dim) + self.embedding.weight.data.uniform_(-1.0 / self.n_e, 1.0 / self.n_e) + + self.remap = remap + if self.remap is not None: + self.register_buffer("used", paddle.to_tensor(np.load(self.remap))) + self.re_embed = self.used.shape[0] + self.unknown_index = unknown_index # "random" or "extra" or integer + if self.unknown_index == "extra": + self.unknown_index = self.re_embed + self.re_embed = self.re_embed + 1 + print(f"Remapping {self.n_e} indices to {self.re_embed} indices. " + f"Using {self.unknown_index} for unknown indices.") + else: + self.re_embed = n_e + + self.sane_index_shape = sane_index_shape + + def remap_to_used(self, inds): + ishape = inds.shape + assert len(ishape) > 1 + inds = inds.reshape([ishape[0], -1]) + used = self.used + match = (inds[:, :, None] == used[None, None, ...]).astype("int64") + new = match.argmax(-1) + unknown = match.sum(2) < 1 + if self.unknown_index == "random": + new[unknown] = paddle.randint(0, self.re_embed, shape=new[unknown].shape) + else: + new[unknown] = self.unknown_index + return new.reshape(ishape) + + def unmap_to_all(self, inds): + ishape = inds.shape + assert len(ishape) > 1 + inds = inds.reshape([ishape[0], -1]) + used = self.used + if self.re_embed > self.used.shape[0]: # extra token + inds[inds >= self.used.shape[0]] = 0 # simply set to zero + back = paddle.gather(used[None, :][inds.shape[0] * [0], :], inds, axis=1) + return back.reshape(ishape) + + def forward(self, z): + # reshape z -> (batch, height, width, channel) and flatten + z = z.transpose([0, 2, 3, 1]) + z_flattened = z.reshape([-1, self.e_dim]) + # distances from z to embeddings e_j (z - e)^2 = z^2 + e^2 - 2 e * z + + d = (paddle.sum(z_flattened**2, axis=1, keepdim=True) + paddle.sum(self.embedding.weight**2, axis=1) - + 2 * paddle.einsum("bd,dn->bn", z_flattened, self.embedding.weight.t())) + + min_encoding_indices = paddle.argmin(d, axis=1) + z_q = self.embedding(min_encoding_indices).reshape(z.shape) + perplexity = None + min_encodings = None + + # compute loss for embedding + if not self.legacy: + loss = self.beta * paddle.mean((z_q.detach() - z)**2) + paddle.mean((z_q - z.detach())**2) + else: + loss = paddle.mean((z_q.detach() - z)**2) + self.beta * paddle.mean((z_q - z.detach())**2) + + # preserve gradients + z_q = z + (z_q - z).detach() + + # reshape back to match original input shape + z_q = z_q.transpose([0, 3, 1, 2]) + + if self.remap is not None: + min_encoding_indices = min_encoding_indices.reshape([z.shape[0], -1]) # add batch axis + min_encoding_indices = self.remap_to_used(min_encoding_indices) + min_encoding_indices = min_encoding_indices.reshape([-1, 1]) # flatten + + if self.sane_index_shape: + min_encoding_indices = min_encoding_indices.reshape([z_q.shape[0], z_q.shape[2], z_q.shape[3]]) + + return z_q, loss, (perplexity, min_encodings, min_encoding_indices) + + def get_codebook_entry(self, indices, shape): + # shape specifying (batch, height, width, channel) + if self.remap is not None: + indices = indices.reshape([shape[0], -1]) # add batch axis + indices = self.unmap_to_all(indices) + indices = indices.flatten() # flatten again + + # get quantized latent vectors + z_q = self.embedding(indices) + + if shape is not None: + z_q = z_q.reshape(shape) + # reshape back to match original input shape + z_q = z_q.transpose([0, 3, 1, 2]) + + return z_q + + +class DiagonalGaussianDistribution(object): + + def __init__(self, parameters, deterministic=False): + self.parameters = parameters + self.mean, self.logvar = paddle.chunk(parameters, 2, axis=1) + self.logvar = paddle.clip(self.logvar, -30.0, 20.0) + self.deterministic = deterministic + self.std = paddle.exp(0.5 * self.logvar) + self.var = paddle.exp(self.logvar) + if self.deterministic: + self.var = self.std = paddle.zeros_like(self.mean) + + def sample(self): + x = self.mean + self.std * paddle.randn(self.mean.shape) + return x + + def kl(self, other=None): + if self.deterministic: + return paddle.to_tensor([0.0]) + else: + if other is None: + return 0.5 * paddle.sum(paddle.pow(self.mean, 2) + self.var - 1.0 - self.logvar, axis=[1, 2, 3]) + else: + return 0.5 * paddle.sum( + paddle.pow(self.mean - other.mean, 2) / other.var + self.var / other.var - 1.0 - self.logvar + + other.logvar, + axis=[1, 2, 3], + ) + + def nll(self, sample, dims=[1, 2, 3]): + if self.deterministic: + return paddle.to_tensor([0.0]) + logtwopi = np.log(2.0 * np.pi) + return 0.5 * paddle.sum(logtwopi + self.logvar + paddle.pow(sample - self.mean, 2) / self.var, axis=dims) + + def mode(self): + return self.mean + + +class VQModel(ConfigMixin): + + @register_to_config + def __init__( + self, + in_channels=3, + out_channels=3, + down_block_types=("DownEncoderBlock2D", ), + up_block_types=("UpDecoderBlock2D", ), + block_out_channels=(64, ), + layers_per_block=1, + act_fn="silu", + latent_channels=3, + sample_size=32, + num_vq_embeddings=256, + ): + super().__init__() + + # pass init params to Encoder + self.encoder = Encoder( + in_channels=in_channels, + out_channels=latent_channels, + down_block_types=down_block_types, + block_out_channels=block_out_channels, + layers_per_block=layers_per_block, + act_fn=act_fn, + double_z=False, + ) + + self.quant_conv = nn.Conv2D(latent_channels, latent_channels, 1) + self.quantize = VectorQuantizer(num_vq_embeddings, + latent_channels, + beta=0.25, + remap=None, + sane_index_shape=False) + self.post_quant_conv = nn.Conv2D(latent_channels, latent_channels, 1) + + # pass init params to Decoder + self.decoder = Decoder( + in_channels=latent_channels, + out_channels=out_channels, + up_block_types=up_block_types, + block_out_channels=block_out_channels, + layers_per_block=layers_per_block, + act_fn=act_fn, + ) + + def encode(self, x): + h = self.encoder(x) + h = self.quant_conv(h) + return h + + def decode(self, h, force_not_quantize=False): + # also go through quantization layer + if not force_not_quantize: + quant, emb_loss, info = self.quantize(h) + else: + quant = h + quant = self.post_quant_conv(quant) + dec = self.decoder(quant) + return dec + + def forward(self, sample): + x = sample + h = self.encode(x) + dec = self.decode(h) + return dec + + +class AutoencoderKL(nn.Layer, ConfigMixin): + + @register_to_config + def __init__( + self, + in_channels=3, + out_channels=3, + down_block_types=("DownEncoderBlock2D", "DownEncoderBlock2D", "DownEncoderBlock2D", "DownEncoderBlock2D"), + up_block_types=("UpDecoderBlock2D", "UpDecoderBlock2D", "UpDecoderBlock2D", "UpDecoderBlock2D"), + block_out_channels=(128, 256, 512, 512), + layers_per_block=2, + act_fn="silu", + latent_channels=4, + sample_size=512, + ): + super().__init__() + + # pass init params to Encoder + self.encoder = Encoder( + in_channels=in_channels, + out_channels=latent_channels, + down_block_types=down_block_types, + block_out_channels=block_out_channels, + layers_per_block=layers_per_block, + act_fn=act_fn, + double_z=True, + ) + + # pass init params to Decoder + self.decoder = Decoder( + in_channels=latent_channels, + out_channels=out_channels, + up_block_types=up_block_types, + block_out_channels=block_out_channels, + layers_per_block=layers_per_block, + act_fn=act_fn, + ) + + self.quant_conv = nn.Conv2D(2 * latent_channels, 2 * latent_channels, 1) + self.post_quant_conv = nn.Conv2D(latent_channels, latent_channels, 1) + + def encode(self, x): + h = self.encoder(x) + moments = self.quant_conv(h) + posterior = DiagonalGaussianDistribution(moments) + return posterior + + def decode(self, z): + z = self.post_quant_conv(z) + dec = self.decoder(z) + return dec + + def forward(self, sample, sample_posterior=False): + x = sample + posterior = self.encode(x) + if sample_posterior: + z = posterior.sample() + else: + z = posterior.mode() + dec = self.decode(z) + return dec diff --git a/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/schedulers/README.md b/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/schedulers/README.md new file mode 100644 index 000000000..40f50f232 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/schedulers/README.md @@ -0,0 +1,18 @@ +# Schedulers + +- Schedulers are the algorithms to use diffusion models in inference as well as for training. They include the noise schedules and define algorithm-specific diffusion steps. +- Schedulers can be used interchangable between diffusion models in inference to find the preferred trade-off between speed and generation quality. +- Schedulers are available in numpy, but can easily be transformed into Py + +## API + +- Schedulers should provide one or more `def step(...)` functions that should be called iteratively to unroll the diffusion loop during +the forward pass. +- Schedulers should be framework-agnostic, but provide a simple functionality to convert the scheduler into a specific framework, such as PyTorch +with a `set_format(...)` method. + +## Examples + +- The DDPM scheduler was proposed in [Denoising Diffusion Probabilistic Models](https://arxiv.org/abs/2006.11239) and can be found in [scheduling_ddpm.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_ddpm.py). An example of how to use this scheduler can be found in [pipeline_ddpm.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/pipeline_ddpm.py). +- The DDIM scheduler was proposed in [Denoising Diffusion Implicit Models](https://arxiv.org/abs/2010.02502) and can be found in [scheduling_ddim.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_ddim.py). An example of how to use this scheduler can be found in [pipeline_ddim.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/pipeline_ddim.py). +- The PNDM scheduler was proposed in [Pseudo Numerical Methods for Diffusion Models on Manifolds](https://arxiv.org/abs/2202.09778) and can be found in [scheduling_pndm.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_pndm.py). An example of how to use this scheduler can be found in [pipeline_pndm.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/pipeline_pndm.py). diff --git a/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/schedulers/__init__.py b/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/schedulers/__init__.py new file mode 100644 index 000000000..cebc3e618 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/schedulers/__init__.py @@ -0,0 +1,24 @@ +# flake8: noqa +# There's no way to ignore "F401 '...' imported but unused" warnings in this +# module, but to preserve other warnings. So, don't check this module at all. +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from .scheduling_ddim import DDIMScheduler +from .scheduling_ddpm import DDPMScheduler +from .scheduling_karras_ve import KarrasVeScheduler +from .scheduling_lms_discrete import LMSDiscreteScheduler +from .scheduling_pndm import PNDMScheduler +from .scheduling_sde_ve import ScoreSdeVeScheduler +from .scheduling_sde_vp import ScoreSdeVpScheduler +from .scheduling_utils import SchedulerMixin diff --git a/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/schedulers/scheduling_ddim.py b/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/schedulers/scheduling_ddim.py new file mode 100644 index 000000000..ebe362d99 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/schedulers/scheduling_ddim.py @@ -0,0 +1,182 @@ +# Copyright 2022 Stanford University Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# DISCLAIMER: This code is strongly influenced by https://github.com/pesser/pypaddle_diffusion +# and https://github.com/hojonathanho/diffusion +import math +from typing import Union + +import numpy as np +import paddle + +from ..configuration_utils import ConfigMixin +from ..configuration_utils import register_to_config +from .scheduling_utils import SchedulerMixin + + +def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999): + """ + Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of + (1-beta) over time from t = [0,1]. + + :param num_diffusion_timesteps: the number of betas to produce. :param alpha_bar: a lambda that takes an argument t + from 0 to 1 and + produces the cumulative product of (1-beta) up to that part of the diffusion process. + :param max_beta: the maximum beta to use; use values lower than 1 to + prevent singularities. + """ + + def alpha_bar(time_step): + return math.cos((time_step + 0.008) / 1.008 * math.pi / 2)**2 + + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta)) + return np.array(betas, dtype=np.float32) + + +class DDIMScheduler(SchedulerMixin, ConfigMixin): + + @register_to_config + def __init__( + self, + num_train_timesteps=1000, + beta_start=0.0001, + beta_end=0.02, + beta_schedule="linear", + trained_betas=None, + timestep_values=None, + clip_sample=True, + set_alpha_to_one=True, + tensor_format="pd", + ): + + if beta_schedule == "linear": + self.betas = np.linspace(beta_start, beta_end, num_train_timesteps, dtype=np.float32) + elif beta_schedule == "scaled_linear": + # this schedule is very specific to the latent diffusion model. + self.betas = np.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=np.float32)**2 + elif beta_schedule == "squaredcos_cap_v2": + # Glide cosine schedule + self.betas = betas_for_alpha_bar(num_train_timesteps) + else: + raise NotImplementedError(f"{beta_schedule} does is not implemented for {self.__class__}") + + self.alphas = 1.0 - self.betas + self.alphas_cumprod = np.cumprod(self.alphas, axis=0) + + # At every step in ddim, we are looking into the previous alphas_cumprod + # For the final step, there is no previous alphas_cumprod because we are already at 0 + # `set_alpha_to_one` decides whether we set this paratemer simply to one or + # whether we use the final alpha of the "non-previous" one. + self.final_alpha_cumprod = np.array(1.0) if set_alpha_to_one else self.alphas_cumprod[0] + + # setable values + self.num_inference_steps = None + self.timesteps = np.arange(0, num_train_timesteps)[::-1].copy() + + self.tensor_format = tensor_format + self.set_format(tensor_format=tensor_format) + + def _get_variance(self, timestep, prev_timestep): + alpha_prod_t = self.alphas_cumprod[timestep] + alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod + beta_prod_t = 1 - alpha_prod_t + beta_prod_t_prev = 1 - alpha_prod_t_prev + + variance = (beta_prod_t_prev / beta_prod_t) * (1 - alpha_prod_t / alpha_prod_t_prev) + + return variance + + def set_timesteps(self, num_inference_steps, offset=0): + self.num_inference_steps = num_inference_steps + self.timesteps = np.arange(0, self.config.num_train_timesteps, + self.config.num_train_timesteps // self.num_inference_steps)[::-1].copy() + self.timesteps += offset + self.set_format(tensor_format=self.tensor_format) + + def step( + self, + model_output: Union[paddle.Tensor, np.ndarray], + timestep: int, + sample: Union[paddle.Tensor, np.ndarray], + eta: float = 0.0, + use_clipped_model_output: bool = False, + generator=None, + ): + # See formulas (12) and (16) of DDIM paper https://arxiv.org/pdf/2010.02502.pdf + # Ideally, read DDIM paper in-detail understanding + + # Notation ( -> + # - pred_noise_t -> e_theta(x_t, t) + # - pred_original_sample -> f_theta(x_t, t) or x_0 + # - std_dev_t -> sigma_t + # - eta -> η + # - pred_sample_direction -> "direction pointingc to x_t" + # - pred_prev_sample -> "x_t-1" + + # 1. get previous step value (=t-1) + prev_timestep = timestep - self.config.num_train_timesteps // self.num_inference_steps + + # 2. compute alphas, betas + alpha_prod_t = self.alphas_cumprod[timestep] + alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod + beta_prod_t = 1 - alpha_prod_t + + # 3. compute predicted original sample from predicted noise also called + # "predicted x_0" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf + pred_original_sample = (sample - beta_prod_t**(0.5) * model_output) / alpha_prod_t**(0.5) + + # 4. Clip "predicted x_0" + if self.config.clip_sample: + pred_original_sample = self.clip(pred_original_sample, -1, 1) + + # 5. compute variance: "sigma_t(η)" -> see formula (16) + # σ_t = sqrt((1 − α_t−1)/(1 − α_t)) * sqrt(1 − α_t/α_t−1) + variance = self._get_variance(timestep, prev_timestep) + std_dev_t = eta * variance**(0.5) + + if use_clipped_model_output: + # the model_output is always re-derived from the clipped x_0 in Glide + model_output = (sample - alpha_prod_t**(0.5) * pred_original_sample) / beta_prod_t**(0.5) + + # 6. compute "direction pointing to x_t" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf + pred_sample_direction = (1 - alpha_prod_t_prev - std_dev_t**2)**(0.5) * model_output + + # 7. compute x_t without "random noise" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf + prev_sample = alpha_prod_t_prev**(0.5) * pred_original_sample + pred_sample_direction + + if eta > 0: + noise = paddle.randn(model_output.shape) + variance = self._get_variance(timestep, prev_timestep)**(0.5) * eta * noise + + if not paddle.is_tensor(model_output): + variance = variance.numpy() + + prev_sample = prev_sample + variance + + return {"prev_sample": prev_sample} + + def add_noise(self, original_samples, noise, timesteps): + sqrt_alpha_prod = self.alphas_cumprod[timesteps]**0.5 + sqrt_alpha_prod = self.match_shape(sqrt_alpha_prod, original_samples) + sqrt_one_minus_alpha_prod = (1 - self.alphas_cumprod[timesteps])**0.5 + sqrt_one_minus_alpha_prod = self.match_shape(sqrt_one_minus_alpha_prod, original_samples) + + noisy_samples = sqrt_alpha_prod * original_samples + sqrt_one_minus_alpha_prod * noise + return noisy_samples + + def __len__(self): + return self.config.num_train_timesteps diff --git a/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/schedulers/scheduling_ddpm.py b/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/schedulers/scheduling_ddpm.py new file mode 100644 index 000000000..34551b2ad --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/schedulers/scheduling_ddpm.py @@ -0,0 +1,191 @@ +# Copyright 2022 UC Berkely Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# DISCLAIMER: This file is strongly influenced by https://github.com/ermongroup/ddim +import math +from typing import Union + +import numpy as np +import paddle + +from ..configuration_utils import ConfigMixin +from ..configuration_utils import register_to_config +from .scheduling_utils import SchedulerMixin + + +def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999): + """ + Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of + (1-beta) over time from t = [0,1]. + + :param num_diffusion_timesteps: the number of betas to produce. :param alpha_bar: a lambda that takes an argument t + from 0 to 1 and + produces the cumulative product of (1-beta) up to that part of the diffusion process. + :param max_beta: the maximum beta to use; use values lower than 1 to + prevent singularities. + """ + + def alpha_bar(time_step): + return math.cos((time_step + 0.008) / 1.008 * math.pi / 2)**2 + + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta)) + return np.array(betas, dtype=np.float32) + + +class DDPMScheduler(SchedulerMixin, ConfigMixin): + + @register_to_config + def __init__( + self, + num_train_timesteps=1000, + beta_start=0.0001, + beta_end=0.02, + beta_schedule="linear", + trained_betas=None, + variance_type="fixed_small", + clip_sample=True, + tensor_format="pd", + ): + + if trained_betas is not None: + self.betas = np.asarray(trained_betas) + elif beta_schedule == "linear": + self.betas = np.linspace(beta_start, beta_end, num_train_timesteps, dtype=np.float32) + elif beta_schedule == "scaled_linear": + # this schedule is very specific to the latent diffusion model. + self.betas = np.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=np.float32)**2 + elif beta_schedule == "squaredcos_cap_v2": + # Glide cosine schedule + self.betas = betas_for_alpha_bar(num_train_timesteps) + else: + raise NotImplementedError(f"{beta_schedule} does is not implemented for {self.__class__}") + + self.alphas = 1.0 - self.betas + self.alphas_cumprod = np.cumprod(self.alphas, axis=0) + self.one = np.array(1.0) + + # setable values + self.num_inference_steps = None + self.timesteps = np.arange(0, num_train_timesteps)[::-1].copy() + + self.tensor_format = tensor_format + self.set_format(tensor_format=tensor_format) + + self.variance_type = variance_type + + def set_timesteps(self, num_inference_steps): + num_inference_steps = min(self.config.num_train_timesteps, num_inference_steps) + self.num_inference_steps = num_inference_steps + self.timesteps = np.arange(0, self.config.num_train_timesteps, + self.config.num_train_timesteps // self.num_inference_steps)[::-1].copy() + self.set_format(tensor_format=self.tensor_format) + + def _get_variance(self, t, predicted_variance=None, variance_type=None): + alpha_prod_t = self.alphas_cumprod[t] + alpha_prod_t_prev = self.alphas_cumprod[t - 1] if t > 0 else self.one + + # For t > 0, compute predicted variance βt (see formula (6) and (7) from https://arxiv.org/pdf/2006.11239.pdf) + # and sample from it to get previous sample + # x_{t-1} ~ N(pred_prev_sample, variance) == add variance to pred_sample + variance = (1 - alpha_prod_t_prev) / (1 - alpha_prod_t) * self.betas[t] + + if variance_type is None: + variance_type = self.config.variance_type + + # hacks - were probs added for training stability + if variance_type == "fixed_small": + variance = self.clip(variance, min_value=1e-20) + # for rl-diffuser https://arxiv.org/abs/2205.09991 + elif variance_type == "fixed_small_log": + variance = self.log(self.clip(variance, min_value=1e-20)) + elif variance_type == "fixed_large": + variance = self.betas[t] + elif variance_type == "fixed_large_log": + # Glide max_log + variance = self.log(self.betas[t]) + elif variance_type == "learned": + return predicted_variance + elif variance_type == "learned_range": + min_log = variance + max_log = self.betas[t] + frac = (predicted_variance + 1) / 2 + variance = frac * max_log + (1 - frac) * min_log + + return variance + + def step( + self, + model_output: Union[paddle.Tensor, np.ndarray], + timestep: int, + sample: Union[paddle.Tensor, np.ndarray], + predict_epsilon=True, + generator=None, + ): + t = timestep + + if model_output.shape[1] == sample.shape[1] * 2 and self.variance_type in ["learned", "learned_range"]: + model_output, predicted_variance = paddle.split(model_output, sample.shape[1], axis=1) + else: + predicted_variance = None + + # 1. compute alphas, betas + alpha_prod_t = self.alphas_cumprod[t] + alpha_prod_t_prev = self.alphas_cumprod[t - 1] if t > 0 else self.one + beta_prod_t = 1 - alpha_prod_t + beta_prod_t_prev = 1 - alpha_prod_t_prev + + # 2. compute predicted original sample from predicted noise also called + # "predicted x_0" of formula (15) from https://arxiv.org/pdf/2006.11239.pdf + if predict_epsilon: + pred_original_sample = (sample - beta_prod_t**(0.5) * model_output) / alpha_prod_t**(0.5) + else: + pred_original_sample = model_output + + # 3. Clip "predicted x_0" + if self.config.clip_sample: + pred_original_sample = self.clip(pred_original_sample, -1, 1) + + # 4. Compute coefficients for pred_original_sample x_0 and current sample x_t + # See formula (7) from https://arxiv.org/pdf/2006.11239.pdf + pred_original_sample_coeff = (alpha_prod_t_prev**(0.5) * self.betas[t]) / beta_prod_t + current_sample_coeff = self.alphas[t]**(0.5) * beta_prod_t_prev / beta_prod_t + + # 5. Compute predicted previous sample µ_t + # See formula (7) from https://arxiv.org/pdf/2006.11239.pdf + pred_prev_sample = pred_original_sample_coeff * pred_original_sample + current_sample_coeff * sample + + # 6. Add noise + variance = 0 + if t > 0: + noise = self.randn_like(model_output) + variance = (self._get_variance(t, predicted_variance=predicted_variance)**0.5) * noise + + pred_prev_sample = pred_prev_sample + variance + + return {"prev_sample": pred_prev_sample} + + def add_noise(self, original_samples, noise, timesteps): + sqrt_alpha_prod = self.alphas_cumprod[timesteps]**0.5 + sqrt_alpha_prod = self.match_shape(sqrt_alpha_prod, original_samples) + sqrt_one_minus_alpha_prod = (1 - self.alphas_cumprod[timesteps])**0.5 + sqrt_one_minus_alpha_prod = self.match_shape(sqrt_one_minus_alpha_prod, original_samples) + + noisy_samples = sqrt_alpha_prod * original_samples + sqrt_one_minus_alpha_prod * noise + return noisy_samples + + def __len__(self): + return self.config.num_train_timesteps diff --git a/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/schedulers/scheduling_karras_ve.py b/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/schedulers/scheduling_karras_ve.py new file mode 100644 index 000000000..36827564e --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/schedulers/scheduling_karras_ve.py @@ -0,0 +1,124 @@ +# Copyright 2022 NVIDIA and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Union + +import numpy as np +import paddle + +from ..configuration_utils import ConfigMixin +from ..configuration_utils import register_to_config +from .scheduling_utils import SchedulerMixin + + +class KarrasVeScheduler(SchedulerMixin, ConfigMixin): + """ + Stochastic sampling from Karras et al. [1] tailored to the Variance-Expanding (VE) models [2]. Use Algorithm 2 and + the VE column of Table 1 from [1] for reference. + + [1] Karras, Tero, et al. "Elucidating the Design Space of Diffusion-Based Generative Models." + https://arxiv.org/abs/2206.00364 [2] Song, Yang, et al. "Score-based generative modeling through stochastic + differential equations." https://arxiv.org/abs/2011.13456 + """ + + @register_to_config + def __init__( + self, + sigma_min=0.02, + sigma_max=100, + s_noise=1.007, + s_churn=80, + s_min=0.05, + s_max=50, + tensor_format="pd", + ): + """ + For more details on the parameters, see the original paper's Appendix E.: "Elucidating the Design Space of + Diffusion-Based Generative Models." https://arxiv.org/abs/2206.00364. The grid search values used to find the + optimal {s_noise, s_churn, s_min, s_max} for a specific model are described in Table 5 of the paper. + + Args: + sigma_min (`float`): minimum noise magnitude + sigma_max (`float`): maximum noise magnitude + s_noise (`float`): the amount of additional noise to counteract loss of detail during sampling. + A reasonable range is [1.000, 1.011]. + s_churn (`float`): the parameter controlling the overall amount of stochasticity. + A reasonable range is [0, 100]. + s_min (`float`): the start value of the sigma range where we add noise (enable stochasticity). + A reasonable range is [0, 10]. + s_max (`float`): the end value of the sigma range where we add noise. + A reasonable range is [0.2, 80]. + """ + # setable values + self.num_inference_steps = None + self.timesteps = None + self.schedule = None # sigma(t_i) + + self.tensor_format = tensor_format + self.set_format(tensor_format=tensor_format) + + def set_timesteps(self, num_inference_steps): + self.num_inference_steps = num_inference_steps + self.timesteps = np.arange(0, self.num_inference_steps)[::-1].copy() + self.schedule = [(self.sigma_max * (self.sigma_min**2 / self.sigma_max**2)**(i / (num_inference_steps - 1))) + for i in self.timesteps] + self.schedule = np.array(self.schedule, dtype=np.float32) + + self.set_format(tensor_format=self.tensor_format) + + def add_noise_to_input(self, sample, sigma, generator=None): + """ + Explicit Langevin-like "churn" step of adding noise to the sample according to a factor gamma_i ≥ 0 to reach a + higher noise level sigma_hat = sigma_i + gamma_i*sigma_i. + """ + if self.s_min <= sigma <= self.s_max: + gamma = min(self.s_churn / self.num_inference_steps, 2**0.5 - 1) + else: + gamma = 0 + + # sample eps ~ N(0, S_noise^2 * I) + eps = self.s_noise * paddle.randn(sample.shape) + sigma_hat = sigma + gamma * sigma + sample_hat = sample + ((sigma_hat**2 - sigma**2)**0.5 * eps) + + return sample_hat, sigma_hat + + def step( + self, + model_output: Union[paddle.Tensor, np.ndarray], + sigma_hat: float, + sigma_prev: float, + sample_hat: Union[paddle.Tensor, np.ndarray], + ): + pred_original_sample = sample_hat + sigma_hat * model_output + derivative = (sample_hat - pred_original_sample) / sigma_hat + sample_prev = sample_hat + (sigma_prev - sigma_hat) * derivative + + return {"prev_sample": sample_prev, "derivative": derivative} + + def step_correct( + self, + model_output: Union[paddle.Tensor, np.ndarray], + sigma_hat: float, + sigma_prev: float, + sample_hat: Union[paddle.Tensor, np.ndarray], + sample_prev: Union[paddle.Tensor, np.ndarray], + derivative: Union[paddle.Tensor, np.ndarray], + ): + pred_original_sample = sample_prev + sigma_prev * model_output + derivative_corr = (sample_prev - pred_original_sample) / sigma_prev + sample_prev = sample_hat + (sigma_prev - sigma_hat) * (0.5 * derivative + 0.5 * derivative_corr) + return {"prev_sample": sample_prev, "derivative": derivative_corr} + + def add_noise(self, original_samples, noise, timesteps): + raise NotImplementedError() diff --git a/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/schedulers/scheduling_lms_discrete.py b/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/schedulers/scheduling_lms_discrete.py new file mode 100644 index 000000000..2ed63cc2c --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/schedulers/scheduling_lms_discrete.py @@ -0,0 +1,133 @@ +# Copyright 2022 Katherine Crowson and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Union + +import numpy as np +import paddle +from scipy import integrate + +from ..configuration_utils import ConfigMixin +from ..configuration_utils import register_to_config +from .scheduling_utils import SchedulerMixin + + +class LMSDiscreteScheduler(SchedulerMixin, ConfigMixin): + + @register_to_config + def __init__( + self, + num_train_timesteps=1000, + beta_start=0.0001, + beta_end=0.02, + beta_schedule="linear", + trained_betas=None, + timestep_values=None, + tensor_format="pd", + ): + """ + Linear Multistep Scheduler for discrete beta schedules. Based on the original k-diffusion implementation by + Katherine Crowson: + https://github.com/crowsonkb/k-diffusion/blob/481677d114f6ea445aa009cf5bd7a9cdee909e47/k_diffusion/sampling.py#L181 + """ + + if beta_schedule == "linear": + self.betas = np.linspace(beta_start, beta_end, num_train_timesteps, dtype=np.float32) + elif beta_schedule == "scaled_linear": + # this schedule is very specific to the latent diffusion model. + self.betas = np.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=np.float32)**2 + else: + raise NotImplementedError(f"{beta_schedule} does is not implemented for {self.__class__}") + + self.alphas = 1.0 - self.betas + self.alphas_cumprod = np.cumprod(self.alphas, axis=0) + + self.sigmas = ((1 - self.alphas_cumprod) / self.alphas_cumprod)**0.5 + + # setable values + self.num_inference_steps = None + self.timesteps = np.arange(0, num_train_timesteps)[::-1].copy() + self.derivatives = [] + + self.tensor_format = tensor_format + self.set_format(tensor_format=tensor_format) + + def get_lms_coefficient(self, order, t, current_order): + """ + Compute a linear multistep coefficient + """ + + def lms_derivative(tau): + prod = 1.0 + for k in range(order): + if current_order == k: + continue + prod *= (tau - self.sigmas[t - k]) / (self.sigmas[t - current_order] - self.sigmas[t - k]) + return prod + + integrated_coeff = integrate.quad(lms_derivative, self.sigmas[t], self.sigmas[t + 1], epsrel=1e-4)[0] + + return integrated_coeff + + def set_timesteps(self, num_inference_steps): + self.num_inference_steps = num_inference_steps + self.timesteps = np.linspace(self.num_train_timesteps - 1, 0, num_inference_steps, dtype=float) + + low_idx = np.floor(self.timesteps).astype(int) + high_idx = np.ceil(self.timesteps).astype(int) + frac = np.mod(self.timesteps, 1.0) + sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod)**0.5) + sigmas = (1 - frac) * sigmas[low_idx] + frac * sigmas[high_idx] + self.sigmas = np.concatenate([sigmas, [0.0]]) + + self.derivatives = [] + + self.set_format(tensor_format=self.tensor_format) + + def step( + self, + model_output: Union[paddle.Tensor, np.ndarray], + timestep: int, + sample: Union[paddle.Tensor, np.ndarray], + order: int = 4, + ): + sigma = self.sigmas[timestep] + + # 1. compute predicted original sample (x_0) from sigma-scaled predicted noise + pred_original_sample = sample - sigma * model_output + + # 2. Convert to an ODE derivative + derivative = (sample - pred_original_sample) / sigma + self.derivatives.append(derivative) + if len(self.derivatives) > order: + self.derivatives.pop(0) + + # 3. Compute linear multistep coefficients + order = min(timestep + 1, order) + lms_coeffs = [self.get_lms_coefficient(order, timestep, curr_order) for curr_order in range(order)] + + # 4. Compute previous sample based on the derivatives path + prev_sample = sample + sum(coeff * derivative + for coeff, derivative in zip(lms_coeffs, reversed(self.derivatives))) + + return {"prev_sample": prev_sample} + + def add_noise(self, original_samples, noise, timesteps): + alpha_prod = self.alphas_cumprod[timesteps] + alpha_prod = self.match_shape(alpha_prod, original_samples) + + noisy_samples = (alpha_prod**0.5) * original_samples + ((1 - alpha_prod)**0.5) * noise + return noisy_samples + + def __len__(self): + return self.config.num_train_timesteps diff --git a/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/schedulers/scheduling_pndm.py b/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/schedulers/scheduling_pndm.py new file mode 100644 index 000000000..12abd9cfe --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/schedulers/scheduling_pndm.py @@ -0,0 +1,258 @@ +# Copyright 2022 Zhejiang University Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# DISCLAIMER: This file is strongly influenced by https://github.com/ermongroup/ddim +import math +from typing import Union + +import numpy as np +import paddle + +from ..configuration_utils import ConfigMixin +from ..configuration_utils import register_to_config +from .scheduling_utils import SchedulerMixin + + +def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999): + """ + Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of + (1-beta) over time from t = [0,1]. + + :param num_diffusion_timesteps: the number of betas to produce. :param alpha_bar: a lambda that takes an argument t + from 0 to 1 and + produces the cumulative product of (1-beta) up to that part of the diffusion process. + :param max_beta: the maximum beta to use; use values lower than 1 to + prevent singularities. + """ + + def alpha_bar(time_step): + return math.cos((time_step + 0.008) / 1.008 * math.pi / 2)**2 + + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta)) + return np.array(betas, dtype=np.float32) + + +class PNDMScheduler(SchedulerMixin, ConfigMixin): + + @register_to_config + def __init__( + self, + num_train_timesteps=1000, + beta_start=0.0001, + beta_end=0.02, + beta_schedule="linear", + tensor_format="pd", + skip_prk_steps=False, + ): + + if beta_schedule == "linear": + self.betas = np.linspace(beta_start, beta_end, num_train_timesteps, dtype=np.float32) + elif beta_schedule == "scaled_linear": + # this schedule is very specific to the latent diffusion model. + self.betas = np.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=np.float32)**2 + elif beta_schedule == "squaredcos_cap_v2": + # Glide cosine schedule + self.betas = betas_for_alpha_bar(num_train_timesteps) + else: + raise NotImplementedError(f"{beta_schedule} does is not implemented for {self.__class__}") + + self.alphas = 1.0 - self.betas + self.alphas_cumprod = np.cumprod(self.alphas, axis=0) + + self.one = np.array(1.0) + + # For now we only support F-PNDM, i.e. the runge-kutta method + # For more information on the algorithm please take a look at the paper: https://arxiv.org/pdf/2202.09778.pdf + # mainly at formula (9), (12), (13) and the Algorithm 2. + self.pndm_order = 4 + + # running values + self.cur_model_output = 0 + self.counter = 0 + self.cur_sample = None + self.ets = [] + + # setable values + self.num_inference_steps = None + self._timesteps = np.arange(0, num_train_timesteps)[::-1].copy() + self._offset = 0 + self.prk_timesteps = None + self.plms_timesteps = None + self.timesteps = None + + self.tensor_format = tensor_format + self.set_format(tensor_format=tensor_format) + + def set_timesteps(self, num_inference_steps, offset=0): + self.num_inference_steps = num_inference_steps + self._timesteps = list( + range(0, self.config.num_train_timesteps, self.config.num_train_timesteps // num_inference_steps)) + self._offset = offset + self._timesteps = [t + self._offset for t in self._timesteps] + + if self.config.skip_prk_steps: + # for some models like stable diffusion the prk steps can/should be skipped to + # produce better results. When using PNDM with `self.config.skip_prk_steps` the implementation + # is based on crowsonkb's PLMS sampler implementation: https://github.com/CompVis/latent-diffusion/pull/51 + self.prk_timesteps = [] + self.plms_timesteps = list(reversed(self._timesteps[:-1] + self._timesteps[-2:-1] + self._timesteps[-1:])) + else: + prk_timesteps = np.array(self._timesteps[-self.pndm_order:]).repeat(2) + np.tile( + np.array([0, self.config.num_train_timesteps // num_inference_steps // 2]), self.pndm_order) + self.prk_timesteps = list(reversed(prk_timesteps[:-1].repeat(2)[1:-1])) + self.plms_timesteps = list(reversed(self._timesteps[:-3])) + + self.timesteps = self.prk_timesteps + self.plms_timesteps + + self.ets = [] + self.counter = 0 + self.set_format(tensor_format=self.tensor_format) + + def step( + self, + model_output: Union[paddle.Tensor, np.ndarray], + timestep: int, + sample: Union[paddle.Tensor, np.ndarray], + ): + if self.counter < len(self.prk_timesteps) and not self.config.skip_prk_steps: + return self.step_prk(model_output=model_output, timestep=timestep, sample=sample) + else: + return self.step_plms(model_output=model_output, timestep=timestep, sample=sample) + + def step_prk( + self, + model_output: Union[paddle.Tensor, np.ndarray], + timestep: int, + sample: Union[paddle.Tensor, np.ndarray], + ): + """ + Step function propagating the sample with the Runge-Kutta method. RK takes 4 forward passes to approximate the + solution to the differential equation. + """ + diff_to_prev = 0 if self.counter % 2 else self.config.num_train_timesteps // self.num_inference_steps // 2 + prev_timestep = max(timestep - diff_to_prev, self.prk_timesteps[-1]) + timestep = self.prk_timesteps[self.counter // 4 * 4] + + if self.counter % 4 == 0: + self.cur_model_output += 1 / 6 * model_output + self.ets.append(model_output) + self.cur_sample = sample + elif (self.counter - 1) % 4 == 0: + self.cur_model_output += 1 / 3 * model_output + elif (self.counter - 2) % 4 == 0: + self.cur_model_output += 1 / 3 * model_output + elif (self.counter - 3) % 4 == 0: + model_output = self.cur_model_output + 1 / 6 * model_output + self.cur_model_output = 0 + + # cur_sample should not be `None` + cur_sample = self.cur_sample if self.cur_sample is not None else sample + + prev_sample = self._get_prev_sample(cur_sample, timestep, prev_timestep, model_output) + self.counter += 1 + + return {"prev_sample": prev_sample} + + def step_plms( + self, + model_output: Union[paddle.Tensor, np.ndarray], + timestep: int, + sample: Union[paddle.Tensor, np.ndarray], + ): + """ + Step function propagating the sample with the linear multi-step method. This has one forward pass with multiple + times to approximate the solution. + """ + if not self.config.skip_prk_steps and len(self.ets) < 3: + raise ValueError( + f"{self.__class__} can only be run AFTER scheduler has been run " + "in 'prk' mode for at least 12 iterations " + "See: https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/pipeline_pndm.py " + "for more information.") + + prev_timestep = max(timestep - self.config.num_train_timesteps // self.num_inference_steps, 0) + + if self.counter != 1: + self.ets.append(model_output) + else: + prev_timestep = timestep + timestep = timestep + self.config.num_train_timesteps // self.num_inference_steps + + if len(self.ets) == 1 and self.counter == 0: + model_output = model_output + self.cur_sample = sample + elif len(self.ets) == 1 and self.counter == 1: + model_output = (model_output + self.ets[-1]) / 2 + sample = self.cur_sample + self.cur_sample = None + elif len(self.ets) == 2: + model_output = (3 * self.ets[-1] - self.ets[-2]) / 2 + elif len(self.ets) == 3: + model_output = (23 * self.ets[-1] - 16 * self.ets[-2] + 5 * self.ets[-3]) / 12 + else: + model_output = (1 / 24) * (55 * self.ets[-1] - 59 * self.ets[-2] + 37 * self.ets[-3] - 9 * self.ets[-4]) + + prev_sample = self._get_prev_sample(sample, timestep, prev_timestep, model_output) + self.counter += 1 + + return {"prev_sample": prev_sample} + + def _get_prev_sample(self, sample, timestep, timestep_prev, model_output): + # See formula (9) of PNDM paper https://arxiv.org/pdf/2202.09778.pdf + # this function computes x_(t−δ) using the formula of (9) + # Note that x_t needs to be added to both sides of the equation + + # Notation ( -> + # alpha_prod_t -> α_t + # alpha_prod_t_prev -> α_(t−δ) + # beta_prod_t -> (1 - α_t) + # beta_prod_t_prev -> (1 - α_(t−δ)) + # sample -> x_t + # model_output -> e_θ(x_t, t) + # prev_sample -> x_(t−δ) + alpha_prod_t = self.alphas_cumprod[timestep + 1 - self._offset] + alpha_prod_t_prev = self.alphas_cumprod[timestep_prev + 1 - self._offset] + beta_prod_t = 1 - alpha_prod_t + beta_prod_t_prev = 1 - alpha_prod_t_prev + + # corresponds to (α_(t−δ) - α_t) divided by + # denominator of x_t in formula (9) and plus 1 + # Note: (α_(t−δ) - α_t) / (sqrt(α_t) * (sqrt(α_(t−δ)) + sqr(α_t))) = + # sqrt(α_(t−δ)) / sqrt(α_t)) + sample_coeff = (alpha_prod_t_prev / alpha_prod_t)**(0.5) + + # corresponds to denominator of e_θ(x_t, t) in formula (9) + model_output_denom_coeff = alpha_prod_t * beta_prod_t_prev**(0.5) + (alpha_prod_t * beta_prod_t * + alpha_prod_t_prev)**(0.5) + + # full formula (9) + prev_sample = (sample_coeff * sample - + (alpha_prod_t_prev - alpha_prod_t) * model_output / model_output_denom_coeff) + + return prev_sample + + def add_noise(self, original_samples, noise, timesteps): + sqrt_alpha_prod = self.alphas_cumprod[timesteps]**0.5 + sqrt_alpha_prod = self.match_shape(sqrt_alpha_prod, original_samples) + sqrt_one_minus_alpha_prod = (1 - self.alphas_cumprod[timesteps])**0.5 + sqrt_one_minus_alpha_prod = self.match_shape(sqrt_one_minus_alpha_prod, original_samples) + + noisy_samples = sqrt_alpha_prod * original_samples + sqrt_one_minus_alpha_prod * noise + return noisy_samples + + def __len__(self): + return self.config.num_train_timesteps diff --git a/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/schedulers/scheduling_sde_ve.py b/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/schedulers/scheduling_sde_ve.py new file mode 100644 index 000000000..92ca23d5b --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/schedulers/scheduling_sde_ve.py @@ -0,0 +1,172 @@ +# Copyright 2022 Google Brain and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# DISCLAIMER: This file is strongly influenced by https://github.com/yang-song/score_sde_pypaddle +# TODO(Patrick, Anton, Suraj) - make scheduler framework indepedent and clean-up a bit +from typing import Union + +import numpy as np +import paddle + +from ..configuration_utils import ConfigMixin +from ..configuration_utils import register_to_config +from .scheduling_utils import SchedulerMixin + + +class ScoreSdeVeScheduler(SchedulerMixin, ConfigMixin): + """ + The variance exploding stochastic differential equation (SDE) scheduler. + + :param snr: coefficient weighting the step from the model_output sample (from the network) to the random noise. + :param sigma_min: initial noise scale for sigma sequence in sampling procedure. The minimum sigma should mirror the + distribution of the data. + :param sigma_max: :param sampling_eps: the end value of sampling, where timesteps decrease progessively from 1 to + epsilon. :param correct_steps: number of correction steps performed on a produced sample. :param tensor_format: + "np" or "pd" for the expected format of samples passed to the Scheduler. + """ + + @register_to_config + def __init__( + self, + num_train_timesteps=2000, + snr=0.15, + sigma_min=0.01, + sigma_max=1348, + sampling_eps=1e-5, + correct_steps=1, + tensor_format="pd", + ): + # self.sigmas = None + # self.discrete_sigmas = None + # + # # setable values + # self.num_inference_steps = None + self.timesteps = None + + self.set_sigmas(num_train_timesteps, sigma_min, sigma_max, sampling_eps) + + self.tensor_format = tensor_format + self.set_format(tensor_format=tensor_format) + + def set_timesteps(self, num_inference_steps, sampling_eps=None): + sampling_eps = sampling_eps if sampling_eps is not None else self.config.sampling_eps + tensor_format = getattr(self, "tensor_format", "pd") + if tensor_format == "np": + self.timesteps = np.linspace(1, sampling_eps, num_inference_steps) + elif tensor_format == "pd": + self.timesteps = paddle.linspace(1, sampling_eps, num_inference_steps) + else: + raise ValueError(f"`self.tensor_format`: {self.tensor_format} is not valid.") + + def set_sigmas(self, num_inference_steps, sigma_min=None, sigma_max=None, sampling_eps=None): + sigma_min = sigma_min if sigma_min is not None else self.config.sigma_min + sigma_max = sigma_max if sigma_max is not None else self.config.sigma_max + sampling_eps = sampling_eps if sampling_eps is not None else self.config.sampling_eps + if self.timesteps is None: + self.set_timesteps(num_inference_steps, sampling_eps) + + tensor_format = getattr(self, "tensor_format", "pd") + if tensor_format == "np": + self.discrete_sigmas = np.exp(np.linspace(np.log(sigma_min), np.log(sigma_max), num_inference_steps)) + self.sigmas = np.array([sigma_min * (sigma_max / sigma_min)**t for t in self.timesteps]) + elif tensor_format == "pd": + self.discrete_sigmas = paddle.exp(paddle.linspace(np.log(sigma_min), np.log(sigma_max), + num_inference_steps)) + self.sigmas = paddle.to_tensor([sigma_min * (sigma_max / sigma_min)**t for t in self.timesteps]) + else: + raise ValueError(f"`self.tensor_format`: {self.tensor_format} is not valid.") + + def get_adjacent_sigma(self, timesteps, t): + tensor_format = getattr(self, "tensor_format", "pd") + if tensor_format == "np": + return np.where(timesteps == 0, np.zeros_like(t), self.discrete_sigmas[timesteps - 1]) + elif tensor_format == "pd": + return paddle.where(timesteps == 0, paddle.zeros_like(t), self.discrete_sigmas[timesteps - 1]) + + raise ValueError(f"`self.tensor_format`: {self.tensor_format} is not valid.") + + def set_seed(self, seed): + tensor_format = getattr(self, "tensor_format", "pd") + if tensor_format == "np": + np.random.seed(seed) + elif tensor_format == "pd": + paddle.seed(seed) + else: + raise ValueError(f"`self.tensor_format`: {self.tensor_format} is not valid.") + + def step_pred( + self, + model_output: Union[paddle.Tensor, np.ndarray], + timestep: int, + sample: Union[paddle.Tensor, np.ndarray], + seed=None, + ): + """ + Predict the sample at the previous timestep by reversing the SDE. + """ + if seed is not None: + self.set_seed(seed) + # TODO(Patrick) non-Pypaddle + + timestep = timestep * paddle.ones(sample.shape[0]) # paddle.repeat_interleave(timestep, sample.shape[0]) + timesteps = (timestep * (len(self.timesteps) - 1)).astype("int64") + + sigma = self.discrete_sigmas[timesteps] + adjacent_sigma = self.get_adjacent_sigma(timesteps, timestep) + drift = self.zeros_like(sample) + diffusion = (sigma**2 - adjacent_sigma**2)**0.5 + + # equation 6 in the paper: the model_output modeled by the network is grad_x log pt(x) + # also equation 47 shows the analog from SDE models to ancestral sampling methods + drift = drift - diffusion[:, None, None, None]**2 * model_output + + # equation 6: sample noise for the diffusion term of + noise = self.randn_like(sample) + prev_sample_mean = sample - drift # subtract because `dt` is a small negative timestep + # TODO is the variable diffusion the correct scaling term for the noise? + prev_sample = prev_sample_mean + diffusion[:, None, None, None] * noise # add impact of diffusion field g + + return {"prev_sample": prev_sample, "prev_sample_mean": prev_sample_mean} + + def step_correct( + self, + model_output: Union[paddle.Tensor, np.ndarray], + sample: Union[paddle.Tensor, np.ndarray], + seed=None, + ): + """ + Correct the predicted sample based on the output model_output of the network. This is often run repeatedly + after making the prediction for the previous timestep. + """ + if seed is not None: + self.set_seed(seed) + + # For small batch sizes, the paper "suggest replacing norm(z) with sqrt(d), where d is the dim. of z" + # sample noise for correction + noise = self.randn_like(sample) + + # compute step size from the model_output, the noise, and the snr + grad_norm = self.norm(model_output) + noise_norm = self.norm(noise) + step_size = (self.config.snr * noise_norm / grad_norm)**2 * 2 + step_size = step_size * paddle.ones(sample.shape[0]) + # self.repeat_scalar(step_size, sample.shape[0]) + + # compute corrected sample: model_output term and noise term + prev_sample_mean = sample + step_size[:, None, None, None] * model_output + prev_sample = prev_sample_mean + ((step_size * 2)**0.5)[:, None, None, None] * noise + + return {"prev_sample": prev_sample} + + def __len__(self): + return self.config.num_train_timesteps diff --git a/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/schedulers/scheduling_sde_vp.py b/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/schedulers/scheduling_sde_vp.py new file mode 100644 index 000000000..8ad84c73e --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/schedulers/scheduling_sde_vp.py @@ -0,0 +1,59 @@ +# Copyright 2022 Google Brain and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# DISCLAIMER: This file is strongly influenced by https://github.com/yang-song/score_sde_pytorch +# TODO(Patrick, Anton, Suraj) - make scheduler framework indepedent and clean-up a bit +import numpy as np +import paddle + +from ..configuration_utils import ConfigMixin +from ..configuration_utils import register_to_config +from .scheduling_utils import SchedulerMixin + + +class ScoreSdeVpScheduler(SchedulerMixin, ConfigMixin): + + @register_to_config + def __init__(self, num_train_timesteps=2000, beta_min=0.1, beta_max=20, sampling_eps=1e-3, tensor_format="np"): + + self.sigmas = None + self.discrete_sigmas = None + self.timesteps = None + + def set_timesteps(self, num_inference_steps): + self.timesteps = paddle.linspace(1, self.config.sampling_eps, num_inference_steps) + + def step_pred(self, score, x, t): + # TODO(Patrick) better comments + non-PyTorch + # postprocess model score + log_mean_coeff = (-0.25 * t**2 * (self.config.beta_max - self.config.beta_min) - 0.5 * t * self.config.beta_min) + std = paddle.sqrt(1.0 - paddle.exp(2.0 * log_mean_coeff)) + score = -score / std[:, None, None, None] + + # compute + dt = -1.0 / len(self.timesteps) + + beta_t = self.config.beta_min + t * (self.config.beta_max - self.config.beta_min) + drift = -0.5 * beta_t[:, None, None, None] * x + diffusion = paddle.sqrt(beta_t) + drift = drift - diffusion[:, None, None, None]**2 * score + x_mean = x + drift * dt + + # add noise + noise = self.randn_like(x) + x = x_mean + diffusion[:, None, None, None] * np.sqrt(-dt) * noise + + return x, x_mean + + def __len__(self): + return self.config.num_train_timesteps diff --git a/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/schedulers/scheduling_utils.py b/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/schedulers/scheduling_utils.py new file mode 100644 index 000000000..dc3cbde5a --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_inpainting/diffusers/schedulers/scheduling_utils.py @@ -0,0 +1,102 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Union + +import numpy as np +import paddle + +SCHEDULER_CONFIG_NAME = "scheduler_config.json" + + +class SchedulerMixin: + + config_name = SCHEDULER_CONFIG_NAME + ignore_for_config = ["tensor_format"] + + def set_format(self, tensor_format="pd"): + self.tensor_format = tensor_format + if tensor_format == "pd": + for key, value in vars(self).items(): + if isinstance(value, np.ndarray): + setattr(self, key, paddle.to_tensor(value)) + + return self + + def clip(self, tensor, min_value=None, max_value=None): + tensor_format = getattr(self, "tensor_format", "pd") + + if tensor_format == "np": + return np.clip(tensor, min_value, max_value) + elif tensor_format == "pd": + return paddle.clip(tensor, min_value, max_value) + + raise ValueError(f"`self.tensor_format`: {self.tensor_format} is not valid.") + + def log(self, tensor): + tensor_format = getattr(self, "tensor_format", "pd") + + if tensor_format == "np": + return np.log(tensor) + elif tensor_format == "pd": + return paddle.log(tensor) + + raise ValueError(f"`self.tensor_format`: {self.tensor_format} is not valid.") + + def match_shape(self, values: Union[np.ndarray, paddle.Tensor], broadcast_array: Union[np.ndarray, paddle.Tensor]): + """ + Turns a 1-D array into an array or tensor with len(broadcast_array.shape) dims. + + Args: + values: an array or tensor of values to extract. + broadcast_array: an array with a larger shape of K dimensions with the batch + dimension equal to the length of timesteps. + Returns: + a tensor of shape [batch_size, 1, ...] where the shape has K dims. + """ + + tensor_format = getattr(self, "tensor_format", "pd") + values = values.flatten() + + while len(values.shape) < len(broadcast_array.shape): + values = values[..., None] + + return values + + def norm(self, tensor): + tensor_format = getattr(self, "tensor_format", "pd") + if tensor_format == "np": + return np.linalg.norm(tensor) + elif tensor_format == "pd": + return paddle.norm(tensor.reshape([tensor.shape[0], -1]), axis=-1).mean() + + raise ValueError(f"`self.tensor_format`: {self.tensor_format} is not valid.") + + def randn_like(self, tensor, generator=None): + tensor_format = getattr(self, "tensor_format", "pd") + if tensor_format == "np": + return np.random.randn(np.shape(tensor)) + elif tensor_format == "pd": + # return paddle.randn_like(tensor) + return paddle.randn(tensor.shape) + + raise ValueError(f"`self.tensor_format`: {self.tensor_format} is not valid.") + + def zeros_like(self, tensor): + tensor_format = getattr(self, "tensor_format", "pd") + if tensor_format == "np": + return np.zeros_like(tensor) + elif tensor_format == "pd": + return paddle.zeros_like(tensor) + + raise ValueError(f"`self.tensor_format`: {self.tensor_format} is not valid.") diff --git a/modules/image/text_to_image/stable_diffusion_inpainting/module.py b/modules/image/text_to_image/stable_diffusion_inpainting/module.py new file mode 100755 index 000000000..919ce6eae --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_inpainting/module.py @@ -0,0 +1,450 @@ +# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import ast +import base64 +import inspect +import os +import random +import sys +from functools import partial +from io import BytesIO +from typing import List +from typing import Optional + +import numpy as np +import paddle +from docarray import Document +from docarray import DocumentArray +from IPython import display +from PIL import Image +from stable_diffusion_inpainting.clip.clip.utils import build_model +from stable_diffusion_inpainting.clip.clip.utils import tokenize +from stable_diffusion_inpainting.diffusers import AutoencoderKL +from stable_diffusion_inpainting.diffusers import DDIMScheduler +from stable_diffusion_inpainting.diffusers import LMSDiscreteScheduler +from stable_diffusion_inpainting.diffusers import PNDMScheduler +from stable_diffusion_inpainting.diffusers import UNet2DConditionModel +from stable_diffusion_inpainting.utils import preprocess +from stable_diffusion_inpainting.utils import preprocess_mask +from tqdm.auto import tqdm + +import paddlehub as hub +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving + + +@moduleinfo(name="stable_diffusion_inpainting", + version="1.0.0", + type="image/text_to_image", + summary="", + author="paddlepaddle", + author_email="paddle-dev@baidu.com") +class StableDiffusionInpainting: + + def __init__(self): + self.vae = AutoencoderKL(in_channels=3, + out_channels=3, + down_block_types=("DownEncoderBlock2D", "DownEncoderBlock2D", "DownEncoderBlock2D", + "DownEncoderBlock2D"), + up_block_types=("UpDecoderBlock2D", "UpDecoderBlock2D", "UpDecoderBlock2D", + "UpDecoderBlock2D"), + block_out_channels=(128, 256, 512, 512), + layers_per_block=2, + act_fn="silu", + latent_channels=4, + sample_size=512) + + self.unet = UNet2DConditionModel(sample_size=64, + in_channels=4, + out_channels=4, + center_input_sample=False, + flip_sin_to_cos=True, + freq_shift=0, + down_block_types=("CrossAttnDownBlock2D", "CrossAttnDownBlock2D", + "CrossAttnDownBlock2D", "DownBlock2D"), + up_block_types=("UpBlock2D", "CrossAttnUpBlock2D", "CrossAttnUpBlock2D", + "CrossAttnUpBlock2D"), + block_out_channels=(320, 640, 1280, 1280), + layers_per_block=2, + downsample_padding=1, + mid_block_scale_factor=1, + act_fn="silu", + norm_num_groups=32, + norm_eps=1e-5, + cross_attention_dim=768, + attention_head_dim=8) + + vae_path = os.path.join(self.directory, 'pre_trained', 'stable-diffusion-v1-4-vae.pdparams') + unet_path = os.path.join(self.directory, 'pre_trained', 'stable-diffusion-v1-4-unet.pdparams') + self.unet.set_dict(paddle.load(unet_path)) + self.vae.set_dict(paddle.load(vae_path)) + for parameter in self.unet.parameters(): + parameter.stop_gradient = True + self.vae.eval() + for parameter in self.vae.parameters(): + parameter.stop_gradient = True + self.unet.eval() + + self.text_encoder = build_model() + for parameter in self.text_encoder.parameters(): + parameter.stop_gradient = True + self.scheduler = PNDMScheduler(beta_start=0.00085, + beta_end=0.012, + beta_schedule="scaled_linear", + num_train_timesteps=1000, + skip_prk_steps=True) + + def generate_image(self, + text_prompts, + init_image, + mask_image, + strength: float = 0.8, + style: Optional[str] = None, + artist: Optional[str] = None, + batch_size: Optional[int] = 1, + num_inference_steps=50, + guidance_scale=7.5, + enable_fp16=False, + seed=None, + eta=0.0, + display_rate=5, + use_gpu=True, + output_dir: Optional[str] = 'stable_diffusion_inpainting_out'): + """ + Create Stable Diffusion artworks and save the result into a DocumentArray. + + :param text_prompts: Phrase, sentence, or string of words and phrases describing what the image should look like. The words will be analyzed by the AI and will guide the diffusion process toward the image(s) you describe. + :param init_image: Initial image. + :param mask_image: Mask image. + :param strength: Control the noise strength added to initial image, value is in the interval [0.0, 1.0]. The closer to 1, the bigger change to the initial image. + :param style: Image style, such as oil paintings, if specified, style will be used to construct prompts. + :param artist: Artist style, if specified, style will be used to construct prompts. + :param batch_size: This variable sets the number of still images you want SD to create for each prompt. + :param num_inference_steps: The number of inference steps. + :param guidance_scale: Increase the adherence to the conditional signal which in this case is text as well as overall sample quality. + :param enable_fp16: Whether to use float16. + :param use_gpu: whether to use gpu or not. + :param output_dir: Output directory. + :return: a DocumentArray object that has `n_batches` Documents + """ + if seed: + np.random.seed(seed) + random.seed(seed) + paddle.seed(seed) + + if use_gpu: + try: + _places = os.environ.get("CUDA_VISIBLE_DEVICES", None) + if _places: + paddle.device.set_device("gpu:{}".format(0)) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + else: + paddle.device.set_device("cpu") + paddle.disable_static() + + if not os.path.exists(output_dir): + os.makedirs(output_dir, exist_ok=True) + + if isinstance(text_prompts, str): + text_prompts = text_prompts.rstrip(',.,。') + if style is not None: + text_prompts += ",{}".format(style) + if artist is not None: + text_prompts += ",{},trending on artstation".format(artist) + text_prompts = [text_prompts] + elif isinstance(text_prompts, list): + for i, prompt in enumerate( + text_prompts): # different from dd here, dd can have multiple prompts for one image with weight. + text_prompts[i] = prompt.rstrip(',.,。') + if style is not None: + text_prompts[i] += ",{}".format(style) + if artist is not None: + text_prompts[i] += ",{},trending on artstation".format(artist) + + if isinstance(init_image, str): + init_image = preprocess(Image.open(init_image)) + else: + init_image = preprocess(init_image) + + if isinstance(mask_image, str): + mask_image = preprocess_mask(Image.open(mask_image)) + else: + mask_image = preprocess_mask(mask_image) + + # set timesteps + accepts_offset = "offset" in set(inspect.signature(self.scheduler.set_timesteps).parameters.keys()) + extra_set_kwargs = {} + offset = 0 + if accepts_offset: + offset = 1 + extra_set_kwargs["offset"] = 1 + + self.scheduler.set_timesteps(num_inference_steps, **extra_set_kwargs) + + # encode the init image into latents and scale the latents + init_latents = self.vae.encode(init_image).sample() + init_latents = 0.18215 * init_latents + + # prepare init_latents noise to latents + init_latents = paddle.concat([init_latents] * batch_size) + init_latents_orig = init_latents + + mask = paddle.concat([mask_image] * batch_size) + + # check sizes + if not mask.shape == init_latents.shape: + raise ValueError(f"The mask and init_image should be the same size!") + + # get the original timestep using init_timestep + init_timestep = int(num_inference_steps * strength) + offset + init_timestep = min(init_timestep, num_inference_steps) + if isinstance(self.scheduler, LMSDiscreteScheduler): + timesteps = paddle.to_tensor([num_inference_steps - init_timestep] * batch_size, dtype="int64") + else: + timesteps = self.scheduler.timesteps[-init_timestep] + timesteps = paddle.to_tensor([timesteps] * batch_size, dtype="int64") + + # add noise to latents using the timesteps + noise = paddle.randn(init_latents.shape) + init_latents = self.scheduler.add_noise(init_latents, noise, timesteps) + + # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2) + # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1` + # corresponds to doing no classifier free guidance. + do_classifier_free_guidance = guidance_scale > 1.0 + + # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature + # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers. + # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502 + # and should be between [0, 1] + accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys()) + extra_step_kwargs = {} + if accepts_eta: + extra_step_kwargs["eta"] = eta + + da_batches = DocumentArray() + + for prompt in text_prompts: + d = Document(tags={'prompt': prompt}) + da_batches.append(d) + for i in range(batch_size): + d.chunks.append(Document(tags={'prompt': prompt, 'image idx': i})) + d.chunks.append(Document(tags={'prompt': prompt, 'image idx': 'merged'})) + with paddle.amp.auto_cast(enable=enable_fp16, level='O2'): + prompts = [prompt] * batch_size + text_input = tokenize(prompts) + text_embeddings = self.text_encoder(text_input) + if do_classifier_free_guidance: + uncond_input = tokenize([""] * batch_size) + uncond_embeddings = self.text_encoder(uncond_input) + text_embeddings = paddle.concat([uncond_embeddings, text_embeddings]) + + latents = init_latents + + t_start = max(num_inference_steps - init_timestep + offset, 0) + for i, t in tqdm(enumerate(self.scheduler.timesteps[t_start:])): + t_index = t_start + i + # expand the latents if we are doing classifier-free guidance to avoid doing two forward passes. + latent_model_input = (paddle.concat([latents] * 2) if do_classifier_free_guidance else latents) + + if isinstance(self.scheduler, LMSDiscreteScheduler): + sigma = self.scheduler.sigmas[t_index] + latent_model_input = latent_model_input / ((sigma**2 + 1)**0.5) + + # predict the noise residual + noise_pred = self.unet(latent_model_input, t, encoder_hidden_states=text_embeddings)["sample"] + + # perform guidance + if do_classifier_free_guidance: + noise_pred_uncond, noise_pred_text = noise_pred.chunk(2) + noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond) + + # compute the previous noisy sample x_t -> x_t-1 + if isinstance(self.scheduler, LMSDiscreteScheduler): + latents = self.scheduler.step(noise_pred, t_index, latents, **extra_step_kwargs)["prev_sample"] + # masking + init_latents_proper = self.scheduler.add_noise(init_latents_orig, noise, t_index) + else: + latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs)["prev_sample"] + # masking + init_latents_proper = self.scheduler.add_noise(init_latents_orig, noise, t) + latents = (init_latents_proper * mask) + (latents * (1 - mask)) + if i % display_rate == 0: + # vae decode + images = self.vae.decode(1 / 0.18215 * latents) + images = (images / 2 + 0.5).clip(0, 1) + merge_image = images.cpu().transpose([2, 0, 3, 1]).flatten(1, 2).numpy() + merge_image = (merge_image * 255).round().astype(np.uint8) + merge_image = Image.fromarray(merge_image) + merge_image.save(os.path.join(output_dir, f'{prompt}-progress.png')) + c = Document(tags={'step': i, 'prompt': prompt}) + c.load_pil_image_to_datauri(merge_image) + d.chunks[-1].chunks.append(c) + display.clear_output(wait=True) + display.display(merge_image) + images = images.cpu().transpose([0, 2, 3, 1]).numpy() + images = (images * 255).round().astype(np.uint8) + for j in range(images.shape[0]): + image = Image.fromarray(images[j]) + c = Document(tags={'step': i, 'prompt': prompt}) + c.load_pil_image_to_datauri(image) + d.chunks[j].chunks.append(c) + + # vae decode + images = self.vae.decode(1 / 0.18215 * latents) + images = (images / 2 + 0.5).clip(0, 1) + merge_image = images.cpu().transpose([2, 0, 3, 1]).flatten(1, 2).numpy() + merge_image = (merge_image * 255).round().astype(np.uint8) + merge_image = Image.fromarray(merge_image) + merge_image.save(os.path.join(output_dir, f'{prompt}-merge.png')) + display.clear_output(wait=True) + display.display(merge_image) + d.load_pil_image_to_datauri(merge_image) + d.chunks[-1].load_pil_image_to_datauri(merge_image) + images = images.cpu().transpose([0, 2, 3, 1]).numpy() + images = (images * 255).round().astype(np.uint8) + for j in range(images.shape[0]): + image = Image.fromarray(images[j]) + image.save(os.path.join(output_dir, f'{prompt}-image-{j}.png')) + d.chunks[j].load_pil_image_to_datauri(image) + return da_batches + + @serving + def serving_method(self, text_prompts, init_image, mask_image, **kwargs): + """ + Run as a service. + """ + init_image = Image.open(BytesIO(base64.b64decode(init_image))) + mask_image = Image.open(BytesIO(base64.b64decode(mask_image))) + results = self.generate_image(text_prompts=text_prompts, init_image=init_image, mask_image=mask_image, + **kwargs).to_base64() + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.generate_image(text_prompts=args.text_prompts, + style=args.style, + artist=args.artist, + batch_size=args.batch_size, + num_inference_steps=args.num_inference_steps, + guidance_scale=args.guidance_scale, + enable_fp16=args.enable_fp16, + seed=args.seed, + display_rate=args.display_rate, + use_gpu=args.use_gpu, + output_dir=args.output_dir) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + + self.arg_input_group.add_argument('--num_inference_steps', + type=int, + default=50, + help="The number of inference steps.") + + self.arg_input_group.add_argument( + '--guidance_scale', + type=float, + default=7.5, + help= + "Increase the adherence to the conditional signal which in this case is text as well as overall sample quality." + ) + + self.arg_input_group.add_argument( + '--seed', + type=int, + default=None, + help= + "Deep in the diffusion code, there is a random number ‘seed’ which is used as the basis for determining the initial state of the diffusion. By default, this is random, but you can also specify your own seed." + ) + + self.arg_input_group.add_argument( + '--display_rate', + type=int, + default=10, + help="During a diffusion run, you can monitor the progress of each image being created with this variable.") + + self.arg_config_group.add_argument('--use_gpu', + type=ast.literal_eval, + default=True, + help="whether use GPU or not") + + self.arg_config_group.add_argument('--enable_fp16', + type=ast.literal_eval, + default=False, + help="whether use float16 or not") + + self.arg_config_group.add_argument('--output_dir', + type=str, + default='stable_diffusion_inpainting_out', + help='Output directory.') + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument( + '--text_prompts', + type=str, + help= + 'Phrase, sentence, or string of words and phrases describing what the image should look like. The words will be analyzed by the AI and will guide the diffusion process toward the image(s) you describe. These can include commas and weights to adjust the relative importance of each element. E.g. "A beautiful painting of a singular lighthouse, shining its light across a tumultuous sea of blood by greg rutkowski and thomas kinkade, Trending on artstation."Notice that this prompt loosely follows a structure: [subject], [prepositional details], [setting], [meta modifiers and artist]; this is a good starting point for your experiments. Developing text prompts takes practice and experience, and is not the subject of this guide. If you are a beginner to writing text prompts, a good place to start is on a simple AI art app like Night Cafe, starry ai or WOMBO prior to using DD, to get a feel for how text gets translated into images by GAN tools. These other apps use different technologies, but many of the same principles apply.' + ) + + self.arg_input_group.add_argument('--init_image', type=str, help='Initial image.') + + self.arg_input_group.add_argument('--mask_image', type=str, help='Mask image.') + + self.arg_input_group.add_argument( + '--strength', + type=float, + help= + 'Control the noise strength added to initial image, value is in the interval [0.0, 1.0]. The closer to 1, the bigger change to the initial image.' + ) + + self.arg_input_group.add_argument( + '--style', + type=str, + default=None, + help='Image style, such as oil paintings, if specified, style will be used to construct prompts.') + self.arg_input_group.add_argument('--artist', + type=str, + default=None, + help='Artist style, if specified, style will be used to construct prompts.') + + self.arg_input_group.add_argument( + '--batch_size', + type=int, + default=1, + help="This variable sets the number of still images you want SD to create for each prompt.") diff --git a/modules/image/text_to_image/stable_diffusion_inpainting/requirements.txt b/modules/image/text_to_image/stable_diffusion_inpainting/requirements.txt new file mode 100644 index 000000000..45e6baa06 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_inpainting/requirements.txt @@ -0,0 +1,8 @@ +numpy +ftfy +regex +docarray>=0.13.29 +pyyaml +regex +tqdm +ipywidgets diff --git a/modules/image/text_to_image/stable_diffusion_inpainting/utils.py b/modules/image/text_to_image/stable_diffusion_inpainting/utils.py new file mode 100644 index 000000000..c245d73c1 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_inpainting/utils.py @@ -0,0 +1,31 @@ +import numpy as np +import paddle +import PIL +from PIL import Image + + +def preprocess(image): + if isinstance(image, np.ndarray): + image = Image.fromarray(image) + w, h = image.size + w, h = map(lambda x: x - x % 32, (w, h)) # resize to integer multiple of 32 + image = image.resize((w, h), resample=PIL.Image.LANCZOS) + image = np.array(image).astype(np.float32) / 255.0 + image = image[None].transpose(0, 3, 1, 2) + image = paddle.to_tensor(image) + return 2.0 * image - 1.0 + + +def preprocess_mask(mask): + if isinstance(mask, np.ndarray): + mask = Image.fromarray(mask) + mask = mask.convert("L") + w, h = mask.size + w, h = map(lambda x: x - x % 32, (w, h)) # resize to integer multiple of 32 + mask = mask.resize((w // 8, h // 8), resample=PIL.Image.NEAREST) + mask = np.array(mask).astype(np.float32) / 255.0 + mask = np.tile(mask, (4, 1, 1)) + mask = mask[None].transpose(0, 1, 2, 3) + mask = 1 - mask # repaint white, keep black + mask = paddle.to_tensor(mask) + return mask From d8525ace9d1f9ffeb1378130ada0f4d48e8bc395 Mon Sep 17 00:00:00 2001 From: chenjian Date: Fri, 14 Oct 2022 14:04:06 +0800 Subject: [PATCH 093/117] update ernie_vilg version (#2073) * update ernie_vilg version * modify default style * update readme --- .../image/text_to_image/ernie_vilg/README.md | 22 +++---- .../image/text_to_image/ernie_vilg/module.py | 63 +++++++++++++------ 2 files changed, 53 insertions(+), 32 deletions(-) diff --git a/modules/image/text_to_image/ernie_vilg/README.md b/modules/image/text_to_image/ernie_vilg/README.md index 823d859b1..36b33d613 100755 --- a/modules/image/text_to_image/ernie_vilg/README.md +++ b/modules/image/text_to_image/ernie_vilg/README.md @@ -21,7 +21,7 @@ |数据集|-| |是否支持Fine-tuning|否| |模型大小|-| -|最新更新日期|2022-08-02| +|最新更新日期|2022-10-14| |数据指标|-| ### 应用效果展示 @@ -78,7 +78,8 @@ - ```python def generate_image( text_prompts:str, - style: Optional[str] = "油画", + style: Optional[str] = "探索无限", + resolution: Optional[str] = "1024*1024", topk: Optional[int] = 6, output_dir: Optional[str] = 'ernievilg_output') ``` @@ -88,7 +89,9 @@ - **参数** - text_prompts(str): 输入的语句,描述想要生成的图像的内容。 - - style(Optional[str]): 生成图像的风格,当前支持'油画','水彩','粉笔画','卡通','儿童画','蜡笔画','探索无限'。 + - style(Optional[str]): 生成图像的风格,当前支持 古风、油画、水彩、卡通、二次元、浮世绘、蒸汽波艺术、 + low poly、像素风格、概念艺术、未来主义、赛博朋克、写实风格、洛丽塔风格、巴洛克风格、超现实主义、探索无限。 + - resolution(Optional[str]): 生成图像的分辨率,当前支持 '1024\*1024', '1024\*1536', '1536\*1024',默认为'1024\*1024'。 - topk(Optional[int]): 保存前多少张图,最多保存6张。 - output_dir(Optional[str]): 保存输出图像的目录,默认为"ernievilg_output"。 @@ -100,14 +103,6 @@ ## 四、 Prompt 指南 -(在 GitHub 阅读 README 的用户可以访问 www.youpromptme.cn 获得更好的阅读体验) - -(图片比较多,完全加载页面可能需要 3min) - -作者:佳祥 (LCL-Brew) - -原文地址: https://github.com/OleNet/YouPromptMe/tree/gh-pages/you-prompt-me - 这是一份如何调整 Prompt 得到更漂亮的图片的经验性文档。我们的结果和经验都来源于[文心 ERNIE-ViLG Demo](https://wenxin.baidu.com/moduleApi/ernieVilg) 和[社区的资料](#related-work)。 @@ -797,6 +792,9 @@ DiscoDiffusion Prompt 技巧资料:https://docs.google.com/document/d/1l8s7uS2 初始发布 +* 1.1.0 + + 增加分辨率参数以及所支持的风格 ```shell - $ hub install ernie_vilg == 1.0.0 + $ hub install ernie_vilg == 1.1.0 ``` diff --git a/modules/image/text_to_image/ernie_vilg/module.py b/modules/image/text_to_image/ernie_vilg/module.py index f812c60cc..f08ae13a2 100755 --- a/modules/image/text_to_image/ernie_vilg/module.py +++ b/modules/image/text_to_image/ernie_vilg/module.py @@ -21,7 +21,7 @@ @moduleinfo(name="ernie_vilg", - version="1.0.0", + version="1.1.0", type="image/text_to_image", summary="", author="baidu-nlp", @@ -64,7 +64,8 @@ def _apply_token(self, ak, sk): def generate_image(self, text_prompts, - style: Optional[str] = "油画", + style: Optional[str] = "探索无限", + resolution: Optional[str] = "1024*1024", topk: Optional[int] = 6, visualization: Optional[bool] = True, output_dir: Optional[str] = 'ernievilg_output'): @@ -72,7 +73,9 @@ def generate_image(self, Create image by text prompts using ErnieVilG model. :param text_prompts: Phrase, sentence, or string of words and phrases describing what the image should look like. - :param style: Image stype, currently supported 油画、水彩、粉笔画、卡通、儿童画、蜡笔画、探索无限。 + :param style: Image stype, currently supported 古风、油画、水彩、卡通、二次元、浮世绘、蒸汽波艺术、 + low poly、像素风格、概念艺术、未来主义、赛博朋克、写实风格、洛丽塔风格、巴洛克风格、超现实主义、探索无限。 + :param resolution: Resolution of images, currently supported "1024*1024", "1024*1536", "1536*1024". :param topk: Top k images to save. :param visualization: Whether to save images or not. :output_dir: Output directory @@ -91,7 +94,8 @@ def generate_image(self, data={ 'access_token': token, "text": text_prompt, - "style": style + "style": style, + "resolution": resolution }) res = res.json() if res['code'] == 4001: @@ -113,7 +117,8 @@ def generate_image(self, data={ 'access_token': token, "text": text_prompt, - "style": style + "style": style, + "resolution": resolution }) res = res.json() if res['code'] != 0: @@ -128,11 +133,31 @@ def generate_image(self, start_time = time.time() process_bar = tqdm(total=100, unit='%') results = {} - first_iter = True + total_time = 60 * len(taskids) while True: + end_time = time.time() + duration = end_time - start_time + progress_rate = int((duration) / total_time * 100) + if not taskids: + progress_rate = 100 + if progress_rate > process_bar.n: + if progress_rate >= 100: + if not taskids: + increase_rate = 100 - process_bar.n + else: + increase_rate = 0 + else: + increase_rate = progress_rate - process_bar.n + else: + increase_rate = 0 + process_bar.update(increase_rate) + if duration < 30: + time.sleep(5) + continue + else: + time.sleep(6) if not taskids: break - total_time = 0 has_done = [] for taskid in taskids: res = requests.post(get_url, @@ -177,17 +202,6 @@ def generate_image(self, else: print(res['msg']) raise RuntimeError(res['msg']) - total_time = int(re.match('[0-9]+', str(res['data']['waiting'])).group(0)) * 60 - end_time = time.time() - progress_rate = int(((end_time - start_time) / total_time * 100)) if total_time != 0 else 100 - if progress_rate > process_bar.n: - increase_rate = progress_rate - process_bar.n - if progress_rate >= 100: - increase_rate = 100 - process_bar.n - else: - increase_rate = 0 - process_bar.update(increase_rate) - time.sleep(5) for taskid in has_done: taskids.remove(taskid) print('Saving Images...') @@ -228,6 +242,7 @@ def run_cmd(self, argvs): self.token = self._apply_token(self.ak, self.sk) results = self.generate_image(text_prompts=args.text_prompts, style=args.style, + resolution=args.resolution, topk=args.topk, visualization=args.visualization, output_dir=args.output_dir) @@ -254,9 +269,17 @@ def add_module_input_arg(self): self.arg_input_group.add_argument('--text_prompts', type=str) self.arg_input_group.add_argument('--style', type=str, - default='油画', - choices=['油画', '水彩', '粉笔画', '卡通', '儿童画', '蜡笔画', '探索无限'], + default='探索无限', + choices=[ + '古风', '油画', '水彩', '卡通', '二次元', '浮世绘', '蒸汽波艺术', 'low poly', '像素风格', '概念艺术', + '未来主义', '赛博朋克', '写实风格', '洛丽塔风格', '巴洛克风格', '超现实主义', '探索无限' + ], help="绘画风格") + self.arg_input_group.add_argument('--resolution', + type=str, + default='1024*1024', + choices=['1024*1024', '1024*1536', '1536*1024'], + help="图像分辨率") self.arg_input_group.add_argument('--topk', type=int, default=6, help="选取保存前多少张图,最多10张") self.arg_input_group.add_argument('--ak', type=str, default=None, help="申请文心api使用token的ak") self.arg_input_group.add_argument('--sk', type=str, default=None, help="申请文心api使用token的sk") From fb41acf6db2fb762a5ac2c7720a1a66a9679bdf5 Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Fri, 14 Oct 2022 14:07:22 +0800 Subject: [PATCH 094/117] update mobilenet_v2_animals (#2016) --- .../mobilenet_v2_animals/README.md | 7 +- .../mobilenet_v2_animals/README_en.md | 7 +- .../mobilenet_v2_animals/data_feed.py | 1 - .../mobilenet_v2_animals/mobilenet_v2.py | 182 ------------------ .../mobilenet_v2_animals/module.py | 110 +++-------- .../mobilenet_v2_animals/processor.py | 1 - .../mobilenet_v2_animals/test.py | 76 ++++++++ 7 files changed, 112 insertions(+), 272 deletions(-) delete mode 100644 modules/image/classification/mobilenet_v2_animals/mobilenet_v2.py create mode 100644 modules/image/classification/mobilenet_v2_animals/test.py diff --git a/modules/image/classification/mobilenet_v2_animals/README.md b/modules/image/classification/mobilenet_v2_animals/README.md index e1ba58dcd..37349a98d 100644 --- a/modules/image/classification/mobilenet_v2_animals/README.md +++ b/modules/image/classification/mobilenet_v2_animals/README.md @@ -129,6 +129,11 @@ * 1.0.0 初始发布 + +* 1.1.0 + + 移除 Fluid API + - ```shell - $ hub install mobilenet_v2_animals==1.0.0 + $ hub install mobilenet_v2_animals==1.1.0 ``` diff --git a/modules/image/classification/mobilenet_v2_animals/README_en.md b/modules/image/classification/mobilenet_v2_animals/README_en.md index 9f0835948..4d883efa0 100644 --- a/modules/image/classification/mobilenet_v2_animals/README_en.md +++ b/modules/image/classification/mobilenet_v2_animals/README_en.md @@ -130,6 +130,11 @@ * 1.0.0 First release + +* 1.1.0 + + Remove Fluid API + - ```shell - $ hub install mobilenet_v2_animals==1.0.0 + $ hub install mobilenet_v2_animals==1.1.0 ``` diff --git a/modules/image/classification/mobilenet_v2_animals/data_feed.py b/modules/image/classification/mobilenet_v2_animals/data_feed.py index 99a0855fd..80f0d7f05 100644 --- a/modules/image/classification/mobilenet_v2_animals/data_feed.py +++ b/modules/image/classification/mobilenet_v2_animals/data_feed.py @@ -3,7 +3,6 @@ import time from collections import OrderedDict -import cv2 import numpy as np from PIL import Image diff --git a/modules/image/classification/mobilenet_v2_animals/mobilenet_v2.py b/modules/image/classification/mobilenet_v2_animals/mobilenet_v2.py deleted file mode 100644 index 7186d5802..000000000 --- a/modules/image/classification/mobilenet_v2_animals/mobilenet_v2.py +++ /dev/null @@ -1,182 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle.fluid as fluid -from paddle.fluid.initializer import MSRA -from paddle.fluid.param_attr import ParamAttr - -__all__ = ['MobileNetV2'] - -train_parameters = { - "input_size": [3, 224, 224], - "input_mean": [0.485, 0.456, 0.406], - "input_std": [0.229, 0.224, 0.225], - "learning_strategy": { - "name": "piecewise_decay", - "batch_size": 256, - "epochs": [30, 60, 90], - "steps": [0.1, 0.01, 0.001, 0.0001] - } -} - - -class MobileNetV2(): - def __init__(self): - self.params = train_parameters - - def net(self, input, class_dim=1000, scale=1.0): - - bottleneck_params_list = [ - (1, 16, 1, 1), - (6, 24, 2, 2), - (6, 32, 3, 2), - (6, 64, 4, 2), - (6, 96, 3, 1), - (6, 160, 3, 2), - (6, 320, 1, 1), - ] - - #conv1 - input = self.conv_bn_layer( - input, num_filters=int(32 * scale), filter_size=3, stride=2, padding=1, if_act=True, name='conv1_1') - - # bottleneck sequences - i = 1 - in_c = int(32 * scale) - for layer_setting in bottleneck_params_list: - t, c, n, s = layer_setting - i += 1 - input = self.invresi_blocks(input=input, in_c=in_c, t=t, c=int(c * scale), n=n, s=s, name='conv' + str(i)) - in_c = int(c * scale) - #last_conv - input = self.conv_bn_layer( - input=input, - num_filters=int(1280 * scale) if scale > 1.0 else 1280, - filter_size=1, - stride=1, - padding=0, - if_act=True, - name='conv9') - - input = fluid.layers.pool2d(input=input, pool_size=7, pool_stride=1, pool_type='avg', global_pooling=True) - - output = fluid.layers.fc( - input=input, - size=class_dim, - param_attr=ParamAttr(name='fc10_weights'), - bias_attr=ParamAttr(name='fc10_offset')) - return output, input - - def conv_bn_layer(self, - input, - filter_size, - num_filters, - stride, - padding, - channels=None, - num_groups=1, - if_act=True, - name=None, - use_cudnn=True): - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=padding, - groups=num_groups, - act=None, - use_cudnn=use_cudnn, - param_attr=ParamAttr(name=name + '_weights'), - bias_attr=False) - bn_name = name + '_bn' - bn = fluid.layers.batch_norm( - input=conv, - param_attr=ParamAttr(name=bn_name + "_scale"), - bias_attr=ParamAttr(name=bn_name + "_offset"), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - if if_act: - return fluid.layers.relu6(bn) - else: - return bn - - def shortcut(self, input, data_residual): - return fluid.layers.elementwise_add(input, data_residual) - - def inverted_residual_unit(self, - input, - num_in_filter, - num_filters, - ifshortcut, - stride, - filter_size, - padding, - expansion_factor, - name=None): - num_expfilter = int(round(num_in_filter * expansion_factor)) - - channel_expand = self.conv_bn_layer( - input=input, - num_filters=num_expfilter, - filter_size=1, - stride=1, - padding=0, - num_groups=1, - if_act=True, - name=name + '_expand') - - bottleneck_conv = self.conv_bn_layer( - input=channel_expand, - num_filters=num_expfilter, - filter_size=filter_size, - stride=stride, - padding=padding, - num_groups=num_expfilter, - if_act=True, - name=name + '_dwise', - use_cudnn=False) - - linear_out = self.conv_bn_layer( - input=bottleneck_conv, - num_filters=num_filters, - filter_size=1, - stride=1, - padding=0, - num_groups=1, - if_act=False, - name=name + '_linear') - if ifshortcut: - out = self.shortcut(input=input, data_residual=linear_out) - return out - else: - return linear_out - - def invresi_blocks(self, input, in_c, t, c, n, s, name=None): - first_block = self.inverted_residual_unit( - input=input, - num_in_filter=in_c, - num_filters=c, - ifshortcut=False, - stride=s, - filter_size=3, - padding=1, - expansion_factor=t, - name=name + '_1') - - last_residual_block = first_block - last_c = c - - for i in range(1, n): - last_residual_block = self.inverted_residual_unit( - input=last_residual_block, - num_in_filter=last_c, - num_filters=c, - ifshortcut=True, - stride=1, - filter_size=3, - padding=1, - expansion_factor=t, - name=name + '_' + str(i + 1)) - return last_residual_block diff --git a/modules/image/classification/mobilenet_v2_animals/module.py b/modules/image/classification/mobilenet_v2_animals/module.py index c691a41e9..2dc071703 100644 --- a/modules/image/classification/mobilenet_v2_animals/module.py +++ b/modules/image/classification/mobilenet_v2_animals/module.py @@ -7,15 +7,13 @@ import os import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +import paddle +import paddle.static +from paddle.inference import Config, create_predictor from paddlehub.module.module import moduleinfo, runnable, serving -from paddlehub.common.paddle_helper import add_vars_prefix -from mobilenet_v2_animals.processor import postprocess, base64_to_cv2 -from mobilenet_v2_animals.data_feed import reader -from mobilenet_v2_animals.mobilenet_v2 import MobileNetV2 +from .processor import postprocess, base64_to_cv2 +from .data_feed import reader @moduleinfo( @@ -25,10 +23,10 @@ author_email="", summary= "Mobilenet_V2 is a image classfication model, this module is trained with Baidu's self-built animals dataset.", - version="1.0.0") -class MobileNetV2Animals(hub.Module): - def _initialize(self): - self.default_pretrained_model_path = os.path.join(self.directory, "model") + version="1.1.0") +class MobileNetV2Animals: + def __init__(self): + self.default_pretrained_model_path = os.path.join(self.directory, "model", "model") label_file = os.path.join(self.directory, "label_list.txt") with open(label_file, 'r', encoding='utf-8') as file: self.label_list = file.read().split("\n")[:-1] @@ -52,10 +50,12 @@ def _set_config(self): """ predictor config setting """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) + model = self.default_pretrained_model_path+'.pdmodel' + params = self.default_pretrained_model_path+'.pdiparams' + cpu_config = Config(model, params) cpu_config.disable_glog_info() cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) + self.cpu_predictor = create_predictor(cpu_config) try: _places = os.environ["CUDA_VISIBLE_DEVICES"] @@ -64,58 +64,10 @@ def _set_config(self): except: use_gpu = False if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config = Config(model, params) gpu_config.disable_glog_info() gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) - - def context(self, trainable=True, pretrained=True): - """context for transfer learning. - - Args: - trainable (bool): Set parameters in program to be trainable. - pretrained (bool) : Whether to load pretrained model. - - Returns: - inputs (dict): key is 'image', corresponding vaule is image tensor. - outputs (dict): key is : - 'classification', corresponding value is the result of classification. - 'feature_map', corresponding value is the result of the layer before the fully connected layer. - context_prog (fluid.Program): program for transfer learning. - """ - context_prog = fluid.Program() - startup_prog = fluid.Program() - with fluid.program_guard(context_prog, startup_prog): - with fluid.unique_name.guard(): - image = fluid.layers.data(name="image", shape=[3, 224, 224], dtype="float32") - mobile_net = MobileNetV2() - output, feature_map = mobile_net.net(input=image, class_dim=len(self.label_list), scale=1.0) - - name_prefix = '@HUB_{}@'.format(self.name) - inputs = {'image': name_prefix + image.name} - outputs = {'classification': name_prefix + output.name, 'feature_map': name_prefix + feature_map.name} - add_vars_prefix(context_prog, name_prefix) - add_vars_prefix(startup_prog, name_prefix) - global_vars = context_prog.global_block().vars - inputs = {key: global_vars[value] for key, value in inputs.items()} - outputs = {key: global_vars[value] for key, value in outputs.items()} - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - # pretrained - if pretrained: - - def _if_exist(var): - b = os.path.exists(os.path.join(self.default_pretrained_model_path, var.name)) - return b - - fluid.io.load_vars(exe, self.default_pretrained_model_path, context_prog, predicate=_if_exist) - else: - exe.run(startup_prog) - # trainable - for param in context_prog.global_block().iter_parameters(): - param.trainable = trainable - return inputs, outputs, context_prog + self.gpu_predictor = create_predictor(gpu_config) def classification(self, images=None, paths=None, batch_size=1, use_gpu=False, top_k=1): """ @@ -158,32 +110,18 @@ def classification(self, images=None, paths=None, batch_size=1, use_gpu=False, t pass # feed batch image batch_image = np.array([data['image'] for data in batch_data]) - batch_image = PaddleTensor(batch_image.copy()) - predictor_output = self.gpu_predictor.run([batch_image]) if use_gpu else self.cpu_predictor.run( - [batch_image]) - out = postprocess(data_out=predictor_output[0].as_ndarray(), label_list=self.label_list, top_k=top_k) + + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + input_handle.copy_from_cpu(batch_image.copy()) + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) + out = postprocess(data_out=output_handle.copy_to_cpu(), label_list=self.label_list, top_k=top_k) res += out return res - def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - @serving def serving_method(self, images, **kwargs): """ diff --git a/modules/image/classification/mobilenet_v2_animals/processor.py b/modules/image/classification/mobilenet_v2_animals/processor.py index 6dc49772f..ede6f0993 100644 --- a/modules/image/classification/mobilenet_v2_animals/processor.py +++ b/modules/image/classification/mobilenet_v2_animals/processor.py @@ -5,7 +5,6 @@ import base64 import cv2 -import os import numpy as np diff --git a/modules/image/classification/mobilenet_v2_animals/test.py b/modules/image/classification/mobilenet_v2_animals/test.py new file mode 100644 index 000000000..587dee2ee --- /dev/null +++ b/modules/image/classification/mobilenet_v2_animals/test.py @@ -0,0 +1,76 @@ +import os +import shutil +import unittest + +import cv2 +import requests +import paddlehub as hub + + +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + +class TestHubModule(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + img_url = 'https://unsplash.com/photos/brFsZ7qszSY/download?ixid=MnwxMjA3fDB8MXxzZWFyY2h8OHx8ZG9nfGVufDB8fHx8MTY2MzA1ODQ1MQ&force=true&w=640' + if not os.path.exists('tests'): + os.makedirs('tests') + response = requests.get(img_url) + assert response.status_code == 200, 'Network Error.' + with open('tests/test.jpg', 'wb') as f: + f.write(response.content) + cls.module = hub.Module(name="mobilenet_v2_animals") + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree('tests') + shutil.rmtree('inference') + + def test_classification1(self): + results = self.module.classification( + paths=['tests/test.jpg'] + ) + data = results[0] + self.assertTrue('威尔士柯基' in data) + self.assertTrue(data['威尔士柯基'] > 0.5) + + def test_classification2(self): + results = self.module.classification( + images=[cv2.imread('tests/test.jpg')] + ) + data = results[0] + self.assertTrue('威尔士柯基' in data) + self.assertTrue(data['威尔士柯基'] > 0.5) + + def test_classification3(self): + results = self.module.classification( + images=[cv2.imread('tests/test.jpg')], + use_gpu=True + ) + data = results[0] + self.assertTrue('威尔士柯基' in data) + self.assertTrue(data['威尔士柯基'] > 0.5) + + def test_classification4(self): + self.assertRaises( + AssertionError, + self.module.classification, + paths=['no.jpg'] + ) + + def test_classification5(self): + self.assertRaises( + TypeError, + self.module.classification, + images=['test.jpg'] + ) + + def test_save_inference_model(self): + self.module.save_inference_model('./inference/model') + + self.assertTrue(os.path.exists('./inference/model.pdmodel')) + self.assertTrue(os.path.exists('./inference/model.pdiparams')) + + +if __name__ == "__main__": + unittest.main() From 94949b0e9120b4bca5888a4d19ff9759a05dd54f Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Fri, 14 Oct 2022 14:08:57 +0800 Subject: [PATCH 095/117] update mobilenet_v2_dishes (#2018) --- .../mobilenet_v2_dishes/README.md | 6 +- .../mobilenet_v2_dishes/README_en.md | 6 +- .../mobilenet_v2_dishes/data_feed.py | 7 +- .../mobilenet_v2_dishes/mobilenet_v2.py | 182 ------------------ .../mobilenet_v2_dishes/module.py | 158 ++++++--------- .../mobilenet_v2_dishes/processor.py | 1 - .../mobilenet_v2_dishes/test.py | 76 ++++++++ 7 files changed, 151 insertions(+), 285 deletions(-) delete mode 100644 modules/image/classification/mobilenet_v2_dishes/mobilenet_v2.py create mode 100644 modules/image/classification/mobilenet_v2_dishes/test.py diff --git a/modules/image/classification/mobilenet_v2_dishes/README.md b/modules/image/classification/mobilenet_v2_dishes/README.md index aad927459..c9aa28bea 100644 --- a/modules/image/classification/mobilenet_v2_dishes/README.md +++ b/modules/image/classification/mobilenet_v2_dishes/README.md @@ -134,6 +134,10 @@ 初始发布 +* 1.1.0 + + 移除 Fluid API + - ```shell - $ hub install mobilenet_v2_dishes==1.0.0 + $ hub install mobilenet_v2_dishes==1.1.0 ``` diff --git a/modules/image/classification/mobilenet_v2_dishes/README_en.md b/modules/image/classification/mobilenet_v2_dishes/README_en.md index 70d3f41a6..1e753eddf 100644 --- a/modules/image/classification/mobilenet_v2_dishes/README_en.md +++ b/modules/image/classification/mobilenet_v2_dishes/README_en.md @@ -133,6 +133,10 @@ First release +* 1.1.0 + + Remove Fluid API + - ```shell - $ hub install mobilenet_v2_dishes==1.0.0 + $ hub install mobilenet_v2_dishes==1.1.0 ``` diff --git a/modules/image/classification/mobilenet_v2_dishes/data_feed.py b/modules/image/classification/mobilenet_v2_dishes/data_feed.py index 99a0855fd..03c740d0b 100644 --- a/modules/image/classification/mobilenet_v2_dishes/data_feed.py +++ b/modules/image/classification/mobilenet_v2_dishes/data_feed.py @@ -3,7 +3,6 @@ import time from collections import OrderedDict -import cv2 import numpy as np from PIL import Image @@ -63,7 +62,8 @@ def reader(images=None, paths=None): if paths: for im_path in paths: each = OrderedDict() - assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path) + assert os.path.isfile( + im_path), "The {} isn't a valid file path.".format(im_path) each['org_im_path'] = im_path each['org_im'] = Image.open(im_path) each['org_im_width'], each['org_im_height'] = each['org_im'].size @@ -73,7 +73,8 @@ def reader(images=None, paths=None): for im in images: each = OrderedDict() each['org_im'] = Image.fromarray(im[:, :, ::-1]) - each['org_im_path'] = 'ndarray_time={}'.format(round(time.time(), 6) * 1e6) + each['org_im_path'] = 'ndarray_time={}'.format( + round(time.time(), 6) * 1e6) each['org_im_width'], each['org_im_height'] = each['org_im'].size component.append(each) diff --git a/modules/image/classification/mobilenet_v2_dishes/mobilenet_v2.py b/modules/image/classification/mobilenet_v2_dishes/mobilenet_v2.py deleted file mode 100644 index 7186d5802..000000000 --- a/modules/image/classification/mobilenet_v2_dishes/mobilenet_v2.py +++ /dev/null @@ -1,182 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle.fluid as fluid -from paddle.fluid.initializer import MSRA -from paddle.fluid.param_attr import ParamAttr - -__all__ = ['MobileNetV2'] - -train_parameters = { - "input_size": [3, 224, 224], - "input_mean": [0.485, 0.456, 0.406], - "input_std": [0.229, 0.224, 0.225], - "learning_strategy": { - "name": "piecewise_decay", - "batch_size": 256, - "epochs": [30, 60, 90], - "steps": [0.1, 0.01, 0.001, 0.0001] - } -} - - -class MobileNetV2(): - def __init__(self): - self.params = train_parameters - - def net(self, input, class_dim=1000, scale=1.0): - - bottleneck_params_list = [ - (1, 16, 1, 1), - (6, 24, 2, 2), - (6, 32, 3, 2), - (6, 64, 4, 2), - (6, 96, 3, 1), - (6, 160, 3, 2), - (6, 320, 1, 1), - ] - - #conv1 - input = self.conv_bn_layer( - input, num_filters=int(32 * scale), filter_size=3, stride=2, padding=1, if_act=True, name='conv1_1') - - # bottleneck sequences - i = 1 - in_c = int(32 * scale) - for layer_setting in bottleneck_params_list: - t, c, n, s = layer_setting - i += 1 - input = self.invresi_blocks(input=input, in_c=in_c, t=t, c=int(c * scale), n=n, s=s, name='conv' + str(i)) - in_c = int(c * scale) - #last_conv - input = self.conv_bn_layer( - input=input, - num_filters=int(1280 * scale) if scale > 1.0 else 1280, - filter_size=1, - stride=1, - padding=0, - if_act=True, - name='conv9') - - input = fluid.layers.pool2d(input=input, pool_size=7, pool_stride=1, pool_type='avg', global_pooling=True) - - output = fluid.layers.fc( - input=input, - size=class_dim, - param_attr=ParamAttr(name='fc10_weights'), - bias_attr=ParamAttr(name='fc10_offset')) - return output, input - - def conv_bn_layer(self, - input, - filter_size, - num_filters, - stride, - padding, - channels=None, - num_groups=1, - if_act=True, - name=None, - use_cudnn=True): - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=padding, - groups=num_groups, - act=None, - use_cudnn=use_cudnn, - param_attr=ParamAttr(name=name + '_weights'), - bias_attr=False) - bn_name = name + '_bn' - bn = fluid.layers.batch_norm( - input=conv, - param_attr=ParamAttr(name=bn_name + "_scale"), - bias_attr=ParamAttr(name=bn_name + "_offset"), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - if if_act: - return fluid.layers.relu6(bn) - else: - return bn - - def shortcut(self, input, data_residual): - return fluid.layers.elementwise_add(input, data_residual) - - def inverted_residual_unit(self, - input, - num_in_filter, - num_filters, - ifshortcut, - stride, - filter_size, - padding, - expansion_factor, - name=None): - num_expfilter = int(round(num_in_filter * expansion_factor)) - - channel_expand = self.conv_bn_layer( - input=input, - num_filters=num_expfilter, - filter_size=1, - stride=1, - padding=0, - num_groups=1, - if_act=True, - name=name + '_expand') - - bottleneck_conv = self.conv_bn_layer( - input=channel_expand, - num_filters=num_expfilter, - filter_size=filter_size, - stride=stride, - padding=padding, - num_groups=num_expfilter, - if_act=True, - name=name + '_dwise', - use_cudnn=False) - - linear_out = self.conv_bn_layer( - input=bottleneck_conv, - num_filters=num_filters, - filter_size=1, - stride=1, - padding=0, - num_groups=1, - if_act=False, - name=name + '_linear') - if ifshortcut: - out = self.shortcut(input=input, data_residual=linear_out) - return out - else: - return linear_out - - def invresi_blocks(self, input, in_c, t, c, n, s, name=None): - first_block = self.inverted_residual_unit( - input=input, - num_in_filter=in_c, - num_filters=c, - ifshortcut=False, - stride=s, - filter_size=3, - padding=1, - expansion_factor=t, - name=name + '_1') - - last_residual_block = first_block - last_c = c - - for i in range(1, n): - last_residual_block = self.inverted_residual_unit( - input=last_residual_block, - num_in_filter=last_c, - num_filters=c, - ifshortcut=True, - stride=1, - filter_size=3, - padding=1, - expansion_factor=t, - name=name + '_' + str(i + 1)) - return last_residual_block diff --git a/modules/image/classification/mobilenet_v2_dishes/module.py b/modules/image/classification/mobilenet_v2_dishes/module.py index d0b6ed8b8..deddd966a 100644 --- a/modules/image/classification/mobilenet_v2_dishes/module.py +++ b/modules/image/classification/mobilenet_v2_dishes/module.py @@ -7,15 +7,11 @@ import os import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddle.inference import Config, create_predictor from paddlehub.module.module import moduleinfo, runnable, serving -from paddlehub.common.paddle_helper import add_vars_prefix -from mobilenet_v2_dishes.processor import postprocess, base64_to_cv2 -from mobilenet_v2_dishes.data_feed import reader -from mobilenet_v2_dishes.mobilenet_v2 import MobileNetV2 +from .processor import postprocess, base64_to_cv2 +from .data_feed import reader @moduleinfo( @@ -24,11 +20,12 @@ author="baidu-vis", author_email="", summary= - "Mobilenet_V2 is a image classfication model, this module is trained with Baidu's self-built dishes dataset.", - version="1.0.0") -class MobileNetV2Dishes(hub.Module): - def _initialize(self): - self.default_pretrained_model_path = os.path.join(self.directory, "model") + "Mobilenet_V2 is a image classfication model, this module is trained with Baidu self-built dishes dataset.", + version="1.1.0") +class MobileNetV2Dishes: + def __init__(self): + self.default_pretrained_model_path = os.path.join( + self.directory, "model", "model") label_file = os.path.join(self.directory, "label_list.txt") with open(label_file, 'r', encoding='utf-8') as file: self.label_list = file.read().split("\n")[:-1] @@ -52,10 +49,12 @@ def _set_config(self): """ predictor config setting """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) + model = self.default_pretrained_model_path+'.pdmodel' + params = self.default_pretrained_model_path+'.pdiparams' + cpu_config = Config(model, params) cpu_config.disable_glog_info() cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) + self.cpu_predictor = create_predictor(cpu_config) try: _places = os.environ["CUDA_VISIBLE_DEVICES"] @@ -64,60 +63,18 @@ def _set_config(self): except: use_gpu = False if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config = Config(model, params) gpu_config.disable_glog_info() - gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) - - def context(self, trainable=True, pretrained=True): - """context for transfer learning. - - Args: - trainable (bool): Set parameters in program to be trainable. - pretrained (bool) : Whether to load pretrained model. - - Returns: - inputs (dict): key is 'image', corresponding vaule is image tensor. - outputs (dict): key is : - 'classification', corresponding value is the result of classification. - 'feature_map', corresponding value is the result of the layer before the fully connected layer. - context_prog (fluid.Program): program for transfer learning. - """ - context_prog = fluid.Program() - startup_prog = fluid.Program() - with fluid.program_guard(context_prog, startup_prog): - with fluid.unique_name.guard(): - image = fluid.layers.data(name="image", shape=[3, 224, 224], dtype="float32") - mobile_net = MobileNetV2() - output, feature_map = mobile_net.net(input=image, class_dim=len(self.label_list), scale=1.0) - - name_prefix = '@HUB_{}@'.format(self.name) - inputs = {'image': name_prefix + image.name} - outputs = {'classification': name_prefix + output.name, 'feature_map': name_prefix + feature_map.name} - add_vars_prefix(context_prog, name_prefix) - add_vars_prefix(startup_prog, name_prefix) - global_vars = context_prog.global_block().vars - inputs = {key: global_vars[value] for key, value in inputs.items()} - outputs = {key: global_vars[value] for key, value in outputs.items()} - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - # pretrained - if pretrained: - - def _if_exist(var): - b = os.path.exists(os.path.join(self.default_pretrained_model_path, var.name)) - return b - - fluid.io.load_vars(exe, self.default_pretrained_model_path, context_prog, predicate=_if_exist) - else: - exe.run(startup_prog) - # trainable - for param in context_prog.global_block().iter_parameters(): - param.trainable = trainable - return inputs, outputs, context_prog - - def classification(self, images=None, paths=None, batch_size=1, use_gpu=False, top_k=1): + gpu_config.enable_use_gpu( + memory_pool_init_size_mb=1000, device_id=0) + self.gpu_predictor = create_predictor(gpu_config) + + def classification(self, + images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): """ API for image classification. @@ -158,32 +115,21 @@ def classification(self, images=None, paths=None, batch_size=1, use_gpu=False, t pass # feed batch image batch_image = np.array([data['image'] for data in batch_data]) - batch_image = PaddleTensor(batch_image.copy()) - predictor_output = self.gpu_predictor.run([batch_image]) if use_gpu else self.cpu_predictor.run( - [batch_image]) - out = postprocess(data_out=predictor_output[0].as_ndarray(), label_list=self.label_list, top_k=top_k) + + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + input_handle.copy_from_cpu(batch_image.copy()) + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) + out = postprocess( + data_out=output_handle.copy_to_cpu(), + label_list=self.label_list, + top_k=top_k) res += out return res - def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - @serving def serving_method(self, images, **kwargs): """ @@ -203,13 +149,19 @@ def run_cmd(self, argvs): prog='hub run {}'.format(self.name), usage='%(prog)s', add_help=True) - self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_input_group = self.parser.add_argument_group( + title="Input options", description="Input data. Required") self.arg_config_group = self.parser.add_argument_group( - title="Config options", description="Run configuration for controlling module behavior, not required.") + title="Config options", + description= + "Run configuration for controlling module behavior, not required.") self.add_module_config_arg() self.add_module_input_arg() args = self.parser.parse_args(argvs) - results = self.classification(paths=[args.input_path], batch_size=args.batch_size, use_gpu=args.use_gpu) + results = self.classification( + paths=[args.input_path], + batch_size=args.batch_size, + use_gpu=args.use_gpu) return results def add_module_config_arg(self): @@ -217,12 +169,24 @@ def add_module_config_arg(self): Add the command config options. """ self.arg_config_group.add_argument( - '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not.") - self.arg_config_group.add_argument('--batch_size', type=ast.literal_eval, default=1, help="batch size.") - self.arg_config_group.add_argument('--top_k', type=ast.literal_eval, default=1, help="Return top k results.") + '--use_gpu', + type=ast.literal_eval, + default=False, + help="whether use GPU or not.") + self.arg_config_group.add_argument( + '--batch_size', + type=ast.literal_eval, + default=1, + help="batch size.") + self.arg_config_group.add_argument( + '--top_k', + type=ast.literal_eval, + default=1, + help="Return top k results.") def add_module_input_arg(self): """ Add the command input options. """ - self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") + self.arg_input_group.add_argument( + '--input_path', type=str, help="path to image.") diff --git a/modules/image/classification/mobilenet_v2_dishes/processor.py b/modules/image/classification/mobilenet_v2_dishes/processor.py index 6dc49772f..ede6f0993 100644 --- a/modules/image/classification/mobilenet_v2_dishes/processor.py +++ b/modules/image/classification/mobilenet_v2_dishes/processor.py @@ -5,7 +5,6 @@ import base64 import cv2 -import os import numpy as np diff --git a/modules/image/classification/mobilenet_v2_dishes/test.py b/modules/image/classification/mobilenet_v2_dishes/test.py new file mode 100644 index 000000000..664a2cbc2 --- /dev/null +++ b/modules/image/classification/mobilenet_v2_dishes/test.py @@ -0,0 +1,76 @@ +import os +import shutil +import unittest + +import cv2 +import requests +import paddlehub as hub + + +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + +class TestHubModule(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + img_url = 'https://unsplash.com/photos/rAyCBQTH7ws/download?ixid=MnwxMjA3fDB8MXxhbGx8fHx8fHx8fHwxNjYzMTIzODM5&force=true&w=640' + if not os.path.exists('tests'): + os.makedirs('tests') + response = requests.get(img_url) + assert response.status_code == 200, 'Network Error.' + with open('tests/test.jpg', 'wb') as f: + f.write(response.content) + cls.module = hub.Module(name="mobilenet_v2_dishes") + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree('tests') + shutil.rmtree('inference') + + def test_classification1(self): + results = self.module.classification( + paths=['tests/test.jpg'] + ) + data = results[0] + self.assertTrue('海鲜面' in data) + self.assertTrue(data['海鲜面'] > 0.01) + + def test_classification2(self): + results = self.module.classification( + images=[cv2.imread('tests/test.jpg')] + ) + data = results[0] + self.assertTrue('海鲜面' in data) + self.assertTrue(data['海鲜面'] > 0.01) + + def test_classification3(self): + results = self.module.classification( + images=[cv2.imread('tests/test.jpg')], + use_gpu=True + ) + data = results[0] + self.assertTrue('海鲜面' in data) + self.assertTrue(data['海鲜面'] > 0.01) + + def test_classification4(self): + self.assertRaises( + AssertionError, + self.module.classification, + paths=['no.jpg'] + ) + + def test_classification5(self): + self.assertRaises( + TypeError, + self.module.classification, + images=['test.jpg'] + ) + + def test_save_inference_model(self): + self.module.save_inference_model('./inference/model') + + self.assertTrue(os.path.exists('./inference/model.pdmodel')) + self.assertTrue(os.path.exists('./inference/model.pdiparams')) + + +if __name__ == "__main__": + unittest.main() From 000473594a0d7c7d27795d017abe961902251869 Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Fri, 14 Oct 2022 14:10:13 +0800 Subject: [PATCH 096/117] update ace2p (#2003) * update ace2p * add clean func * update ace2p --- .../semantic_segmentation/ace2p/README.md | 18 ++-- .../semantic_segmentation/ace2p/README_en.md | 18 ++-- .../semantic_segmentation/ace2p/data_feed.py | 4 +- .../semantic_segmentation/ace2p/module.py | 62 ++++++------- .../image/semantic_segmentation/ace2p/test.py | 93 +++++++++++++++++++ 5 files changed, 142 insertions(+), 53 deletions(-) create mode 100644 modules/image/semantic_segmentation/ace2p/test.py diff --git a/modules/image/semantic_segmentation/ace2p/README.md b/modules/image/semantic_segmentation/ace2p/README.md index 12b23cf4f..5677f51e7 100644 --- a/modules/image/semantic_segmentation/ace2p/README.md +++ b/modules/image/semantic_segmentation/ace2p/README.md @@ -99,20 +99,14 @@ * data (numpy.ndarray): 图像分割得到的结果,shape 为`H * W`,元素的取值为0-19,表示每个像素的分类结果,映射顺序与下面的调色板相同。 ```python - def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True) + def save_inference_model(dirname) ``` - 将模型保存到指定路径。 - **参数** - * dirname: 存在模型的目录名称 - * model\_filename: 模型文件名称,默认为\_\_model\_\_ - * params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) - * combined: 是否将参数保存到统一的一个文件中。 + * dirname: 模型保存路径 ## 四、服务部署 @@ -176,3 +170,11 @@ * 1.1.0 适配paddlehub2.0版本 + +* 1.2.0 + + 移除 Fluid API + + ```shell + $ hub install ace2p == 1.2.0 + ``` \ No newline at end of file diff --git a/modules/image/semantic_segmentation/ace2p/README_en.md b/modules/image/semantic_segmentation/ace2p/README_en.md index 3fa0c273e..e8acf04f2 100644 --- a/modules/image/semantic_segmentation/ace2p/README_en.md +++ b/modules/image/semantic_segmentation/ace2p/README_en.md @@ -105,19 +105,13 @@ - ```python - def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True) + def save_inference_model(dirname) ``` - Save the model to the specified path. - **Parameters** - * dirname: Save path. - * model\_filename: mMdel file name,defalt is \_\_model\_\_ - * params\_filename: Parameter file name,defalt is \_\_params\_\_(Only takes effect when `combined` is True) - * combined: Whether to save the parameters to a unified file. + * dirname: Model save path. ## IV. Server Deployment @@ -182,3 +176,11 @@ * 1.1.0 Adapt to paddlehub2.0 + +* 1.2.0 + + Remove Fluid API + + ```shell + $ hub install ace2p == 1.2.0 + ``` diff --git a/modules/image/semantic_segmentation/ace2p/data_feed.py b/modules/image/semantic_segmentation/ace2p/data_feed.py index 390946548..230520253 100644 --- a/modules/image/semantic_segmentation/ace2p/data_feed.py +++ b/modules/image/semantic_segmentation/ace2p/data_feed.py @@ -6,7 +6,7 @@ import cv2 import numpy as np -from ace2p.processor import get_direction, get_3rd_point, get_affine_transform +from .processor import get_affine_transform __all__ = ['reader'] @@ -45,7 +45,7 @@ def preprocess(org_im, scale, rotation): img_mean = np.array([0.406, 0.456, 0.485]).reshape((1, 1, 3)) img_std = np.array([0.225, 0.224, 0.229]).reshape((1, 1, 3)) - image = image.astype(np.float) + image = image.astype(np.float32) image = (image / 255.0 - img_mean) / img_std image = image.transpose(2, 0, 1).astype(np.float32) diff --git a/modules/image/semantic_segmentation/ace2p/module.py b/modules/image/semantic_segmentation/ace2p/module.py index 458f33d10..2f30df4d7 100644 --- a/modules/image/semantic_segmentation/ace2p/module.py +++ b/modules/image/semantic_segmentation/ace2p/module.py @@ -7,13 +7,14 @@ import os import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +import paddle +import paddle.jit +import paddle.static +from paddle.inference import Config, create_predictor from paddlehub.module.module import moduleinfo, runnable, serving -from ace2p.processor import get_palette, postprocess, base64_to_cv2, cv2_to_base64 -from ace2p.data_feed import reader +from .processor import get_palette, postprocess, base64_to_cv2, cv2_to_base64 +from .data_feed import reader @moduleinfo( @@ -22,10 +23,11 @@ author="baidu-idl", author_email="", summary="ACE2P is an image segmentation model for human parsing solution.", - version="1.1.0") -class ACE2P(hub.Module): - def _initialize(self): - self.default_pretrained_model_path = os.path.join(self.directory, "ace2p_human_parsing") + version="1.2.0") +class ACE2P: + def __init__(self): + self.default_pretrained_model_path = os.path.join( + self.directory, "ace2p_human_parsing", "model") # label list label_list_file = os.path.join(self.directory, 'label_list.txt') with open(label_list_file, "r") as file: @@ -39,10 +41,12 @@ def _set_config(self): """ predictor config setting """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) + model = self.default_pretrained_model_path+'.pdmodel' + params = self.default_pretrained_model_path+'.pdiparams' + cpu_config = Config(model, params) cpu_config.disable_glog_info() cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) + self.cpu_predictor = create_predictor(cpu_config) try: _places = os.environ["CUDA_VISIBLE_DEVICES"] @@ -51,10 +55,10 @@ def _set_config(self): except: use_gpu = False if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) + gpu_config = Config(model, params) gpu_config.disable_glog_info() gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) + self.gpu_predictor = create_predictor(gpu_config) def segmentation(self, images=None, @@ -114,12 +118,19 @@ def segmentation(self, pass # feed batch image batch_image = np.array([data['image'] for data in batch_data]) - batch_image = PaddleTensor(batch_image.astype('float32')) - data_out = self.gpu_predictor.run([batch_image]) if use_gpu else self.cpu_predictor.run([batch_image]) + + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + input_handle.copy_from_cpu(batch_image.astype('float32')) + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) + # postprocess one by one for i in range(len(batch_data)): out = postprocess( - data_out=data_out[0].as_ndarray()[i], + data_out=output_handle.copy_to_cpu()[i], org_im=batch_data[i]['org_im'], org_im_path=batch_data[i]['org_im_path'], image_info=batch_data[i]['image_info'], @@ -129,25 +140,6 @@ def segmentation(self, res.append(out) return res - def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - @serving def serving_method(self, images, **kwargs): """ diff --git a/modules/image/semantic_segmentation/ace2p/test.py b/modules/image/semantic_segmentation/ace2p/test.py new file mode 100644 index 000000000..fa738eb71 --- /dev/null +++ b/modules/image/semantic_segmentation/ace2p/test.py @@ -0,0 +1,93 @@ +import os +import shutil +import unittest + +import cv2 +import requests +import numpy as np +import paddlehub as hub + + +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + + +class TestHubModule(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + img_url = 'https://unsplash.com/photos/pg_WCHWSdT8/download?ixid=MnwxMjA3fDB8MXxhbGx8fHx8fHx8fHwxNjYyNDM2ODI4&force=true&w=640' + if not os.path.exists('tests'): + os.makedirs('tests') + response = requests.get(img_url) + assert response.status_code == 200, 'Network Error.' + with open('tests/test.jpg', 'wb') as f: + f.write(response.content) + fourcc = cv2.VideoWriter_fourcc('M', 'J', 'P', 'G') + img = cv2.imread('tests/test.jpg') + video = cv2.VideoWriter('tests/test.avi', fourcc, + 20.0, tuple(img.shape[:2])) + for i in range(40): + video.write(img) + video.release() + cls.module = hub.Module(name="ace2p") + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree('tests') + shutil.rmtree('inference') + shutil.rmtree('ace2p_output') + + def test_segmentation1(self): + results = self.module.segmentation( + paths=['tests/test.jpg'], + use_gpu=False, + visualization=False + ) + self.assertIsInstance(results[0]['data'], np.ndarray) + + def test_segmentation2(self): + results = self.module.segmentation( + images=[cv2.imread('tests/test.jpg')], + use_gpu=False, + visualization=False + ) + self.assertIsInstance(results[0]['data'], np.ndarray) + + def test_segmentation3(self): + results = self.module.segmentation( + images=[cv2.imread('tests/test.jpg')], + use_gpu=False, + visualization=True + ) + self.assertIsInstance(results[0]['data'], np.ndarray) + + def test_segmentation4(self): + results = self.module.segmentation( + images=[cv2.imread('tests/test.jpg')], + use_gpu=True, + visualization=False + ) + self.assertIsInstance(results[0]['data'], np.ndarray) + + def test_segmentation5(self): + self.assertRaises( + AssertionError, + self.module.segmentation, + paths=['no.jpg'] + ) + + def test_segmentation6(self): + self.assertRaises( + AttributeError, + self.module.segmentation, + images=['test.jpg'] + ) + + def test_save_inference_model(self): + self.module.save_inference_model('./inference/model') + + self.assertTrue(os.path.exists('./inference/model.pdmodel')) + self.assertTrue(os.path.exists('./inference/model.pdiparams')) + + +if __name__ == "__main__": + unittest.main() From 71ee4cf637bc297be4903e41ed344f8452418818 Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Fri, 14 Oct 2022 14:43:10 +0800 Subject: [PATCH 097/117] update deeplabv3p_xception65_humanseg (#2008) * update deeplabv3p_xception65_humanseg * update save inference model --- .../deeplabv3p_xception65_humanseg/README.md | 24 ++---- .../README_en.md | 24 ++---- .../data_feed.py | 1 - .../deeplabv3p_xception65_humanseg/module.py | 41 +++------ .../processor.py | 1 - .../deeplabv3p_xception65_humanseg/test.py | 86 +++++++++++++++++++ 6 files changed, 116 insertions(+), 61 deletions(-) create mode 100644 modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/test.py diff --git a/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/README.md b/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/README.md index ae623197e..835dd3fdf 100644 --- a/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/README.md +++ b/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/README.md @@ -70,11 +70,11 @@ ```python def segmentation(images=None, - paths=None, - batch_size=1, - use_gpu=False, - visualization=False, - output_dir='humanseg_output') + paths=None, + batch_size=1, + use_gpu=False, + visualization=False, + output_dir='humanseg_output') ``` - 预测API,用于人像分割。 @@ -95,20 +95,14 @@ * data (numpy.ndarray): 人像分割结果,仅包含Alpha通道,取值为0-255 (0为全透明,255为不透明),也即取值越大的像素点越可能为人体,取值越小的像素点越可能为背景。 ```python - def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True) + def save_inference_model(dirname) ``` - 将模型保存到指定路径。 - **参数** - * dirname: 存在模型的目录名称 - * model\_filename: 模型文件名称,默认为\_\_model\_\_ - * params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) - * combined: 是否将参数保存到统一的一个文件中 + * dirname: 模型保存路径 ## 四、服务部署 @@ -175,10 +169,10 @@ 修复预测后处理图像数据超过[0,255]范围 -* 1.1.2 +* 1.2.0 移除 fluid api - ```shell - $ hub install deeplabv3p_xception65_humanseg==1.1.2 + $ hub install deeplabv3p_xception65_humanseg==1.2.0 ``` diff --git a/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/README_en.md b/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/README_en.md index 8e090c7f7..4bf80baad 100644 --- a/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/README_en.md +++ b/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/README_en.md @@ -70,11 +70,11 @@ - ```python def segmentation(images=None, - paths=None, - batch_size=1, - use_gpu=False, - visualization=False, - output_dir='humanseg_output') + paths=None, + batch_size=1, + use_gpu=False, + visualization=False, + output_dir='humanseg_output') ``` - Prediction API, generating segmentation result. @@ -94,19 +94,13 @@ * data (numpy.ndarray): The result of portrait segmentation. - ```python - def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True) + def save_inference_model(dirname) ``` - Save the model to the specified path. - **Parameters** - * dirname: Save path. - * model\_filename: Model file name,defalt is \_\_model\_\_ - * params\_filename: Parameter file name,defalt is \_\_params\_\_(Only takes effect when `combined` is True) - * combined: Whether to save the parameters to a unified file. + * dirname: Model save path. ## IV. Server Deployment @@ -171,10 +165,10 @@ Fix the bug of image value out of range -* 1.1.2 +* 1.2.0 Remove fluid api - ```shell - $ hub install deeplabv3p_xception65_humanseg==1.1.2 + $ hub install deeplabv3p_xception65_humanseg==1.2.0 ``` diff --git a/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/data_feed.py b/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/data_feed.py index 6306b28f4..ec940aa0a 100644 --- a/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/data_feed.py +++ b/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/data_feed.py @@ -5,7 +5,6 @@ import cv2 import numpy as np -from PIL import Image __all__ = ['reader'] diff --git a/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/module.py b/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/module.py index 5208b61ba..a39117ce7 100644 --- a/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/module.py +++ b/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/module.py @@ -8,14 +8,13 @@ import numpy as np import paddle -from deeplabv3p_xception65_humanseg.data_feed import reader -from deeplabv3p_xception65_humanseg.processor import base64_to_cv2 -from deeplabv3p_xception65_humanseg.processor import cv2_to_base64 -from deeplabv3p_xception65_humanseg.processor import postprocess +from .data_feed import reader +from .processor import base64_to_cv2 +from .processor import cv2_to_base64 +from .processor import postprocess from paddle.inference import Config from paddle.inference import create_predictor -import paddlehub as hub from paddlehub.module.module import moduleinfo from paddlehub.module.module import runnable from paddlehub.module.module import serving @@ -26,18 +25,20 @@ author="baidu-vis", author_email="", summary="DeepLabv3+ is a semantic segmentation model.", - version="1.1.2") -class DeeplabV3pXception65HumanSeg(hub.Module): + version="1.2.0") +class DeeplabV3pXception65HumanSeg: - def _initialize(self): - self.default_pretrained_model_path = os.path.join(self.directory, "deeplabv3p_xception65_humanseg_model") + def __init__(self): + self.default_pretrained_model_path = os.path.join(self.directory, "deeplabv3p_xception65_humanseg_model", "model") self._set_config() def _set_config(self): """ predictor config setting """ - cpu_config = Config(self.default_pretrained_model_path) + model = self.default_pretrained_model_path+'.pdmodel' + params = self.default_pretrained_model_path+'.pdiparams' + cpu_config = Config(model, params) cpu_config.disable_glog_info() cpu_config.disable_gpu() self.cpu_predictor = create_predictor(cpu_config) @@ -49,7 +50,7 @@ def _set_config(self): except: use_gpu = False if use_gpu: - gpu_config = Config(self.default_pretrained_model_path) + gpu_config = Config(model, params) gpu_config.disable_glog_info() gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) self.gpu_predictor = create_predictor(gpu_config) @@ -134,24 +135,6 @@ def segmentation(self, res.append(out) return res - def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = paddle.CPUPlace() - exe = paddle.Executor(place) - - program, feeded_var_names, target_vars = paddle.static.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - paddle.static.save_inference_model(dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - @serving def serving_method(self, images, **kwargs): """ diff --git a/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/processor.py b/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/processor.py index ca456d9bf..377aa4d73 100644 --- a/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/processor.py +++ b/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/processor.py @@ -5,7 +5,6 @@ import os import time -from collections import OrderedDict import base64 import cv2 diff --git a/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/test.py b/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/test.py new file mode 100644 index 000000000..19b4fd048 --- /dev/null +++ b/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/test.py @@ -0,0 +1,86 @@ +import os +import shutil +import unittest + +import cv2 +import requests +import numpy as np +import paddlehub as hub + + +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + + +class TestHubModule(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + img_url = 'https://unsplash.com/photos/pg_WCHWSdT8/download?ixid=MnwxMjA3fDB8MXxhbGx8fHx8fHx8fHwxNjYyNDM2ODI4&force=true&w=640' + if not os.path.exists('tests'): + os.makedirs('tests') + response = requests.get(img_url) + assert response.status_code == 200, 'Network Error.' + with open('tests/test.jpg', 'wb') as f: + f.write(response.content) + cls.module = hub.Module(name="deeplabv3p_xception65_humanseg") + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree('tests') + shutil.rmtree('inference') + shutil.rmtree('humanseg_output') + + def test_segmentation1(self): + results = self.module.segmentation( + paths=['tests/test.jpg'], + use_gpu=False, + visualization=False + ) + self.assertIsInstance(results[0]['data'], np.ndarray) + + def test_segmentation2(self): + results = self.module.segmentation( + images=[cv2.imread('tests/test.jpg')], + use_gpu=False, + visualization=False + ) + self.assertIsInstance(results[0]['data'], np.ndarray) + + def test_segmentation3(self): + results = self.module.segmentation( + images=[cv2.imread('tests/test.jpg')], + use_gpu=False, + visualization=True + ) + self.assertIsInstance(results[0]['data'], np.ndarray) + + def test_segmentation4(self): + results = self.module.segmentation( + images=[cv2.imread('tests/test.jpg')], + use_gpu=True, + visualization=False + ) + self.assertIsInstance(results[0]['data'], np.ndarray) + + def test_segmentation5(self): + self.assertRaises( + AssertionError, + self.module.segmentation, + paths=['no.jpg'] + ) + + def test_segmentation6(self): + self.assertRaises( + AttributeError, + self.module.segmentation, + images=['test.jpg'] + ) + + def test_save_inference_model(self): + self.module.save_inference_model('./inference/model') + + self.assertTrue(os.path.exists('./inference/model.pdmodel')) + self.assertTrue(os.path.exists('./inference/model.pdiparams')) + + +if __name__ == "__main__": + unittest.main() From 2ce0e07b0effaca1e70853870098224d617e26c5 Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Fri, 14 Oct 2022 14:45:59 +0800 Subject: [PATCH 098/117] update solov2 (#2015) * update solov2 * fix typo --- .../instance_segmentation/solov2/README.md | 12 +- .../instance_segmentation/solov2/data_feed.py | 108 ++++++++++++------ .../instance_segmentation/solov2/example.png | Bin 459508 -> 0 bytes .../instance_segmentation/solov2/module.py | 69 ++++++----- .../instance_segmentation/solov2/processor.py | 18 ++- .../instance_segmentation/solov2/test.py | 96 ++++++++++++++++ 6 files changed, 228 insertions(+), 75 deletions(-) delete mode 100644 modules/image/instance_segmentation/solov2/example.png create mode 100644 modules/image/instance_segmentation/solov2/test.py diff --git a/modules/image/instance_segmentation/solov2/README.md b/modules/image/instance_segmentation/solov2/README.md index a9ca8e14e..16e1b3d24 100644 --- a/modules/image/instance_segmentation/solov2/README.md +++ b/modules/image/instance_segmentation/solov2/README.md @@ -78,7 +78,7 @@ - res (dict): 识别结果,关键字有 'segm', 'label', 'score'对应的取值为: - segm (np.ndarray): 实例分割结果,取值为0或1。0表示背景,1为实例; - label (list): 实例分割结果类别id; - - score (list):实例分割结果类别得分;s + - score (list):实例分割结果类别得分; ## 四、服务部署 @@ -147,8 +147,10 @@ 初始发布 - * ```shell - $ hub install hand_pose_localization==1.0.0 - ``` +* 1.1.0 - + 适配 PaddlePaddle 2.2.0+ + + * ```shell + $ hub install hand_pose_localization==1.1.0 + ``` \ No newline at end of file diff --git a/modules/image/instance_segmentation/solov2/data_feed.py b/modules/image/instance_segmentation/solov2/data_feed.py index 7d34afe85..1baaefafd 100644 --- a/modules/image/instance_segmentation/solov2/data_feed.py +++ b/modules/image/instance_segmentation/solov2/data_feed.py @@ -3,8 +3,8 @@ import cv2 import numpy as np +from paddle.inference import Config, create_predictor, PrecisionType from PIL import Image, ImageDraw -import paddle.fluid as fluid def create_inputs(im, im_info): @@ -19,11 +19,14 @@ def create_inputs(im, im_info): inputs['image'] = im origin_shape = list(im_info['origin_shape']) resize_shape = list(im_info['resize_shape']) - pad_shape = list(im_info['pad_shape']) if im_info['pad_shape'] is not None else list(im_info['resize_shape']) + pad_shape = list(im_info['pad_shape']) if im_info[ + 'pad_shape'] is not None else list(im_info['resize_shape']) scale_x, scale_y = im_info['scale'] scale = scale_x im_info = np.array([resize_shape + [scale]]).astype('float32') inputs['im_info'] = im_info + inputs['scale_factor'] = np.array([scale_x, scale_x]).astype('float32').reshape(-1, 2) + inputs['im_shape'] = np.array(resize_shape).astype('float32').reshape(-1, 2) return inputs @@ -42,28 +45,38 @@ def visualize_box_mask(im, results, labels=None, mask_resolution=14, threshold=0 im (PIL.Image.Image): visualized image """ if not labels: - labels = [ - 'background', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', - 'traffic light', 'fire', 'hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', - 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', - 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', - 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', - 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', - 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', - 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', - 'teddy bear', 'hair drier', 'toothbrush' - ] + labels = ['background', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', + 'train', 'truck', 'boat', 'traffic light', 'fire', 'hydrant', 'stop sign', 'parking meter', + 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', + 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', + 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', + 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', + 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', + 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', + 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', + 'hair drier', 'toothbrush'] if isinstance(im, str): im = Image.open(im).convert('RGB') else: im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) im = Image.fromarray(im) if 'masks' in results and 'boxes' in results: - im = draw_mask(im, results['boxes'], results['masks'], labels, resolution=mask_resolution) + im = draw_mask( + im, + results['boxes'], + results['masks'], + labels, + resolution=mask_resolution) if 'boxes' in results: im = draw_box(im, results['boxes'], labels) if 'segm' in results: - im = draw_segm(im, results['segm'], results['label'], results['score'], labels, threshold=threshold) + im = draw_segm( + im, + results['segm'], + results['label'], + results['score'], + labels, + threshold=threshold) return im @@ -152,7 +165,8 @@ def draw_mask(im, np_boxes, np_masks, labels, resolution=14, threshold=0.5): y0 = min(max(ymin, 0), im_h) y1 = min(max(ymax + 1, 0), im_h) im_mask = np.zeros((im_h, im_w), dtype=np.uint8) - im_mask[y0:y1, x0:x1] = resized_mask[(y0 - ymin):(y1 - ymin), (x0 - xmin):(x1 - xmin)] + im_mask[y0:y1, x0:x1] = resized_mask[(y0 - ymin):(y1 - ymin), ( + x0 - xmin):(x1 - xmin)] if clsid not in clsid2color: clsid2color[clsid] = color_list[clsid] color_mask = clsid2color[clsid] @@ -190,19 +204,28 @@ def draw_box(im, np_boxes, labels): color = tuple(clsid2color[clsid]) # draw bbox - draw.line([(xmin, ymin), (xmin, ymax), (xmax, ymax), (xmax, ymin), (xmin, ymin)], - width=draw_thickness, - fill=color) + draw.line( + [(xmin, ymin), (xmin, ymax), (xmax, ymax), (xmax, ymin), + (xmin, ymin)], + width=draw_thickness, + fill=color) # draw label text = "{} {:.4f}".format(labels[clsid], score) tw, th = draw.textsize(text) - draw.rectangle([(xmin + 1, ymin - th), (xmin + tw + 1, ymin)], fill=color) + draw.rectangle( + [(xmin + 1, ymin - th), (xmin + tw + 1, ymin)], fill=color) draw.text((xmin + 1, ymin - th), text, fill=(255, 255, 255)) return im -def draw_segm(im, np_segms, np_label, np_score, labels, threshold=0.5, alpha=0.7): +def draw_segm(im, + np_segms, + np_label, + np_score, + labels, + threshold=0.5, + alpha=0.7): """ Draw segmentation on image. """ @@ -231,17 +254,28 @@ def draw_segm(im, np_segms, np_label, np_score, labels, threshold=0.5, alpha=0.7 sum_y = np.sum(mask, axis=1) y = np.where(sum_y > 0.5)[0] x0, x1, y0, y1 = x[0], x[-1], y[0], y[-1] - cv2.rectangle(im, (x0, y0), (x1, y1), tuple(color_mask.astype('int32').tolist()), 1) + cv2.rectangle(im, (x0, y0), (x1, y1), + tuple(color_mask.astype('int32').tolist()), 1) bbox_text = '%s %.2f' % (labels[clsid], score) t_size = cv2.getTextSize(bbox_text, 0, 0.3, thickness=1)[0] - cv2.rectangle(im, (x0, y0), (x0 + t_size[0], y0 - t_size[1] - 3), tuple(color_mask.astype('int32').tolist()), - -1) - cv2.putText(im, bbox_text, (x0, y0 - 2), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 0), 1, lineType=cv2.LINE_AA) - + cv2.rectangle(im, (x0, y0), (x0 + t_size[0], y0 - t_size[1] - 3), + tuple(color_mask.astype('int32').tolist()), -1) + cv2.putText( + im, + bbox_text, (x0, y0 - 2), + cv2.FONT_HERSHEY_SIMPLEX, + 0.3, (0, 0, 0), + 1, + lineType=cv2.LINE_AA) + return Image.fromarray(im.astype('uint8')) -def load_predictor(model_dir, run_mode='fluid', batch_size=1, use_gpu=False, min_subgraph_size=3): +def load_predictor(model_dir, + run_mode='paddle', + batch_size=1, + use_gpu=False, + min_subgraph_size=3): """set AnalysisConfig, generate AnalysisPredictor Args: model_dir (str): root path of __model__ and __params__ @@ -251,17 +285,19 @@ def load_predictor(model_dir, run_mode='fluid', batch_size=1, use_gpu=False, min Raises: ValueError: predict by TensorRT need use_gpu == True. """ - if not use_gpu and not run_mode == 'fluid': - raise ValueError("Predict by TensorRT mode: {}, expect use_gpu==True, but use_gpu == {}".format( - run_mode, use_gpu)) + if not use_gpu and not run_mode == 'paddle': + raise ValueError( + "Predict by TensorRT mode: {}, expect use_gpu==True, but use_gpu == {}" + .format(run_mode, use_gpu)) if run_mode == 'trt_int8': - raise ValueError("TensorRT int8 mode is not supported now, " "please use trt_fp32 or trt_fp16 instead.") + raise ValueError("TensorRT int8 mode is not supported now, " + "please use trt_fp32 or trt_fp16 instead.") precision_map = { - 'trt_int8': fluid.core.AnalysisConfig.Precision.Int8, - 'trt_fp32': fluid.core.AnalysisConfig.Precision.Float32, - 'trt_fp16': fluid.core.AnalysisConfig.Precision.Half + 'trt_int8': PrecisionType.Int8, + 'trt_fp32': PrecisionType.Float32, + 'trt_fp16': PrecisionType.Half } - config = fluid.core.AnalysisConfig(os.path.join(model_dir, '__model__'), os.path.join(model_dir, '__params__')) + config = Config(model_dir+'.pdmodel', model_dir+'.pdiparams') if use_gpu: # initial GPU memory(M), device ID config.enable_use_gpu(100, 0) @@ -285,7 +321,7 @@ def load_predictor(model_dir, run_mode='fluid', batch_size=1, use_gpu=False, min config.enable_memory_optim() # disable feed, fetch OP, needed by zero_copy_run config.switch_use_feed_fetch_ops(False) - predictor = fluid.core.create_paddle_predictor(config) + predictor = create_predictor(config) return predictor diff --git a/modules/image/instance_segmentation/solov2/example.png b/modules/image/instance_segmentation/solov2/example.png deleted file mode 100644 index 4ece0a2df78452484829ce4e0fafd02341c5a3e5..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 459508 zcmWifXE>X08^%MaP_0l(%fCcJ?Y*}&l+-A-ckR)j_NuLR?Y&yFMriF7sH&=kJPtuJiOZ%>x<$0B~DV1EvoEkcR^RWSKzn>oy^t=_I3h#Gu z(dpdc4we-zW{d+9J~RMxb9o2T$nW~2eV)M*4=(;xl{jAX_@_cN^YczZF0=f5&5RC0 zdg{79^@eGLtA)W}@CTgaw)nypzeRj;XTG&k@akp3H{78u+ESNrZ@>Iix6V!M>R9be zK_cES=w$a|wfD$8?1|rtttpS4b%bQYy&ML%j2W|Sh#H7u7=lDZc~D)1xXr#`po$_B zze~>v=Zyi4_mA^X0-OaaGI7BG6&Mi(XZzl`q*=gA7$h0t&^K>H6UIPezOg;qq- zZCEr<3}|l$0IARt&v3{Sg#*Fgf#Byg$Dg4<8&%j|S_~RYLBml&0njCL;LM~THVU|n zaRHhQt21kAYU*k#T_OPye8A^$u6UhHBYi;lkrD+B*u)Llg44scf+;E(!{f}Myf2f; zAV3(bTJwj5Luo6*cy5VhPRf>f1Kx+b1k3-vsWo%Jyk0P-P|w@ZDSBdf8k2=cq!rY>2V9?B(iTh9LNP3#usywL#-<|Bmro?(5LE1z?FB+gYVms(VGH5h#Ig%Fb7yX?;=O|6mI!a?8 zQ{)^zbM0x@(w}c_MZT7@)L>#M7$Z1YVtD3-b6P~cu(kRhrbEElj~_pF!7)Qk2o;&k zPQp*i^elxkART!YP0va9Zko%l*P+e)i}=UX($*)X_5X&5s=_KD9nTHtaJ$o6>yjhq zL1t5UAz?oqYpuk-dZ(AXYf<}u)_xV8Q3bC;pI`*sPt|jMo_ihswly;{wDjjO1cE7g zyoox!(uGrabuMA6?l>y8yev^)%gAXLTPRRPDf&yq&8VKXR=%!&4Yw%R<^c`%XEyLh zVQx4jknCdw07ypBm`UD^WKaVZ9ZAi70Ys6NSMw&6^3VWGgmNZhN_jf#U%+F&Ov{Lf zQyRgS7RPHw5r~6yh^%}8kSsP9JI$cJ%Cw^*0T0*c7f3}J!w-TLVXsDOT=Lsp=nb%~ zF4Hq+x*i+c+N!xD6W2upfLA*QHVW9i?W}(SG;@PBqshZHll4cv70zg$2N97iUKIm4xLl; z*VgAx=C?*zJ$feWU)VJdo^)$9&o68(FD@FaCriwo5qnE&{+zF?r6DY>ic$`Va9-`= zDaWR>qld5P&Xg~3h91o*f1k7Mc`?b8-jly)f)$-2c^Y9f#YByZ66j^y8UhQ(j~2Q7%x}FV#pm)70-`coHT%&aUMDP z>bklx3hUFR_HuGc(l)8Ah>Mdm>FOPS?`PtqEYkP<=S0}*m>B`}G!+19%gJOZp`?$= zgaTK-j8C`4f}%kbASjS_oHlL`RUmkY5u#U?8ssdfve`H$CFi{HMOyj(+H0E(#1|fg zt|E~;zqj<^_nQ_X#$O>*^((1ekufHMV&b>@wR4ASgTn*t82av}IVGE5TJ-orXYIwn zr7X{X^V6%JgCG3YTU1Z@jS3_p@T+(^eDrfs4a(~k8}L*Zz_~O_TPi`n_s9TdF;5wx z9Go`0ojQ*-7XiT61%bb!(GNg?hgf7 z7^rM5q?P~5UH3f<=AxAjne1ri28SP6iS*BjtBb!}bSxMadk_JFCfR|eS(-$>_mW>v z{~o;sE+qa|#P}{rBuo7WF4&gf9rtZ7-O- zd-4|&|Cl5pomxfm>gn!agKXr4{kq6-($#gso6F13eYFa`6w_$&PE`z%)27K`Rx9S) zJ^r_E>M_gy?s~6^=j!Jd=1m2hb;lj-QqymKk)7p~cnxj3`0S-;=F!p4NI{_qS1N+rNLXJaMLe0KTxbdPwYjtw;OK0p8DbN}>a(U-*CTk>tx#PsqVuf_y17T&ciW! z4L0=ZZLl86`jF&zF>K!XUg?@*E)mIUMca@Om6}dh;rd$KUz_Hok}3SW4c-0GMmE~m zM6VA2vrWg)Z{edVSP9K+M$=Xk0g5U)NtgV`_hUiZtuJayfUm&e@Zaw!nZMi}HgH=t zd)z_+&Qtt+i%jK7cAsk-tfVv(1cm=RW-KjfVgjV%WmSqG@uHO&ad31!q5DWPT|}JY zyihMe>cwz-EVeaSU97loXJ@B>&B@7WY+dYutKE3xHm3+6yuL}FrXo|nhVZHz)&9$v z`&KlFoLah#`O$1#NDvmaWxI2|eAfrkEs2sQ=YZP|%%`F#G`oZ;)cVg(%oj zuCVTD%8vokdkfR&^npWl+Vz>hwcjI*I0*3hu}#EdX(ojZ8{{OOrI z-pQ%wHQg?1y+|whPU|xUOWz{&$kxfgB@W%ydpzd6R%$ncHCwdcy4cfIdehuxsFE&J zaEQB*Cl&C*mwryRSY|)w7S*gto(^$H_*{5idqgz#|&Gh2=ab(uhTy_6CP7kTd!VL3olSiet4* z%^*1rcP22l-T7`7dM*bAiav;IV-);0NNb9r1}BM*gEiA3a~YbvnXoeTSgPf#^VNaI z;ML@Y%a!@HmWFOpm-0!XZn4nZ%Hg?b)xMDLozQmYdnm}SM*08VX}xS!+e}{#5o1qz z8kK*i=j@wu*!iDQ*G=@2yNgw2HlB|f5^L@ zomlW~b+4-Aa{mCcTo>j?3GEbe;!P|Dm)2)gYb~tPIx9WA1wPC%q=w2e|9I6URG_e| zEpAs}ROqy^DDsTehlOqIfo6h4<(_EFKxsshbc$*Y-ZUtxiXEo z=eUtILDSERk3>d-v6Upju-SO_rtTJG+$bJ|*!4_9(aq7^dQdVVV=?Em#SK8T#Csab!S!N`(?Yp@%SnbB&)+cVbN9A(>L<6|Nsd;I7;h)fRzB{h+xcd;Zl0pSv zuG@!H!{Kp(LNgL-nc6b~%uM2Ls34vSiq9!)B?kIxuoltA>2HrpTa<+E1K!)%rqq#9 z^Qv&rK2&%KjDzqTOErHOZ5BG^1*F&N78hGvYIA%ts@Cr$?fS^dUU2?dLOqI>sU=j= zy;kZFI;Es{;VSR1C!97cOLG>O`xDTDZ~q>Tl)(yA+~o?K%)6qKICQ7#TZ0BByiQZD zNLNS3yE}K**7}g3M`Otu0Zb*l z70*L7OUVJBOu~rWeV#zC8@HrB=>jQ0N_o0t{p<<$*JKtBb5R-giAqNufB1PIr6XZz`T zh4lAJqAPtG=({{rGNhKM``Pq!z=3g*Pe^dLQS%Bk{HW(zDiD6d2OlwG$`Oj z5tQ|j;hVv~pP)K{1PTh#CaLr|PC>Rp|H=3RIUtxrXj1N4u|KVhrtUt1;beOkObMx+ zsZMv`)ZpmsMv_#g4AK|4=z}mcw8XsTa1*?JX69CAKOtFo78( z0&Y#L=Fp`)Tznw8kB(*E$U(l?A`xPA2QPNIjPbrwN`JN|jV5;daab(t{?p_Ay{Wey z^AFcE%el-w0|PZ<8?Huw5w`FtR$9!K&l5xl?K?#G3B-RQi@}5*z~|iY7YK@5oZARi zb+13jHbKd3il+I;aG@+D%C zjGZnf1^}$bK4y;ekq-O|I#mg4j4`47Y z-^Yi`Cjm_w_=B6RlX+njX^&Ofw+x#_8X!RCCfS+p>Y ztURIYnfXTP@5m?WZ^UbZzUg5k9GEl3goU^Vb1wt@Jv@l(Bcw`dQ=>(#$4Q~V!~ieJ zJfVBlnl+{lK;R)yArRXb?hCk(%% z^h|)tLhFtkzAyvoWQvwRoNHaTO$Z@mw|{%7BCnOnbr;CTRY-t(UMu*r)-UQb>VLM) zL>`+3U|k;FEvxOE)JM>s`e{Sc<&^< z?8P)oQqXBU>-tzVb@5ANEZux4w&eWSsT$4Pv^6=M`gmuaRMoZ|a*Ke^<>eB&5lNvQp7q2s3SeUQ;nT0j01Zx>mG;lgUjSNbH=@ER;#2{gy)%HG%7UIcI_gYu>7s|25`klzeJl^5*hm z=!utyM?;BI&w-PZaIHf;dHQ&N757T+*k7deP4NdwBvmEzNm%Ja#X!z|`0n3-YfVSX zp~Mm99OS7KH%p>Rdi;p*?Drb3w!-{gUzWOb!VYR1Gt4x339aNG zBI?3Z9z-|O8Sqt2}xExvizxQ{rIu+WEQha#(ZP|`$_U3$um3>ff zZ4R@el;zLo5uT$fQPS+?F8L7Cjyf#bA6-<`%W`{W7C3JDdJMPR=}Q>+#{MDw?E1Vz z_t~bkS?}agX?wC|pT(?Mlo^r-1uErrPq;6`(6F=tQ`}+0XB^41B;CK3G;Fch-p@?ZVUD#=c zIUUYqVY18pIb)8r^is&{kn^l9pM+uk;i4izVol$tHVdI%+IO6d5^|bJM;|j2D*d9I z?G~HwkzGhK7^>&{M3JOk-)sC?#{vK_ke34f6|0KShp(xOGse49rvpS%y>;F%0yZHd z=mOp6HsLFZ;7)-tfJ&V38&&{>n=Ky5k`_*IZv#%cwKx$hA#qUOd$dAWhXM@Yn zcpuTxH;~um%8vH`UhTg%Q20D9Q~1d_6853lr3guo)1C=X_O%Sl^HK+X=@SPsLU|bJ zFWrkJI`0Ekd>=Zm)|Ps69mYAAlLO*yR}+3;ORvo70+D6eAuVvK{~C%eqO1P35G<=u zif@d;E-AB?$@^UkywDrDv(TDs`*AV`^mSrl!c%5Ez1ihKS-*`4fV;vs9l}9*_bsSd zM=EWfk6%EvlBfJT587fmzaoH16bNocXN=gFA)n0{xZT$9wUoz~%DCOgd;-L!WvcRS zDLxujAY{!={BtBzF-N7zX%s7T!GlkusD_j;H&+!1#cvKDzYkIi`}q9)Shr`a_5ML{ zRkUe3!*s~+j;5#b@`iIf?q`bKvM1h-M(Rc{+TKuf0Iwxz?--5O~4Y@t)dV+9P7FI_?Wf9YXX*yNXruFjmlS*N)YNZ@3 ze|m|96*~_4HBIsz{+^zA`b98uLZ7n8(y!gZKy7;JUonjm@ph9>d2Qb7yPQN6@s~-E zsWT8EGEvA&Z&{TLXSzdG{=tV-RfeMCJT~`lyO-$HdvtYDc7(h7yZQI{_c`fX*=iqB z75~S{`h2t0y;AxaAyU+|$>i1g(CROpz|f6wKlN`Bg#y@|kB~S80v-OKKg$>ECl+4$ zU8q1yx^FW6ec=ncC0kS92~Pe~()!n2pB{@lH*dHJ1}Z;Hd9$RAAA1%qq1_gHrtM7` z3N{S#mR0Sa=Fod?saq+HDTLfVQ8Ol~we90F`~iJ_MO@i%mg;VV872S=fwu{Ay@i!V zwWX0Qz}5;Fbr+ujej8@o0^k}Hxah(4f2=~r_!ZbZE>WeY=r2BYg3O6A`uTa-)&p9X z{5w>ul?3i7kLYL#ZF*5LE8it9&7?)HA1z{;AI%aZQ6C z#Z3Om(g%KBm?UFTYuzi08)l<^LZ&g<72aLx?#%VssAdxnVALrmCRpevk%VwJHf+=kO zLK=PAo4lW64wy@<@zrF1earlh1_O&NEK6j))Y2rizZ$wRmItbrJC2zaAH7{&J@HY8 z(G>YxVn8=vmNULz;FuhX(YQ;58E)WAp#=~>sp6CGZ2u9{+sq!=Gj%fe9;)}Uyt5}x zmW1u+kc)^e@UZMt^|h2u_f1dlte56ZF$?QV=}rlemmDS_&z=-%d`2nm%H>b7$AT`q zt~Ql7JL~J+s-FWtTL|gs6yth)A5$Ft9!M-g*185-YU6`ZFOOC>(1)updt??o{97*? zYkbl>|7>;Fww}35@e5nGQmy`8`W7`=WP39IUZZ9P?&wdvbF$Th_XxQl7|I8GUC!H@ zWf{8tD83h8Ri9$IY2*tzAVrvO$ zW@bx_YHL6B zIS)r$xt4hxN^7w!^%}fSe>{Gu3b^^+*&IFeKx#17nz!L+J1%R&9txtR4QDe7q6bHd z#)H06F)yo<<=1z0x9hwxkc(I6g&J~xH+w4ZAF}^unk%x5QE0~_v+~CB3QI#r>CO1i!1wVR!|;YQUN;YFgV#47+H9hzFv*abgueeMs}|!@9w6F z-lQKnbGAqNqA8Kl2lei(NW)KkK`hDXm2TqiYicrJlwR=aNc@}Knp%OSsddU8sHNi z$2)g(Q8?_PR=7Cy^BW(2-Tk?NEoYG=pbR`(E1w!^`F`P4Kom#GYZPDa+;Z(UZ{`NX zZNXqaqfe$CzRFc4#?kUGK+=-|D=3l@P&MhR^X&%6+}bv(HYg>W4GPld(riq#MIPO~ z1=jll7iG0nO3T#ezq|J$9fH6%+QY1##=Kkw+WqbC{;kr?ZAbe$PB%Jkx?`1})-~?k z3ysf}vo8PbA%;yp*K?E#-^D@eC}aq*4Y~WY0R9^ghyAYC4MQmlY+onnuY+b^OFWPb zaBM@S!h7F;g%1m%0Lb9kVu9}}GWj%5m!c<4wXD1BxCDwRkFOzp$B>(1t|+kl^CXC>Kq#aqPR!^12x^`_)qU2YK9~cnd>G zQTz@nWmMNr3pgnoA9+JfDw6C&Jz;XbHThjoi!%M*PTTV3mTuS&iHQJ(S6CyU=r&y` z0zM-_(Tb*F6cvb1gXdQcvJep#Q^%AQE~A8tj)*g897|agR>QNAHn|UdEIi`GdYv6a^4i=Lk%X@U`c4RiCd_JAh zb27FJvW4{oUG682&6J>&Y`_Q8JLXS^{JEPtq!t$b6nzUe<)n`!yhE>+AfS16{8+ss^3RzxSYMpcT+}C-L&Hu1yN=ya!-b<}N zj!lGLp2n^NNzl9%vhsYP(Y7lbCLMNb#cAr{ZK{*Y-E41nC#TenScF?ceM%+C!O}kh zmR;VFhC8ct$>vgebGpA|cXWcMV_(Ec4rb2PO%O=BVfpjx!Ngwd)kf^tb>+;u1oiS^ zi@M+J97x|cSo+!7#NO>|yiL(b8an*PY8~wFe{?K-^)Hs2wHDefs{KP0KGK54@Q26%b-h03}W+y}fO?m*&SGIp6u@;$*wzb))K!kD6dbG$q>gQ!x#u223EH)ByQDdb7&rAis{!H%XYJDsT1hkBuZoA zsZxiZQW~9~<>C??5hU@^WBgg zr<`)H&!n0atMv!9vX~=xrXmlUn+aKOJ`&7$r(4mo7{WoEg`pG_s*tWf3Tj7SofT~c zI-l8TH7Ac3I^-p=|Cd~Zmdn3JHR!TU04mdCqm4;qMOq0qBc>&eF}GUXJ0UN44BU{U z_jiBYX!-&O2q0{3Y&bdvS5AVrilD;iB0wMxLny+P1hU<`a;Vd@ER znw5dl#wHxV2K{a(dN(jdH0^QubLVFC$C8r5R|o_z64?@1oN*B6tQ8p-p~_pa50|aW zgsy?kZ$@)1*qgRmOlCkv8fh_EPJ=AGz9ES}80d=#rLiyIuqdo=(NWrrI^H7N0lm;@ z<&Ho|8s5LC$V?CUnTm&x0`ng&#<$8p3~yIT6IkNeC@Y9RD<^aYEE$*GH;WIb$P^pw z*!%k`!4wnrO&|W&P|Cl|_1#m^nj$-AV3KJzHMo=l!*YWbfxV}eR>^yd9{Tv&khTCbyvMN7zhqR^@S+4wW5NLcAX`l(S0`8%#+^QWJl?ycGV0h zK;nTcYq!U54^$4q{Kj)VMkUx%H4PXRQIN_30sI)PB;yf`j_%|wB5H|!tl1?AE3u{w zUE>)%V?T)B_x4_o-@S`8*xX|+nQ$~Cv?7j+lvy7CUVBKeq;EKgOS+6&6e$Gl zKkmK4ez$rc$eklx<2BmzWG!knShx0H75f{;Tp!!aNKzBCX^ln8tUii#h}E8-TmU$i zk8T`<%oqQ+w4l?nm2dVeQ$I+T;)fYzoIg?UF>3nd{Pf5eVSL4E8M}V6e=0C1@`p>q zl8T&cp|UprUhQ5Y=~?zqb~?k-nje18BsY`e6gaFcEiF>7*lc9B&GB!)eQ3G-W9t=i zcDOa>4fw8pn9y85c7zLVD6}~ARaCrcHZP10#Yj0r#y6N_YDPy^3CuQBQM&AkXOq^2 z#D}-*8}1q2Rt^doJ1diwfxw&$^YC=S^>negG|(*MiT zf~Zt#pOgx$&q`;D1f^3$!zGw%w{M1zv>aXfh-}_!PIE_KrAphKCrX#%s*~qmwKg?1 zSqQbD5q5j{8U1M(oe@PExjQu&d-}>Gx^Rvup8~{ z>EYAnW}zU%qo8i6y3tCPXej7o)@L+%sf9}_LZa18=8`K>C>jlyMlfkR%xZvb^%$XX zgpmW!pUkylRdSl?QBhHi0v_Dr$n;VkTQ#EwOrk(hf}XlP>N*&A6k6y9UOnost{G)1 zgScfyJ*E+89;sk7i)7S)PtTd!+iA)Yh10`yN{pDUm=n7%@XJ@Y(!y)!tClB$u$7$V zs_@X?R~%*fz%J-w&-2q#E3)ab1%u0#M%audE4?{^0z>iq4O1T3UGnBmcuZRgorh!M z$vIz!yQwdG*#5V$zy4vbb1SBbb#4g?>6Y}K47wqV^*w8a=~7u1CWUGjWMesPJOeqe z;UBoV{qA;W{06So5+X|$uJ>va9VWk#puF8Fbefyo5AYlqX$YKUs&GltoSCI;L>EY4 z%-V{QEjj91aIq|OH~-O_z_#sngkwYpyRu}47XW~Mz1~l!`K32SLW`Q_e;*&4@Y1W* z;Lfh^vN+X>Bo~S=#*oF)>uifWswF$u&OH9M!jeYnW|l2$2-lPDI|LB@qxXx?-#eHN z&M*)h@?`aS_0E}MjMb~4Kwe*%%_rWbj5G){n8HrW665zd$@)dMLm--P5X5h}`yfM2 zV?jb2OG!le*Vc*Gj~pKNDpjsgS7cIX!AK_-aM_0`wb#6!tHcBCsuvxhF38o#PisH4 z41P37ar+(R{cdmU0g~x$C6oHoel+rrj)H?eoCahUa}8Ryj*>#;iq--jx{2pc%Hrz| z5ZGVXp(l7T}k^!p?dtA+C?Cy4t+o zym5iNO7`cTiK+Tj@=(&U=rFz2iuS3$1&Zy+?-~Z(CJ@SY-t>2q(nS>Et>F1KgEZ2& zm*9$4wEsO7=KRK`GE4*f^4$I(cKOVwIjrG!>`VqT>TtawY;}Up>gs)diBNqpb)zPq z!{0oKr(LU)r>)=Rsm{dGki^5Y7=mh=qwr+0aBX9%J@uWGl$ftuKIv8eHco%zb`(nw z1H!(yPMidev|0)LB9&bH{YIxOvKd6(86?%aadGhm-{}?9Qy39FDdjyHo%H*59{!Cf zcAbhnl(arS_wDIpPE(6zgXOEu#D+H)$Lm`I2gyW>r@x@Q;ecSQ5o_V%c<Nry7yM5nd$CM;HRc`7f~%O zP0I%2%US+JTt{cfe9NyQ>30tmWx0N_MR{AA3fD5fyI*(%OlG4B{KCP=&=;Wav1TC% z(<&wClMZc2b(rUj^4H-Mp$4~r$}=4^c-E0ITRt6}_U0hWVaxD_#0(@oj zEOckan~`e$|Id-FXqA3Q7y{F&c>$FFiX-vEErSJ#G2x<%argRzb-y4mqUqD=mLKIG zQ|t!?ZVpRE(WK_GH=noNZ zOUyft2}A7Zw9E)Y8HK@Iw+jL5@2<^#7n9wvq}UluyzD*B=}ngHzPgmBS;Uuwh7kY$ z-9LF0@$2-obCb(BCu1@YEE#Ugh>mqIZGn>q^x+=lu~bwrz&XFYrSU}MK&}Z7c%WdG z=%7Z#XC*{Mvt@-|j1wf?4B@#(Ge<8&bJ4BwgYh{B4X*>>P)R~NO1}jm*n&1&sHc>Q zpSHxlXY7w7k5fpaQ)L)9zh;XaP17n*@*hWoG~k*MMl#$%L3cz=2uEkQ7ro0RI{0gZ z7`L<}NN}*kpcg`W*%Phvc;s4>r;`e3OP!r}B&EAViVIot5Xbll(G}zE_-~yrVlx{t zggIYJjIV(WLZ5h?klhz?7aE}3UQhQ9H5ec$>^Z5KCZW9=Z-=;iD;vigBiX<-7Q4`T zn=I;RdZ|A0CRhbn_9B)4w(%brJv5c4rv6&27#kbwUGP^3BnT2wbbc>Vnq{-wd4Cv> zvV60xz%jt}F`CH!n*7M$yi8DuF|k8o5>C7f#QEHL!K8{cuHUNz)<6I19;(qPLDmv4 zXP3#yIWhC=lX6$b2*aE9Fo4FKZQlJ=Xk6wOPZ@!+MwbT__NGELH46(5@?e7Znk2-D zy;r+l9RgzmySuv~O6=~*(t(mDiId$&4yHYOgRE^H%j*48%2x`S>GJ-&g@uLJsHnme zvIT)1>`mG3Og7Y>@iFxq|MP|?q13e=`4nVnD%eo9^3y@nDtp~luV-(?m-8=9z7$2} z!D2UhVtce}%EPBD*pAN)z zSH>fYRx^D+NM8A3jZ~#uvVjVnnICBUa}=vKZ7q~@5gHckPdwal$aa9bG6spTRBa7- znO%JgJ6=8;y4q6SVlwx4r{m5z-0t*wc-R`K#2Xv61Ob!#zLBKs6ZIoG1cr#<9vreO zDbcG$?S1@woRL!Er=%$catvwuSo?NH;r^dpE)=5ZkUQrt&)|?pPY!obh)=L6O6ToB zolwk(`QYU)uGJB~f9wp&@aefW3tY09?RYB+r*cRR@@#ilRB2u?4$RG!OFyLKrPbSB z@-&$Bm1W!KZB~Eg7G?82Jz^vfZ?r*KF-@)@GKfft8wYaO%G9=43FUkyvrTulGsw?( z)&eNr5EZFcd)I9=5@%Q>1gJ}e(kdqFm}=P2|9s|2rin9ge% z1_GxVK!JG{S3om$9d}_0fX<|$dg);!Rf;4wP*V8>9A7I zdCJg&VGmx=B*%%SGzHSYAcvnVHDUGRY~R*$do6kiV+=9AH=Pd@U-i(N^!_FdDkjBs z3&9;XmX?-AuF=)&?-x~mvi$P1WWIm=Ge9yl(}M6NOJBd*m}z$UGg*|u%x3x4b2|b;UWe?4Dn70x6Si`4J&kFe*AN;#4S-p(KkDZO}+MaUB zZKtwre59HFkvegCOnpa_4A4>U25!$euw3OLsEUekbCp8G_31_%yNfO9sIZ3E&hXV& z0=`02TPJhAVIpP%k5PM~Pc(w(2M!YF=WKt&-H>{v7+oPp@4uzojJa&Pi!U{*Zhihk z&P}z2E_z&bz#e*4^{gbX?-y0?iE?P5V##soGWKoVWfU>&61_w>6iYOczCNBpFE{r? z3Z*wvz`OR~wmtxkZCyfW^{#Ql4Cr;PWsX^%!BFdYQLmq@8OUpGT2^gXj zP@Nqe?R&b`O4S0*RxUbk%~#uSy}qHL?m>^x7w=XsjeER;eASe?d%CRUzgAk_{3rKK zC1rB*j-PsmQHoNq)Pt&@z7ztP+@&Xt5KbBnT7iNJ|3$*V`@RJ^tMMWdz^3^S?W^6z z(Ezbme++MJ$s&)SmI+{2>q9*9=dG(8C2c96Z;aI3urbZysaL?)a zG?~-fL;MNC=C<+LA-zI0Ol-fplGgv}aWOQb1B)Bh9UJT2Fo6yb`s*S(nXqkE)SB`6 zY$D=q|9=b_01S6aWiCsS>>H-PKZL%$D3R>#{+%6fN?s^q436&K?6-#k#JWI!ZGL&F zip+rx)J(iQ|4I=%o+7(+36(X(Wx{_u>>=lSy4~LJZQOh1f{9`!OP27ZrH3)@VZjoA z@P^oO(` zNzob+<=dVa*$vKViujh3zc z9;{rX<1}-v(!P&jC?*r8;_Ee7A64}GR=rEM;B-Gt{yb=GRj~Zk>;vc<_T!DDVfmYW z4VMlImV+$F^ne5#!NfR+WBpB&!bcV59O)1;`gk(<% zbi!lxu}Fi#cU!}o$;zkGx;=Zz4MV}Kv^TF!Qk0eMhp)bFjCReUqGZBZR`IYnU`3|- zQ;Lp+XB0tJb9aYqpwv6$w?@chj8IL)$KGqw#8TEjk&h(_MMG@sa7n zYb~N?V~3RrKbN!gTMZQ-+`ew$kcb?^Efn$SObaY3S98C$O+D_Ku`o1!$jIiaU-c~tY7cBs;DN-yQ`{2Xr)xB%K)8v@(F4gPmtZP6>7M}r$|0Khin;Vxm z$z8v5*mh)zWvZ;o)Dok7c}JBae$R8v^j~&D$fC(5^VK$;{IfQq;`?xvl9lcla;F@@ zcZi>u7(0vo#wl27JQFBBzoZP=7{vIhAlI8_o@MN&91+(pil$21=#)Ep+!MG@D$>_a zM*gK{%y{Q8CEp@>$-aOKe$2%JHZBjB+8C^R6fX7$hNi?iLrFTP$cBrX#&{Ab<2fe3 zPfDjcGiHTx5-(^k|IOU|>iBAJ>6%aC9=GRoU+TS>_AhpOb8^1dB=BwLa}Yi`ssR<+ zWj6j#>%L}vk<;6S8Z!6b0gh-#*`kG@w2k}XqScN0OTib1;5#1Nq$*)T*d_Mrt6Wg% zMkCuj$@k$HE9LW5k1)Ke82e5Vf;rNu@7K2Lk6wQZqaA0T=Tb(G%bbL{9d2Us5E~`Q2ynU<%^Pp4?K781nF8%I_@Lb(V_9Gpxi#+YX8i1*O<*=;T%Z>a&#ZjkC?j6ND%6hHD9P)+fycS;WERJohMuAfx;oH>JKL#{E;66057JLCCuyZ1s^XKg?Y8#MSD>#>BWdxX)JKO*PiJ zcWwE|;T9g;iKPdq_urk<6;ZbR{4^j;UI6MKsj&Ty&`#pO^j~{;#)34_p&yYyqwSXpDRS?Vm1m~Sykq({ z%?Z2h8L~Z`KH$OE`STc9`%VD>t9=`yH=YNfrkABD(62oX?@Mcunp}xf9URQ}v21H< z5--x3kw}hf!c18!1an8T9G*t2^lN28My`9X)u$;|VHae2&I_(e#?Wws%~u7EF_~VU zO=K`?uT&ch%pbD-F~#1A{Wf4bO+2d5hibIA3w~|L?7cf}BNCU=Z6Wjoh{a+P2RQR} zU=~6-(?J^@5xy>!qJzN+iK&oy=sFvf5vR@kY*L;ISLzKa-ZWt92+u`Xit@?htF>U5 z%`d7ZhJ`!j5vAevWVCr?5l&Gz>j256~X?>CErV~ z)|#fQ4IKB)Jc5_GW8goKNurvu*jz|mmYI+)U5fiu0Y^*qvo4+sKT(0hp5i~Khub;I zvKRYJc;)}N4zB;H74&P@F+z}E*-Yt9W$@Wk<=}tiz5WfGOG@4TM^wr|qKopEYl#^H z#1guj&nGxuaL~)ST^(Iv%@aEX$70&hEKEF%E~lHP4B?PQLcXhPTm9-yl>{VGKy%ya zaSFH^_vR>S`QievtJ@j%Bv;GETqPem_eEDJ)J*zQSnAFW`^Dzd6!)cteC*h-6=jAB-=8yLqk*|1ie6x~+oiI%`t~CoiCmZINjA}+3`As?hvt2^d z$ofo1EOuQbA(f8KeR-iaJF!Z)cbM@&ADN*AH+}oVPdhzV%Nv+^A8tt8@G9oE#D@Ri z08j#X4>+WiBm&K5eXC=n1|@u%ox=LJM?sO~B7?y4nr3*5nqD+0zfyHgLc2+zUN6=Jpo4ONohYJEB~&UK5OVE>1*l40Majzu$v!Q zv3w(yg!Mj{yN{H^K`%@1jWoalnC`oxi0wxNYo`Qddw%IMHV+33vbFe2)A1 z!~n|M($X>~p*>qfPh{GRcN~`g!AAxG$D!H~)j#U2?OS(b zD9GMNx93?%C(})GtKSX|M(%`n)blaQJUsNM_XyRWTRtAJ4!CFu#?6ncZTCf;J)SC| z*-G))aa5d068O{i%g;n-cTq`BZsBBeHN_*lWhD4|l9-^$J+fpzq+3!9{HuI+9;5T} z$c8XIvUPQ!d|B1I-x4fb-xYQcyL^&teX_uu!p^=!3`p%$tGi1Psm`8CA^XFwJF$=68WJOtNgY1Cgn)8qW?zNfu!SmxsCtWLjZ}BwE!EK*I_xdLS>6%|; z(botOSI(Li6(HGVt9$thV_0(({V74)$ZdH{Sc3)%p{(9AFA=~6fqQHH!@-tOx7lxo z8+WLKR;X)EqbNY-$SoH!55$AEMKU#kr(J*AZ?$eDh$G|(x4)`q=TTtN| zZ-MYsMlJ*BWn^L+%2JUzw6a(0DeA_-Yr;$s8l#q5Zf&^}z%cFld_R6@Ey$QJ0%qqA zH>~#$j2p55ne+TFvFtCN^9xWSRZ=%rPfk9&?*RfZ36lXg)qF`_xt&?9y|1u+bYx(g z;aA0y0dtM?4Pf~9rO)i7Bge}W3bsgwK=t6xNZdaW(Tp zM@%VN47njz=3k=8hpr{>Q|>31B*cQ8w8!f2wwD538{QtDAk~9U3T`Xz508g3UWHLF zN6m{g3&$HZG@a*fJDVUn7=jG7l<#Lx)i7ob=kY5B16$K4Zhj5kK1$yXj*gXX(Jsz5 z!9jwr#{7gjJ951Z`i%85I7b&3jrO-hK#w34w_n29< zh$`RfS0bok54oQ2t$%Kl=pOcv!9LWv@{OPb35<}A`q{l|DZQgX| zzsL0T7X72@vYWShO-I@qi)WMDa(A<7s4_jF6wNJQ$fgV4VjVShb`dNnN(FQA3;2*b z+E%6GI@sYholDzm;((G`w3n3`8O=)zQ&?~c>#%2MXTV_d93@Wuga&X%M!vyVlaD*% z2Kre2ekV0q@1h@92wet!OZL#123g|CVg3m1*R-Gvlssq3^>AeS%<2sh*!Hiq(y)qn zL()l=pN-0R_1qMEV0lZX5VTL2u&OZ zh~x-+Iwb=nihfP|l$wYv`?Ql}O$YcElrF*c#&=#X#a31mTAXY|8&@o(?l>q zWiv{%RK;;oLOc3yr-}EyNxp$BM@M^{nw%Qg%-bfo6MlNIg3&@6Os{AKeTtR+>f zfH$^y_~--;mjv(`rbm>9AP)qA&9M|w2n&B+YrmYs>^^n0hMEy^Q3Qlvk4D37C#wG* z<={z(%lAh2g^a_CO5~s+xYl1$@h&7PJeEM=r$0lZ*vC=Fh^U5BAWBJDGi84#kMzhp zR;j!Qg+nd%6tP}(NttuM2gcgB9b41m6ckpfZcJ1v({PPHU^$5vU;AWX)1a^beKW4k^tG zH}MRxCExaUb^5m+qQM*22%1g6}Ub{`v>M&O<+6P)jMU zsV>eSpKR?uHmW`CF!3cS1Bd3-mgF{aQ%6yRu%)~#n_5{}u@sO46#28gj{P$~I3Oik z>%oB6s4k-Py_gTvst4PH9%!K^7wUR%e_zrjvTvogx3@2nEV9om)ok#enuFF%3ke0N zk(&vZnPbe2k4FBbUG21l>l8nJ-Zvp(M5~k0(^{C<86_9KV9qbnb26`UXA_eQ=it+q z2IouMq^`li-88#5Dfc(`o*qM_LxJxW2sCG1rX)$O{!Ij(05uo;EGiIwj8K4AkbWKh z91DAADJ?eVl)Y&$ol!L_`Ik8iyEWkRLlU07zqeSMjzl6O`!smfknN`;;H+J}N(Lhe z3Ttz*@uo?v_w)%usY3mJon1X><#Z)-?a(KOFA0O0@!k(UNGS+vDBUW!9Z0w@xIgJ$ zkX#TK&)VDHc}IY;VQ)>iTK~4E@@m=^koiV!wX{6DWpufzG=~uc%&5T#(6-`7nk2&zwYkN zQijNyIxA!gOmY{4cORY9p1p~mntFm~q+Z=#)yS=!@>4(I8})ISXR7UZqj^K-<5%SP zf+A|7G}fw_nTCZ_Dte9JMEjKWtn>ASago~n8g1q=Rl3vAs1ARBkc%MPSlcR@1$dW% z&aeCG12%S%$m7`9hfI`w%;3t1Bti8I4_W^c;V|Nbm5LFTSFBd4m7_4p1k>9MI49!||$&qgh}bTin|G;75? z3x3Dg~}DmLuAWE>o{~N`mRg zXr{;<^iP5vpHqP8dvduu6|096t6UhR3OJ2bzQ0$)F(ZmDQUu@nh=uAoq{Hv3$zbAyQ23!DZfdleT_Z_TJ+FC0G&7li^;bP)LgZf+#v<2R1Y*?}sDD z7S>CibexE>n-P{zEz1s5!pF2r?K{PUTlgt*Zs@yBv1rhPjZ4^8H7gGHVtuL1K#i*bkMnTMlQCRL&Ys(@C=*U=69^u_zY-0I# zYD|E$I84P4vt2csoJwAEOt+kHcagv#9RP^ztzDv&H$0j0^=0Iy(9c z;NZGDxG-K6DhN0mNukb2C|JlfTD%GGK0hBFQ02qu@sh5#puCMT$~ehijR|a5wAaNd zwDXe*7>vgjgGOM-;k~9JMii#$R9afQJ7?50@63Vzy&%8K=Q_TXmzUQG$ud75<>Tww z($ujFKec>1(j+XwKgjs6s%p?cGBkIJI6P8z%x!K=pzPP04e!_Ub4TRn?BBWfoK|>D zwoo3dJ?vXvSP(T$Km*788g5WD~N+6tOOd1GtV!@HH& zO51b#@G!}ty8UI|`j(aFxle40sQ-}j$jT8}&_R8d`x*QF>5?CS>Qd698il><3=X&| z7Jas<;OFL+e6E->8<2PklKp}Td4AgHXWmg4+y0)7a+FVn$?CvS^$#qMztX8@KY;hw z*Ym}dyMKLT(ihbU(w8F%3}i}GZ)0<|xW{7T3&FoVjKR~ikM#9<9ENaWJycUAF@z;= z)S>KO2#glE_h0`wKiY85ql6pR4-;C!OG7xy@Q8|lNtiYlEBRGP@7OVN5_NI1ZSUaR zjeR!Z6M%Vy%?%lE+_6$*?=oCCIfs4E6&my1ZRZvo^J4p;f+rD*5LJ0G%t-SfSB0W1 zuH_N2d{K=ghqq=3QOOb_kS+SC+7+kx zhWU>^H^yCl++WZ1)4a&5IDvBeXI63UJo3s36!PoCv=&$4q320t=bEEtlkCjcx%S3h zK?tj&gHs4i_=lgiY2SaAZ^dasLjWD_gG;Z5i_ikSu$Do3Y>)=%cjG5j=4t(O3X^6z zAef`h0-E8pGpy|XO^$=d9M#jqQTjoX1}nj7Rrp;uRZELskVsQSts-?JIgA!F&8lqQe#j09J0WI@T`M1)$es&y$X0eYc$6x<~hbSqO#{e*m~C))3ZYZAAwr z@-#+Lick<^xnmlYKC3Y^lzTlUOiMA5q`^DwgQ8Lb?r=jG2Y$?MbA&vzIhY2N8bz8M zl_?rHv%0PyO8gka%mV7gWmSPhkuXN#lKVJE%i1QfWTBG5hTm0(!yc4TGJduur5yIB z#T}{~m+!*nw$y5}DGObIUdE)O?gPu3H!;3`YdPOvB2#smCmjq2 z_zCSJ{-0%!hk|dMr7xE8@UzM-=tfZ0c+R~A zaaFUv8lAL#smXafAjH=CM`#P6Z{*DGlvn2fchK^(-biVSw6Z4ghZDd|v*9~O>@eRb z^DO_h$wK!zO6Izv`}XGYi9XF4k$!JYF8y=#RmT00Oeik;`jlt_QUAr?`Ta}92sk`1 z2ZNEES8Cc?1^!1g+#dxqxLqkxGFgBJUa#&P9qq(PUC06=<$rcB{&U45oKj8-;6KE~ z=1jC>t7DIe&c8BEalQM*Cfb4Leza5RoQ|4{&A7Q)J=#6re>cNXX`B!Lw{UhjeY!FU zc(51}2Ir6Uc5#rZ$}g^NYpS2&^e}xBcHEX=FYD^+0?=5Z>?z9AVm6m2tLDuuGoH^A z@y-(HgSGj)m=}1qp+FiZ( zI?-a!K4pc!IK5bBP5u!W5Tz*-bVO3x=^0R;Z!%wJwcqOPohrb{_p7PI*fmg)Umu3@ ztR-0f$N?&15#cdh;2!FOvbBweufB{Is zNh9l{6ha9FH(YbcfkTyX{g~E-il)h9!!paAqTeL5P$(eVU~TD|#PYYmtbY12SUa-BEDkRf!pIU*0mXtBugqEdCfR zPgU21zWytg5ZX+xKjyip!5Fm`#Qeu5U31d@4>E&Lf>d%LF%he|k@h-X>ju&Gx5Vd_ z{$W%j7bdDEzS$`851+WzIFfxLhq#%L;J0Vuv)_a?g`U61zPE^~;E+rCxsl_>eYq^= zZsPvcI2E~>SKoOzi(z(2nfNS}>OJL4^<5Qqy$|7mKB9Jn$BpqgpgTjgS9K`Pdip*`g9 zKuqg%OAuzJWXfb58hn)pkTJt(+yP%a5aaLP_N#mGrRH8KaYM$V(HG z#rc?sDQN_ekK83xG1V?M4B;fG^$8 z01nZ-?ql!bo^>GQ4zx#?iI%qpN|8D`kDV83$&7<94pyjn)w@q?oqtgyJ6aRGVTvl1 z%1=D?(~smB1-w(&zWHqkl>xvi=B0&h=iW^s%X5h5*Gn$1ti;aE)g8-m zsUmMKF1+YCD8kf}@g--pTOeH1cK>Mub6((Am6KDgqE#e_Tb&v9_a2RntdBIQ|5*<= zrngR+xX-w6IuAbcxssH*px(MuI+wYbFFEvozIX`_zQlB&<#e|P|F{nCzCk;jd$Y}$ zoVe+{LcGrpxI!B^s}k_duJsvE+fa|5{4saXdlht2!q9bn=RfKZyV?1cNQtP!h8>f| z$irLYZ?{oapWl4iQBra+x=%e z&Nfsz+{jY?!~$h4Q7~|KtgIc9iDh~INU5o+YJ1R+D+ayBOCNNwB5Kl=+Zo{D?hyb6 zz@k=c>OAj$l|&XeJD+!-%}I7&D+RL&pTt;-(a2n_uAvC?9w8>wBMl<`uUoH67Ug8# zC5Jj>&#qDgFscJ=vn(p09+8;1Rzv3I6)4fC;f1ExIfP^V;M{8KZh8_&n%_Z^!$Z6U z-2q1f?j~A{D`&fT10Ga(hWj`WwPesk2AYuK7IQEtl-%(^u-N_z~t_>Lon+c8>3bT1506W@Z5^Nj7I$O>HCh zw90=6df66CNxhoGnkp2(1_-6CxsJj5~C+#+sTi zN=p8ys~VlR(dW)ifk%f(nVXWx;Oo{iwBKzD(Ccb~Ls)Rd8QG}xIa%eaK2w$waN$Rf zbC7E&t0Lu!Q?>bXzHfhE!tcL&%$!V79Ks>pWs}&M_3Y)EsXqP)s>@3^(mB@Ce@FztY9@@m@jx5cJGWF!;|=s!$84joNR87j*T z-k+M>MMY=&g3RK8dM|e1r+)zkZY%9$B>p-NA#=UcN^Ec6U8lGS0oyuEE=H_lLK z(XPvvKsb8HXeRJ_eo4X7dHOM7j{<}ru6cB}J0Dxsj50PcNf?AfaFa5aeHC(G zTcP$(frM1Le(e+T0dq3LR-@Q5H7JMq($w_UmK`r|yUU`{LLBK*wxRLv{@%{cCkCfO zpJvFvCYGsRVe?l@mo4Y_d)@ar_dDlZ=QhEICN6h$i@vy#@KHxSszz1j=DpRxj$7Bi5Y`hK9IJ#tpqG`HyYo`MWZR*8E4k z^o`4&w^~|bRXayg576q=YT0%vWuCO)W-=(gS+Z8pr+b$h;JA{$-kA)a0kXHhocDI) zO!7?}Ga{CmBv+KE8Au<$A&hk^?=)eLtctP`LaDmBH^Mp_%4*W}(p9pS?RM;-Q9vz#qH zdXU%Gk(57_D}oVJ+fuO{=G?*a=}7Ba1TK#GEG&wjmG5wS-PvPtd>n+j0+E0yjIAn| zAU?P_;1GNon*Y^o5+mE~i%T+&*#MBOLk$Ty(~NOt)2VDKs1aYdU->D_HBXy2*v_Ak z1>dJ|E29H3wNosEnf>2nt`V(EK5LK4KDHR{I66&x23}wMs7mjj!af4NuFYG~1vg|) zG)e7K1Bo~{SKlC?W>I=Fti7J-m})i6mVbGBU1T3W=mG-b7n$OYx!c${y*9aBj+D7e za^C}*G%c+bCxes7-Wk+YV@o)&;vJCes8T$-8QWD6liP5 zFP}ZxZ08AHqb8FM_5r-QB^sd*Wp2jUhk~mo5EYtXiW@c?({I+wyH8ufyH6dKE_8yA z))KmIgXbjtFc+u}`k}SMoTj$mlctpUdbqDR95*YDNb*k<_q$KlNG0|J7z`F8w`rKM z72y0m*r&PvbBs-If4DJprHLO_u- zANnDG`Q(TTm;nCn&O}hN24k`Nxh96wwbROIiC~ui4_8-lniI?h^H}Vm_I*V`UXJRb z_$Xcvb%9JraPZ0gJ$%XU>?Th=2=I;vos0x%m3%)U@01${#{ml}36yz-- z@^&DgZ6LFY3(87F;Z-DcDglfUD2~cI5F|u@#I}P&@!WixkxAK;ExG3(+`Z^Uah)eL z>W)#V*Gdn!$K0=^3~eiJ5nMzA5M$%{mBW}K2F%F~?D^bD>YKRnOlv(w_hEx;9E&Y^ z6(Cv@QmjZY8c(p*X>2FtDY<#@=ekE6pEU_Xa8o9u|FqjZYjJ%=Ye>r=Jm+nhs0kV9 zq~RknY_MG>w4_R{kZw~R`I!*J z*?&0s&${7*gj4ol=C~K?^Lv$9;dteU^R6XP^n8Un@VOsjRtv&9-wl!l-yGgb5~0!) zCU_(9-1iY0w$xcvI}$yNAR%0PJC9@=y>u>sHG@-_oXD=6&JGwRUJ;lc)O`L;{28KL zocJVUM)869@;@;P>!d)i;-LP2Psaqc`?Q(*Tgd-C#TVok#Ak+35XAg_;;OdH#~1Of z5HM+}4k%7(nx(S??mG!e=*!PgzDwBFDr=e$MV1uAm&Mu=L>_kh@(b?c+EkQ$(2He# zoot+%tK=GEDA<}spA(cuRb=3q<<&dH^OQF3!u`NI&&DWC>EVa!WhmdRVI@W1sH+4< zetgyTReZXJ3x{PK&kKijICW3dH*={W6V$apUBs29im~^-?htGKXZPL`_M&b_0O0h^ z1QzUfDwnB@r7^5CCco{Im}f?P7u(Fwb5{dG#K1f<>zB1N;8*gI>gw!tG?1h@xw^dz zkWQ%ZlgRUe6@d&&4(~UEZxg!iZ}(+<4hp%)FcSqhJu$>@2#f!hSiC^Bc6oYw3e%lw zjL_&!SPOENp`v#sdJlU2_l=E=Qa7N(Z#V}m-Gukf1yZDfuUN3B@N*?4CGve*3-j2H z{lI;HhQO1_vpvMTb9ZfO`;Q+$R_GrYww$0+DwMzc-}&6&rf)vW)zzGDcP4sV=4OtF zy257dP)xaTKL8`S-+8Jy1=Tr~ih!(no7T73b<9a_pZe*gvsUtqDa^yZS#q;|@tpa7 zvGmvan^SgQI5091yBHfAyF*|Lf=)2-#hLcYjo?$08=F`x(u{Q1Tp7g*&Cl&xcz&)% z0U52;X!t%8d_S{v@oVWr#=||bWPkzfdpDc50Sx4q_O=HOeX7r%<9MP6PrKdk>snw= z4$8%q4g@RPhr)kzZku@uZgoFnx^JG`wbVa9S1>RN{P=bO5n)~12t8RxqZut$@|&NL`+A^U&>0P5jmp1U0EaDa^R+uJN|Z2D6q;#_{Fx`uk>YytZ+m zE|iv;D*aMKj07p znXa0qn)gPL>_lhkoT6&fh?m#CTXC*^dcl}eQR%+(n!iz_XSlz&DMj-k&M?zil$<4T zJ`)SuL-}4JN@g|pvOkc&v!wWnCJNxFx2pa9{VJSLMHWJ9S!lB2=(MR3aR``lc!<;q z%7JUmV$D!V4qDa%g}HvEuivCMSAF-GydeP|oKwElPD)U!XI?5Y*k42`fN0N%@h$uD z8@lexgD-~BUKGUx6GoNDHNORz5dcxX#S5vc^laBw3pRlzx32%>$PDcb;XfsXLLFcH zVrfQ&>BQIQOnZk!fpJ4|K7>F3>$J5cwJvyCXmS4rQSf!>OBQvk0^J`McXvESX5^DT zaKF@;!2Z@H(x!seXZCEON062cx~0A%c?d7E zGgdh*t=mq9kcy^}&RoIBfdKuCvPlXJ0_S>az?Dbcx{tPjXc;NZMnQ^~7*r|~K0P5y-UT zJtNBMi6D+Fcbe|_CLGPkA5B?YRaIvt3uHwAGr6XUo^wj!^^yD@1U()gYNSQ#N^L;=Y`E>402!vy|QY}Bq-Y$Tb)tkjW5D^v=nep zP5O23X3n2XI#3tLlpNk(&Uyx8TUaD>fw^ASC2GrZHFs%N=76%jP7*CH7I=1n*ekSg z^u2r=<8IV_gPPgzJf53TXSh?aEjV>I645D_sllS&mMA3BeXE0At!d1$>YYwj1Rtyh zv=c@46f>ED@~ox_r3i!cY=AelwtCBc)QJ@s@iQpF0sTd#;E)B}h);Oh6s63NpjUer{ zD{1H9VsPqD;t{h}WaR@^7*J|HWHH}F>6wyWUo)v#ugbEp%932+i2GoN{S*Hjw~LdA zTa#cpf*;d8PI_oubA1TUe&>d0fxStTP+MJS)1`dY!mW@*u0iKCBV_bZ zBgys&1iVCbJ1v~3f7)j@K#IA?%I(S^hg{!PVsQEqdf&{~9GE~K+VbDn%-3P{=e?qG zt?hCr4JZ|mUKJG;x5d;$&t(GLk}K@GWm3OMiJQq!i$wpCpK8t%`BKBGXgPdbjC}?I zrK8*BWSl#$He&U}^PW4MCY%jLQ{FPGl@Y^|mvQw986!kgSf2rpvjJN8JfabBs3Wqyu#sgqmlgEtKhP1{l^ZFb$=Hm-d27xP1&?*=(5sxzmBhRSCklM! zAJUR~l8IkzN~y8OR;;wpTpZ^>dkds}qG+0qfGh4J$D~Py zgToZ3h|SHMAkZP40YP$=z#8%o3NuS&FL{Evq$6ER2}52w4=lsIQG?`{u}V|^>h+h8 zbWA`Yi5Z%NC%!vRpixF;>B`C$FK3C^`a+Ka z%dA4nAo1YSmW&ty177L}tT-=t{>JillY}TVHc=6QYCW5ex`0w`Oh65JvATA6v6Yfy z;Jxnw-)sMR*pnF*yR8O)P8!)e-@AM{>2k4|v*f?lqBAiu5qyM_{GagftEA6ZBozqA zjy9%N&DS`Teaxl=Oia5eEUt*sO7QX;x->-~yOT-}<(b!lp6kL|CLfIWzzlzNUrpBo zbE8wg>lAMpiQtPj3IE9G@2aIH?s)Gj?i(G_y%SOfwyu#YvY*Dw?IMw`CEMCp{uAA@ zK3SUzzuSG!p32@0-qH>QpH2jv*d8l6%lKW4L=OBz4Lnxx-K=@%n-nuu(~51{L~;Ax z*Y2lwUjJF#R=+vK46PP2&XZ@j!%xedwdY5C80Wuk#4$#2i)d zI<&GAxz+7wyN~|tjwx)PY^Kfkr6@6sp-=V6p@s)2|1~MLaU6j4^w;pipk$>^j>xMH z4}4)XgZ5b)*s^qnd9NLgD40i2ZSSm| z9`avbJa+f7ZBC@7&QQTqZxmw9{44OH)g)-6FF{(|_vX&}w)vTA)nrnnv{W;B_Ey|y@%&i{BLy1vY5?Us#J56ngN=IXxBRH zMZ>AWN&QpTsW!baq4B3fpU!@-4>e{iq=&{K?j1+hrb5sPWcLR(4u%_rl*98ApkHhHH(k?Z_}tBn(F%Tj>1 z1d8r}gt))wAJF7d1eZ+_QP2Bwmht0&G)8M$B0eB~2t5P(u9h>@piGu+ZTvZNk$#p? z8I46rJn0wr{Bn6tI}47SofkPHG|z9plB>|fLgvoBwTgSO22qtu74RFC(~fpSbY>Vi zd|A_28Uv-qdbG@vvJ*h+p8Jpt6Y=zWEy`wJEgyBtF7uo5yiH;Y1B<%}N4Vh+mBpT+%_n!vWdN6K@(0V^k$VkMY<{DGTx?$2|NU3ERtd*)ym zkWXVVzbi5(pwfU6VLLq%ihtypZbd_&`t391gYC7o5Z#BL%{!b{G-A+;!!;!$qR5D( z&SBX6Bar!nAsPoea>dlg)9>U%OjW@n$44hSjv}*y2tOS@me2>5eF_2MI1?;qg&hG{{STzg2f`hhWltxS%~Y>xq=2zDvo)6 zC*>C4fnPcAkOk+|8M``9;R%&ho z5*x_nXop)ldF##?Inq3wrkUCZ)0sByT$FxB8VQZ+TRk{BTA%?ieDGNh`?#;p%Xhce zz-9_#W5n6u{H~W6-C5w0&@T^(S}d&2LB%&U?geGD2HF7Y8X%K<+_gWVrOE_4r03XB zLq4iXLiu$WE_DT7182EoE5oD6tcS-WUxvBGL<)l3+GG$?M_dsBQm#TzQwO+f&z#Y{ zMIPC5j3}bGung+Hnv3n~%a^>tyT}wE3$rV#UU0f)Dw0cQmZ$BNN*VLd>-OJd!R_YC z?b>P!hOJ9dJr|`G4rzHr^D7%hKf$F0Aumd+)k<_P!b$5h57e{xgKlfRpJXr4fATE+ zk5uqMuM(nf1#Ro=md?1BTak*IPuHA-NhrqE=O|wDJ&S)of#BVA(qQFb{23vFML_Fs zJ?=OAD%3M_?$*?KQ%Lp}cw>b|jw~f|UQxX?y&So`LG4~=sBbso?{B%EKZWAI5TS|F z#?}hPEQgR=Dtvk4{X+*gb!?+?KA)Qk!_#v-A+zM@5O-)eHr6BXVP3aX1P2InO6Cu7Ui4zz zgV{_czM~!?Mqi;7mZMKWq#;)I$5II_Doprc98PW|kK?{QLQATH&ExcJN>incNh26B zb;-lPla}gk=drF{rd*tAo{HOJ-;({lBFA%a`qoy;1+itU{v`!g`@a=?aStq{G@El{ z_h6~~U9(d}MAyCu6jqpN%HO%Rt^-^?F9cTZf zYli8MIUGY)vEn>8&Ir)(o4*e_ng7+MGUGSq`mfart2~WSPJL+#To#j0NpD*s3lDj! zS;vovzSll$(&4rL7xbJA;pRJ{M@+!L9J}wc^(;g ztuhiK_AV0M)HnIvIfwh5r)r-GGUBLLZ1z*D9S~Lf?-5b?w4DZOzWhHYVH!MQ zC!|)m+;?NU>UgqVGNu18j!W-)&_yAuUg?RkmmBf4qV45QW%&C#kvjaV~bsS zBO$94nSAM`ms_(0A-s_%gURWc@|Q2&d|iIbd=i~6r+q}Y5uz9AXO-$jrJ8J=G3M*C ziFW4|nm_IJ0CqKr$y&(c{p5+@(2ojHUwCi{ifdpQfj|wx&PD62Z*UGq|#=ED~!@4@cHxSU#@SZ3?a8JR#(k|-7WE=XrGINrecobG@H&(KsNU? z)}Pgf;+1rqM%;hU4~i73{C&C(&)wBqP`Rx9CzVdu@})eW!BloTq(_Na^r;~I_H7b% zmdDp|Z2P`QBcq*WKDWed$L;*JcR}~Bl+ho>wXcUbL1D`v!vl$bwT}!eMi$|{_lLpf zI_R58|NX#&f6jMH_Y=XJ6L&G9B`S%uO*{Y6{Oh{|yj|~1?=JERMIw=A`K9}v*T{|X zm(_-mS1T#8$T2JJFvwf4P@FdJ9~B{>ius_^e|^Zmp)F2C!(?OT|DB(c=xxDR+VTSzVEeL=pAm9jwgdQ{_3LIK);uMM!a1i7tB1xiUuz5jEw-Z!)X z+E7SmF?^={(P#g*0G4-~7kr7d|9E#%tIq$W?{*7~HePCG$56Yqu>cELp$8AxJr^TQ z8D1N{FMm@xwIZjG^&veM@M*lhvNaeJ8n{M|xO-bin})5VUUHLL8GG*fHFb_7J#nnB~!$= zs#|7P7ApM+ucs&xWHiUBShAs3jGUw9LU8t_W>|Y zOeQ|)Y`Mg3+}}m92&fO@LRt~x87!QGGy$N9CT4F$C>a=+_2Qsssl)rlh)ShnWS=yH z!TeWiA}9DyZo6zk+u}E%*ckf|TdFwt-{Ig>qJrmiWl->w3*alBqGQa z`pqet(+iIgPP6DT<-6k09BU zFC3hVA5FXYdNh0c26gGr3=N5j%xh_Ujeo=qa9<;=#eRTtP5TM-xa~>gSt{QiomwmV zIoG&SvFl}T&iZv^sXFUYmp9ScV5$Xp2UC*Hr3cN+?Cf)QQy<&eH<#yfR=2q4&Fwu$ z6E9oVi{=34`n|Q3cjE&2q?@r4V~s)+$(=Pb)%Icr&JIc1G)Sccb)s;&pSDRlV;g^H zYDk_XD8&vGa}$b0OkTJ&+5IWyQkXoAiJtO{<>RSZ@crw79XUxfb>_yGm#|A_c|dO# z!MQ;XZHBR>5;}0Mfgh9M;Em!_OP!+K2Uo>n`3CV1JmY6SQ%SCCFyrI8@GDUA9nlYE zN&$;x!H=0T7iSY>o81W3dx>bnTJygT_buqTGsb})hfRK^ii8YCk&laSu~% zzxbv!=ApX02cJjIUaYUnS3){;V{B(U2j}C;A3WCd3S7cHYLH{2#0hEQiOB2qTzKh%%ISF_kAEEBqe zR{4o#qPx8@I{`R2=YjYCRz^%ebOCi^I2VKZ7l4zBYAl%GT{YNlN6@S-@J#Tpjwg9d zc|Hg6DI$eD5xNfitUw<@LR?hY(O$=%!U_HHjARDe@>Z~xRZ*A4Zn63L`ucQ0U8@Q4 zrPCM|rS@gol#k})^bhVTgY#;kO2rgqhe^0N{kv_ndE0kyKYL_+1ITG#857L zwFo)6Y5>}0z#46eZSPueBqb$%kKp+-Z99K$e6C*b%GQ+CocDF`*x!Q<-fj;Umy?qd zmaCDsoY-=;<51Ky4SlU}Wb9a@N_ExE)lB!N;Yg}zn`jKR+YS z&xXtq0a4>>3BP$Nqak65k&A9L3ik+?fiO5)lqtdve`9U=VNfd;3RdPU`>2c?=N{Jp zp9?b*y_?BdrXgEj#Q4@RwYxorati7Xy3zdtmdR5;bv;D%fqTLu9XP@TN?V%d_}KUu z73~fR{_Cp|4uPgyftgB#Kxa5I_eW!*XEs#^Wp?@NCzHrAxH3+rqko)>biHWb%6BX) zG~%jeslAcz4@<|TA$+dJ`SpE_;=h8R^MvxX=&*I?dzr55{hI-2bl3gd5CdF>`>10g z*w@v!oNh~ay!^UkavjL}(|PzLnlBOlnKBN0dDzV zFSjsA6_%qOn^$woQg+gXBMrod3%gH8R+^yu4BUZ3%1L zuhnQ>ivQ@5#X1CekG&+h!H z!0#&Oz!5R&&JJX-uJiP~5A3p^m6Y!AQz#^95F*;%;&+&*_FFbVX5RVcdpoJ|ljptx zrvd@^qMv@wd0uE6<}pnidQK}oGuKVNK}NSH#t(kOoR-c_uIFaFD1W`oR@VOBUT0Kp z-SpH(FS$%5TPcBo?)h$-7QabufwaHJk6(6Z4%MyikYf1w_(gJ+4*Wr&mNq<&)Fi&)c``+A3BD3r?K!=ar5L|r_6Fy! zh^W%z670sRqlfqEH}2#x$~MoH^_?*Gn}367GalHfz}VxuFAu0ol*vdX9a+;Nelpyk zoVE1I9S^|C0!y%8JEkTuw>CY_chdfy+$H};oj3Z z=ktEQUeBkOmjxgMp7X5U!u8UF{;RA>sfs&GWW`q=%hLTei!H6Iu5A@AU`{JeyLkrBa$-Z1FFpP&CGRrDK z5AYBn5S|!%D(Y9k+}Mw1K1*vtD1iQvNOouJ4;^$_kY(hZ+j7M*k=KsMfy=BLz5Ca< z^KaHWR6keDL*bE@sc0 zR!@9+#!q0ap@%~f*;+Xrt26&8I91;SKuFdTZf=eBXg~N^%IRH7IM!n5m+?=T8O^4u zCUpf~hVa{}tS2mWuaXt+^voR4!MK{Gboe=Yp^Wzcv$Nr$#Kad5e1exz;#sE`-bUXh zn*uZ+P<@LTu{{#(wd}^w!(&GRgl}9;5l0(KOOvLySrJ8h0y^L5wuHW#a;_*gH`5Sf z44gYM)5Tw6l5U1IMvUxPiS}inAd*m8;6o^GQ4KHVD;>$RQ_{`g(@bykOg5P0nRhoq zQGO0phpOfJZ7J{B0$pxQz5l&{l)2k ztp3~v{rh-7v7*Lp@QGEH8c>6iXB6?ToImf68b#_z7b+h0Oly>|gGCL_delgjEFoaHh)l)OwKp7QR}W~_8asfEg6-{fS2XSyr)Fx{SZdvx|$XxO=X0))1t*@xCITrg>Y-k|@ z-yC2qtm#laGO^*?nRx$|a`&Qr;MwZVa{nz^OHOj_{Y7aRerGHi;*e>gMRR9qaurjKR5PasA^#4qqo11Hc`_|) zz!<`@aZU!sEYEmLohF%;@N1gYQ<~u@Sc~GJ|xj#(TCts?84YWA7Hjg5rLPP(d8BwIo&DW6C zJvrWw6%_-Hy98>|eAs27;c6doYdU{bVg6*Z>(cfjKzrf;cki9yl1`(m-JmYZKa-bZ zmtI$gK>$3U_B^ZWT!o*u(&iGIAC)UG03^hgF;_EQ7r&3@&o8|tFMCyvOHRTgeT%!! zvlOCE^)4)vXso95&w@y7L?GzK&BehPdA`^?Z4>=gL>+F7wM1U7pHXKd z?Pb{GIwr`(dB3U8-TRghJawd0Dy8|g1-J>5!+4LS59qrUhW75SA0eOgw~PvI?A(sgKlndT4pee}d! z9G44muf7f6kBIX3|9N!rvI%(j06_8rM;I3Z^auP1CsHEVmafm(0|yhxN;$6mKAZzi zcmSWKR5RVM5UdNDS*UKxNlF@X6dV5;HY#C{l6aqMzgz`$#aNy9pPI_xm?p(j;v|0k zn}z*=3L@df|Z+-*b-`4m5eT&S71aHDwy z^s|R~7X&mlN;E%4N$OTjW;&352exBE)<^pKezV3VjpRn-r)TLb95=t zpE{~s%-z?r$d{CH3A6QDap|BYqgRj6r<1j?6jVz~h5Q1|p6YDh!(CsFCa0synQ?FU z%V6MXvcBg2Gq7?;faDZUkIm$TH|wxN;K-M5FvdYPlq;OkjkcT*X_vKX4h|3Sxm*YX z5OA8(=lfXU-kGm!G8qFOtv!S?XmpR~a%{oR_mh!0P7-JMJ~ovnce5;H>ou{VxCi=R zIv=2Q0FzOa&gBJzl; zqM5GOLHM%N6YGF#;KVQ_InLBpwwkFe91AM6&vY`!H-l6~y`1z!jMVvg47Gj)KUrA9 z{B9|j%9C7F1qm(C^M1Qioe2jASvxC%qJw^g5=(@cct{c1V6D&Nl0$nC_(xl7E31#L zTB3cF4vz03G4|s}N>spWKck}h0C3JA8*z0H2}vhg=*%$B_89?Y9ck)dY&sL@qokJT zV6b^MMh0NY@%fB=W5CPQe_}vA73-e1#0N9f`jkM=Cyy{q*?v?YWixKzb%1x*Ab}l= zxB<#kix*S{v#uBTVJ+d}PEb(Cjyhgo0OgYac9RC_Q_Vu*^l&*^s@Mf9d^3}T*5`*f zxg4Q)hS$H}S4@$3QBXpg6H?>fm7&M`vN`}4Op@TUJx+5G`vm?VcH2kLt9tI_gI%J& z{**oK*bhFob8^+~LVyqgMf5I;GuwS;68a>PSmTCn$mG3amMv(W4e9yk5fA^b;@P!U z*rqe%QxOg>ZIlG&XVPnJ$+qh1r$)F8WKxa!LtVTLUra+)!V5jk!Lv^8)H{<>`3Jj| zULC%Hrvmow?dF=SLcQ7v{;fL)AC-#&3Tm=P}WU>ET-yh&C3 zeQI5>)ITaJs-2*(r4dV3UPY)XjJgDZY@=;8&nSDLo*W$aW!+M@@hTC{Qfv{MZnkHa zM_onY%K5{5IpYfP5P2mfC1vFl;(sLaASe9cSpxuZQBqd?Yk4vgtH-l29H=H9wzI-A z==?dd}=%tc8MA%vI{xt8;Q!g?7ddfHL zytsz7Ti^>k*MF7N(Ab0CD;iV~Gkr&rC3wFsgip)%?)+zMo}w{ky?^h>hkHh}TDq^1 zbT-`9ObEberBh~bBJL)3U{d$VIW7$OLO0MmT&Q}}>a8rPZ7m_62v~2{W(b(qj>+fY z*)^<>*wjcXAVn~x*?C|%_!xvhO~m=2o!#CPgfpZh5^~eb@y&s{C1b|412x*Yw2Aqlg>lwVq`gnbETrQ)0Egwi-8{MLyT2h*e^Hg3N3@;6>BFDIH?h&UbIN{QRb<62`NlK@l=FbXJ<_@LlUi77@6$NrricP)Yzv1j`*I( zqrWRJ@B5B@5_&fCqV9aM>v*UfFya-4HwUeIOR^m$O+8bn8N52YI!UFYn<={5i?VrX zX?)vm_fSDaNluPGQ_F*@WT2#94=Bv!l&z62#!|bEn$Ptv8*D=sU$Zo?uLM7D%r~B} z$__D{Zt~g2aCf!@9WV7ZT#~LzNN;?7>wc}OH4{8N=R1t2rFk7$1nFwrP>?TOvY3E% zGl2~amD5Ls*R@>5UjXhT!+Xp_pr@a^<&x(i`M95pu48a?zZXMTIH7S^wZ)QO+SjyZkBD?p&NEcFhV63 z>uZI8Bj$CEUZD3*!s!$qbHzx+LDB^lGg%QL#nY~L>m=2|Yz7QOU6hX$xN+(zf|RT` z;or0d?G2-*ww|t*4KQFT_&NJsTVO}I&g90(Ua%ZT+yn(wZc#!1QSaDUCh4#gFn4kU|_wkQ<>}pUQshQR?GYLEe%M9=}U^<;iZHw zpoLH(vUIX0#Pk|~(& zV)yqZRw^H;5Kp~fa*^9>{Uu{xpIS{w^~-J*q0w^~KQBq~%D!E@Kq=M~nC^TO@xroY z>v#JF9PhvLfi(vq{z$Z%En>(0vfArXjFJ)?)Y+h9cI|sQ(4wzAFiCYbO8fYL5D|j0 zh&aUdCJC?S*tB^$nT--D^^J`U2T2!%iGO-GKblnJ;L8ei`z=m}SK=tTBK(hJuXens z*F6axt#h6o@n!RemIucZL9B?b!=Wz!kdPCs0{5Sg6s3y`(mBCKI-hgn>XLMM_sPJG zqoSzw)JP5^%ofKmOwT5Ct@I%D-T0raAzP|gPwx541C?{&JD+P0@DXj7JixiPW>R9Aneug0qLg9aUUSG!KfUbM2&X`yyjR!V&iw^w&aSA_gj zE+uI4c-N`v6=J3aik+5``fEW41zlrFZtRdI$ZOSI<7T{9J3u`qD6Jeh?M^|o^0slS z_V+5_&!qI`-9VCQiGqZX2_-Lox3VERawHm7^5{2f8Nx5JeB8u-9N#fs8$60NNG!Mk zD%_JQOal9x8XqiRDR{>LTo=H2a|`e-B&N8Q0$xlWi*i%3iZ3H(YNS=oG`F5GB zFR$bLL-pDuJARYPE{jKj{%u%$sQxD=>bu#cGPpWFhiE}tkJDSTB*}__e z4n6Z^kdBI6?1&D4>3kDwYlVI$I-&i82yFBF!XtOiF+{Jq4#r|*B|4ONspGamYjk%= z>8avlr2d~+pINA;sBv3AwkRkW;vfz+;}&sR-;|lnb>#DKV(-n(G7K@dLk&qzTM#<^ z{5rz{rPNYm-moX;iXhyzCwqBVdof!(*Ac=Wuf3KmGafc1YHEQPrFjcM*G+|KfXl`% zIHDy4lC?!l)yqPPX^V43;y*25Um4_CB#EXOs_(pk@Tq#+gjLh4aiQ#<$h?$<23gy> zJ)FbWs>kAwyaFRU!kWxH#fxo2*Pa!a?j`j#LKi99bg#~?7P~evq{er;N6k7D&FjmS zvJD;KyBn@qAJ-3jqr7DDb%ADtx!2-aNxALWAaF<`FM02OSA?QF#KhdLVP>a!B9%+! zG3AncuS*hS>CLB6T_fz!VmE8koF|E(QqhtxJi6LzU&y@ zoskL5bJR3}A{66lWcO-z2qsRgv+g3h#_#x0)dA5#t8oU>00DZU&X!W^vj$Jrzbxsp zdG}`UD+!3WfTjcb;W*?;F&0Z_naCE3%rE$Qz4D~%d{iYv8)v)<+aU7woW z3``=2vA`HLCj={K9T;>ojhRwyxc6v0R7 z^zPqjaG0I;lXP^EZD#Ksg31>o<`c8fl7GCK0G!=H{;S_e?$hP&zgnIk1s};orX#XF6<8w^(*7G`P4`0~p+I zp3d3{L3Y)GDBQJt19`oPF}ttkKa$5Y2Ee>O1Eq;Spbagu7?=XP$x8L|@@C-O;903T zN6A^wmXUlZod@?v68X4I9>c%CXrLhzEk#0xR!)!qs?WFJLoqEa)FZYwHYstje8Z6I z6*;3SL3%@;kv;>WTsNtN%Yl`x9`;7XkpDc+7;)vNxjH(5vRzAMXdx99Xz%NBeCxzPV;5dQJI{ zlSF<>!4Y-8Y}EELw-X;LT|5x{*R`Iu4Lz7__DGU)!ygX*d=5l|dXRjH11Be}rH$+P zEH=T5BXjMft|OZVD}fCINil|8G<4`2h#!INoT(Z*4V_Mnffi689$QHZim7SeY51pLbDZ1p1@O!=rUS~{vGyPpbEcd#n=-Ef6QC*d@dBAqC z_LbILOR?D7X_;Ag1VV1dMy;hK`Uo5L>)vw+57PaMPc#1WBqmjY#(KJsNfD$!=x;4t z>WF?;vZoj{LJNLE3_Ax&2rys3wgQ0Rj{T>d>F{GoSDAy?@nwx)JKuK8Ksq_bvl`Nnp z)eRFfkVL1x;Noi2LHS$AFJkmNkLKL}{n8%Id4|+yVn-J!H%xi=Y|VG6R8i%ofBep^ z7!0s4%YfZ7%>uYb>)rR1nu_>{Ws z_^z`vpu@Mk1AQo8pqn$K_C?om?c~H&n#0BIUt6c&dLX{p=USn_K=WrnALqo~YsPO= z+H^nBLB61(##9Sv>2@B>U!9grcZP4M({Ij9fF9bwPM@vXrs26KVzRtzcv!f!#aJxosx&|>#)5^&&RglDQ-Y^y0 z&|8E@>EcAVdG`NntJX}#ceIE)C{bS<{@S?Ma2SW=ovJ3B88PP8E5FFiiaL*f!Cm)O z^XqfjHUVVI)E$N}Dkr*JaQ4$u@v`UJsVeew_@U%IEz!yg7xZXZ81gOP8*NBZ7yqRI zHSOz;oiJ>&_Pec?2omqC&%e1)vU`)72Wh7+N7TU!YVtif-4WKKiWSo_nt58Svu`0j zf(yoqjnU}tw2c^NL#gjvKRU; zV0fH7T2Tjc1O#A$I<8awoI*kZBh5u%V1=j;3s&C1)o>8OU29heE=>4!n6BZ1F*Y}{_2X`pWsCyE0i64|CjkAjH;5j$Sf6>X1=9nTH05f6JID47 z)g(fj4y)_>3$&@R2`{)gi~Rd#aL1fGRY+mk8Zc606^gY_6L>R8MK6H-+qSi_?=JN& z+guYu#Xh_DZ2su+U}cqiTZ#)Y`4Nc89u*9S;(k2>QFpEt1pWF&3F*s~&&=u#wan4{ z_Z)cakcS)&P-G{ZoG3oM9j$o!m+|8!u=YsItpC_^w^zv_K}hxUV_iH1-tFCL1`aNe^7GJ{>&ik>abIigUVZykZ3b?dsuO+V%+t2`t(=ATKlS1Q z$x$FI@7FEI()$K+L8ttLF6Z!-0|QIj&P8{Yl=xQA#bl?P%=KTqgKrFl3RT(NrE2K; zboMs(|7gB>1Z6y!*STpbG-r+}#&0KYI~t<;lyW(f z22d|Ye^2q%d9 zM~CxgYiGcNV|nKn)Ob3bhD$|($tvwd{z`?2#v{=O9(g&KfAs=Q>LKMQLp5X9O&Toj zlZ(>ZmEXDlHLsqjL{evjPsC9C6@0$pjgcq9{F`X8H!Jy&|J}X8MX%dvFd~;oPvtEi z9R&XH{a$>*OuhzS=iXH)dak{kt!o{GphT#Wb&P3ImjtIbw@uF`AC(K|I8Vf0R8(*& zMJg8Pmi|;;=z#iU`^LpOC524*OGCcbH_CDnKl?_Dx757TEtk+0PFE4>sjRuXw`AuP zr5Nr{8nMVR|s9X!MR!HQy}o3qja)@6h9 zHtRcWA-zJh*^LR;HxiQ;EoFWq2CwRl!ophpMl59CX;<*2p3ZGH``rH_BuC_+09ZNn zd|DY|^=?`JN`K}WL^+Al-!k1NN)!4Xa=~F;xwqoi>=Zj;N{?S{Xs=3)>?vB9vsQQg zbzFO#@K}*(_42k?`ubt0?7XdZ%Ys5;;HGfauYkOK#^J!PX+G z9tKcYgMO~$j+cbGHulmo_K?Go4BccU!A*TEJ0=PsZP7Pz)CLEO7(K(9=cDwMyZf(M zio(|8Ri4+^R@~8?__p^I`qd{K)}=V_E<<^Zq_nKv>?_G>&N4Ehy9@lxB&0C4fB9r( zW$bvn-0SL(bLZ)#uwJ1zc1f3>U%<(fDYmSJM$w=uHqx{0rHKXdW;eRVSm>B~g~?K>58lu#*WcraP)pVvxSuNlGaT z3&6`*2VIS9w#-L{VazTE098xQQ0a7@!xD;DSzn7WCN{|AE)ji8T$GxZ(&$KZG1I~I zxjJ%vm|$l(Dr@4l9!vfNgfn+iPy@d+W<+kUtn_)q^0aY#Cwxy$x0&Sr(-~EKm-)Eb z*tDf&O#e{&wk996^q0%$(0A8oU79Lnsb#&wcmFJgsmz^E18Zo$#jnD(bn{?w+AW>xRLIdMVIv4Mz~k809b#lyf%akDX}}% zW(U5`OX-%Gnmq;yc=b1_fpjpI_)Gm;QEe3cXoZn=;VNjseF51|fp^azvkN~u!~lP~ zv*f$R08^$c!$kVTT8BGOESQ(Eqkkke9(n??eoT)O;rVs1ya2(AglK>D#R4B;&QM!T z|FX|Cu-brsw^K|c`;4@U4~kZ7tG`Gxdf&vx_(ZM*rJi2_K~LR>tz$@MT+}M8bdG#f zvGw2gWaq>EFlIcI89TpVh(>_qP~3ndcHh=T1Gv6B#k)7jYZB?b;!)-@c|bSpAO(}BgyHBlw!?E zG&mTp%XVx|6lcbk^y>Ii@=qb;LR!+0!o94=35LMuqT1vPPuOGwk^)<#Z~$y|i5rcq zTyGN|)s4}>w>hKCS-?7(hnhWm#p4#-8@Is1S_hfqVnc+h?N|&3Gu(CpEYNI>jcb7w z?(i}~E6mwmadxOdZ1lmcqrRDeWq>uru6Jj7Q|0`i>qsMNUz88h)6RF7k@kDEXTF`P z>)F6e$es6MWb9At?pOYSubq6J+|0P}V*c%B{6LS}tK7)34MOg2ngd<#td?70zP85O zr)u<+A#!KGzTy+0AdJ+7ncc|02?dTA&=3;%{c{1u<>)sOx}wI$y%`e|%^tEzr4Z4w zpZ?eX}VsZLlrqkyt1qB7}n;J;S`eS2b3%5TV66$vlRl>;Q z=&Bph2?ryg8luc7BP|KFTyZyR@FPnK(BB)PqGOZ4+pI$Xb9lpyiwB~<-7v52FXPJ6 zDYNwea*2jK8fMntVbAn-K>_FUjZhPM^{Js7>iuh_^wD$MZ|vO>*tk=RSN;V7u@GNE z#130)&BWh3>WfjkXPJ=t8Q|7B$y#VQsCpAqb)~HUsmCt)`K6 zpW-td8fLuVWYIox5_BUP{&%UQ5J(C9oGo~>TM4VGr76O8D-NVT-E>oCD5Ke{UW#?~ z_BBAyeimkw=$#JOQXIrZG6d2pjI?O5FhZTY1eNp85$@jvzMiK3J)4AAuxdGM$@xBhyb#}+4c?}*;4zbX4`-u36V%d?@10vUD=***@+*w0#&6mQb(A- zKhUTH-ZOFHylDNHFD==@s$}*k*907x86k#|Fu84xg&s^9b@L79m(5hQIGLviA*uuL z&AB&7iq~j`8uKk$5`EyB^)pqzg)3?=LlMbbj<>IM{QKx;Y+y`T(oC9AWQ2>E_@Tio z8-ToJyYzuUxuqbPkt15rJ%`UHqH!tNmBMO|r{;zYm@pfy`IYPO@~by@vqju*nr>Fl zyW;;lh=YPq5^3!&s5f8VHsQijy%Y9akhu$5oL7AfWTrd4wi_X=2tx;~5s`y(s1?NG zz77BAfmaUQ|MU0nry4heHhy0-ok@*mVM&kOx@E`sx4>-3F)y%+Ex>{i3S_t%mR8AH zW^~`7N++WzU=P5uA{2oz>?9IGOG`mxmpgaw3+5>8Oa~ITKm%9Hw*CRQ5R}-H)muC{ zfeF*zo*(i*z@uvB)e^OldQqpS*;WJCcdQa1Jp%&+o0hV`;Zt?1xLD+0b8EnQHZYI2 zF?RY*4#1HC%Tw8EE+;p46j5P5Wc$wyyY1!6iGkr4PN;FoMr-RA=K7s%Ov*fItLtT= z>hV#>Q~h0M@hTGjyW3-H{i$5#=7r3|56+jkRpcVl%Qx0C;;F-y`IdD|?|Pitux(Ps z(qp6P14h2r7;CMBaKXI^;e=;uT|gy(3K{$Sj^XvM?}5^DLj4rzt>ABHgL$=RuSt-_ zYn9Nw_@O?Te?Z!quC2z+azL)O1+eQGd;x)$2mYf9^7ZYJQ<0air{eifSWowAQAM#x zcQk4{HZ?UhF6y7&g{T*hATEm7UF#Kg_NZ1q{yRfWy4+j@r+6{n;+9eGb*C~UHwz#^ zSMTaGzqtK{^3b-Znz%mn%nw0Zv3|pziiRV_UXs;r!q>4mr_zV!ELKd|!yq$Ynpv&M ztZD)|k;z{o*T>N+b(@ydVFZm+yltL+a~Eg6x3QA}eHsf%*Oeny?cmx!IeZ#4WbYM0 zpgi(tMGoA3#*ZN65OUZtgi`2}Yct<49Beq){919s*f<1n);aH)7Yvoa@xE{K1b=Y7 zQ!h3#tJb0AHvDVEAWMRae6|$SzhFzk99dY3Iw|sN_GB66>lBo`dh=Rz^qt=-ea=%H(6F)@q3+Zm*-Ng7buFeAugI{Bl*DejrQ?iy3A#k8~D`r?x#$ftJ9>C z?WT?6y;DwG@dpjfYvU+OHyUT~tEo^aHdha{A8eu7gQnOuZ%~()>1#LdvCW#;vyUbt zuV>sDYU4Ud67K~xp&!7!ULL}?dzg&iI{`_He8d?MbEa$2Bz%gbm9a6{zF~&8|29^Y zeg&F3__E#RI!*%UOv>CHm(n@wWYqrNIh67I5Obt#( z`8PrD@8UBRhj;5i+_RkxChQ2qK}Oz~J^B}E7NXS>C0d^aGTLP_S<0dmPeGo6@rNf+WWc(sz%j0SK6E_c3 zV$g~=>>FjKR4i6ibW-XjQBz>;iD=SP{ko(1D)LKn^MppvZ+a#%LO{VRHD~i~dB)Kq+~Yt- z1AeZ98(+T~@2CI7cph)4OqwiL>@ z^^y2Joj zPhc4`bK(s{LI&#{DT~krhlXL4A%#UnMch9QoAK2q5nx`J3X~RH0`Nk^!i?ND^&(G4 zl197ioK(2<=FbVYa19q;S4%~Q(Gob#!MY&pX^VTT2%OxlzcXtyu^P^L#9e^4Id!+T zlkn{9>XMt#8N%3k{_kS#6ZCfRoXzEr^#@t3k48|!#`vj87e&0n%dpmm!Kz3*)yt@t z``oeHd{SNbnAh(MFoy5bKautcC6LA`Gy}>9GZXCP@de1QC)Z9iUr0Y>2JI?#ea(#r z5(vP84Dka>;hZ6$nWQ+I-{R3e<33n#Rhz?0xeXwA@kNp6J{^K7UIZoE$%%{Day_n6 zR$Hq=dibEP_&YAj)Y9Eq#?;`6L+PGq ze?svNY>Bl=;(E6F0O~O;R1^pqwJuosmp_6&W||4T1;p6CH#jz>#~S=YXAFW=({GPJ zX9mQEmcsl-T&wqr6ZFlGdpf7&i1c=-CYb~f6)gzut>yZ4N!z+k2y>HpQ{H5wk)mra zL~WXG(6l*qT6XdPjbUWAS)n8cEpXy_E>|aA=oQ<1b)C^T0clq$`1iE}{?U~cr@;vU z%|@v-qqk+`ywgW*Ro4+x{EcE0Z8gi%kDhA74#uA%ftitgObt9{Y*O(R#rp+nr_$%6 zqJ=1`3KFw30F0aK^nu2#JK-lSZEngW$zc~V-Ag%8ae&pggcT7nEg8cDON_q0g>lxL zJ*uWCO6d43OE@bjDIpfNAC-`F+Ov*$h z;J;h7Kq-@fgANarystZ+|p!cSu_fM7%1taM> zI8!O!yt$@ub$z^rv$g(7@-62d%O#odkj$S3U)ZUKAu+ec{!&P&ivzC%M+A{;(aEhc zr^o0*Mm>J3M|ySAZ#=wyB#Pt2g&Lnn(+hs`z$Yao0j{lCx7yB&iJ-3i(g*ga^|t!} zfTkzGjw628x*ZZ?U)Np?eW(s|Mv5a)O}vDl9iv3Ko6on8aSp60d9H5v^b~_RE+%Pu zpdEWY>2_cc`Y5{F?A~qiM2SFf-;HMh5$OUCI=12SLBPES`F8TjD)@Z+gYW*#+WB&7@-DYOhK<&~;gw4Gp3x($|MuiG5BA^T z4#83-bp3?r`MWDBO7fk(YR00Aa*+`ZJqZLs&GO2|;Jj1;PX-dlXQ2-{OzN^kX%^8G z*fA?tj16GEc6wuqp{XDQ%i!=|V!d$McE*&nybc}?KPfnUQWqKF?8_;R+ zpq_Mz2uUZl9@J>RHEMDX6EaKzj{>;3Yb>zXdTq5H#GXb>hL)r{N`jDIaFfni!idyL zOeyd)41|NXp9gph%JIS|i2?IfXES8%sduVbG6tTzMum1tG4Iko#64i~@V_}VtJSJI z@jT%pDGcGi|1xmI)q|c~i~hy&^`!tPMJZq6nL`E*sY>5;b?=o6WypDs9{!5#h- ztPYB^n+YUhI*!i$l2$A%DNqKy@@{J8FQaQ}t`QrS5szj9+mq?Pp!ZCgHG2%+56Z7g z5q~KBEl?YIJV=j7;2BN0UK13@kPeAqy@uq7KHwtr1?4iLe4Kx*OlSZYf`)3cla4zk zm6LzGf$O?sZ}BW_VrF8<59Om>OFLywOQ61WUzG<5QscU|u3gzUC9lJvo$zzq%mYi- zkD0Ds(gUj`v4xBC#Jm_5tGDc3B=w4 zAF?IO;5`Zjbq`_FdZswBNK#0$3Zz zZwPU9ve!^AFu0F67v(b-^WTWTUF2Uz(RO9!!D%uYpZ<)*ObJ4RGFRXCgB-({TLgL9 zT;G^(#u$u-WH8A?JM4+bw*gNi^dA+j68|#dDx@#tiKXYe{L6 zzB*Uv5)8KZI6xo}LNU1S9}+ap;=|vA5i#WNpMzs1sK8(-0&Sorj#F(F$9-K$3xXoZ zry){JRGOlDViHrhFQXL|joDy(ZShAi-4+p55Sz5=^hL7a9m*#(kHI0%G`wh{2mZ;c z==VR&j8{eZnM&R}MTLZDN_6@J?;o!*_WZ>X{3YK#@f=O2C>u zWKT%&$um((ky3dlp%Nd1EI|L|^p1V*)}WQX9okR#!TnL-tMK)wIPnhuMmRAzJ5K_K zLC8_$f{S`r0|T|^Gq%SIm=L#g zdeZE#|9Bf4bld4{q_u4MPY9N@Ff*2_XRmp{awQmP5KlqFEK=+mwUgYJslp;#J|rKJ zXI9zYy8oKAiAVEYRNZrBFrzkgS&K>sX*UYpFFgazcV!cy=<`ch!ECnmYlsICy?(puF}d; zCE}=8%o&>BA(B_dLN3PwTa4D9cT*M6WI&4YkQ71W+%Zp)x9AYv$R`^6pMK64s4j?Q%{js z03bLjIk$h{#c{}i)FHx1BXyBM(U9{QXYA|@;dmA60U|l(`|$}e!V}8vW!P@uT85;l zzSc$k@((BkYq?Gb(5$qty@K#kCVVrH<$CTT^w#iSv);p<)4LB|-6D?a!ai(<;>Ar? zLwaJ5J`C>m^*MpJR+AC;+9e-|d{|z5%C1Jwl^i#AeR*k#{Gkmk^%x+|cqkT+-^`{d zyscLL@%i%c!K8z>{nuTR`@qg|uIl*8mjgUHPk zXg~98>4O)NpZIuG-k~UPxZD|chvXNU<3}FnUE9C?Mhm)5;w^^y&V8R}wMmqa`?DW9 zZx>w(UlA6uf4nYiuU1XxgHMQ^Cl6~e;Xwf*f!^M4%RUD@P1twn)Si$sbFMZvEOTWu zjizUyL~>BNRqZJ+F+&zYSt134fX_(a^IJolW1*N14KKq1+NiqeyTeqQUb=g`HQ;^UQ7i= z#?!`&@~Z>w=iivGxs`>4&VZ5{e8mqQGr=DzT}-zEsZfx+Y>w-?ef^Zex5vM@h^Sx=%&uda_B4KG<>${=zBX983d zRdyrWLw_xg8!4iZfRbN& zP#huES^ib{#?))j&PJ&fD1I|^8U0MnA(M(4t-npU!`Wpn@MS+j4@d$j5UWhMmj8-* zpc`}<;h^yFpY_OCKa+s(jOyEjU`WSp=uBspVQ5Lxqu1mWviX@wp-u0Os_nw1*shh} zho|_z*V9te;}$^bmZ`r3n+kQy$ry?lPQ-Asb;MyE9eO0iWl16GG{=0e5LMA0Pir${Y_ zkFi zQ8I*7PU<{fN&>5m0oQcp9x=Z!5|)HS0d`EQ^&83TA8-sQZhf>RNA89OW0`JTeCJ@;>Q!5Xepq`s|S{NJCsd z7{8`RBS_f}`une&9T7UycX>RJA#6PlKx-brVZ2D7fmu*g)Zg#}>UO4F^UnBzXz7SW zOtcZh>juLLNX&v%#boJE6yUr67s_im*)kO`HS}3^#G;g`FPfe&*N)aO07z<$>W%=Y zqTPjuCzClxId~6?7hUHpm*;z<^TURhrkAm{twHNxYPY7gup>|Wp@-L?WWg3^XgBcw zCJ`%_LhYJ&KWE6y38=q|mm+x>`cZ%VS7Qa4>=)Dw(6+$RhaJI+5Tm!AcDzI4+C7P(ha%tDI6U_0BL4(zp^567g8EVs)l?%lFi^UCcEJ;*OoOhznI!a(vvHj*D|P&j_Nhki{rTGV8^r5UG-L81=FI> za@M7nHK!+`54B$X_vs7u;Fql|3Q98m7zsM%;N`0L_f_?luiy3y{LJ^1WX^ zrC$r^RO!Jm?iN#!UG?OMJaOqVOCMe=IGHtVv(aGQ1A6au_MQBMZv>@$OA>DTk<+TS zCimA0d+UQg=MJc*2{13J`0v%;*gM}5`s-6so$rzw|A9+Lk++mJ$lE&+s3!vA*&aRO zr9WRX#>Bg2Ttk9;$iR=3p8=2a-B-Ndu9d6tL2nXRT(!haDyWIBd9b*GbSG(Dk;i1v z9s$h)iTue#-x0a#5ev8EDMFa82vU_*L9xTWdJD)qTV3a^uZPmbr=Bw%wX!9nLt{SaqnJ&kiA{@7jGwez|f8QYneF$XVtH>W}ka8f$U znVn&K*#@(gVS9gHq&ow3dM8wJKSC6srf4!CGMgeX*2$IjRrj@yCKi9nO}n=Mp(E(> z|D{nYE>cTK1>*HIjj+j#3`VByy(3$goE&|oGsP=%s9^kLw_HoLEXok|=x`HIxHmYL z1MR>cNx&X$B!h3r8rU-_|B1DqkODyVIN8dBV3@@NQ=nj0b0SB~AATVYeL6=prs0j) z-tdtt5L=(rC<;e^R2A(wAGh`jQ-1_j|Ms6l$BetkKSZMxf;Qq&0L$HE8JTR>@Phyk z3C3HR--^yUQ?JJ#0~?rugU|xOnWijGD<67R1Q5-}T5LYmpqHN@bNssl?#ln_Por9KeC)~St~rPQlWaL|_f4FeB&XZ>c~mF4x^00jVrzRj zC35d_lpcJ%IcJDeJK<(1O=&+Q;BYL0?d~m-mo}RK=`v{hFHWA)My{L~D=)jTQqP-Q z)^4cv+I4Zo(a?C`#qg$kn!+{Ijse{l8<`EJ%)}RCX)M3dXU98^e(0OoFFw6yP^L!=4154AAq*S{(Nc57kL%9Pn_?S=tM0ZXbUQyeC2Xtl{i860n(qq$BnYt zWWJJbv6#9s84~tp&3oDU@$*?hmu;V;ZvGqb zSPQRXPJTb-KiXO~($eqJ%7{NyhC=xl5U1yNyi3O$6`8C>zOV?joal{5W_zDKJ#GKX zE=rJQ@aMP&Z9~ zt7<}%ofyw9R3v3x#r?Xmj2_$)xTVbz;5~?ANKuaaY3JB#r_(sciNM_MSB3J=BV@9? zKABhMIzBP#>T`>f%4?ciTJ}|~zR1eWK*WQ0nm%fDf)?_! zFu_|tb41@Dgw$+g8Q&^0VEhlK+*0PreU%^OcMJE@?9TFGM7R|1AEVbKb7!H!XOgU# z{7R`{7w*U`YbyWq10KSLjgcSgn38pt^We6VMsg8^7549}tu=I|)Q+s!K<2W!v>&cXgk=e zx_XmvsQ%)L66FlzurbAU_z@H`*{n$rxtS^0bSI*1k}0+eQ4Mw6sm~vD#L>Nd>^k`V zSKd{$#z?7Nfw`-7*m4GlT8SYFz}UTWB;5%DzPBt}089XRye!pz={+?wuJ}IH%?wt(J8$Li2@MQo%{iWdlHor zvD?DH5ITGEuYou004OinMFwhOVq&ZU@Q6;NVB8~a6+X7ZIkW`EyZG3~lpok3?hD&o zP!WrIQ@na0I?nj@m+2ao`rOPF*%R%kB1YiseD1ByIpHP++~)W{)F$6Q%)~HiI`gel z>gkbE0`U+RoGONT<2>&60KT|nBTyW+v7t4-i}k&91R6t^$r(~O;9uOgZC^b!hB~7A zOlj%C)mLSF9tA7jBOc2$^WGI#x1&Y9nSU=Q+XB;)%TFE}x~1<`yIiyuG9hKabzOHh zfd9>?OQ`qRj%|`vF4Ie)A)d7%{6J4n!06R=&^8M)>tFBMC9Wsw!6TQFZh_HW*HRd# z*u26ENUDA}+}DGYiH4n{!PdeQoJ{_BsMA0SF^1~mn?z2#3g5U_V3#TR-++Wnfj}Ef z1zcGS5Yl{E3lsfq^)n@^52#HUMo=ai{_a?t)@xCSC3zx`3Uo~bpAEr9#n5?n-4Q3T5d>~S8t)Ks(d|jD%+*qC4L!xRkUfUwtAQz^dXdS4ss+Ke*@tOmAQmcCF%6m z6bYZ(z`--1Z=0EXUz^aC8bNW*b1HMr?>^%pGw#4MwhJJMNI`# zKD)O#yFlonll5Kj6rtL+5JUDC!y&F0&cWsz0Y75!mT2AM^x^l;xvQmR~}fIlc5|yFChglTqZ`0|12LZxxwIX zNuf}{YX5K!{&Zy7zRtYVIa)m42uP(IvP<@&9$Ug{P@;^@o7pP?P5?qD0bSPC)%p;K!C zO&05GYx7MDD-DW=f|2@liRriX%5IyPrNvotxpRCN_oJxm{7>VK*PB?OE6w zHbmuM%r`#Y72pSYs1uA!aIv>y*Qcek_rjYb4RX%)Hp z_?t8aQxN{|qX+}w6dRZClq4?;*Nk`JWZHRCi}_Isx0{|lom9DS~Avl&9Z>riTuKt8hkuN&` zB3{PnxlQwo$D-6vpt*Qo?>;5UbZlgF_h1$0B~ygbY?7ap!y8zNn|5^V*EHe+u%)lt7Hjyeh*s*#0KIM47RwMEcN5{_O z&E1A6*)y?UuFOO(7iY;hpdYF9y3_?c->C3P7QS}rp~rOV-KnW31d^HlS*PNk!@q@n z93Wp9cva$fo;v3iIgiy>ej0PV8mfW%9@tLJ2lU~>|Cv*;zThKuasIE5H|~tC7G5~z zyNW%X4C%ASLHkfm1?7-Rr8a8Zs%@>zQy2lDvHO4puXIXPvNob1w1vBL?ZogFMbK6&GrMVf30FB?F;H>Si`cqU%ytjpB4^tJ zcfTq{$p5(u29DvN=rc@}7)V^J=e%&%$jqG74>#0%H6}s-I3qZhJ+C?9WS;{lvLi_H zr?h6D&8ilPzrr8L{FeZp#mgVD7>;IF3vFB<5Jec$Hc(m{rMa%^#DdKER|j>!%3A~@ z!)kzmii5$)GJEe`C>iR)s%GfA+()Yw4P$)tT4}IOtgrTmY^%AL5&Sh&Yr~i+&h%#^ zXy=%^gH)VUUXnqA!FrqYm^=seG_3mwK^tbNy~x^cejfUD>1SQkLi5QBSnHtz?a>u? z!M{{DAM~fd0@uw&6KFwg?&3yxNi{!kvizO4%3 z98IimG^_TrlwuzyE;8La41a%gEz=?~8-2d>#j+%S_SR>Q1`Ft7l0QEO<5{fH8Cw;x z=G+IURO9rKW{^tYkvi<<#$S1*()49y<*O~K=K|MiSpk?n)|BH)TKg#`x(hlMHR4PNHR+K24JO#45N?Y}`6%`QUlp90O7kJ$-k6otS-}T-Qp7{yfy%D7AW0mfD=bEx^gm=5A5yyMY z1ccbzeBIbCVyHeFb_8*=cvV3#+t>Z7u4%4FpB;0kP+|-fQb(PG}+xkWz**pS!l*`V>{~Y$G zHTP5H4~aKj^1I+Mi$MzAw)?jI_t~d*i#t<=ExV^1Up zdF)i7~1^8S%@ zrj8;PRvee27pA~93CF|dB~f#<FaJ(YP6j<|+Qg?oYVg>ZtXm@PH4J-Oj?*Fw>~w zjyDBFnG)X7M>SU!V?f%ysj)FmO_M9Hh?6wkbkXs#yav>)dYpKer#6puodm;#{?`U8 z_rK$9*r0{K^j#}2+Mt2$l1q%19Q{}@N{2OBTo7|jCtcDB{q%<%3G$H&fuVPZ3@Ex9 zVu;4~WP5>PzsqPU;r^|RS}QIFMOdT*(^8VbOEEcaaEkpOlRS3`fLiR9U}gI7z6$`E{abll@r9+=Uolb!aC&jET?~G{PS20fG?sp)&Md?Ucis-X8+< zs4I42@4Du*StCOJt=5fO9dDVm|MZ_y-kYDlHy&8szP;rqm-IF^TBv+*Y=7=5pUQ1x z5fhFwitvZabiAdW+uHU?y#Raq~~eRzfU*rC!U$Zg?GX1}1* z=D^6g{%!QQdu>PLGqf!_h~XhPM_i8cCEQ-Y`cCqvywZqHP|!*eN5N2TuQ|puT_ms5 z+#mNxa)i$gRC(UlfR$v^lQ9NhXn5Iy^VO`9Ot$rFdD6+6^?@(g910cp8lZY@+n&O` zQ)z4g>`iM4dmp9#ZMG!#bqJ@6cEEt z+p5z0bKeQ0ci@Hz8w^U}frbqihD%dkNA1>p7-W~dDknUzpYbeuqhCqoaSEsKWMoe3FAV;w<;*VLMMSY zoASNKqLR%@;|H!;E6izx-I(d(9p_eZ%2U6!wjt*WS#Fcxlq55UU#t<~Y?mHd2f< zV0=2#5Q-sqa9defozzC|FG^e875%Zgy8T!#{ZG$=57G67I1f7ZM-FktefAuG*`_A) zj&(H(mR$?bW2H`|y|Hu0VEJ|*TFOys$X2yaJXI<=5zzi|Q+J52gOYq|Uz?Z0GB3^# z5qPDHh%Kx5R?+kx@nL1peShHy|XNIAxT^uhZGXi+ol6!FU4yOY*dRhP?`p3A&1bMI6W@7u-G#q7-s@#>Y1 zv{V!2?*^armF_YGi}R!~IQ!JMW)B5OIhg%xGg#RJ3gxo&r56|<1*S=OF=p{`2_tE_ zrw4FL+=8nxHFK@vT~C`W?f{O){^WT3*!$Pwm^4>AdVZjq>+=$e>q^L9-tH?b)flf1 zKbg4iaB$7%v+>Ho4w)k>3(WeQb15f%1xG~6v6T~g>F8Y*vRGnB+PuO2N$|z7Z+}LN zERVN2Qj#eTGYr*nCdBNS=K8{xi=?9Q{=zp3Yw~rCLF&&-RFWAMWweaPB>l(hD`D}w z?aq^X-yF>@FMlne6wWn?3^Xz)>psnNhfVo=GqKEs zv#yI)vE5w=^$*}ilOw$bqnj??la0AKKo(NdxK->Yl~*}VmgEeg=skQ0nJY|DJm@!R z_gTZQr6`^3#V{)d9j-N`Fr}22A%qovK_0O+txRQ=^QokY|FKr(b*aq1h!SJgM3O}A z2^fuDi0yeW?_puXp*9?Fqy4ph-qjE4+8;jm7U?iPEgRQ#oTo<9< z1hyNtQNqNz5etq0l?419{Mo*uT4~^;z5U-x`PAB&X`Vnl8{n;KESVoIFQ}auoN+{R zu6`pB2wRfiQ(uo)X&$8`bPb=)h`R*pBtJ0X5205as;%Wc%0QgI5ci8SvdO%aT3mfJ zDcyebyKiwKOd8fMI_{jSN200}*sjn%#Sq`$??4T=A-jF9BxULNC zntuge7v7@RK8ghSNeMrKK^Ik#S5^KtD_)Mq%M_#QB3?1K?oQ?kG-d zc^-!og7Cok3N1-8S#4#=C3$LFV$Xz6ovn(pvP$Fbn~<(8g~8ht_o+PapQ9MI_sBJm zeV@y-ReW|TbYt#m6A|MuC|htr1NB8LU7X8K?#g&Rv$D)v@~A`Ie*?`cpytF0oGLdiLRaytVfG@i-o9;ys5R)JTOS;m1Ra*CE=iqUQ2i4&T^70 z?C~`CJSJo@%Qp_|zP+TAGve12sU{c6iMyMgfnA$U!A*-Bw?6NB@2$?=u=}s&zD z(-wYMziXxap`xtrWMU|y-YU;C%&xs7^09k;Tw@kV>2$lW<79h#FqFEegVOn^uc%vs zc+f%3r&wUCI*te&Tbr05B~XQE4zcn$*$P!^@^(F)49fP&yuSRdOlfbg(0|0jv&4B<`p6{q_pH%$x5%b+R{-(qcU8@95+Aw>2yNZ~vN}Qp@T5acp%*2(|83 z>+9?LW=$`KKMMYiJS6YlX6(Q?SRbs&`&*XURx$1qnLAFOah?1hk2B{ z`RkvcOtIwXtA%tj&`f1Y#66SLP6Ms(q|uP3kb~bvYby#`H6mP0u|TMZDb;L?zxEwk z5mz%WQ-2km(CmH{C7?xn&*38MGAzB~*;Mn|VWTLkvpEze8#sw^2o4H@`-XuZwoHyr zKtMp6zTPJNY0uxN(9x+|9g%|hFGWq20!bJb+yH~%rZH#)bw`RE5~5D}@9jU{W9b%vsa z<0?hV1No4kaK9-q?~H3xLog6k1YvY$Lj|J6bgi$V4ArX+hSYcOo=ov zZIS@7baN@>aD{v(1M^5!_#9XL3BLV?N5Go1XSBj~h^VOOc`uzzh};5x_u4$Pax zeH)fkzE8w6%sL6pRS5`iMpD1uZN|e;i0#Ck)iMp6BShW5X2=r;Z=EW=I z&-#~X9Z*Apwb>(Tspt3wvOY}$t|M2p24A(b$#8+%A>4=K;=ffA|L7W|Mv3S z-LUa2(;zk{Ce{k2D1m?8=VdQ3tm57_wS5^!bM5G_P3(hvue=E?W}|-U&UTg)CeFDJ zx2ErlnLp5WMjW4wHm=Ns-fx5c`E6}6)dP0Hw7`6SV?%AKNc_OHN{o?5 zjdTMs%i*ff6YtRl?ETJHFjrjr7grd8KH=|K_0xu^z;*5a=CRixku8}H=!v=eP@b^X zKv=aw^nxqL}AJYnl`tRTc z&hBx2hY=gwXZ_sA{fVJ`xF+~jnoV?}=Azg13P-IEG`t#$>$6wk{6sYD9(82DeY5Dd zxE3*#nGi6^LaYC9dNX{7ZhBKl&(-83p$utwg7L+Oi0@w10D6Qr6m`(CMg=l$RO{qaEtM} z-3STpVvE04-h_g?Pl}L1%!h|9sn@wo?wN;@<|gaFhnv_HsGC$Cz!JTCUv@)x-`0l&>gY44vm+(1-|W#tyy=udb+*_T=6L(A%$srh=30lk#dF_W{7RkRbsV z%QvEPnYktpBrw2H{0#Z%Xj(S#@E>@R9%En<+)RsKvq((_F5nxzQR=X=I&x=GPw{Ab z!ddE@ku~wx5YyNBj$@6EJXnUd!ET65KGzVV15vKe-pHug1eiDxHaXpKhx5fSZ@2x1R6wuWtJcv zuW5A~Y+-v_()jGdyiK>}nW|g2Zh;aQ=!}BR47r)-!r2DT*$cymHf(kxsVwK0(ggV9 zz{7KFANJ@bnZ*__#?cGE)7D?}P1Xs3yt%o#!w=?g1j70=NeP0K?jkyQQ`;D+?W}(R z`n`P|Bd+Z{Ga3gUI4vK!0V{qywY~54(cJnRzo;kyV0ohobR0pG^C-5VV{fd^b^N5~ zba(aymcjhrI6e)0E88 z)cYzCB9Pn^$BAqW?=31*TBf>GV+;~G;1&TbwZ^Ab8Yv;yV#SSQ%Pc=)Zhi~-> z?OKHzA+N)$%X|WzK>jyDA;aJ-`CZ;(@${{K_UKQNx-IRg2AK|h@ZgE>-BVfT=i4GD zx_a2PR4dgkHo-bMfgn5za~*XF!W(_t_}WWJ-TTUw+>pNX*6FeE*@FDnH%7Jkb^`(d zQhBRnt=4wqm=YOzx-n@lv8kBwu?7%}s8q1tX2*_5S+s0U=C@RX^2zo@-z5P7-4XDB z5BEDw4|#H$-*MQYw1-Q1efRD}4lfqOjuhO;UC$&9`u)oE(z-eG1Qy0NlUf3k!5Mjf zfn&Xd`)k*#kkSE<>1{a2u{uM)^+SNo z3bspoAdqfk_=G(2n6{f=Mn!p)4AY@!Gv#RA#ULaQ2Fq>MSx>Z)m!_X^WHk~Gn@1rB zFKF}VqZJ#QmsOyXW~)Z3|NTHcQz0}wiJMkcrtO6<_C_^p3}vB})8ZT~&7Jn6{t?|9 zrh1=PSByh!XttcM2v=r^MO@l|U|VyxZx0kJq}5V6MX$L3dS>7f9^rGP;J?b9Fwla%*;3`% zFY{;#OV$50Y%I{UKJ_*6I&Q=76WA~_Gz930HqdCbRXL=&UuDX{U=M!E5c28+-AULH zqrr4G7GCUYC;1k9w%Uk+_KR1p4)9C+VFwfY=alx8>!xso@8mUHc5}`Cf{FFOb!LKF z|Bb5`;H0_X{Vxs+o$LWgXYbR#gjFyD@@b>5AU(t1%8~F<6+X0hzc3=XhWL1ds90n*aSRtAZ}|3sY$IevB+FxslAJ`hnY; z6f50d^SQTne$K6KhexEJfw|x|9^hdrmwcA7H(h-{$rR|QOIPvUx=czd+6l-Pq;;;D z@Tj{MhxPfR`=IHY{Uo`{O;jT-hOK-}D#J|hJia?*B;YE7Yx`xfv(@MoMjryCwLVC) zK-y?f^G$c?6xYQVw>K5%y-S*R71=s?D+3` zWgj)Ywkw7U!nBzUQ>l@S+Q9ozH8e4v;8;^$r;i$pFR#R#fKs-lW#Ioz{v60RQsI7x_>WK)(e8`sOLZ~f zi5GAG5K1V%Lu`^t?}YcrUec**;KIWlzRAq^tu2=70l&#>U&0wKZX5)tO7ARUC5YPz z2K3r0AESuDF*L1h?^w_A!;r5a0(`LIW^A!Ss<|tDO4KBT)|;r+_Z+5I;d9kM>2-2_ z-oTJzq!)P8bsSMUusVXF46%UKJmupg@!!@Fh=a5Hyjs!)BZeoQ0KiQe8oE5W&8&C| zQfiM!$Kbc~28B20{Kq|b6!+|Fn}6jbGX-d5C6cts%YJE0cp#cde_VuNyoYb8ZJt($ zJef%98Xn9GCGITd=chz$uJ?@ydA9ApP_~iFxAbt0em5zZH=t`xMsTj`3L1F^mc~$x zG6dr5%GUB`hGj<0W=31~Wm3&7$5jN)GI!dH+vc&}shUV>BiRL3PCxf+C^6$5OHzJs zE&3ApmaCP5zV3c<8}%ipB2PjKMZH3o%~SK)>sd=xt+A8DL`8Z2ei3C-6;N~n$~0(E zkF?)o40hFbZzT7#q~hQ3vC@WAhT<7XlJs90rHT6eI9X-AXY`iAZ#1FT={XD87>hCK z9E%S0zj;_fiW$|k<93*k%SnHZZX|yXWmfE7_kZ;cZBRkhN6%S(saX8Wm%IgVBp3;NNM&Bt4zXqKE`+2|^#O?} z#soq6yXT}yy7n;qZ2qraI_2DwPeEE~-J!g?s)fV7=Da2`!+Id9bxmf$r|s;vbFO+a zF08%QR0Nce9gIE5*%Ci@!(Wq9#o2CtC~+>rc%Q4R`x9*d3U}ilWD>Y9oJ}PPss8ZK zzhm;PcF8BF1fbdPO-bvI4ezcIiLXJqDAdl#!V?nZ`w#FqL;#$yhf@oMLAyG&G4yAP zkDVzf`LL2i#dGG7%w9WS4Ozj7(V&Qvbp>5V1WzR9r*)>R>I}iD_DCAHW@G$)U9L)Zb#7@lFjm6~ab7%n}IzOXTSJbO}(pey8Z&(OJ@!x5F1h?e*eYL`@_eF$Rn z>t{f5=OI#WL{iHa@^LH{29p{}b9F`)PGwShJjT*lV)sQDmcqtT(+@JJ`mtVaa`)10;P zIqt?;Q@T4#4YR=%4qj&bNS_w2LdH9tt#_ zk<>>;e?h9lHLH#I?g5F!8quxP*(KU!lRdF#VQ*|ra%i0V-`iGc$83&n29@vT`HnT~ zE48(k`j*|KoL3CA<7qGk>9~)==SuT+M(Qc(z~N|>9Ghs3&|>fB;&>!2L@gS=5F*0E zHpUQL;WE{B+_hHEsMvj~K zXWtR{>^X5=UtaK;vSzW(D|(rOfq@}>Hl&>H*OYWh(pvwH;Gn(XXmD;*_TW<6nB40_ zm5=Y$-dsrffqHXCU9iu=>{5U2V*~BB(Kt;=lDL|FcU|1rK{cv&c^=h>_3qdW` zsP27TBX^_OOSI*qW$sVPw?STZN)iMt2)nZphimRebYJp3TSx2vBiYJu6FW{p>|C$? zUtRSJSo`tUNWXwjxeuA!4+c7>{bvp-9UMcFl(qkj)H4f*|KgMKfX{Tm{g@+ilViMU z2CnN=P@tgEnOkCu9t;fd;N|T&CF$BA3doPgqUstgL=62|m~#KEY9`hWD`= zV*%^9E->q;=7wH+Hu2YdB5@!sPLfmSAz{qj)xkixnErg4A-zkbngFC*UNcRgBzDxY z9HTw*ncQ5VS&Tv3a0{VeWn~}o2yjmzTm>aiSpP?(O3-VCKm=-BjZ#TtlO-Zw7g?;3 zMToGUPbgy1<+cGq#lqLcoA{Vs6@Dl!1Du!_5EU_=Hh)YAMc#cOmqSdEMG;nnS*gl0 zaMV&q`nRn$6PjTBRLI-4mh7ZO{#P7XS0Njk2e&iD)l;8U<^;&;j@Uj%!4wXxcG9%8 zKMaqBo+s|V|0GC1!T#bVQ&OP@;zYvgMrGlNd_=5FYQwFf`JkT7&fcZ?aTnDc167iR zLTi!c@+g9oAizqo0Xq0Z{eDJ92n$jr$pB*zRsJD*_YJU^vQ0#7!tJ{?)%~lfpS=kB z0=3IAZYMLVuo?im^21P%_zPsl@dmp)w+ZI zP~P@~l}cO?{oDwGA&N%q677PV&IgRw?BqTFl&N)WK}?v&=ak$-I1Q^{tzNOPYc`+jG5~w_;nAPJ2?A zl9^;~CTe%cF#m4o_V(9e^EZg(Oto5=beD z!8!PD;hct&tE~eG?CErA7qUvp&BiGOuR*!~c|*Kr#d@uHc)%w)S+lj()uKs|5}0v| zIKUmM>C4FFtP-cd*u$Jror6npYF|Z8LTJG2c>PvzbpB45q~T|gfPZUCFX#?emt(qS zY{j^r*AWlv4aLVEIWjtM9~rTp?~ds@3}SVj37phK9sqaR!*WHfw0Rp;9Du-@-*h0{ zdgw9Je4V~pdTPSY?doW=?hC-8xXk~3*FCqyEb{}P^!B`Uof_+X^+~v zB4z0F_nYW4NmsgqxfoxXsGqvJvO_g)O|5KNh~l5-=lQ4VGtxqMbKCRQz6}^N^26;o zI5<)O3pQ`|Xh~u4e|-#rq>xX)m|-d^0FqPq4&2M|)o-Qw5`-o&e&9^`YL>}DTZn6Q z9su$BQ6v3rA$Y@>1ke?seC)naskNXSb$sC8SsgY-424LJv65;;E=lMLNaLTC--nJ( zRJoGNzEKs1sPkU%JbJq}Uyny@JXmpce@n?LTiI&2W_UKecPUedL?zLr<-sd(_W z@X0u{MvMj0h^@!#6&}C1aD;)rl`PU~Z}=*eVO~*Qi{9E}GH)dKwPs~lrnc*2 z)W=6}AaCl1&9zcXoXzVjJ^ko&`1!T-KEY{}XI-_C?+mr;!ecdyUd1WWFI;`Z-`1cK z2NQXUNSImF`DM;E^AYo-iXv+Vs#3}uC|1@SeN6H{^1qa}yIV1;`m~WXZK$ zk}SW7x{6Ds>^$+Xw$?XVfVRkbvS^`KIFtF-)yEsr!|eQR8;F)Q?T3FX{JcV-!<=rF zJCIEc2!)Xk5uI{X%t&2Gq7c(AFh=`@{aw1Hx6(v0Q;ZmpTIVvClnD<0%8ANm1wXOYucmSyF$^(+i6$aw}}Mh2QHd z#WU4kH|}D7yz9Nr;cn=i;GFBY>}6=Ir+0aoZ!P5RY<@;wUBk=UHuQzfznbRSmQdH? z+x!|Q>tvV(qBQuPBWNxJ*U8HJ1T3^wZ<5T(!_UGZ+V=)$ z_uMJRp#UA$cbp=a&hM}X04m4F9lN1QhZ94kBl-IT;zZBBZEe`z@3;0xAS{x=PU<)W z&Pw2Q0dXI#k?qst*;D4zBcOl*k45>@-P5m;r@JTXPpnT5Kx~g%S>h~39(9xYWUdr^ zw10fK8@caleVlc=Wu5$W5p1LSm-iVoz-pawAUmw5-s$82i-?8_MsA^idao zFRu-Jo~JBxDF=siatdS5oZD=sdENiEb8}q<$ID8GLZ@>J`+dYPjSr$8*MrhR>@^u} zy9c>E>+Vd-UMr@L4xhapO@Q-fkG_0V(I4XEM4Dd&B}N{rmQ4^apv<(8DyQ<-WXj1P z%eRFSBR8vmc<6fd-nnKI%kiYddzlU;CR%B~=Hy2iQgqJYuS1|Y02u&n$Gj60h*E66 z!^)rUtCh=T*I-l|8T8$6Q$4nw0$Kv%!LO^w$%%=+yiwPK#kpCaW#u)JT84_ETQ!p_ zR>dDan>6`!nv=15X{sB)S64-D0v5JX$QH;jg7KYa1U1j0X(E%n?{^kwZ~=3c5Nza` zC^8P>vBb}n1^~qvE>SofXD~U+&QFhffIMu!uU)sJnaw8RV)O!rhE?7v1m~ujglXgT zwpq9Goe-nn&SByi!!XS>r^QXlfto=_>ZI%L7`blju9||ZEK+(3yuu(MxW*ZeVhnZvX!U6Se;Ldg08w4q+{rr(Whj_F{RO{veRg)X18n~6B_=?#Xw%en z0Nx^FEui&py@fJ+di3^M#q9uz9PnO4JWwwjZfgnTQ3^5j5bf9^WO;C}$roP2Mc3-q z6*DSXT41ra(AT53`ug^pS4VKF6UwS(mV3W(h090ZWlvV!p58Um*4IuzPz6~t;gCn2 zS5ajy3#$KVr9r-fUYS(=m`{j9Yt>ZA2X@4xO!%6tUfoWx;z#34D=k%-IbHd((2OPYzH%75g$cCK4=PXpu`}&UszN^ilO{cL5m_JZ;bAJXtczIxg<4JLbh+_ z+WJu2Zg_R2G@+@AjIIf{lV#9mRAW`a#GD~nZj2q?g!)&=TUmY9&xSGdh&N>zbi(Nb z#Df^25_r0(JA@|>p3{@N&IkBIX~LAZ!<|?z6f1U~Ti}yxFW1l9`a#34B37b@T9vny zz5@~Kuc+0(K98Du4C*7lJZ`G_i}w=rOq}sL6cdgtZ2CZ15!>gNCDV{EHecYK@8JR# z84eFs?jm_G{TmADc~M=*kaDy!oSC$hau8wU=E*9xXQE<9^6^C-}j6KnXdJ|)#ex7lfyr&4o1?D3_gi94lQ%SWP z?Z-SoB83#`+38l=hN#%{sgJbbPpZKS^(pW6D%2~TY?TSV4Q;hwqwx|#8EShe(VOW>yH@|boM4XF zIG&j2h}b?U94c^I-`ZcS{Yr`4NF477qMQ^az7r-MP0D0w1qSbXo@|t!E?6I>zY}iX zZKK?6c@GxoN?08xk9G;(_30q$!0Z*>0CnZ6r>51JBgofqW>aX&nja+RJd2+O`uUE%h`lKzw3jyY;799a#6-?n? zR<5Jwf65mpFr01k!7Dn0RXF30H^13so--feI^Ip@V zJ26ANN5t9F(1FolqdWSli+5!>U`T$pFZcM+2@nrS9x+jt``hP$G5I`XrN&+}lOwT= zINjtgf_h)x?{dlD^H}UnlF^(>5Cacwd=6*yB^5E5R87@4Sjz915h|DjX(8FsrmhX- z9e|A6h*02h3I=i-qdRdjIS;Kv*XnwDCV~`lbp$QWxmt3k(S6zJiUGSrb63(uHHoV! zp1iy!{CyCvuW5CsN-D%h0)U7y3zl1K#7gnLLfmj{~}9Mx$=oVD~=3a}MrXFdLOEeA;uCysF`k^hUM^f);D$LYAdSjy4! zh7T)4l5)_3_T6TL^k?ffE$O(})hgU7KllA>r+7jVW_^Y|FcX}~8SMnRgKO&WW-q&6 z%vo})y@C+r9>~&Y%T;PdaU?2{v5WEaU#wgqy{wvv(_l^P>O`*kCo1-E1hIs@MEGo+ z>Oqb!#0r^}$S-m7)o{f8+AFfYOI6bkqT~nYjWSNquzTxn)HrTKwF$8ufRAh!grb6;x{>e63JBRuu>?D?5}@Z93)6y=IMi2n^^= zw2>BdABj&JudIJ%G-$P+S7UO;xW&TmTf+GznbwdF`Zp6b7r&eLZ=ZRMZ`Z7&gY?#7 z4JOM$G_AOKtzL^2;7QUp+U*RbSRgbIh?eTT$I^NJ1Mncq*F_8H=1O@r5wO(c@~t`| zj=^vkfjQk&Isp{hyXKX~v9YnlLl77{#e1Gi?(gpxYNWh-_77yOJHRpxuq5wR`WpuF zqZ${3f%tY7Xtqn7U*2z?kc50;9UmY5a8cyx_lc?z{>FNLY?qg5x5Uz%&7^<6fk}dH zw1{BMAxD*Q#A#?$(#ksY969pKo+x1YD@yg_kyH8Va)5EX{Q4IWs<+_jpbS zAm;vn42>&q>+bqeNRz3^Sk|aLrcPVB%>iX0Y9cQ`l345C$*(RK%7A4Q&8}4x%%ta% zy`>3#N{_r}_r=bJJ6?JbQCKzdAGpLSjBda3`Xn<@L(fR%QGr0a%@^;-je0cp3*pLV zzKw>xgsF5I{brFNy`v=EzO3XC#Rmu9SNZ2~YFHV>`g@Z);Lb3ht%YP3YB>eW;K(Z}C17zH zI2+_7a|1ilBJD9FBi9upVAw{!2BasrZHGyt%X@}Z%*lCA~0fkJd&%mV!f#JESu>IkVfCG7sAQGvLkpMDAugoBP8IJ39Rxj~cpkqMoqc_n*hrzF%x|J2&ISy^@jbZ1MdoALgTT*3xemWEVf`bTu2`chYyLthq& z41Dgbtczm%%tXVPMUY(+;omOyXN9yfL*UTQ?$SIdAaEX%z5cDdtSlFF&0z^KI6e<< zSd?#jLqjY|R!>m=_IB0UO2N@@A?q;ePJ3lO6{An8V*5CJs46Do*<5m-UxpdDDFXuo z#o&BP-4Bp!p-GwyX8@7Tb;>ld$~dv!y%^&F;684~10Y5e z(xgeh-!JdU0bb-<^o4M#tbu#t65(Hwh)xjQ8_1Uc!{!bP(F2fR%5TsJu93ew>VNl2 z5>{XR=7leC@{|<7$C$L4S$eFLSXU0TX7j8M@f1{mSe5!BNM`&#a-&YVNpW4&yER5w zSSP%%c+!^^g!B^qJT_JK`{;(fh|m2D3Cz7To7r9Tr~2i(F-?`ndk9wOPkG+-7tQS& ztSVmKY~SEirj_P+tNpUFin@{EMoOr*^He~a#E*$Ad0qF5SDY&S$7m5G$pQOJ2dy;K z9KRqM^Ar*_pv6J+1q|g|SvM+(f+g*l%+AK{Iq=zfnb=g^T>h4LR9x=aws7|vLY>Qi z?dHd@M^JwHMW9HiOR-g$d&p&{6Q#^|ZsjU%Qu%7u)I?To_ulufigyU#3oWMLi@4jZ z{eFo{QFB;v$It(6_1@WxrHuwYV2PG$iK2HKH%{8S<&k52Coh(os$5lQASionWaWP^ zj9mlQYb`}SHgYCM7p6c8bCTNMKKWW&S_-a~ z(xC!f!SvAT=_VO@22|*-l~&sCJZ7ySK99OP-Id8`pikG=Fnfv`ZYhjt%?m1{r)^hfS^yW`x@mDuhbp=pC%tX3E zTJdL6F2)JNw45e{y!ejQrcgg zsBPIL?!`o&3NaVFo(*4gy*#PB9J{M@r`@n@K%;4_-st$2;>j7iZwmTtE=*OF>%leC z|7&!M_opd*D0tj*7#ew!-+?9ku?|{HbTAXm%&KlX!IPMI_0$(joE49^aUCbpr0lRj;kN z$?a!VB6b?uHz^&*l$=*QHf=r%m3oQM(chy#+z2Nal})J zQ?L1=198q!Q9g0vvq}IK41Lr6LRpp}s^)`3=9qgPX`gtaz;hY22}{8nxt$rLt$jcS zW;~c*)qb~Qnw~|yjaj0_K>Gl;&}l`l`a>S-oGZ(l$|uRg1W{%b)Ve}zZ}zl}AxdNb zPS@cORRvhGv>@T0*&^-NVtBxef>(tjpQ&m?{Be0Ix=o=KJHz2r<5x* z!{@ClaS(`czel5fo2h~J&xcYf3_wpFr>5ZrMHmz<>|9HsvtCi*Sw6K6(hmybnVa}S z_n2<6veM=AI?!9Pvc3*U>`DmlvFGYLR~Kqe9OLkk(16}hLk1H7o~A zoa+ntp?m^_-S;MUij*P*at6Zh>U`1rv8d!Z7rT{yI&}J1^hb$AN#hS?#}-g%B2I)*avVb?+HaPARI#6+CepCr8v{WeA*3 z(x|Xy4;{^7H>1>ACxeQLG6RvsF+1;BL47`cic$uDsU%J0G8};$Z&RTb${PI%{cP)% z!ug|J<*4Z1aoSqT8#QAlgou6!C6(?9^C!bJdBIXGh@-nLT{I++q(n}T#KSX^?x%hX z-yp?M#tfsW-4QsKxmJfs@S8J1X#ajwo?MFk@G8suG3IoeOl5jf7O4!VCnN&8{==iEGoy{XEU3tG(HL-Gk(Esab4#Wn|r@h)xvGElWv^S-0r~jjS%FelzEnZtQw59y>*RO*{CnMp5$)3og z-u;oD2Bo~G>E{g8%(qDq4k;J~7UCHQ49CX&=L*BciOe<+yiOw9e7dH>}`y+#FX;73vK(Sf|-YO#TL%_Xa63{90C;RvN!6=DKqb zI*;EhJ|avx2n}4Cg;Xo6GE9+i1@SsWTy%s$01wpaMpvmsEHocq-8heWQsLe{%;sDp zkdUBqp{#2!x$gy~1Kd-FJsM%Od~Ybz9k6pnqh4B2-1F!Da)lkFYgey`Ghnkuy|PN% ziHh{NVGrw6^(V?ve-}3bRfi-eS`p}ZfD=N^K-4Ov5olHy46!@g+#u*u11Zb_Dpa;j zbJ)Yk>n)rtLAuNMbI_VDLGM+2iFd)!|3>fY)6?*a)z@(}=A})aQ^0ko&mAp=|8+10 zx`yNt&&B~SY1)L^8o+U}lKcF3Ysn=-R#jdx7$0h&PZ(DQ?XMfmkHT95rpaVa>gm7P z%C-Asxp>NSMj@zt+Y4;+KmM2IbC*p+GT&=_GAypg23LFe8cctzX@yaB|J%j=LF+kt z|H`B1swcl&@9%7mkL#KkXO$D5q;JXG9$P9W&CJdCwM^R%YW;H?8JSF0A5_S!e_H~g z!$X^qpWEFmwKBgfMNc)w<+-B=Dp%2ZRsqWMp^6h76(W|XwVHD5wbixfDyn;=bNDgo z2|Hi+=@zjxW}*KTQUuWu0~ZmGuAo2ZtJevfWMz4tyMY>db<_LbY<+3)Tbfy9W zOSP3Y)@J+Z5@g5TzvM16S2P?U_`z(mtd+D&k}+y*Z6(*7M^sNz$RovkF2YOAACkAm z$?mqoWdMe=Jka<`2sV0G6A2V~ILU!CZtZ4=i_4A5BOnwYRAvN#{0+TwCg}gI8)T4U zEmd3maZ7~Z|7jx^mrHtTrUC3y+C&>&5N>n;MsBaC9aUofzJ23Cd{euQUkH+b+xmh; ze(&yp;PbAnT(P^p;4yA&Yz&g;5IXr@8yt7d{|rh~;0^xHwF=t$iEx4p*x<+%87J3= zy&1#87qs8g|9XD9`5aeVEPLJ!$!=@cTEyN^%}&Ph%A<5_>vwL@gc3^gLkWr;rA`D+ zdTMuW5^EB>Ua_aqIDEoH(6z*%VrChAM7pwW{QTC*s#m;uu8ih;t?8Y-O-%4qOVQgr zb4vpW;l}E(cvAzUAaPpW{ujG`+u5#kZVm_wM89}Dj@smmDS&I1ht*&gMF_#T)=El| zM3RI+HXnD-@$VUG?`is!8=FXR`}llcAt^|}rY&q?{yDI$ErBcU&gsGXBJ2AnC&vk3 z%mWgrYapvAHW-m{{%wLbZe4*JG1|~k*ZFb0`@s4w9^w;&0=u+m5 z?s3mb6n~%pPw^)Ij^gcQ@t-@@*p0VvPwj0M&_S(oHj}qnpIwN6;_Q zlassURlnKw{vEvOSy8cdaCd8}+INUw|FGvw!p}gjYOq!&7lp(%pYM~v1Bk*M2WXHD@M@3&D*kk09Al(Jr)P`-od-=yf2FkW~G+U<&hqyci$uGN7jRehkvs@0oLG&*2%i7>#M6c91f^Ej=!HJC;ytB znRG9HT6h+^l#N;`7ka4XRdpymRN#03bIbem_S@WX(HyCe>M!~fBW3)E5#w>(f})%E z>j~NY#}08T2(iu3xmcDVx&OPc&R=e}qn7%kee2(ms>oQWcV!9-Pw9aT=hqIgv}y~{ zpURa#EgkxcYpgxQvxw9D`oG>d<}Ep8xkCEdvOv6FAF9p`@V&7;t#->R}VH|5i|+Uz6I~7 znDeq1vO*RQlvzsXVaeuK zuBRONjrSWJJiQ&6gZtdaiDXZZZsM7+lDcx9zQmiadIxd>+O{gs%|6<=%Ncds)%cUT z-gKNy4de{CHQ3b{1^~c}lepfvB@$?WK(nl}@-GP-?@*whd#>8Nva<4j;y~HwpMJ~h zXSV^+ zt^jcrWZ^($heLorsb^=fsB`YL?;haA22=%ik}v`b0dWh3@y8!{EN8~#@1ll0M@))U z;~&%4UcJT&rH>t#rVOPSEYr+5N+E_CO6bEQ8s_~R^>~<lIr!Y|aSM#k<^PydK1>NdLUZfuUD+SxQp2VPi*TL3^1 zzdm+3xs{)b<=w^i^6qA4)ww^-6Ik$5HmCSbe$o4{?#%S+d8xnn>?IPW83<8RxDKP$ z2(+F{>ET_4y-QG&-|eu7&o`Ti4aag*n+Ys*{Bz1<@bf2hSPlL^Kzjzs$;6f{|cT z(`&JJjAWUtyT_CHNzJDE?E*w#sG&tP*E|S)C%3Q)*VJP@?LTB^E0u4kKsIB&8ih)Q zr=W4H+W?eZs*HAuGsQP`XY(P46pbTXBpoKhM%%yMOy*x*{{oVES6LpGjEYAN!P7f# zk+1OCAHSl1nte{Ab>=_V9rg!dJ*YZMK$2ctUssO$_xbhu^iOqLR2P_2;_Xyd-fBP- zO_62wcaXWI8sd-P;*n(B1m5oE_{TF4>U7IQ#6u@KbB*hdkl>8dq4{wt^cq=LKK?$R_S?toydvpopk>`Svx5vT#ss$x>qm_8BuZbA+AmTK~#GhyGnb9Z0L*pHN zN7MI|7Ad?Q66p+1hTa8d5d|HJs=c4n!6iCu{OXmfr~87c0o-wsvHuP}2$oD)7H*vA z@;Hbbam&>`7i%FtDPM1X&@rA2)KqrFiI$aInn7OuNHgYUV!F0Al3gL+sN7-F*0=0S zt2@)5|0SF;f;np~j{i;CohdHG20qa<2)bLV;E7`m*gol7@9#geFV1BIV!MY8JQvCU ziNt^@1O^^b4_QaE+-E&Tn+dR}H zw}xJ}JCMYoC=LCPs65-aFd(vH-5hdF4IxR-I%Ux%VEYuveUo7+AmnRS&#pWC04e(bm%zJh7~Mcb5PT0pw^o zw10L@<(n`i&K=M8exM%#ES-p5kk9G{G9Yj-|9LRAH3?b;Kz>>>c`tr~F=p#`#=Ya; za>mb#%-JhOMjX4348~hnR8RjAYGxc}f(NT5>Ux5~<#p~qxtZC6e?aNn6+euY1xEx`JuR zyvb;YdmWeTg{5VjN0z}f7Gs=VjP5WdK<1m$)1=6B`}3}bGTIsn5Ss-TaXCJ_g(n>y zbJHm-7oW)cri)y{oE;dfv~_f3&jEhJ<3FQ~{Y7_=;{ST8E+6MWcp)=2!cT=EFo84V zN~^-kt8TQMkc4yd?`Ua~Ab*k@Nvwg_c%3$&dc3>|Qd|%|XVI><1e$JZ#aAv1UoU;- zWn)ZKGdu^OXTFxDbz6B;6sBg}Hfnq`^EJ{P_qT;Y9%mXFZCXm*YNfhUPbMlRYx{1t z-e0ENI~|J-k`M_l=>U=-NX<&dg9K^eJkCwx>z-Orzm-hqscpV<_nuX5FIS5B#{`<# zx9nLV_xr6bXryZGc+}g~wO(>pz6Px?S;Dm%E#N{x8peQ$=_LTLQ55;BsWI*~5ufQZ zZ!_YbVYBDYnF+e5e&2J*CTpvF$^Q00{H!_7du%T*kL7P{LMo9l-wlg!`V&UQW$yeM zzT0Y!oNmcTy9@Ybx(g38H#_{d`b?pt(!+t$gmklsZ!_g!+(?S(-Aj&`JI3;YgB!(Y zqWLMsN4Aia_&=um=bxmX{Rh$As8S3aD&PI(+JCT{Ia&tZn7<^6vlt){? z8t*g{)cDESK@~=x)iAznCi#-aj07Z~TSb-ljQ4|ZA#G$KP|3LFxY zqLo&P(WLi7X#s)va|Uu4>%Zw^4Mr9}apR!yYzxME0`Sa4fBkImZMaQCBkbv8#I$@kms0rRx75+*USb+kt{ z5y%1JG63kDsrvx{0Dx~G8s+K92$^n~otw)jXia?vrZY@r98A3O`H1Jx(v7mv#wk#z z1tA(~BS0iRIRWIpfBOsnBN_j2@?Bd$ISG_kzNK1%B`5d!H1`B8;KMuP_Gb6use30U z(cOfGMSQbwdjA0_wpH)k#8j|F&%y6PE35MH&+)#cDb@U7{cS?=&xENVgB zl^Z5!UMsf^4hNr!-w5j%Qq4?n{-i z_r*{zBD(h2!32Wv9U`ism>l*|?orK~VMjcxU&eTx-ZIZ z5kX(}KZ2hMmytb`{@nv7Y}$Q`s*Dih;6ZrC^K+8rSjp)dwp;;ik$Ox)GR)3;LN`c9 z#y1iCFTY(6lHCRg@FEtC)LM(UIl-h)uA=Wd02oi_gR@mG&Xk`f7SJDtm&WWqTpUoy zkUcNZM!dDz2fygc3VGkoi&Y}$rEiscuai0^nl2?e6%ocuYkl!ZgqH7BzC)pXk)edF z%1ZR2!)#=L=QtAaRpviuO99&|=`&i)sPQILdf#=INoi8dpnEY{Tfz;a&EGq3Xuoz{*2Y z;i$3QqD6g)E7Dd~P51Nf-yz;jLo@j|b6!ZV6{eF%n!l)iqr{;mz*JUUJ0m?(B!LT| zuQaMttZBMtBzU@V@3e2hlfygDUa3Q(cqD#%;dxY2)sP3(>om3*MB~eecynahPI)*X z28!Ldv*SKt+NaIe*<)=`;r*+63CmeR@7g!@3Z7?0hi$4UURyLbpx75@idq*|uDTb5 zp;)leVVzRX1>U@t^>zLBHVe1aGg;#DqR3NkL^O{?NB$8-rTwV#OybEUq^tEd{&l$D zOdhLZ;c!=d+}tM`b7mS^2$@-3Z~R-Q6V50{UCriRUbid;yqz@BYO~2O=xrI^y(j1X zp7Z6Onsa>zu!hw^w@b>=ETWMUzKlZJgL=G}4Z^NN3H&D}(6h_zGS~v#y!2MMot@9< ztlWm_455u9KUK^mfB!SS!cFCQFkwr%l?d{-l}N^gC+mkDqMZZ-X9;;@*1n5pP5A|O#AB$Wv8c-t(@7Yhx z2|Lj;@I|5zM77X}IF$Q>85c2K>V3G;$e64yirxpOV`9^5Lbq z2Z$@kUpZOzRBa_n;3*xpB6+rok<{6HAGKzB#>t-PKLcMk*HH38rexzLMLUh-_B!X3 zR8;QokbLJ32q2-_alFY0+G-@mg!}OkBi^6hyg+-q0?V9S@4;}u=_Zeb!MODCcH;;T z9H#KZg@%TH+dFn?r%!Oa`F_f+C#0thO1OZ(uJQR`sbTFfFzCs7UDWt#1@*Y%`SGeC zxq@R$zeS2?a+r(LeV(%f@~zkDC5$;pLS0Cmr`jAW6@SpYYo>cfWqwW+^nO3%5wqJx zNPo1&q^6^%^I*|lu@0u3VC_JDw$_<94Zos>EW=NUCYW5r!&#IW252E?)EcVBwG>2o626g=-%EpU){|9Qn)p&PIPE6h9oyz9uDkg4L&0T zMfyw{3=NMV#HTu>7{)-%Nl4X;b)u(9g=jp1@j&RJsT$A=-v6SIrls4$SGOO??pNxa z;Yv765T%E3!IE?-8n#u>j`V+VO~pWA5M|eCuE1he!ZZHQ(e5+l4)*rO4{ax#BK&{9 zDBx!N%X7>VFEno``%V7K#{bSUFPB4>53`ZMjw%YuZ$F7UD1yM6k#!60f8-yCg}B)pa3)Q5SmR|;MFO_P z6_pfDR}7dA^g+xwYHM}-)pojJA~q52_67q?>WCX1Pw%20lg$ z4gH6Z9@$zBwVMq4ajHOVGQ+VMyjefwH-UE+aK}JXzkZu|`3Ts|V=HnlfHKBdqx~CiO*genZp2!H%mK9sGeKfSOeA7*+fpZ#SRqJ!m~` z5lWjoD4H}L$&0HvT^%`D;qE=WNsZf2@BT3B`$^9D=}b;z`qU~+yt?%UB*RRI0OJH~us2UXKx$oyAh?`oMeAZON=Q@WCsjg+%r%UlrS%@qARjO~$xBh1qXDKBWc zl3`h*ZctMiw6M#lH+eSF(CISwl zsAwMgS8%;Bg97a)=~Mh~LwHp(<2#tLpX?3w79|kX6(Dd27Q-aDjEjqu6=Lq&u}=Ym zfWssh>YvjR$N!GXTeNNB{ge@QWvbt?3pF$9#eV5CaeDuz4`6WZx1na9$$x99!78K0 z^9^D3lmdSJ!3OY2iPK;WcrCN25jCTYe_*~c`azJ@35nJlk8PY1DjSootMmz=^3vOj z+$mhkGZ#fiZp?2@#h&zusWcIJuB^swkA50oUIc_2@5-fq;gPLjpODbVwV3#0pfcOJ z7yoY?EL2FDYdbY}TkmiFWK4~$x38Y`B^-dQ&{DWmK2N3Mrq#; zrD`W!vGDZM>3;7d>bksQLJ9YzTwxjG3$c~T{niA?p9k{<1%e**CAG7PV8S*F%3H@( zNLSsI80Mp&yx>e*AswK!DkGC7L{XgBt<-pnvw-pq1#3ZgehEuFtf1A}cl*}B2EC9# zPj9g4!>J2(rQaO9Bh>!qh~#ti{9mFSm4a4>DaE7Him!~1JJcY6;~e#amHcZuX0^`~0qJyVU~msHJxfRod8d#C zNk}kz@dsg_p@e9Ink9PuP>bHF!_p$&rCzsA_Jy!qdnZAGe5FZv31XRv8cvv8s~D6? zC~B+UTIX~){`h%-Sn;z90~_K-XbV*5?C#EG)(DTBTF{)n!SM)|EpX+^CgJ0fbjACA zv0SqY(!7D9rPw}3CopjAN)v@2E$HL7@!M*5Jo#rgW922@P@PF9D`7);L&_rkib^*`QEan1kS>YKq-* zxNQDIyp@$qG3Fh&dj5H${KAikACcX&SoZ;i2?N_IBrQj%_~&-R8AotcP`<4J)!C{4 z*U{ECk4xDI&ea6o9XEES1#W4dk$^GNvGvp!x8DRr3S*B3xXQj_li9@t!0So)*w-!;}JizcU%PZj|H zQ(j)43nO^Cd^A^~+#7ZDd#OX}a+Wy4?Ov;XwyCkgORU=43-F-^H|1+|2^09cSCz%_ zApO8fpGluX|BB7@O|c|o?z#nQby+}s*Y_JdkNt{D=Bdw9FKy#jh#LX4{`rzBSf zMj{L_2?(4oe?(92#^vz~|B2iJ!71I0Fm|;bONa9*n!KrS0i~Rl%A=lmNd;6|*co#%RL|8*#)+pWTgkShnHFSpj7XYqe;=u^5#Yceys{vJ$FG*rypj&XT zvKlPt{*b&AJzT1%+2dG;+AuGFu6kdZ)3^%^08?O1>);sYk|BBJ5^Cn!)vKJYqGHzO zXA|Cn8h$<&lLpZ3!FS^ihW5ZBg-1CuFfvjP8YBvCx>n$0;^YLuS7aQ1XQ#@K0JL>-7pPwL_vjWyQF;ruZ@$rdY#G<=aBg<@CrQl{|%f%$Rg!xU=3S)XEB#j{cWT1B>k?tb0;#ZzOI@vhSXqJ!13}~@qAa~2oSYZ`dA2hY~mZ=t{Q=zaqTyXfcqOprWxWErT~Z=uUK|E4?M zJ^Rudgs5KyG9m%NKc-U&H6x%e$@v&kyBb7N^t4=+UYVwaFo6H){t>R@t;}7Qb&hD~ zigPv_WSsu~)JNC3w)wQ}FZ+DUDT@qLs-0nx?nyVpz0`&D^XmUKHqRy$~l}DR`yPGI%iS(`$sZs22*`@|ec?OY}1WsAg{U z{~{H2&aCZbo^Fo-6Tn{(X^aEC#m#Z@>GTpB~qb^WYO*UI!`TKOa)j6d{Vv8sOhg;Aht+?cSw-!R|nXYUZaJzxnV%Q){(@ zz)5Ke;)?bas@W2e!Z(G{L2UUzEp@zh9G%_f!;Yg(z_Kxk4cTSEkA7-G6aNhpAHiN= zFVg$o;=7onO;v7F}{0ortc|p`JehnCiq-;(`Jn6S?k+kRp()` z5}xTSwt=FHQLK;HQ0XMi&RT9}hC9ozC&h+PlRG&B-=mKf zM{~Ty6qJ>>VajO0HFBykYLMpj8(fR-l+clLxY!~Oe+)i{yH%OKc}tO|qWKr}**qeIf@;U(C{XMdK8Hc7s&m>`>7YctcWJJ(!5z| zZZLo*MmEc5G3mzVCK^DXv@RR-qc8Px967ZHIhoKpJuhI7CLUT`PP(p_CSo<%6qcMi zRJmFpte>5eQ{gavLH7(pbtM-hO2){wQZZHRyP|Z#!3Tj zZi^bRX*_FGXDlpLK$N16dMeL=pFgB8h9lz^jRfe0_&H40N3AuxU?A#*HzA_(nW=WZ zI-@q!#E(q4%q|TL1^54ns1{y^{@tAcxZ#PtcBAjN3qdH_9rC?AYkie^smoUny^m*V^@>58DLOLVJKfu#5FPC_WUIY0wN+a)X z8=H0#@bU`DSFmeLwxAqxU=KfNAj_iNGwO-LWth#V;*agLkW%k4>GR|(iP8@DtS=x} zSKYjYIZOUMn{o8kr~$g!!&=-`&`vqj+!WM!>KyMCU<93Mt_ zR12u9W(FA___-IO3d+H`=83m4)V5mm)57M!RFp-l`QWA-8Jxh>g(&{@Hu7Zt9P@J7 zeW$Br=@-<{r!2vYqQm2%ia^n~&ZXtoLQx(WcL}_IU**bW+q@#>D8=U^B_^t@|E3x} zbC!i}{!MQL^9>Pmb+7OFMwkDe-5aU=m`&IfNa*qMrYpEk*%N?D_@nR#x7# z)+lXx3DRdkF}5zWn)-&R@ArvoM>Z1yBxpPKXSYf0>zQ)uk>2UXpP_Pl)SZnty=^wT z07ZMXqO_*!z^~Ppd(n^TL_OFd<2mtvJYpWDg5EeZs9l+dnL(p8ed2wL(lwK2tE!=X zFJ<_7LCVfF*MtltA!mT!*pik3sqJ8Gw^y<(^th9ggG?SOlndzDJ4qpR)J_ymJgon? z*8zL9%+8PD)hrO=2CpykMfxsH9w8TTMN=K*nHLTf_~e(%dF}k;&KKgERk`(ChRXd4 z*GTK%;hLt~-Me?hHI!y=qVn14LDYsb_z8dI1U8V`C4&&tVt($7xt%XAfRS*h<8;-q zc`roYI4eE+b1g3Nk&_HDvHx}DdBR0wu%~}x2px}bqT|KePxis8o!$sA`cOhEk$cootnWUwXB(qQgtDB@zmL$qyCX951 zd9>8;5V~B3Sdczo6?(l@au+O zQ!@ph?5?~f%0@EN)G11lU81iu7C`X|nrN6kIO2QifqTlc&CnLR8e22CYgZ%aW|Y)O z*!gN85^^NXbSw;*a^|l}<^^AV2cw*!3up=vZM%7>&MTb&Kq*kQuSy%&4vmBIrQUb5 zK)-NVGyff4N;6e^LI3rQ3ViS21EZ3Xh38IY{Pz0Ix*s(|852bjq8jGSKEJo3=9-w3 zve*(MeTRBHFZn&8%O;r^mukYw|}C%Jt(pjDTeqy73xwm zfv0h$bg)Uf^3w+phqjzW`8kL&ytGk&GDZpX%%C|~W`S3vI}x7GrR8(s?Ni9{V4^pl zM42%=ylXMjeiBUR=(zuP5r`V&caD2IqPBih?nUpc9w4_jl6n3mggoXq6l`o*>p42+ z@yWDw>kh}>07XD0LRLuSQJRGq|L|Y+32Cdjzgh_rm6XJV*I8FSbUf%wh-8#afIX5b z+BwUbK{pvw)6Bk1B01WsPWcVT76(71r=k1iE+b@4DIyve*hYi}V4G_{OX@^ThL@NL zpyluR!4P6&4b#9b*Wxps#(M+19hahp|L}5;WGe{gpnIIEr!TA^Ux!UnOZH?9)Pall zDVDW4>39=_7^vMW@GR5fRC(pcsB;`dE^$SE{hv7fNT449r7ozssQ2U!$wn6EwmAT2 z20pBNm5M+dkZlO9zg#0}WtH)1!Dk9vsPm-)^kT}rt0ORJu=j3?MTP={1yiB57@c%& zGk$%4Oat+b)$HC`NE1<14T|`WA?J) zW}u_0Jn1clRx>C$&%^rg;DtA32?TeWok}W6b`@n<+<3Y7bnQ=EeDEnCXLgTz)p=J= z_*`8=I83_RdSsmLFB}1s+@He?tR&@woATPUk=4_#S9hkZz$f2e2V0}zMV{9Gm(J1~qyo_U8&HR$GnfKuxW%3%@*{;{we znbrlK%WJZQ=Mf@TOM_00-n~sg(64owDZa{o08>}eN~eJ`sHxFwB=W)x6kd|JlHMu} zhYNhDb-dRrq451`*qXaVKtaoNay{&ox7`!*0K~vK-<^Yg1sMJYodGaLY3y@`;KAM@ z9Pa~BGw9kYwGg{iGU!!TE1+tvvh)2&_>!5?w)St|*aW5$rzDw&Gz{qPuS6l4iS)d1 zbK~X}Xe9Cm?6Hmr*u{AuXtI6C3*5)Qo(_m;@#enlvf3{Lv# zub>38K2BGIogfZ1?@)VsZV`Fj#3vB?ueQn11X=n|{Z{^O)?N68A92a>;F_luteHR2 zJd&~gzyNuH7B{Y1!z&Kc?N+*@ImQDRl$F=egVv%tZ}OzJ zE)6iTHT3qHa1z~>uANU4IX<=6@vLtt(~pKPVn&%|W|Hj2fAN$**Kz(|x`B!FYKqc<>7h5FKUss~mU8cF7c(^9s zw(E6^78>TN(y^6hTNe-=Htsqn``~2Y$Gsn+=^vf~q?qW_S4LLGPZMi2QWYs#{+17U{9`_*T*Q!rolGN#c6<{;%jM6Y?ilX6!gAc0}%j zpUQxQhYrL%H~S?WBq__v-knC|^MCJ44bCPdG4R|v{Pkx(QuevMvQi;+?c(PTVk$Ei zep2T4x%i459KGOFQt#f{^V3~H&=AhyiGLPye`jZMQZp6|$`^(xL++D=T{_=ESa2}$ z+s@ShfjOJ~VpWoqytkdcLWLTXk4@pCtu(!@ns#nlgrrmm5$-x}_o{jj z*?$*#8Q^eo#*gPS+GXT+ay36nY!eQGt`rsGaH|=v@S`=4LY4~!8jI3iFf*L?_cDBL zo}O0aU_M*t(-I(cNEcC~bt@XRBzy{X&Mi|`A;1@|R5ciEj{JIN)i>dWvW4(+re}ZU zBI}(|XuN#adsxx`Iz;|gDl4nSub?X#^)vp@M$T6RTedmF1%^WvpNGzdEA5UShyXSg z4fcTK7au<=Ju&h3_r}Kl>MAotC;2^yoBC#i)JA$B4*B?8e#01^*kcN4USKfZ%zW*D zGh{%>SC9%p{Kv5eq_N`6I5r5g^LQAyWsR`l;0JNGzS+(S;t=$wG;8l&YhH zSWWY*XSRal2l9_W7`<_-Tx7ie-(fVT>v~6+b!Lvf>nOn^Cu|ZRmvB<4Zh7UXjIdW5 z8$gs{^4wo`rlm$u$^q4Z(}pxA)D>z}6n^=ww8frVSsOR|_EpWwGtRy|_V3}6=OiKu zi$C-KEUYRt>b?5#=gL0~*r9XCt|757W3L!?-gL=_K}SO}i?acu&Sz+c*U_--CtrS< zy3it(^C)<=b>Ul185-HOGswO0w(om$GgUUI+wqRoC7tkzk2X9Ob-d%=g*GYQ1qVDl zQsC7)UKyqQU$7Q^AMBSx2$mnY)D%3m)xNRoPB;Y9@!heq+75ST*OBfYZi>u)TKq6o z8Xt5eqyps&-J5s#pOX}N6neEL#2-uGmkj*e z`E`vY{f|mYN`^fqzU=_`7SF9{{%nwRl(O3YE!VqwNC8NmxVX6E)hSOxd-${mafmV@ zP;K57PB0S8>5t|6k{c%6PCfc4oc83#M|pgkLzq|*3Hhq0qqgHaM+1g63-?|;v?@#x zmmfQOC+w!3hO)smFUYI%mLC6=aLFR$noZlmg@tYt=l_ivj-L z(|F4CNpgYz_&qT(fD>z-^%ul{Bl)K|Va(KAF?2Ux{leMvMkZsKEFV0OlDaLrDCR_) zklo;3qs>Rg==F3dda{xWi1~xucSM|4nrdpCX|H}_{^HvE2(d~P^H2or1??wOH7Ua$ z3*Glf)jrhA>DuFYnDYMF&Iq7SfKT^-fh~SqfZ6`pKIzU>jlqouL9C7JCk)BDly&p- zcD_}_SRk~gY0&FlCd$dlneg0l6!Acysj){hRcrg7cw*b^MeoIKo=gIc>fC7#Pw&S2 zbDt_watiJr-8)I2@-wp%z0niXn!i5V``u4vfAeWT&%rLJmNwa}b{`X!hIS4sSa|Xr z1<`MNdwZvAm0lajjv*a@gsgxsw5wxuYkqqK=B{j(-Q7{7&JC-%eIN0CO>mMHY~c z1irCc#=7G^o|E}*9@T^Xdnbnz9x+ZcTUFfBqVA~@ZxVd!AAI+w|N1(p#mDDq@EA33x1=1rBsN+0IKyK>7SJ{jknq4^dDe3;j?m^3#L zSb~AE0uwJ1Nu%*(rVMNFlb`PiMvG`iZ{-fr)iQtYfv%Cs%dffNYDlnkdp7KG-fS`q z|7=j{(&|xtWo57ch%c;~?Ab5L&PbHnSmp9zob6Y|Br7O|RYsHlT@Ild4cz&n(2TNR z$3JLtu*ywsL3P^IJX81|Ewa1rHuIV|bLoS(#b;cLUQY;*(Dn9pEmO7s8oMG|tkM;u zm~Sjl)qKuboFUFiqM{V;0#*OY#>X-%kEeNrtI~dgd31$!lxnVC+T@N$N`*{HHSW}+ zD1&$TXuoSGu=f}$;0EW@Q_E6}kV_&D{Wf{CXT&o_>}(SGT%c-ZR=(*z(DRb%2F5{6 zm;EfxEE%=>R30Aw8iSqx@J{~%3k$2&lRcBivQ^Eh%232Aj|kLovL-W=#T5filb!AD z?eV9e8W3kmGRKT&KU$D4lGZeoRYbk@`VRZ{5+!_HVc{~flr7IisjWUzk3ilRqctX( zjO4~e+$Je0q4Kfo2ri*9B&XkxK?~HYpYrur_XLqz3DKIDlIVD~-d?t1He2L}sFkng z!&nEt|2m%^B3oW+=1t(tO&s`s)jBW#_Iem+JWW>W592xeE@*=GdwHR^Aw)Z{czYX_ zZ!49?exoui)qtSz`%1SMXMG;?Z~ON}7TIq4A&-R*4i7sT{5!AaO@im)gt0)9DQ&|_ z2cdOwLf=9vgh?-gP&4VPWiN8s*$`-1aE&rME-IaEjw-Q{n*k@oEUJfpR|8IJdXLV< z{}v0Ys;!+lF4lw>Qe-eZo+&T{3dNll1JM@25$xefJKb?*8twZce}Q{gcHlT{+UX#$|+y>FlU~ z;&%I$+HWR5Y@fhbU%sXdr<_l~%mf9!jgLE6&8QBV+nX4QA>o#Uw zVbQa+TY;5#PnQLFdS*mIwgh}!9&g%^4bewi7_{EdwPa<+I{A3W;w^J$ih5cRXJ#wY zt08KHuc8un{^Q(!m~Y|XG@Sp4lle&^PLlPYgTZp(4b{TID$L<_n!ARRN3KKc==#Qu zbbK35O3)a=>3YT1sR)o&5-;W?1;pN!n=!IM5mP-*1JhE+mqQIlwNoxTtUcU$+p5Z= z86a{v_;>+sA=LUllvwFw%*Q7u_mr|@pp_h$rEhM2P7N=+@9T1t(!is zs+I&X4{%Y3zNHWszGud9q_FB+3^Jy~pk~rOSJicaJ+SJ*{sL}X9PK$mc=Ts~e}83V zP=3xbjD5D6I26Xg#_jrkO8ZNL4G_iG6f19$=9IcEtYqYvXCALU#)O9l9sC(B85+6` zaVi^A1E}tf;$aVo-<6e;3(->cbFs3_n)$0wr*lo3*dMz*=+2SpJ?aUP{j^A#=%n7~ zJq!J67(3TL_39_~NFpkow%~f!nE_K7I75wan%VzxbQXS1{cjW>Da}w~NC;C>1V#@~ zYIMg)$q`D2fHab$k&Yo9j%I|?As{tC1OxEHeS{(`->dw1{iInO!o(s|Nl5nD)QdK_2!kZlN#3Vwuo z>36d38MYtXVlL?GLECw3$+UB~wk3tV;P`RpVaw#o%e5rHZw_>YKEwS=eI{L)e(VJq zhKMBaSAqwox?b%6o>0!Vvw59rFh4vVo@MOvRyHTY#X4wzM5#k^g7r}@OmWri9C$(x z$tYF4#X2YIlNgS25;s@qm1Alhaqmxk8$U~ely}X^LL7!cAjU?mfw{#I`yU~3={!}G z&6%2O5$F<;Lfwnd2N76rkY3IL*@w=Ap=qj`r^w88&?vmj_KTy=2A!u7gRyWpDBc;* zs@$~x5sO$^7zKbOCdg2`hu_J5h94gMO4D=Mcdru!ti80s$gB5qr(20IlmDhDU<@zc{!Adac-El)zufG!ZsBa=w{Db^7D$~1ts^{rKKX`GjR(8pHN1Zxa+S2OUa z4C}MV{T|Szxc}pAEYAIAAYxzPci-X7xI==g;oQ%uI)WM!B=IPJrTJ1-1fTN-wp zDS29Pf-Afx>(ro{?d23LYcOv&lPxon%70vQiGxu*rz&o;wuB)k3he`^+o zoNWZj1)tVmx7+}o6$fiTE$mX*mK+cU@L#=|#`T<@UVjBxSU*(d&R%*+g#~(Dt?7pP zH}*JLl1x44{6Wai3#8!`CB9(}eFuxGCmdTA?z+_YtSk-M6~eO-AV;8g7>(SB5(V*5 z;^?jO4I%{WrkixRwN)R73-gGiDlsm5jZ$ctK{rYIB*jbAIjfu^kq-$EG!(F=N3n|05jM|P%SLew^b39mJ#LhKzRcLqSFxcr_%M8f z4mKj)r9f6_gvClt$KLsM7gKF1U~9?vWdFqMQ3u4&_{l(OUA@Ubs$Jq*&e%m9XZ?IO z)0QM(dC8LN(unX8Ct}|ckMY6oIN}!8slsij-;FYayksYe$tlqS)r!t+Hz(fpHKfbq zO$5!&qT^HP7^qCwKB(?E%M(nuuaM-;wWxx7Yg;PbT0k{W&z2bF>6=7kl!k1OND7;- zu_#-#DLy!SG{}Qq!=8%+8u#PSik>`tF!h8;wlk{DI_8!TCJ>T*`bd#|xcIH}{*`wN zca4ZBA?=z}vepnJGW^+7heVR5avK(x+HyP1r{n7D5_V1WGxYC!i5>16IBRVCjp3$! zE%ER5q7&#cVcbrJ=JQP&{CWG(i-%{_HM#U`=8D?AZfi_9$EFAJWJ(^djiNyO`)gA| z+=qcTAmaT;znc#oJ1nD%J#i*C$GKq^*siH&h-d~7nV$l zRhYIFPBjP1)p|R~!yOu08vG<4QwsI}(~W&do@@3kn(3rcyQBS`xuw2OA49u4WQc_Zu)H{~l=Xb*!EEnWYp!C5b5;8;A^hdpu}8wt-Y=B{ zVYgrR01A16|NiM157fp_t)Aj*M0p$MSA$Cy_oO)ju}ZMYNlfP{{@s5PMa4xa&=k9R z=@PcLSKoEEdew6?z2wmuuyu6Q2DD%Q9UQsYZ@b>!m<_!7(@=mP8r#XXhWjE9(^H;s z0yy(zFRg`+QN2d}(ogw~WEn*Dt_`49Q&RrEUgdoAErsJQjC|^l=4x@zAQi@qJQ1JO zQ@k0f3M(t>M3Y%-cehX!GJ4Nw7A`qdP!2r#Hrr86p>f{4{2WVpV~V0tU+;RntE{$Bggz*D)I85~qRfce^&=L*d^) z^+bY?TJ0D?RTy2qw?)2hxT3v|<~A_oet${R&%|On7u-8S<)iZRH|#$_5s1!ib8~=8 zOWn|%9_Le+D>)cf(u7aT{haht0RbN4+ndQ@0swg{ z@==OZfj7MbrsBfZZiPIAEVdS`!gIAw+O8_Zr=r zVNHOP4tyI)=TRdLC4c*Y#QD92-fPhG-8EKm9S8Ihb@18_P`Bu5JybqTy;WSh^FcPb zcz+l(_cPD7!>aC~@A_Svx(~vYu4@uC14d#8A_W=9LtiXq?NI+Fp zm1~G&>#4VhfgMuZ(Fwr;g_GkTk)T90a#qTOAbgz|l$A>6l+LJNG-Za+M?6}QO!xcb zA+w)jxX#Q*NQxOx<^s9Mw&j=0jF*4(cxr zR!`;5{@wy0u6U_r_;B@MLMxWVI%zG(@axi&jI{JK9-IsezEzDCBP5D* zr2E}5fg9G<(M={xT(?JCh5Y&U@94MVlBK2P>%qu@uGz$iJ*RU*gK#cGFGupQ>8=%9KzR>wR{}^i_ za3WYIXd-r=_ zpZfMvgQONNC6zO|wI=l>TQmb2j#wS1>JMTVS0cs>_ z>Rk3iAn86j5cVg*rWN0J#`1xWAe3P8y#dryMKV2=-VX!9Ss%G|44@B`5|<`2VZts; z)%lqqDJP94L!fHhdpM`dA7OCdPf&!#Mx@UoU7o3CJ>2QAx&^{I(}I?~2rHp1X12~cE-rM9doXd%?u@?A#)L8lnns~ zp*1sunmVT5?gzM}Wkt#*QwsKzES!U)B;Fb1|AD7hanNogO@&7~zu%Um=cG$c6n3!w zV{Fh!R8ywlc#LKA^JiQg?OjskRk1r_PntdenajUOqHvfBBf+1!MY~b!6O;oD>7e42 z@Q@L52H5`lI|v9Oo^>U$@J|@h6%RF(K9AxM=+JA(Fi8H}dQ_@`H&g3lAtZu38P#AK zN#~cHKmBH*gl|bDYtIN|OR5aQ%>_H8Vg?2V4#%9vCKg{xN#|Vsyz(EWTg<{Va(>tk zySYFj;V6K=eDCT^?&cvEhl}L3xqsjb*Uq4WpJSnkC5~ZXg+5wq4Q)BEC@6To7pH`v zHHHEvic%2Y@FZbbni2FyDDdIViL89A6XRYuT|#BtK&mbaCOf zWWMvN;m;63QIwaA$jnF?HXpL)C}aN};F>La(QQM2Ba%qtsMawE@1;ds2&3;EMR|G` zH)Pw;mo)PG0?;&EB1)HoZ%~B5Nr!^FoH(AeC2<~2+RNmY(P(O>iY|%4aof(0V=2B$ z%$x;JcANvf8!^HKc6 z&VOzGT>o<@)cxay40v%wV?1bAso^VL~3&0*by1NA)^tHN4<^zAL zi-R%Mm09f>5g*R)*~?3_vq*x5jym8>`&6)gthM569T&GydGkUI(i{IC%i}Aq>^3buq=awKv3Sv&=TmNYqCPFnUq;7 zWXNuVEs~YJKo+=?LN}Q`5`|i!2j5kNx{eHxvqIf}4sf^x%(~E3`6}z}tMsrFnISVV zcWIZVKYpx+a)iv#pbo!y_*Nj(z&DAVM`NiOrr``mAIt%roF|EABwyPo;;gfdr7-;i zc&dF#{qO2RxhT%Zy<^#Q>2_virVl=;o=_hD?Qx5ng_onFV!8nZC$yx#y8Z9?SGW0S z9&0>dJM|kORy3O@jUI8*xGR;J%Y7$fI=X~8cx5hpYui4e1cQqQ(J6elRQT@m3dUL1 zeUGl?M0uHKJeMp9X7*sH@tcC*qC}Os^MVicZP7caZFOI~GL{0Rb894*U?OQP>x+R0 zuJ4(LzqA_CX#;0Y@(HOZ`g$yu0wu7bQm*fd2T->sL?Nr$6Ri}&nhOjbZ59Y4fTV6u zp^t`1#4Ep>s+C4ez=b^OeDE>7L1A8ABp}!(0DyNZG;m&$h??>z0!g7-^QptpX!{IT zQ{~LqSC$$SKf7>909Y6QN}Y^g`Xwzo&j8C`XvRL9ZpovOf zOMj{vXEyH}y~MgE&&xM@70IWTkvQ~a(2j2H8K&YRS+KBmq5`Bz1Dh|AxaJMVu(IFR z)G-LE*;!;(=QP(GA2$mI5a5%IxjOvo&&O^hbZwlKw!bxsZFg_4n!m?uH^=>4!52R@+xXJC^#Y;5YJdaAEcmmSx?z%9Z5O%T|g9^%|J}b zeV8-LxI$JI`^(a;c7int7Lo|x>N=yMo3=@8baSTTqpuWB#5nAxwalbOh?If&%AG~L z@bKH@!ZO&N>lTl)4D8%*ws90|?R}J|bV176%aDNMgE1i{c6rE;$tz6A_upNtc7_3( zYDLfZhP>t^dQO)2-D2oQPD88%g0}vVWsq~yba9mNR=ansoJfvwK@>L%gDxz9|8mFH z7E|oW6nq#H|H)A#7c!H=*Wxyo`^#@Gwz2M^@zjq%g)&}FnxP+lKQ%1|dX=b@U0#(u z{Wyuxo{6GIiYI*T*5RYJtpfb0;j;UfZ%W z>kuG3`Gy>7Wi7`ScKls8w2XfPysX{{PT1R+br|UfV6VyB{TE)Wg*dli_XS!@FAT4E zF{|9hw->FqBf9Tx;Ub-!vH@<{pbRkVZZH)lFHzrmxr032o;mVab-hWy`B&e4HI$$- zG%?^J58+HM-P|YEqQR5twVU-onqiz9WVtfhq(E@wkAhej1#4Z)% zqh-)$Xkx1|a)IS2^ChyU_W1!bL85Z~`U*((sYD;f!&Jx<8*P8`xLI;oC?W|Gj1Gv0 zy~IdkZ7o{v?1v=kXY4G_`wouxYoXni*D*WTg{UMBZ#-`MMaxQ*F#Vmn5=vegBPBUU zM+CcS?!NfYqIQojJ^&Q}^r}7@(ex?8t}JA{%BA}Nr=Q!C2w!h?^VV8n)+goABm)`2 zNg6j~3U5_{^%v-w4f{X&!&Iwh`4RW*>hj*6ihtBQ2pO|$*uNMuf$ulm`oM~6or5}$ zda+(Wp%k2%L2a(H6!+?6;pMuj2tr0W`BLnfFm&1xH*L|T3F06mb}-h1oPW^*-kR-# z=qd#Po{;4G#kV==`0XdCpulj)OVt-7^PH#pI17x@T1&;M?|meoU(Z99jjvEVOT5=l z2KHBXMyyFO%D(H|s~2G#6*l>)s4Oj>e4hibgPvN>Pw=Ft4tybbVe>lNfr@X73~S)& zPe3>hmmwm|LEU~xYW9kpu2!3q@VTQhf+*fWscQG(s>k8?Q0f+e&&{=$sNj|-SFB*8L$GJOQSo9)5V zyH7X>8ITjoHH78x!B6q=YPnvN`88ij9iHm&1?=Ys{^+9R%#hL0yzNFt@l|Z1uTMX* zbo~Wjl?e1U%44S+VSUXCg`lR=3LmjQW@)u}b~kbDOKUC8?@JxcR!|iXti2ca(SGGk zVg3`>crxdXWybe-U~M+#+7xGhy23HrrD8MdVuk*+S1bV>> zcFp!N<@sZjdV(+i;ZRL{u@FjjiDaQvfL8X*psuHqE8_`m&12&0#D+a#HSVtW% zh@qJt9l*=u!=ee-wb0H+6utOywWy;0w_j|So1(15pEIf}rGdiXJT{Y@y!_a^&{P5D z_+X-G(EFYI344jK%Rh8!1=^Qe_j**JkNW1r+`WV)Z)SRKcEbM7hcUpOTFP#pPkffS zJv{!=6dQP!h-pNC*YMAC;GEC#)65@ji8*w!@pbSQVhEWN?!+zKY*IKtkDUaf} z1<~CSt}s>7>QW{~*&(=Jx|9GOMP0So!OLk+y^x<28ieSg|ce&>G7Pk9oX))YydK`ZvYi@d4q) zHB)vh;aQb)q8B8ldntbXYVZm+2c*8s{}vNoVIK9*^BV6V9>o5e+wl-^Jaq4hgZ*<2=9chugamc#)`ZSb;(aoYHJ-THDY?21S3U$JkJ?~L1IzV`W5*x9SM3iMX* zEQ(D{&V}q(t0|g``rY}JBB+@5F-UnD6^Cb7$ zC5^!E(x6j)9whkgA!lzhm&uYc^jVyd7IA<8x521d!jP1RS`@a3 zE8GmsKu1ikt{?9{Xb#frWBVpxP{-y8%T`6IAg0G_&?5PButYd9!3HMAwt3Fjm#yD) zj(hH0WD{#wtVG1vso>G)=9!G~L2NwUyM_&gAFSA&*8<>L++_E=cklF~3*OCSIK69l zjI^0&=C#upF-b;aMFH z@u4DRBLxr<)*e%?DBrOh5#M5zHSW+qwex-%tcYe`SFV?GDiqI>zzcF zuFjxn6GMNW-rSDC@KgVW_?GG_&^mm+9Cn>@ zH_zNPo*eFQhxR%jzv}a@kkDhsi(ZzN@ccwf0 zui9d^&KdLU^2hheB|$N5!*#L!>pzwVsVB0(+TG`L1cmBDuVq%xR)7&5=(w41G@KW| z>vMWUb3gQKqqLGKd+w#R$!MDQpO`F3la+RXuR4sidB=gcMR&t^fbIF6W%V<1^{J4_ z2`j(=cbKQuz zL-*PrKj@{!M*snh=4w9N#@t^ViT1%4$&CJdB3up9bbh(9E)au@4OaQi$L38({!nx=9zR{=+Q@| zV(xny{VV3=SOE$rKc2)_9#6v_rv`)q{8e2Sf2TYKNu`g{2M^7zVc$c{YyU0K&tZQ#UbDV`1ifkspl`tfA-(~%fqm< zAi&MqA36WzW|wj_@ckkqWEyg`n~!C`cvYidFu-3`uB0SJ#-Rperx&5k!la2_=_65k zoT{4ZHC{Be_qT#yKRfY~7f8FqID*x%8GUpVTo=;4ZR_apjaa@z%Y6&OA$#v|^wDYX zsex5-Vr$2*T63%6z~S-S<2gIpF-gEh)ux@;D9mQW;7&kD^5Dz!AIby@3dOEHa2NdR zaH>=s!oFZBPBGW4QHw6>nZWJ1-6QTIVBU)lhwnoL>~w(EPz)Ap?_?RTPk>#F1v}1Q zAx@Io7Eh~3BR0>Xe_)UdV00>4FHRog1w*`Z7|9L+0-ocDBm9y_opE(_^+OcLmg^Ll zBf*AT>k;!c_)c=PU^Cf518Z1%}uhcxiB){Wk+e(SQ;=_7Fn z@hKWIMj8@xxb5Z66wH)L`HiBImnByMuJ(Z%sN;^F!=;;)o0G!j_qevsV_pmkSlS`0mcomoNAG?gif@gk5*+cVCUNSUW|xh=pfyxP>G(`{wlmsC7JdKpSO{xPs)w(8Iahml6NJg z3Gi$+T6ryawE`(pen+;%&+?X-Z{7Kk&p$cf=AM;1W4hU-4f`{UKYvnRZ!#un6?C{K zxtgcNqRhyK(y`xx;DB)c@fn~_CM3vIRw6#YOkHSCX@T0h4XR(>0n0jDpoX?K|1Eq$ zptPZa_54t(=iRpDt`7gP1>E+z@2bhfhj|9ju(0XQg}WLbgQp8xfxhjEZ0KYlJY%u_ z_k~pwim`dUM)E;?Zw;DsTFkdL-b{JKw4Cq(EPK}gRcu-d0TFY66eD5ftcFXi_KEc_ znqZW@;3JV`M&r)MJzPGUId+L~?u~-kR)QBmJB~L^ZdF}ZODHRm4|6IR*i5_{E&Qsi zn3j($9ql+l<**-X=3^`Yh(e%*XJD{;+fwZzsoKt>q;_Ns7`8>FU0Tw{5&h`7qUAh5 zIWSu->jeO%EB{&h$2MNR{8@20;FLYKbNrLFIdd&TxUadd#yL3pPi75^kx4Pud-lGW`AB;QIG;jZ8x6mXlYOm^G{{;O^cmg|xTQWL z#V{2UxU8S-n;t818f!K zF0HSaZZ?Je0t#+DTjZ2@iJ=mFsx<125HRzjz7 z+^|ztksJ2;-lNRS6MjP~@Bvve&3 zfjq?&Ir=RnJ9#!g0>FcOs@a}t>RUu2J9-Pi^!~~gl z$aIoVLeIaa&@kFGw}Do#GV6Q%!eVY^QUxD#S2&! zO#T+lJsA#!x#=X+RLsPQ`n4=j4TKMUD_E$oFqU9r-3`0m9%QNS`RuFNjE>LfE%DV* z`ejJ=tk;y}w!E=61#@3fyy#roqiF?YI`gO@m_sM^gTuLBwQVusB0h&9J+2HKcTUnTk6K{&3sOJs^Ftpbp_A+9+us-bDH%G~GL ztm~ue?ShVy=C6VmUIOVM+azsEPyU()@cgK5ov5j+8+Bh;9K8!H|3VFF9&(jUZnjooqB|&c)$H_m{*nxj^5bCYmrT1wT{XpBEB`D-RWyKez3X+;=k6 z;-&{T-jn7q^PT5K(EI?Ka#Yxc!I*z|zHG>utb~!U4~G&H+2||9EK05z|D9VLWHxD= zaBflb25J-&poCQ={0^>Mkw%2{uY0ScQetX&BRK9R{G_J`5tmJ!Qv5VtM92dTn(v=x zX+1zu(Pd0%JGSI(i3+7ng-Z(m{ln68au9YccS9SSKU~}qx>G&_z@}_yQ*Sk}#hglQ}gN>z?qThzd2 z#7x_A{$jm-Z}e^hzWf7jd)(~yNcgWruKRFR!e;v~8OjmQc^pl2lxteFOS`deJnp{n zV5M~5z$htoj6Wy#Nx+5r5)N5ZB>N=$2po6Hk__c7F_<&E7uTSYIO&*41?zV)32-8L z3rX^|hUF5%Uy~q51VV3G3(@M0rht-k@J;j`ny5@I2{H;4@zOs_@{=UzOl5oz1A#fs z)47OxT@CgNB>DqzPle_ia%RA38qVDA9omd^)SMBgbLKM1P0NmoiJaLC#)Wlv8Zl2KbD_NR{8-_+QKfMp)7WX(;SFk@PZz zrG2W23d%^}4r5XR2wjDRmT9NN($9a5VbHuWbo7}x(G0Lav8M)SOL~l9)z_LGK(Le4 zPWO3(&vQ?Is0*hyVJZkn08Houv4sUlQg5_Jk7M`o9HpeLoV2rJe)a72kCo#ELuY+4 z0cZH*sKa4hEa~ay>6_3qPLDT5+hkb9(*1R+QMKN=v0U3V>>O_*uN^qK%u?SrXOTN=iKSc zA$OuN;>lc)UMVL{1%=PE&MkXD9Ja|+z-SgCU?45bPc?TJu=2~_@zG&*eVoK83lt>7 z;ik#b%X06`tTHP@-nWPD%@)=#@mJV3uCf=rkqz+{E!<=21(wVbz7rS&w7qB86Qb~| zL91;PxNYhxKo< z4?{+y71vW?I64v(T8sMK5?6ehk8x%r)jdRn25D!t{^!)9s@qNSb_&XfAisb+MDxeL z-I!>?Mmgf`pV~el0&x&YZ;`w*j3+Ci+GD?r4LjDoNxJ#A+7WO-cQmH#B2*~5A|oZg zWQDQ3D@Xn6VJueW2LtaN4*D-8y^U({)w`HDX&1SF-y+@Gu>96qZ-I{K%AP-z$KLP% zRSDO7lf{{?04Q=Ul4nSge>dCOPo55yc<-! zO~*82)<(8va{t>;ejX@m?(U;o6eF+s1YP07QptlKSm^FJeR+%j1w9B}QO^LjP-Ur_ zt8AzmmU<45Ob=z3_~DEJlH%=55USZ*aHxtuxAc_?0^DbwKuTFc3)&FK$5YYW&JB1x zHo6AM@j;hjIXH5??Xp$L$oLX|xSQNkGa8!TV(Z-t`yN1RTR7PT(qpgPI&gbG`|NHc z>Nd(oqL!!SeRn4f5r)0L{^tBN^a!YQTOXigp0QJm7gJ$ZRrpY`;6GGqfM-QDB?Ftw zA#Yy_ug>x8Ndg;GBBv)l1jwGB{_eJ^b%<%NH3CyJNXg3!nC>L{WWl|(sJ^8dL@)a!T^xUR&b-?_i^TO_h7kR;5`ng0G~IVfA-HF=Yg@Kfi8+< zgJQp8_q#0STaJ-(WjvGkyo$!TxxEKvUx20(t(@GNX~IPos93kR8h4|)^$aAgp{^Qi z1N6wt207QV?oR@7ZfwEjiN}?wiNaoyMgL^TnzpLxc07x5>IS7X?}nbWXX z=0fC3A=W+@xj2R zE~Z?Ey|)WmI2$>wO)?LWmfA~>D(gPC3D&xpCZInknCPm4|Dodfn#vIN7(Tw^4%V`W z(a*k?NQMa8O>lp0;5+`uUxleSiBVv^L>Oog_Gr#mn^ZX$&6?Ya&4qtZc!dI%^D1Zm z)*I@uJi7H)yHbfE8n*9uR|5{IlMdDw452yW+}YI$a-pYP0YNWAPgIA8hX;h$?Yy)4 z8}@+1u2skupa*SOpL01{tvqGu8GkOrJp30(W~*fdi`Q1p<vyeNZsL2$a#wco^re82KjW`Ea8a+4_9v zaS6}N=C{O#QltBpMvd7)dQHFilVS1p)X*WwjG#w;JZ!;F=5uJ zB7=WRVTbkCi-lM3b>;r8b!7?sm5R9Y=x*7mpDsFO38VfHL z$OM_UYJj7>BHC1sJph$RMyq7)E{8Z+$Z9nY!F@a;fe6L79tWP$1_l|LkUre;-*oF!Ga&bW2-@G@6{UW#>eU zgiG%;+wC$9(&C0X@QArdC<)#4R;h3aNh)SG$QvI+KV8%!$i-bm-_)YwV+`^L2dSw# zIhQuf_p{K_(C%D*dlzs-r^PM&;JMSA88XJT-iHgU_uiPM4|!Q4@esvNaRzW96F7FbePY>)amtQQ|Q2I53zK znr*_O{LB#mmQy2wa0THZ&*U{Jiip#0w_rJ%jjs;Ikw#B7eqkS3J}Tzgv19+` z_sQ2%4es!Fg~(YeIQYe%-Cg}X%bd8=STKvpb~sXEnV#$vc=@p;E- zE%=uqq>3C3g_0d?R~E{>{Hw4u-qqIa$9`oO3TgB5ka!G#vO8S7e*xU?XTjIo!-cH- z`}Nt?+50`<7&?ZEnQbQPU|9a7{tJl$0mm8O!Oxv==2)OCZ_xcdcoSo>TJ`~stLu28 zCP8}fD9FVz)1ubB0ypyF$^L~@fkb*P!*9j}k}cDqNd-l+m5yG82kSYOsFhsOpz66+jV= zoY@xJ$}rG?-L8U>>T^O2+%pC#^Nlt)0QC z#a`*IQas+c^+0=8MAVE}E#AHpmsx)|CGAyG=Cbp2Q3WjR=a=ZgP?sc(<&*qJn`V2* z+-wD%m)T)!LRMd8&(2vqK+?qdD3Mk1osEN)<@5xcQxFh47#_}T+>f1Y-+#aOv2SI1 ztom4j=jG**2m=13>oBP2GzdlI|9g4az2j5gQ%g%Wc2>@kdQ3~RRXuVRBAL7v*p=)h z&~0<;mMTzm=v+*E7j?2lsUCpq%oVCYzdSjLYni(Ugplezgf2~RZ~eq<#{!nfzghci z$Qk3F%O9n3H_O)7n==0H>^INC_(@A#1b7nXNR8zRAGK8HMHjjN6sf*_B?XC4GFdg=HXRU&!c@Bvl zPG&F~y*~Hu6LTyY4*@HG+{hO|f4Cl$*>g28`#5$uxnHQ&eOj5~K6h1VebjTVxq4k2 zc5+K#(YOMeH$|IyYVVkxZGwI-kq=kRMd3RdlDE zwW`nBt8v&At5EFM!d%ty@LRz2@M(JGHGEXrXaljq;l$@{D_>9gQsc;*K+Htw_?W6M zaeVw>`H``4_`?R+%dxQQ+{t9|YdA@>mdzl|AmkPr3VtUnaNu*Up%t9K2ocFrqlQCA z0oyw?j~PAPfKIe{b6{pa95Fgeqw4#`s!XSzlWwe)R)B4k9}2s zfMWz{heSB=%uF9d-{kSy^DMT9bPd z+BsUMD=&8}>id@9J1alJnQO~8HU_8a_uuS>{S8<$4(;x7a$UbXMCXUNt;2pn?9nXxzW(-Upht z{{KFBgz;J0^8~gE)|Z2YCbKvm-K})K_`8tdbd9G;`jFT;NZZ}!}Y&E|Ex$+VP-ED7o;7nYCA?;{~lV0oy|;k*Er$2 z)-EKObnCL&s$#&2joLGU^VK8vD(lqbabvjvx)!_WF7Xgma~k@@`ae32^@40sO51-yy zndyRV_ZK-k&ICeYytIHX_HxRRfl$T5n{MzmAA?PsTx=7CK8d6H5aa;~0SfHrk>1hNEJ9dAMbtep3D zc%xo`Gpp2km%8%pnUkT(uP&#$bxuo*R`({bw@y!1Z$>7j6?|}=u)62mwQn(v-)bIG zDnSoc=w7$b6DHUr4lm>H+v1;h{jy(nllcDA-V0!UXwO?)SP%bq8~ zUJ)_(MUnNxGiV9bGscQ^9mqi(8 z37{`AO`JyUsihO03meWq-4uBYDLp0q3`6gqebrkja`hD#g-?FRa?noB z%iECKMYV>0ttB9kaQP=1*6zp7aHuJ?+9iGQ`A*pbB`s$yr8?|RdoG99yf2UszO>Rd zY?%m_$Oob8Q{ixH6LC5-k;HG7j5~LC9zyr49YqR?eK8D4jSV}DJ}M3bMhcuwY2uYp zX$0U`M&Q?2ECIsBztZ(pJ%!H;o=Z;}Pz+#X4wNqNTWZ153uV{|(xC)~3gNI7=iHq4 zSZs!wGUYNY&a2Kj54(FisgZAEFql#C-_eY&hq2~?RKEDKUwYej0JSo7HY#~rNiJru zLgw)kgfV}?BSo#}9m_i0cN7+nuKK=v&FZvoOyCj@(`72*bw7m7MMx$T5n_HULmvZL z*yeWXPG-AGyIS2C)TkI&!=QQpDlRGbxSR;|6g6zL-B?^w4QWKoMPS5H0t`mo`u!OY z0*ucyJp>g9w47B|$m!R1MGeh6*kFUyD}O49W&_BHW(qfnS;`p{qDnyhnSm$%a1Y-e z?B}`Tt+x_v%oj%Lh)VRW>omd>cVr;n=u{%alJ5_f<)Y#*hG#J8qTEzOHvDwe+J>32 z8wY`Voy)HChlBs)=sf({`rkerimF<%sn*uoVy~Jpsy4AzjmF-!XHjDB5kl1#yH-(D z?af#1T_b814Yi;1d;Wu*yw3TI`@XO1edpNlG5!49a*yEhW=P8E4Zn2p>v2T!t>Zg2 z%daDIMxPxD?zcA5>}YM#gYSJhem4#8MUEZc?K1`4Z8wl#UOAeyr2V!I@D6^N*Thva z;|Q?NkqX(lr3?f0AjhS{@1q=5ZFPY1@$zq)KwHgqS=X)f$1jstj#4f-BE26#K zPU-JCrTVit5B4JOL(726XVHg6(b(ggBc}XS$!yKahXVP|Epw`IeL-S2a4S>(IW?d9 zE?=LMOXu^7CZFkQGSYL?q%zgGSM9sABN4a1PvI-g)1{8lRwEQo6%TJ7ehPNgsp25@ zSayXcTyJEpzhvYuwUGOEJ{-|pakp-HSJ~%0ERS&a_YSG+LbQ$GBOr@UO@ydMS&N~B zO=ma~{pHpuulTpskn(UHBcP9>Fwg7=3)~@m(eG!atH(hF#>DE5YMp39Bb&c|-FJEM z&*<laGZP$G7T51XMD`@I4w@$l&{|QLVx&{SclQi*ZP&-k_68WVs)UI zDV;~X8iT+E(U-|{h&x~C5fSLb!T$2~JH`(|9;8wPeWc<9IRZK-PZ}6kX)7;4w|e~u zH|Kd`;MDDJ-;pbt9Svg;i*wj!-1|zygerY_kyM)>d^hWK(fRKeC+UcTnY7!3Xe?ak zV_VH@Xq%UFLg;wg53^HuNG-Y2swoELwub`XVqo$ei7Tqvu;L1X(dV~tmk{~w$HU|m z6A8JA(4cTU!r1VVptW|82wS#c^Pxo&)ESfYZfVRQ~{SW5h0uVflvM_Sil$lr2*ycqEF=+J5ne}SN z6Ykd5QAS0*58uoO2s?#9<}I$%Wb8st8w;1pE?>=D#rF-&P#RC=^$gvt7UeYr39={F zq|?frN82-M`l zhij$rHolTqlG%(N14h*+qFQE6BWztL6qPt+HR?cw(hwkBtB$tHd)8DD*uT;HLIyA8 z-Z$*}v>#q~l7AsmkcwRIaLa-38UnHE(FldI>Yz~z3;IN)d~iQ#r2+QqeL+l)9XX+j&uDu7)E{4zMs>}Qfmx$X@azA zoV?e91!l5@;6qpM1%K8>3-#8af5L9`y0H*-?kJ2Yuj??vIiAWUA&E>_zo!sMx{I%2=E{Y%BS`s11>Y>bDWY;G?)VMbrh{Wi)2 zCIPSu!?NmvQiEg04mh9?;zPR`~>Hu{=dKr4^1F6_+R-7~=71HkXaZZk7Jb^J|j z`Sbkwz(W@c?Ioan1RSblrDRNem#P-%(vY?M6Aap_fzxquxt5oGb9Y(y3u{wVC=W3+ z@`eApD8S*2M@*xfnp!rSE8f0VA}j6p|IMv_Gk0rTs{!ndoM)dxZl;0G>rh8l=Ez2O zL{@&D#RKb^bI4ob3@(nYu%PFmOQ^S(1QUH??TF$UPQ`%oB}ewStUnnH^6oF*4_)!oh=N8?^L#odJY>qhbq~*c{z>9f z$Hl*<2g!d7BdPG+%{%Eg9(?{>l~kxWM5uyOw6t-iUezjXMtm%{R!jEE`TD=gxRCUX z0DR__rw`2@Lyb>=dM0kyy594-3jL_!;xH0Nz~`f=1Y*i-oRa_RdciGI+s9n!Jmkl! zhi@QSLa+P4QCUE}caO7k8K0r7lyG?C~X=03* znh}7i0Id!~#Cj1W;|R&)6?_MkZx1#CfCysvBU;Zd-M~~#74NOyIPx){Vwe}dR<;_= zkUW77#f+!P7u@9fS#&uBa6q>H?KG*r?z4{i?r;zdYre(!i_ThX@ z^ug1=PmB+`Kiy{do}VA~OvqOAj6j^lDobnhZSlyN@zdy|>2h_zua>l6NE=&HZ_x^X zFrIFtZs4XcVHzlhrVTmAQ|Q$M?FW%KexLu*sHoI0ITRKJiXj%z!w0Ju&7)?;_cApplpt2)eFcP=@LZGwP! zSS~C7pB)$d?Q}Dy(dEh`dBBo0_X9v!%7pj-=XhyRI(9Uh*_rAYNJA$;#12IsiM!;& zbly&rCDcZbF;xGKdP7xRtC_iAeY{chyteahXKwYfUmLLKG%UPX8x-aKNW_+Zb1HXD5|2y6LL03mW@nFXNJ?4K-uwf)Hre@*Rp|By_ee>t9brU>J zpu;;T_%i->YhhtJuol>^d1?cWos7EvTzv|;triD7Kok7=F%{(!=9>H9!52Jl; zq7EfiC1sDqod6KV@!Y#^+Wx=G^JuZV%br&)S>4fTp0+ z-TytmL}F*ffCoyeENg_MHM2@IDi&6ZV286Vx?ZEvL;uKGRr7voj(P?Mlc-3l8Ysgy zpv)PSI=#+(i@&S_LW9__a-lHyIvHiGypbs4I%sU!Z{h4rDTF_Dq^&FImzV z;3_e3aayU_wm;E5xTw(1Y&Em5#?Whqq$$}HL0l+mai|h zMG}IitRXxez-s^UIAHj1RSTE+k`+5^5m8|jqI?TK1y}vislM`;etI_&I>D`!0|~Uf zAw*duO7W|cwa=124*xj|fves6<`1uSmmG3faXMWmR+U$n2SiS_ z|9~&XqHpg%I(^QhFRmOqi^`(LHa;)ctNS0N0@{~&1qH`rx-eM#;W`@b-+*@FA@2F! z7S>kNatmMLTUrolh>0)mbRC*mEjxO$=C|F=BXF~Id%>A8W}`FH_r7_Blm!sVRbAA$ z0(;2At*Sm(X|3l8YS|j4!D|9EapXT%gX&3&xP_dCWssxiWJzv@d5PWE{f46DU}arf zE5(BUAYTf@xij5HU-^6uQ4es6hgb8l_@2UoK9-ZK4tM}Wt*M!H@$*Ax-zMFJZlJ`= z)i3o?UIYl6+7*p0Nh3%D+B1uETAa8`)}g@zlf|i zAwD5Ap6>bm^-jTQUb3>S4fI#Z`hYJLX0*!0cP=m?06+T(X+~-dwRyKZMOw@Ro&1@~ zvXniAoy|&Hn(=4lj_vI3stJ=Vu_Ul;i9~`4gDtowZH3PI0Wf{I9rC_l*Zkd6%^V-u zPqh3KHMaswPQ8%6csx*Id=rhariHr?GwvdXV7aGx1$eMmkmuN#xq%hydkSJ{v6| z^unm&-R{;=8I#=J`?}fUDzazG(y}sF9cOE*e+r6mA6^gqAaWlwX|t;04!jtDi~$JX z+rCLVooS65>*GHMzi^YQIEGp)DmT*twe#O3y!vi$D{qHhd`vQZdH3EU(v^`8_aEWi zYJ~XVR-MW6JNTcm)!#$cOfOa9&WZvtSs1`|f0RkPSo=m|#Yo?!dCI6-6dTZiY)tsF z#i-jP5tDe!{3F*D-(Nbn88`#S%j6x3AKEFccOLJzNlLxE{I$^#@Wlv*ZD^=$vlyib z%#|Ii-sz;`aO0!jjEBK8oQcEBtjXT-nmT2$DfrA*$ZsbQkt!YYe3_-e+?z3wAMTXb zE*Jg@0(U6s#W_)C-s6VVOF#dDp;LXu5)?9-NpRw`Uum1%FgWo~sXh0;^1chrit_^3 zvGu;*9xoLbna3@>39=5lll66L`YJ|4q-)Y&$Tcl^bKr7wz~nTv&G?wLFZI!sRMBwb zSpv_9YonU+a`uOQk6zUCrsqXUbJ7u-GqyIH$qFCiEqUQg!JSo{3xPbTx*UBFyOkv` zZb{W@r*+0^2;yV;&^QA~XC2$-mQ%_rE|xp**1}4`WAAykgx&|!$c)(03iymEC zeR;U#Bi3+=7ZBkb>*xKT?>EA1%i(VatDoJ7^(iC^zlF>#D{J!{<;a3J(ls3r(4P#j za@52RW(|U5pqLBAKWG~kSB{bf+$4?`;*>^B zNc^D!ZZx*OtIxkodr$lR zTXR401P%)Frb3j~u|)`t=bGb?sBSua!Pfe=8U)^03QE0*->!SU{$73M_>o-=1C-O-DeS!A)OaYZp~=>Zsh`#SUh6E@m@KcmI?k z>SQH=@G^0`vzPmoXa{O5p(%n78C32<>@DNFBfpY=0ipI5B``hZC z=GB>!CFv%EFLVs@#uzGp1Hc|_*4(Xx7W^onY1xc1VN;@d5$yq87foFT)w;q*V?N?D*Vny`Uu<--uGtgYS^oz9 zel}7W!kC#$&EU9`c2@b??)S*^r29V)<;?l#TwtM_Pu9x!`)AI7o&Mf9xjQ-8z}q=o zz1h3E`4x~yrT44v?iuh)YiJw%w>5}f`bo(yOjLZ*Dsyn=NSf8tDEU25G%c<63^CaT z+=1`9RV}UiEapWTK6+>xLY>%475%wcqLeYFbHFBI&~PD2mqsbva8YB7!+^_vppQq6 zx&69Q3WaS$71(KaRuTy!MTWg^n>G~_3);-XJ@o!Pw9V{4$}{$R6h_hjw(DgLLiEXc)69eVkRDkRP46@ zDaf-&-@H6pKJKG1w$V_JwNGHle43x`wKIxV#4VI>PqBoC;iM@lXEGAm?ptgJF%{CC zc2>k&B;1^oXTUJ+)m)HZKkZ&(;~C((;^%pU*8X)_r7sC?J34`nMeIL zc%8?$S@hvkiQ;*%HdIn|KcQNOvM!c_I~;WExj=;%o6;18io_ulr7QsUTVdnkFCIpH zj1+~|Db{Q>XZ~=G?^O5-aZkIGA2b-kGAN#u{OUIAcZF$inG~dUo1iu5$+BD7@0XiR zBFjN)h<&v8EZ%x(ZwZ z`SQC+fk1?dkH@a9hUX5k{ggFD*{O07Qdn<;xTU)|x7*C7B1!e`m)M78P!mfE^_rMi zcgy++vZpjP*El)({yjxCKwjX&Ht`yRN?ruRn*2PnUR65e04&#k=_A2<1~(r?CY$*R zXl1$3fBZkbGCk(0nh9LTps|1hiVe635^D*9P+e_J51;aZF`6` zQp0tMQp3i5mUSw-@FWrS^vArx;pxpchdm00*?+TAwG`G}#4M2^m#ZAY5J+8;U#_1os3kOsQ_zCPr8LkC;+O@ATv$J3 zhtUg2qkqelbZAsYNgz7wfBcj_Jg>ayfV*nm!;Zg$%{|>F7b#pu?m6co0ZgL+eaf-I z`P;0?p<#cp|9*9gr#j?fy07q5^KuBVEj7-vycI@jm$aJHEOqoO{8Gc&3s`h}Xj6CG zzIMB^)_U>dHhX&GZgsWWS_&Bi>@6Haho^7{^LLJG-YXr`5dhkfwt2lGmTBHNGnT+= zIR0wbdvmA3fo@<}!6Fd!XO(R8{!^=*e>3l^;|KL_e>RAH{gl2T1_vdb6sd`n$#q(o z!*)4y1$|y~A~l4j6?t5WVYv>~T_68bci~L);S;WC+WnYQtQN-FuZUC5g&Cz=a~}}? zdVLXW`Y%`@tvGp9er=XN!|Xm*VoQW%`41Mme|WA1 z%Zi&h`MUO$bY!LvLQ-W=OF$Bw~i(l(3d4_w!HGifV!uMVuWN^0C#tqbMD zI8$H$;k6#veAew7$07KX^qP|#Em6$+PUfhu+~j)wtD0vgmohZcX&pJ(qN@bLFMp`o zwl*=j{z0$$Q>5b$!;0uE?#u3ndNlIgox#5h8uVWmp)+~O;ty`;jOcYZo*ynH#Sr%H zW-qi(dmxccUhBVOIN5%sYR#0xs$Jj&rs@@Ko7^q4=Fi4&we)K6MeTu%dOlVp;ZVI- zib3osbrC7csAK}yx3<-9*MFy~=s&!+T2mPUQKd=J(#qX7Sibbfq_|!011g2K=IODO z&s36TP4DuHsZ_?#b_p99=6UGgj>?Y;vRO^X8!nvj2YNb5@kX65j6bCRgj#Npvb^H> z{HlW}EQ~bPRK4h@f9w=hY&IP;%hlJl!!j^JP$ld2qp32<>&YFN2J4Ce-CugKULwSs zg@E8KzjVgaTjp6ul4+yU)uzY90eH-Ve8``lMZ`-AV8kReT=rw=9;uH*@Kp3*tf~P(bSuZLv>WGL2XD{0`!jhIQS0^*lF|X#E)3>0jMIKySP@?voXhj*L$CY zB|Uwq6ufcO({T}xb@J>8%=5|}lsJv9@fdaYr$oLpWn?@ zJoGHivcH*&b-Zw7%CK&?CrHMeul>`_PLd-i*$!A83R}w`RI zMYN~WuR65_nWA5Q-oJhW0@@mtgqQ9T{~E-aH9x|>d2LE8auLQL=nm8A*#o|WHfp9# zi)aNxdAQ;nXymrHV!X6U_maQRFfr00O=!Slp4vHEs1!boxq%{~Y<}bZ^m}fegLV~Y zsmKhcJ`Dd09b2Dk*P2c8ZOB?noK&+RGTy7^Kd8kVSczLUL(#xd7>b{Udc1l)23LAx zREd2druADwaZbR7&@vSJ(e7s8{tubWT1RJyceAOl1qStc^Xq~$%B$XC7Gp=ZJc60X zx(!cRY#uwy70KvyYFw8{u%INn=A(Sm<=^xs-=eG2jPJ^ZPlC)UbXgM9rra*Qnxcfy zQtY8MC{IeQQ5DI#g94L~6jRX}$JRYRM%iVRw(jSo+6_SM$J=jRH(x%DUfB-t@bKug zS0uEOU=$EwG8sBc_ZkOmL{s;F1P?o}_Rfy))_cy*Lb_VpO{-G;>y7KY2CSHHn`-13 z#WepUb{C-9(BD)Cm{HW$Ex#h3!X>+cthe5nyM^2?V0_y*N5qT#?HS`8_~qNJ7)wQ{ zy~Y*(Bgf4qpXo&Dy?a_>3ot+D^@pIbhn(RxPUqJt^Djeg?0xCH@@Yr3L^DyI`PT7- z>Vw_>eBo7*G!c|TdPKrG>FdTY`d4G$@tl?($`7f$)r{`}FlXO9qaK-eZWmm7(7`G{ z?&u6_$n4bllarqhyp5l;d|lh*G1x?QvrCxr1#wGF*bvto{e>GA(Sz%K_|ir4Qm;Q?Mx;&I4-ta21vpbVTq&@7LG98W}?$Y zA1=0*#^5oRni7TOfAZ}Gkb|Unx|c%Mgd2V`BAQjfu=32;6nq7WgcfTOh34?%;2Gss z^6uiTALaQDQ|_SGn<3zGDhEQEL_!ei>_F#TsPB^>Y9gwJo-|J|5~Mv6TGv%W)NF}; zjiV0vnC6rEljz%%U(UZDrDr}qyMMZ$us$!Ui|Y&Q073{FJ4-crBb`o;)%va*4b ze7R|=p1P#n1HMuqO!@X%*lVty5+H$GhUBAVEj8}+5uYLWLS{N8faoAgN4cT zrQ8(xVfeh9ZN_{f5>Id&?2ZvHL7-{t`gy>@yzP0r8;&UTuX&C|2h3FIYY~9*W7IS> z|K1dlc|3EZOJ>oSrb4YOe>+{cy-f=FvUC`p0MVHiK?5y1n+G5+j%t_8>izco;1|}C z7}qJb_ViSGzvBomm-Kz?6CBiY8_2_UOJOAb7mQmNuwA^hv zc@E)-a?fglSIFh*yXhuuOrR(B-__Wdh$dPdp`Y_@bF;4NtAtx2Ez4v$p$l>4%wCz? z^`&Qr;z86#@~J_zobaF33S8ljyN(}%nrh!`nk}hJ#y=uY0MDqGz_nU#9cWPM3Iqq& zVhPh@x#Ao+l%vs*8D{3NuDjpb3q5yrmu`3a+BI&8_h2b@$x zwhp5YwSYav$cT%MT>U_%liw7&5Nabk;|+RRlP94dJe`>DGa@oNA`tRu+k)~i`zc{5 zYi1m8`QF{Z482KWd3tjC*IXs+?1xIuPb?D~VnBeEt4S}`5n%F^3PIG2)!=H_jDAd# z9uWlB;69bX-Xe7P)hFfAgE8*lDl`k8@GsL}PURQE3b(o(+x}X;>!?bw!*etpmgTa^ zH8c@)nCq~z$$O*XMLUC70-gylF$xKbm6&k$qTDSNZMCUbx1 z7Gg&(NStNl7JRi}v}w*h#;U%>{aHgpgZikp`!k_E!P^T3-!R+qL`jZH2P?gw>YS{s z?0C#V`0^^uC2w)@;tNgSQ$I8`dgXt=P;P>U`47mem&(J_C)%}2JM1A~;ap5>=D4BN z6L3KVnzb%RaPH^iaOLxvLerynjpL&Tg(kpq;*uf2U17^1Vz9aX(&n$sD?@z|2gN}L zA+AVyyn!B_BIpbe?)7&s1{kvhCn)6?%80s>l{5V?#Fq0td#fO&;D;DC14^iu{h6_0 z(qd)Y-B{e5$|Qkc6zS6!%=%9jY>jM%5ZhHMulce>`scsqI>Kl~XN>B=|JpSk?-y&rMyxcP&8(7@)X%VF2yEtu?jX(y1Bh{fc6@zz~7oDRq z2$Zj$=eTgH89(jBYRJ_{ZD-YH9Rji8*}8muQES;5@b@3l)7910^WgoV2j8(P)o!`B zR&S5PANy(ORv`~_-`$oO^=frC**H%%VEGf=5++UO!JQAXRL`PxeJ}#*w zI8`h6DZh$#B40+!CI{0R_d8X2>+)V$iR)O(PPjbDkzl#$bFSg5EZz*+u(5Nu^^XN>~VLJ24)J z83g9j2>EGAKJ!^eh9jc5Q=MWID3@6@SqvK=(JkoST#ab2`S0hgU6X|#2LwK~xKX`7 z2|aBOy{|K`3c2*{T6Xq9CyUCpwJAn--TcFJU!Q!UAyVEB^(mJ}@SK|~V<7R0ddWm%sebx`{J|umV zNBOgGP!^W}V(l|6LSd~y`0J9w=GQADebq5!+lc%VaeMxm0C$BZ z{B1hJntHi&Jw+Rl6I=35uLi>Nprh!UI$`ZUFv^eo^T0z_9xsEvW{pHc65|NzTFckY)CVM#w$#GE+gt-10l9ma0NFr?oX_~wWx#Ep z-6u;6nXBERB{M+%~L|e{M9d+-1FvZD*&`HoA3VSz}N z0lB-Az9QAyRdYAr_CD)+Q~sh_k5UR=IZNbTIlXV`5~FWB zi~Sj*cvHbnTeu$pgeBh5el-@FfGd`y&;3iHz>7PTeC4IRuGJQ0l$BoSu~L!BO0i9R zNqV|^$#%9$ucEzq`M4?uR~pXjRpzLuB*etPEBYttFl0nk1TzGWg9wE`0*+IPZ+w88 zjpa9gY=D0!1L;6lz{wnNa0Cj>+OMXwxee~lx~@x-jury7IfhOo__Id(tgQ&-w>p>x zcuQG@IF0#REqQ|o8E<69o6bn(XV@d_jCs*&WyfYS5X`qF7f^x24~cQ#Xcv4?Cz7pkwc zA3qEUZC|3i>=z%fk|kDU#h@t1`qGKM22|l$!(lce8KZ+y@_3RH*znoxcgH~#F{`33 z8VjD(KVkz^OQ>MXI*MM21*SgpG7u@KvL1)h15r^tZ#=cFD>o#f}+Dr7G1#R33n?63) zr##6fA?mi(^l87{$g;fItG)Nxof8bXT*lv}bQfp(zd?uy{e|t!5MU3X*9dRIrKKgwv1yTbu8FfCZ;#73xw}5OYufuC1CxAL8-i%ZzaMZa zgI;OGAwUd2zE}p`oy^Hy?VkKEH}K!Ko#hc}bmf2>_0y&1|H+4Q#ocJqO&@Y`xQEmm zr+d_lSV#2A;;y6Axk`RW09n4aw4#+U5JAV6Q8Z8k9!EtY(_w@6jH*IUqI*2LUi%ME z6KlA1!FM<%bXY_tUa%%29)YOUv1xhbCDW*_uRGn6=)RrjZ2Gbl721!})M+XpL4)z5 z&%(Z4h`#!S41TYP;IE6J85@k;LVQS2vn1MVgA z6iD#A=@x{?Bw6gI1GvU@jwLN>6cXW_eW2tBAj*djJmWHRr2kmkD${#gftgjMylr(+ z5HS`XMfjn76uC_t;!K9D$2eBd<}K~~jPdZ|Lya&zH8qesK&#Z@e)c3= znPHa`W;>Oo5U${Sz!G)ajp)Bv_4k#1CVRVwb(LdO_4}JCmw0*L=^xmB@nLoZdsZus z;mOj1j&tp9E%Cf1`2$67kuc+u`z3kHKZ{Zt8fww3XU8>uA~MCT=z!=wpn6Q*2wcrc zh`WU)KzQ~MrMQw2G)OxT{=;>~um`rrVs%ftrL+62dxqw9g5RSh@gBmN|AQJCe22hy z+w9?Iy!+mvGPuHWZKnRmz!+6+O^qW>{Ml$2r*Q-#mvi)Nx!-;MRZ4L;81OL}4kfbS zX-N+3!mvJFPe}ty3k*RAe)D5z{x6vVkE3L8?3en;S5S{3f@ugCeVV=4(7;K#us2vn zag$m5`umqQA; z^Gp3GAXHBO_(9`~RGG4Kj$S+%`*YDOOY1cf0j{5=W-CX-b@7!v(Xco*SwTH=UToD3 z-`z1)W4Iz(eQoecTlEDi`Xx!f@uf06A45NdD$;w#K)hc(1Wn&zu!w`%cuR>I;tm7$ z-?CB50R&UXzl~4#2X?GqOsn5~ySeJRZ|}OuBa@@h|NGs!{?R)PkVx6z(;ZD+=JV1v zduhLmTL(HC&F8E@$yfvciRKmD@7!eo>D=07-rB;EugS3V#40|h!rYY4j-9tO7MfT@ z)^!}%V^*ftzC;E(B$la^M}i0iXnm_Mm|WePSmGIGHat3)7$drBQNnM;_Pa0DalwCsxO{?0iUVp zF-b$^z&7_NL~7d%tjot-`y=fkk9$YLSk>_UUOP?{zaPdt{<8xErXt7yTpJ&N|Q|1Idefq>VL34P>GxfWf3SSpQH*m5& zG8cMP7J9XZ9a}L}imNa-1TNd$97}zD)(qIIs~V~UT1-!RzsK@Ha}>xCuT;{TCBU;M z9f61^tg}Yqg&8V$F)o#E6`{G`Zod3wa0~&BavSVCJbF~HO5tcvOrWMVoM@~o|C`E> z!;uIVVQS}LATNaeIIH6evem3}zG@3$pqG@=uDSB|k_i@=?D$97RGJZ(bZ^EaR(jC11TRgcDlLZdV|J4*o3O4T zd)=m$_%r=Ek?z)t;?9Ibwf~lzr-~(TfA->ar)|e*{3kP;9Nmvcz2G$s6%!u&!1mP` z9{D4gt9Me2MVmsVj{xe7N~OZS>YF4^UXU&`>~}cFx7l2H2<0!~rFqQmIXg7vw-+2G zj^yrd&g3qylKS5Jce;~iR`#7^4*{3_#gN@6-^zFVXu?ei;dPPt_zpb+y+hcVxhmCY@)r_W?`D$|WneHmkB?ysoEV>Bw&bi0@GHnf z=kSi%aFvkCV@EBMnDs`(7A{juT%?YJ>=#Yr^*6d0tjlam9TBl=gG2^Rf^gC9r-R=w z1+ydrI2%hJiDqxWy&6K^vt-I$|JsZjz$~1e2-fh^E^VHFeIh?E>5H{fDuV`IbU57_#4!c?*M}ZnjMPsL z2HI~==uCtD?xxr*9sqI22Xnx(1!vYP^#EK!|Hx$aWH74(aSdlRE|%XePj|3Jp%>zE z!FQ)o*|pC;31n8@N~|oqG-MO{xcHr)x9ja<C^5T# zDf!d3#q$I|#c`bPfkvtu5Zd$S4hY1H&zsC?>4jlh8u~dn2Ff^8Gn4ev>{cqwb)ARv zGkM`m#?PyXA;#N2Fi(gFzl3Vyq!u z)g(1$GY1N?m5)HTC=0)37dGnZldP%I=i;D!N}2{1_L>$Ub;808uUk^TJ==XK z?1g2)!y_=j!==#yokj-4p5p%Go>Y;)JFOJ&)~8+gen_TBxR1SZM~OVpRp7*Z)xaM} zrs0*dkWx{R$v{g4vR8Yn7e2**ExWp{ihSr6rPeYvl!XN^-+ z%pcQe9G%{Mx+BEaI^(FTKK_o=cm}~bVV`f=e)=i+@X*Wk+a{SPL=VK?TGpYzpoD>g%F z;pgOKjm6K37gx#ETOv1+}bfthE%vbWgrt^~QSJkzh z*;9`{8w$4eeRKZhQ7`hAaaSLFH*ba$Mvmh{DZwKP#n~TlBnQySFyWj``kBeJY@v0G zW$-pi`q`mT*HxdY+|9U%J)j1q zjl(XS(F(**64OsMw70jmN=W;Dc+G|PV{~6R$=+qD^ZqVy?XvqK<@eQjX>8`phuVxb z#4J?P24BhzRd$EAi#EqZW8P~T)U5~#W?-#N{tN1Sld<`jZ@$-cL3C51EWIGv&00PO zgS22~VvKG-EIrQ3fIz_K5Iaf6sM0tD;lbX&%GPu}Pt88at9OQA=-D_eugcdqwIrXN ztcSev2OO0_o=MN-o|Z!50+?btaJiE&R;idY{(zYE94qJj7)Og4`VG*fsbBYaby6T> zuVB=?{V(91F>5Fda%6`wnv#H3Dm>u8JrO?vr;H2xnXPwM2K`|{;)jQfjQ{m7Y%VSE zn%`cmtvwST>D!yyro{Y^+5KWR(G%q5WtSZDRs+zGKmQbTe!g_%xu;el7 z0UVrLrqN4OT6%`sL5W|l93CD5mGh2^=n3(1cfiS3c}W|3QWn~5TD=;8SyAq?_OY1z zFcr}Ht-Up7YR;5Km>JIg2O=@yz#mDptM-(Y4F@dPlg$a|oEWKFJU$+uFpBGA?I(;H z+5QCtKr8oh`mA0jh2D-}7yi?u0RP=Z@vv@6;C}aPZKM@n4NL=Ce?9|B$kU<$S`4jS zfe^{rhG5S3!sEp2Jml@4hihGJZC6X5?r&yt=L&S*jm9a{=KQ-EOTevX(@~M4?#(?Cj{NoU$xJ0%<5h z36>cAWDN7d8LYBpiS6d$N)IocI+J;aX9KmZ>eRcPRT&Y@n(w?ds^oFWkjZ_t`zmP& zgeX85Ocy6mg5N5qTRO~UKy-{dH~(HbOK^qgikgL8@eCTql#(xHTftT$)l5B=_bQMWiv zl%IKYm@_uFuNTp%@3N(=nbU%!^x4}0RJ2~B!?z3DoGFR^ z-C&K1#+AZS&!mBBX3>?x7~;&1#|S}9$_C%@1TP|>WY%ARj*@50vlBIhMOZT@KAqc5 zja|)lc=bxe$Hj#+PV<4N&?^!gM77>GS&5V&`{zqr4;Ub`J%+41HdIK#$r zEw8vxvqbp-YxcfRG#jYcfp|Rh_zA49H!0<_d&{?)Wz> zNxs*d9<_;O`4cQxtMkpKK-%F$$~ zvmpv$E!E?ffe#~HTIdE+fVW%Nr9Fz#+v4^6Pu+35D?=3;;jG*_$~&DdUbthAbixpW z3HsKUG6(*%;@;$h^cT27;1AC^BPK$ymkZl{Avu=uzH{O&Xu%2kgl`vTf>G-R*qf=# z%>iw<<7QExP>Jy^U@Syrl6A})`3rbVOQ1?z6o<9)-Iuuve{7ImQ}f5+x$BovjkmUp0yFdly2TLhFtiDTn8-(Zi*xcSaxv(dG~#lnCr18 z%E8CCWpDou*-doxc+dUC$$TRlbiP{~a&^{YHCW;7x{>GG?z#|sdMOG%xCjIzOP76K z3ev3uNsB;nZ%z9wX#TjvH)F?=@7ja0y+)g>B;6758Q3V;AG2l1FkM79n{9JF(J>_H zX7S*nz|E@aT@TJa+ro{AMSmp6_}7d*JfPS7NxZL1`j|Z23jCz(rxk%h^+UsK3rY#SN4#%Y&qR{! z>-T}&nlTSXQ=MaVNpBI~!1DZ>)J^+->Jt;JCHW>e%) zKx@e54pu~S^*W>;{puw|r6KKA#)2?an11j_uNs0FDecq`D>2RHwONvOuY{#&mE1?q zWXy&%<5}r7W-DPC6M|=#my*Isn-YwqiGEW}!P5lCi{wCN`a^g$g7kN~Azz>M9BEG- zu}GqO^Bb`g0Rcn%7z69+M$IHOFYYr`8COL{4artTg66a`E$+krqv*Wjss8^cevKrF zYmdxpMcMP_+LYwl*Or;RW$zI}M7MBVl6B8ZzcmpXq zU0m!nNNza@3?^qJuhA>l)wxH>n=lR~!Pp)xp4D%mgco-L^+vAy-ew<6w2|1n5!pH4 zb$pbPo0~gctkx3|wWu&!P_VUh8VNKvz>mi-3u5!0*QwOZD$ zhj(|yTx>768S>a&%okr2ydC0=J@#(34&!WBr@qBd{gxQ8B=?g37HBHE;c-R)T4Y9M zycE-1!=v`6{GFrtG&$&ji8ce2haR6T#t)H^hA_y(*$9IH?CT@UcB!uBdW_Ephjw-+ zu+fBLkeoqF8Eg${vVl?{!QfL;v?47O>Y_qnk`Za64SWyRUepLB=4&YlWwDwnjc6EB zE1+~mLEu8Rd#NB{N^7$q{Gi!Q3pzsdeXh5o2H1*Y|KX>yRHJ~U*`);#=5O)i0C7Iti~2{V>-Y*W=erk@^n2y{;}D>K&M@X6 zajxOXq7PPQ<)6sD?JA1TR^q;)cn2Rm@O5}gAlz|fD2iqf>UpNY5 zV<}m%z#wO;#xNXE8e5zN_$~Cv?rjg#O1~3OFybYDpU=s0{~>fLnkJ4ySA_j7!{^)7 zdMpYy5;Vflo5t0lsNrh!mV|DMPR89l^LBQG=`~roXY^rG772N;cr$;-jT!V;h|V~{ zN!4SO(|@d0O*B4QpSqanF*s3DFj0_`Lu&3^C^AfheR(wj22ZX;FUx~K&Cv$|&9{Da z9kN%;L;x0A1uw|>YwP$Scids&Vff|MK&%)HvTG6-J z$>ht;*zJlkJ>7MaUbW611Lc_zSTWySf zf(!73X4T9DHvK{=^iL?cP#ofM2bT7+Qkg*@nqD7w;4SAsS3^1q`eN&U?k4cDN zPEv&(Y>XHr8r{Y`*?0NQi81{_0RD~BSjLi`R6`p8mrn%(#<~veSnpZxa?FP~Kv|}J zt7F*3=GqD-KB>Cqd0v+$&LA!RF4@!U41y?sS8dKZ4<&Kceqp>N*atJ%>$iGK7HP=W~5 zNLrolEM4wwb#4LW%KyKgv~JYzy)8#Ns~**p#qEdb5~u4T1o=K-U%F!332QQvxm~~R zL8hW~Jpv{AC&(gP{iQPyDdj@BR%LlE;^iSURHUwBd52apugzU*eVw{RUm7LS9{_9J z@Yb#7K$dilX3S=il0}oebG&wE+HOehsoZrNPMtxQgMQjp89kioCsUU`JT#e&FJb%! z0WGJF%=E*Y>uhC^wW-VKQPU9oz?zJlPhI6bsyHRQsU5yZEw-CLU;nZXd6d$7=FgRq z(;QS^_TyNks;Iblj`PjymvxLu)Tv=y*V)cj{J-sl{68Z|XCqs~EgQnS6BALz&~*&h z5JNgO`Y&R7x)!8l#S~uJ*O||7Cy}LAQK|lF^Ig7PjtQzaIP{syzZ>k@-rgR0@dp=% z=226;^Ov9URAqJbvq%R1hxU#F)L^G1YY?Y}E2yqAl6mk;`m4lRniA`aKP zj*GVG`(Tv->V1GccVYf?_0PKN)#;%Z8!A0rHt=$%Z7D?c{KWq9gT0)oSh;?~63pV= z8s>INI6!~=wg6lo`g|ipupJ#Rn>us9CrezJE%f_l^e_2zX8LPE?0h)~(pciFC>Iug z{k~V|A@;;KTz2Y-Xj5-3Jjcqp*He^*uu=Vy%?Sg3U_&i;xQ+ zQ{87da4R}7K#J3vA(Mv-{ z`|#u^(U3b*>)&Y&sk6&Yb(4VQPSMH_oKnBNUz@e{)T=`Ht^T@bQ-Yf*;IX{M+LGY# znJBfBNdX#zk~KS1#>-WrgJv4Oh+5=-cK2rGvFPjlo?!J^-`an zt9#+hY9+>0`b^ln%C`^krg6I?fXnOXZ4mzT^;Z=)#>y}$ePq1}$5Uv+wd7yPiBLo9 zW|Y!H9|8}_TU42*H#~u1jQxg&ah(T+>=Qfj%5aBJ1KOky){>dTPiotwcJt{honi>%#KIpf@N`ny{Leq7k6?`c6YJJ={E6V z&bJ@dmzmndUMa-8{D&<(j_Fx?Zw+J&w_-wJYMu9hE}HxHsc1-C4*zaQ=iqe`ms7!i zE)~@H6uv;QTb)_@lbG;&gWK70WM>Vp zHhc-FnnUb4H{exn# z%RNlF_Fkf~oS8S(0Qc;35`rf47)L28L_UNbBEGXu2r?{!g_A~XfNvY86-dVDe?aLr zk6JV^CT-rC3N9Q`8b5FfhzSoDeR?)RDqgVd`2>7@{0OWz4rgX&fS;3tCkkJk+h6t+ z^#HOMspiP{Ao-gl>jGffOCi#|T8h2u8gM!U-ZP6np^N+Pa{jObwPkHf-57ADr>C-^ z1Q%T*CwmTg;ZFn~jX8+}B17p~R4Mv#5PF>t-!6J9by7Mvf&U-cRb9YIs;VxKUiG_Q z?DWp&WuYLbZXf4rP735xAk$GA`JNgNZpAKGiBs2`D8`+IbVNes7%b9DtNH)a!WxQR z+b96P=OoyMmYmihUJ21uRa8dZJU@DN?U0oZ=5w3cou!BhqS0)o z(kceEyu(qov^!*McavWY^qYYE@iV`^oBi2!=(TzfAa(OC z`?FzqZeRYEQ_nj&r6m1s`Kwe6AiFg$^a@Gf)fgL3ZgPwasoVM&8oYmVegbao^Zdj#~y7 zB>dNBt5THg^D}fvXCG{QF+~nOi^SFyIb&9kRPxS@+6H1KRr`-pKlkVBlr`TA{MwO# zW?E3RuJVN6eYTxvwYC`PX+5V=WmVS>j&~C=nPz7HQy#-)K&TWq zp*u)|9g=M-4`G!0FY@^L%(c`Cv{)~tIz9l`ABvyxpFsVnrVf^k`DkIP=c<3ak%4id zqXnmO&`y+dD%|`Pb+23>l00#b=oGyU^QIrl^{e08#S#``TIRivACid3n|u*_K{>dP z*tYpwL#4;6<3Nn0>HyplsdPE3_RP*yz_jSo;orYTK;H>430Qk8bJh4uk?wI+c;t+% z&u`wczi>4NyjYyNRu)vE0%A;#vpo;LRtF^21*stM#ky(BC_E7VhnRT2`&Y$hVwkc$1y)suN&|W)a zSX^&DT6{TUkIcd2&G&j@yH$Js9X7;9cg`;^c0EWqUW#?oJK3W_eL%|Y+@TL1f+t;H&8ZZ>1WdacGnp@wpz-fxgdhfrtH z=sEyL(@MzZTfyCF@GL4qO431ys0BmGU>8N%uQPWu3!pO$l|C>hm$yKId zpD#RSK|Bf$jgfFRRjw)C&+h6fZwPm5BktRarwHUdx?TDc_UMQwamd4j!JQ5NT*(m1 zW+##0VJ!zR)Tizb`R8T!btB2ClD}0MQ_#tsRswE zQVjE99Yq3W0fU1HN$VNWoh+7BrV$;J5l)~|k)Hn*@oak1lR~*r(C)XcumjZ+Z9wbb z5UsPfMqPp|e69_iwH@VdM;o-r1cCBo-T-9|?K7X2RzBAkGGqdQbDY=HWM!mfNs(ZB zkSC;ujFBNPRadkU$kK_yFMSNwR;Mu}w(;&=Ot~2*97N~?GIQ&+VXf!KVL>?rCJh{w{E6N10Ui@h6?dQdhmuQs&e1l*aL&T3tH@AQ6`dj|JyH{N_oOmof=%Ix5xf?;#ugKJ7y;bjEuHQ!Zzu6%- zh$n04Ys&|ptqYeKn^1ci14>8^B5!4N6_WI-1vKU&Xu?C+4>BUL>~(QWvy>y9(vp6G zJL^Zob22I{Lw|)ADTaoQ_k@xCW;5g|zSZ}8@*(f`F#vYHT&BNVHoW|4NWC+%hVej! zOCpN**8x6X_bH9squHgaL^YRnAc)US9FI9_Q9El)?qumu zaqLO3*q=XZk3&|ry@Nvnih^Pe)?L|?D{c%-Meg-X1uw*e>U#*UdZ1G0KTqo$3Zzd* zaVt$mZFWsD&7|>%iBE=mqLLZL`em)NQxfe(uVs&=d5Y%LM6)q?SvcInQc=xp%3~%Z z9A>2W!I1k-WK2?;=WPP(xPA<*y*@0A{9b-|}w z%h87V{FSn|oqlmj@3U1?Q;GK}H3(Qwlghd8u=`wb;tXZ`&Ahd$Xfe>_Sn0i2e=>OsIF6OaSbXDB#5f07)J2`bQ6 z{pS1Q-qKC9>qn8ety03XCv|aet$}|mdx_j@joa5bgc+nkf%;=VU;T1?1q+VPb`*l& z;BLgwfEeFn*3jmo!Fo6L&aDxm)wGsxt>kFlWoLX|pnbyfqV3&1smR{T`^# zH*%cWY6meO0Y8WlTQ{~OVTc8pv?LfUsq1$GG3JjzX zhOv^ks)}IewzMJ(zTU`ZV`C|VuAsk1QzFwvrAT7P+3d^=4z~lG9OqL#=QY3$U^(_G z_TRxl`oP-SiDb-vdO+TgoMi6*QwFmh%l=A#qpyapHtl2oEw-rrOT5w`Bq=sL5kUW^ zvmXBMwc2IuUZrExlO3@J@vTtQV?$)zHOwNKb8_UX1l>F;e>PqB# zcfvk?}$?fy!#3xacXA zV>K9^A8)ia1BOep=`d0vno!F)eACU~lKb32b<7MFh_F-^j3&sb{&0}mn+Bqz@!|&c zSjLO4V+PThvK1-XL@_=m0u}(B9c}^V@}KQL#tK457KTOr6PX^fwjhThVG??z?cEoq z=f_)-~>EFgLk4W3+ND~ecJ0T|!f3jbC*Ck?qb?YICZ4^kP(~b9>ZmzFaczg$c zL8fySRf(D+)cHu@S6oBV+ zkOEUuf$mRMRl+q5`AyzYtBcH#&tj)MxyYTCIaiT;i5I+sp`UtwjAe4@$XH&$X0qi- z85zOF2BJ4oF{j_P$P5q+V^B?5QEN$dWleG@_iax?)AYehgirAdmu{6EAVpp6j%vEp zUr~{fX$RFSOgzozDy8Sw?`!fnf5_V%8xwO0ye;Ej^V){@7CK? zdn!zr&z_SO;M%wN*~)ScPAy*D(T9Qu{J(q!WwW_WWJsSvcgJ0WrGC zqe(plt7Ywao)_pkIGFNnurI*&9xGvIJ&ZD&%X#|@yW>PH;U?6WvmUN1f3xjLLxsnu zN$M1{%)+s)#|qqIgoVlt~OhSULja{+V|Y{dBBFLnH;Gg&F9yt_otnuGX<=fta~i)1>*Zwl;Ea(W6+`H z?#t81|7VUgRr_RgUj0xeP8hcrJ z<=Scui#h*G%u?&_TsWU!mn>3=Bqh@Kw5es1*en|A{Sx+^h~KdbKF6l{?ZJHkVSj%$ zY(yD$8yW4I|FkaDAYo1u@tu%hwBpt6XIL@uDU4m4D9zxJr+M#UAAp1|7iO znjD>703Er&^Z;CMfs+u0?S2v5ih=ExY*iq20x$UGD@*iJHq-oE0ek~{vP4I>M-5=~ zkfTijpP?VKl>%}T*qH*8F&*l!w9R9OK8%iq@!)4iiH?~*ExhaXTi8x##EOB(@9m?Z zJ~27e=Is*)V&2J$Y6md)aeY{L=j$lY#>X>K>HlGCTXB`P=`}<75 zDDh+!?D$Nz14^M6 zSBfKy_J9)f|1^;zT$EG`l@4NCja2(r^s|}pLpleAtfe^t9E@-FP}1e!j@sG^!i&x( z*3cCE3_Hh>PJE;>Z@kBSF13iWQjuJ@YOd3m>f|Z8QWxNHBkh0mUG+x>aJ`OmBiV78 z?sn+RwbyUaoxR`79s7yS?7IU|kLOScDq?~bt-nW_lquCq`Pq>$Atz9h1%GQ2 z`&jy}2(b2Seex7hm&z7jeS?f~afpRsCQ85+YubJjAEbaQvfPrZH4?%#*!QR^D|-Leeg4(XBYd0p^M>{f8g9UzzkDU`=^ih%KX`i-{rdBQsiD@#g5wL z-xl1Z-`kp&gHe%!g)*ulf%nI%SaE$*O&1pC$%P|o5byb5XtHD8AhR?5gQ0L=%}BH> zg;9KkN84HAvFcj&z6OitGa(_ttuyndNAYCxl#)Pnx60M@6*u7D*Oj~%f^^}eJZcawr7@reVWKSi*GrQtNwI#P_kb(UE zwDHc^+f#DSpgTh#P=&BL2uUlK3%1pJaRVS9C%7(lg2~fnSq9C>$P5{!4_-=^yGzW3 zlYv}mXzJ-I#f(g&wv+%FZm}F0I0CfvWvD&~`*jBhYzRO&>4B-b~am>`IR5Qb@#$n5sUoqwTR&nq0{8+$-F13 zg1G-p(jw);C<}`z|GCYDNyX6h*#aJwoq)bE2VZtNV91yLX;S=C(E9QE+}7dzydAO7 zjg&u;qFA8F{{jA&YNw#H4)gCo>uoSQCYa+U~%%^hOF%@97QaTBQzLj?vVH48^FY_HaVee=H7Cwcf zJzFql_&**Ch7Y2Cd3>p~z7RsMZed{oL};{L5NYD4^9!7`5r^HgyM?G}%CSVtNfWv9 zkrn|eDH*VKR;my#sD1`-PO%&GsJzT1%^LC8pL#LU{#N$kvd#1E;JNX3sn=gbr)zUG zBK&43K-sq1ws;)QpmurFpm6kd{{`OL4BZQLd9A0ELwp(iyA` zu$C9k6#am3=MKic$r&e-IxLMu=GvjlDJ`y@fI#x}C7FpBPFN{2?I;DLt;_zR^s5&3 zfdIs0-SHhTPvcyPv<1tmmjmOK`(apHTic@I!a|kk(+_)R^H55W;F5;`6B5 zC9kp<$_pC;=8M= z72(5AeGBUMpCHn$sGIyE)$hqDkArC5!EKBm_!oI5FHjJV)blFPUXI@%=>{yxyS!!4 z{fx{wL<+%=%cCmkO)2Us;9*jQ0)rJdddtl=^`48U=dr(lh3|R!%P>{5lG_4*V__kD z>fh?X&7_KtssO4d(@~a6{yPm5T7~0K+~)OiU1{3r}XiYGARF_y!iHp+!bpp z?itV_a@0RdxsGlgt4lI@w@ne7&o(NVKe1x}&PBpp&C@x> zf%D6}(6c5-jgL!#dQfOF!(9urCCA9$#R!k4g99abxSK1$rc)_E^knaOfi~Y1Bs+^g z!9kT*olM@tM0F5dL8`x%)O`{gM9lWOTG$hoyt7gqKXnoGtqQYRS1M6iTTOiB)AHg> zGpk8p^y1^LH0tn5jCzIolsAya!4(v2_&Voo_A)b=8jQ)W3{(X8qCWpjwH2i?$-DJZ zPxSr16ZNs?Y9vm#Rx3+@t~&r=?>K?Koq;p*M#!Y!YlS2;JCn2$K@$D2hZK|O%?`u* z7d|BPLqRF>QctC3)6aVz!<`lcs|BaK&^kW)uPJOkhzW{}M27+g2au1d zSONAIs#wT>1M6Y1-92Amb-=mjlo7qukel9TK)MGw^I7fcW(NVh;+MK)$NOB#akQW> zWtgzEVzx;cz;2IM-UAwS%BukI1FgQ^~ z?DcxF=sj*`mXX5}2f|0&v@(;gT^*)&&!~=;#gosPztoafn#C$fyK3XxQC~MWaN36G zJB$qAC4wje7;c6$ON!SM{Slc>o5LvN?bc`LI;&T4G zC|g+D2T7p9+^5w6)1~z7hsh;Id=paLC`R-G^%rl^2>AzX3#@zV?Bt6DfCXN1)we!0 zy6sa(N1AjA^4EFE7B_LK=Ar6G1%;Y5O+=S~)X8>>A>a>`B8fp`&i@5)G<1c6q$=81 zf7`3Y@)YcbUM23GZ?dUftx3j0X{%o>gml{9qgQ=qwY*ld)nISSbWO*di!19jy#(n; zlI1iVfg4i)c2s(;0EgX+Bz-BElO)8%4+oR`+Ro5=v9y9I-X}a2X4AYg1|4noyH(~J z2FcM|2{#Z8j?TbSfKq4Eet8-yGC8R`xe7mZ21{ubeO*?0E#Nu}skqiFodW@paWX%* zuF_5#RGK@=ip}y97aCwkJImT{=*ZrjeE(?2rylya!kk3`!pPO;^AMEE2W4`&T~OkQ zFdm2-2Ib&B(`t`^vUk^h(747eiw#*B>d;F$)E#RyV}0hf)|7V77LU_`MM1%w-=>~* zc^-n;IPX1HHb_hM84&n!IGk_j$4{0gPZe{g@5NDf$Lza?&$d@AdDDpj3DGez2cOiz zu~hTnc2hqS_{Ks1rpo&;Vc8*FdNQ`E2ioyXe(Wrg__w5*R8;o%(NPHJV1RB-%zn@L z(xuw+(%HZ9EAZvP)k4qJ#+5;%<$_%f~#vJk-(Es_y4r@UQ(%F@Vv=4olptqb{U8$YK9;L+x#m6->Ukj?B zt>iELz6@p6s0#$Kvs8?1U1p|SN^{#=`sYlQUuGTbZ zKfg~rml|(}MRa_XeQK<2+i=FTNw=K8x$=TKiq@2q?48YDD$AQjl9G}m&(T4gl_p;Y zPTF;!)R|9q_W1%DG<_%c*+=WTdPPEbk^Pe%hR=rs?Vhdt`Ir(Uk}(HL%HSix44Bfm zKliN-bX4e-Gdg*N0}|^n$FLGlqR3QE3;&;ee>pAFj)*^U+N3VSDUkjcn)ECbXypRx zGo6!&aury=T|c=Tlk2u3IbbW}zl#`~-o{fU3U=vfOJ)jbE#L+tz~CwB@bxFP*<;25 z2?c)bO%&!|VMVXLS^F3EH~V!=jX{p>w&wTBp3Gvjs$&&cT#qs^I?~k;S%&FfZzAvv zQzq)NNOX|7LN-i97$ThQO?7s0G(v3C_J(nh!mW#oHz+$7iar%ohQ}&ZAjKkneAsVX zc;0okk`>cP@LLZ^Q&qa|u&&YyPo(NS2``eAz&rgOFOEIzy}l<63|!E)YIGomJ|)rp z4W97_$^A>$PML0n-^T6jvK$IT@Gi6ZKA#sT?z;<=q_)){0SwqP2Y_gN zTuQIXjN?1q9+?7UX7-D)AB7#spwsT9@is^fj=9?F445SVA79;y)ZV9qMW?3I^OlBz zWq_|16W!v)I{NL|qSA*2Sh)u(b4p+J?}DZv5F#-!ojWu|W%Vf;zDUhfFsrSv(kT|ej=Gnt8+!lGLOjnXx;i9 z^RIp{hs^Aq0Y;MgXN;SQP9w*w_n=n{d~u4*PC=S9{US4e`vM*LpcYKuJ#IuaQdDhr z)f-`=tIR(^A+ANq0RNN#o=aal@~={x`ApmE8KIPqeBB?P&!GRi+h)U&b;2)L`zP9& zIAyC%H#axexj+X3&eNEl(6Ht`+(A^rt@e7;dLP1ys}?0-Z1^r!`BSCM=jwzc*}+pU zF`n6Xw6Z-@4v}}v%BZG@JCO!(rDwIi`caoEkWI@H= zI8R|{(bFQm-$48cDVWmd@MjP8@v8G`61(Gw7Be&w8e%!Ech7L=|AKt&sZrr%O&d4~ zWr6EBOHpE^sB<{U67m?^!J3RXF?_pkyO_{;kSqE#vvwETL%@pV?|ll$c3-Qeqvpbk zENX&aYY!|9-|>UUK%ht38fMTYKo!=MS`{l5*J0p*cIf)F-Knn7xliJf<*zav@!a7A zO-o-*KOdOU`8u|VWKa=+y!}bpA32`k+8b(JQUCjC`tIE z2d8~k#QgEa?rDdUQ&ur70$76VhoLK~In6pMn$UPjp7c~vU>_ROMIJ(~Fy6iwIXW^% zZr=AM2^?qmG{oF2c^ioLG8^|Z?HliE93mNCt2u5wH5sA8)2h7ogS_%-15kq%9zbApCY?rtfE?5AL%jUzK<&s|EJJ3^nfMoH933-8m1sM85s* zUT)WA1u;#ud!kI@VY$%qvJu!k4kv+y|E@K8%Z)_d<&O>zzuc~C3pvvw+d5tM?X+*0 zi(Z-=!i-gKq1>_>BzOu*Fs4bRv6-SR7_2*@dC8k1C3GozF0Cx!D>3Yc;po6>O+(S% z@tK_P8`=}Ox!GK?lI@Se2@HjQ&r9eR_R2G{{FEt3x4N0Ia%NENgDVAvz5t6vukQ*+ z@W{ho#15~&swc4yg!P6hIZ|E-Cc1A5A> zIQs3u`M=HPjqmiTr@u!VipJMMLWAns3P_Un+{u3um;SwUWCy|srvqQ5eSP;bhXj~D z9d6Ip5C|6m&x-1cU{4z6>|;*W78dNNR3;d8yTC^R8UaaGX#&*nXLv(sz7^qg zUhhrN_v+?XCRKrT_wS}-q#d%!OH`9Km`!Rt$sHUVsMgB(rKU_u>paUDK_GrGM*R&} z@!w}I+eR5YR01E1lRh1{>$kg5ctur>j>^G(Qr+XsZ%5WeoMw+js$~*knTIqo0`J#k zuDez*n{iv+0#7pBo`cvqK|Wn?08b$J5nU`}wR#Ze40)+LlokV4{l`s=CG=}&532Kf_?k7tO+Rxm1C|x|MIa-|KhFgO|0Rv-Rh2#0vg2BnsQNVuT9_;+& zh?sTN6LYx|dvRv}qb=R$oBouUfK=D9D6VG4=FrF_O~U*Fhm$6upSilK12PJdoMyZ9 zM$$x;2>=+RC}Ef4Z+WQBqBA7@Kz!z&^b6Ku8Y!@p%*S5*3w`;!d^rLNwFIB9I@-hGB$Xf6Cqa4FWnQ5M5qBkV%w9u3E{3nv2bZG{8-_Y4 zZ$da_%*d66NVa;91S!;UT5)IF9oKmnGV%<5JD4+%Ek?FrXJ-zh_nhynJGqnou1j&% z2zwQgbmlCS0=hhT)nd|qh7eUhf^SP&6{|*nm%{;=UT-XLDF(<^oXLQ_q2wj5c@I2k zq4<5DeKnxTlKQvd)vh0Qh@z3WJQQ%W*di+1Cc#%??dYmgP*IHx#M-ujb<0Opr@eGiG ziTk8fTkjY7;)QY75$YDuh^z`qkhNzgl3MBQa`Y6X?AoRiQ*fpQAf)KPQ5Arg@e}pT zfsaK3e=O8bXZYXi1>d=bx7ubbvB7T#Z7a_9e=roVKpcW90wi|kPi*naHG3zP+=(W7c2u3f3P&`)4@R4ai5RD> z9P?rbf3;%v=F!3ii@H!vUQjQTj{hEO`h}xxhE_Jxgk0Q^SM6jZEB0B_{hO1DV>O3P zlR&uo{G6m_Uo0MbMIW=51w`BVO;FUJOfp6vJ1%U(Gzn0p3J$0wA(TgT|2e3 zL5E}2E~ISX;e&1GI}Vu>53|(om$kc;3_?j=qtNjwuac99E+SL&jE!FzZjzO7uHueE4m@vBk;|)7$VaDc{2L59<3rc4B-<^$1bm4w^P|T7 z_hsJ4Ta0m5xMH<!xI+@6;UeF2x3yCOit z1aD@f_V1#aTd^n;YR-gFA$jS{8K}kRGM8`LkO$}B^?TgF*OHxeX(07tTkjax z=eW(v%$aNQ=n?4ssrm|1We5)yw08w2fS+1BB_<$eh<2^KoomTE?axVc0V~n@M}09! z{q(0(!9FjaRV}#vyXAOwHJ23|8}$6@2={O4#0l}HWH`cvvOg>jpftS4#M|XGTRs~v z&eAn9F}{|jvCi=-ZseN1>)R5dlAah}az=@h^=4tH3_eYV7Xg%+cupc8i)LgNZmCoT zrd@a@IH&Zs`4BM1cE>uD@p<#0yO?b)BDyAzUsg*(R46dM%K~(bf}#v%n^^<~HH#JD zaRZIXeB?jc<~-imzTk88IjU`Y3VjoTI69u51vXTSjy(jkmb5!n{Ctrlj1#Mo^W3_= zD}qCncSu9O^lweeUFmCaVAKPKMHw;cw2tIJ2=03zdK&~bOC$D2NZ6epf-;d-HL${&aFe2jd{jq@zHN@rLg|! z33dUep$GB(fRjl0{cVa!FggD>vMfPJKb5qm6wn?4&bfh=!U`4%ZXM!$7Jcw4$8w!D zM_%T#ZkLv=$12^p*ZwJ1#LR%LC?M<8S~J_WjtaWugj8=m(3`bJyc>yZkhUmd2nK_Er9Fcb7SedHv~8o#=gn1NS85Fn+1k=@BV0K z(o|B3`~Xw7Sd|_;#0vDiV|J#6r4IKxNf+P(*@u|5sDWKe*J85Z>l-}W+~vqGd0ofE zZ?ZXQU}MDlpW9uMj7z{?#wA^1s`e~=T_Tih5ANKl z18V|{q5CskpIvDBY5;H9Y0VNBcHYAO>>o-qW)sg;fTJ(uFw9XK%N3==*f%`$|fwL@YfsW^*! zHu-=KAEfVrhR_f@)#&r7f*0-oCdz7abmGC!ZxZdFsueedxneekdf5|obp+}f$(=ae zT0G`_g7DCxf!A8Xf*@X2LcP{%lU-At>7QXq#5%PRy&>Qrl zfQ*vPwZ=ud>*<;E`jrRsJ6k9r?4?%e0`dB|ee6Ya_McKs;={FCfGGsHJC}R?fYhJi z;x-l=@;ZcbHBV!!W4G~N&cw>n;+bozGJ=7pyReWFQryNEX5D)|;-hRb;*Q$E{iEWujt zZagRl!!KxYNGLq+#&}MCAnefYhja>+__p14zJowRKGtDq=D`A+i(MK)`1U&s=;C7D zB{bV7o3U!3zVsPq&>yoG!zY0NX9p%afQI85x{kj0K0^&k6&e%<5!t){4M@Jl{qbt( zQt%O(p@<12A&r&w~#S%hIdLl4frTx^BI+n2|c-rxopuQeD#xCf4oq zDZABtJqe(FexGs3$V3HF3TC;J?A~?v$s>M2BMr)s5Xt*5`58+lXxdv?KnrF-W z4%TiCJC$czU8+xbgg^k^v-9(3z``=X9il>0`{r{U(WA!djpW z;v*ndz#{lKI_&(&sQFXEVgl}GSciY}C3pC!q_*HQCvCgQA>iFsu%l2xy_m(XHy7L| z^^>-Vse)ew!2Xpg4q9%}5~cov{CB>u9QIN8Ev)pewiI8El~wyKDEXoWfhNI%0jK@n zD~ZLI8^zr*2OsZ!UyRfa#>e0JSylq2(vI4!t~4+-44SPhirAdocmYwIEk{p{z!4gvvW1fl}~F!OCU0;XuZl0ONV zpa=Mm@qA8-e`f*HRjxvR(j$b#?!*aGzKQPkVo%@EXz%u1X+0d zJQ)g*qM}xQ3QPBtx<%=gc!nG4(@UQ`ok5)GHm? zkO#0fYim~BuHhJzPH2{n8UB8q8cF?LNdFf{FZ+G#=}sJK^>^<_8_4cMovCA$!?4Cr z5$Er3?Aniqo!d=hXLfwAqo-TlcS0W z5Bh?WA7m5W3gewkfgQIW2;KS61gZA`o9NkEvEFtvLG+o6W{EnjLaIF`UVY(7jJu!V ztpB3$xSn5FT*6s<3~QDi^fk#6^%wu$mz_g8lLF}3Ex$`XgMOa>*dWTvRU4s$Fw%9L zuuU>Ejh|0lR?>zvmuJ@khh{E#k4r4`VmKLvp(Fmv;MUA4OA-^yaaN-dsmEYEytIB~ z74k8^o@9^kq7{YuX;N#k*FEi*OL`?};WL$D%u*b)nI96Ul*}b3ur(^#JuC)CEXAzi z`%>uBL9-_(?Lofm$qTN^1$eg>D|1H|B1Sa>(rhJeAGT{H1yh5uoBpV8o7#Bx>(ndd z`l1El@c#M-%X>V9MJFAcL%mi-kypQZ`@mJ6-XTU>(z78T$--zSRYeO_C3HWlDl z@x#7t(7f?k!poNLS%i&eJWizOh2x8|LYYS4MnDErwpTQ23rOAmJjasYo@px>WfF+c zp%!H$^AUs$A#^p({)cV2B_#n=z?OuuE$e*fC}-R7D>$>N!>vh~o|Cm^$kAIw7rX8`1V^@RVd z)WIX|g*@t-xRO+LYw&j{xjRfbvg{QBYXJRpvfh>qrq9ZhF%!LJ^gpJ~`<>1A5Bs60 zDm6;QXo*b~p+)RjdsCYddlof5HCsi@s=cY2v8DDdVh6Rws=d|TJooqc<$3;qly7*g*YFwDaMWYeV6w1(J z2R$?#&=c9@>>9-igwn1~mKpQ5{#*ShqQR**WYuEWdV7`*upXZH+sy(IpF3*~|9xsq z88RKqkQBR2^SfLx+o5Lv25hwdjxyyPF5G&X`|eGkBFDB$y)eI=qw&(<5zIk0?Ff<4eegX$^NtyZOkKi zZnx>vr;E*=?n0jP0>fgb%O1n6hiE$0f0&l*Q>%a#bQ!PT)=MN95QdC%C(ZD!WzdXV zb46+Cl@|X_!Py*zfutDT##Oo3mYXVb#&n3-bl&nI@(LW#&?woH)>^c)SCbpPEX8At z5ic^rawdp~oSZ9ya*pvc5!uh;(r}m(A$m52={scI0^Gdj27Y4W5lg1y0BD5!Iz-uG zd&ms%zJl;Tw`38BOP5WpU*(RBzZP(mpDE@qnmIkLX5&;>QATvN%QyA~ais--fdnAs z7}bt~q#?%1 zDR6YP7l9CXy4;y~dI>nh0-VIA!|%a(dLG0cRxdL?CB z>5X$bSY$vs@lvYNmv>$KZn#J1Puu73r?QLZ{)PwFF}?6|AXE7V+O5w2Z$qKVS!}N0 zo|T~ZZs$xb14<5VGBf=A#fx|270G|k_ON>S-(Hmd_b!oSinU0MJe-8O(0#+Ck5C07 zw8of|M8XlFh@q98RAC+jQ2bR9XD~Gg&>R6y2LcDa-f{@^v=%i+c5TxLUwG`H`i8) zuZL%4>L;ls!6x-iGs z(o;+NRl6v|`jE2H4!pD;H@qRl_ABE7(1>TIx92$c=et%zRa;1(LybdW7%)xnP_>$d z>8vVqGdmFA$mXGOq<$z&p1%GHvB9_Ld~fhd$8?H_pZGs8L6@EBM5VWHan6tGnAB^s zcLO!3tTMV#=*v9lYKDjUBzS|KjyW#Qvcu@94BH;7A)DhKauLGhrbGCONhIdh9Tnv^ z=Pbw;FYk^!NoU2(6qQ(&6FXhsSe^-Fm8n`tWRuF8xYi9>UELfHF9u$0-t3m{^dOy) z$bMLMhL};s6u@r{+ysvaKY@N!Q0PJR4Fz{c*JcvSE|S-*b?_Qk+6;Ooj7I8cbe~KGk>lu`@SyFIy!Uhp}TX-cZAuR{z1_dewU8( zl>-zS>gp0_RmmA^D3mXDs6dzT3vzXrB)j4rZ}Ww_&&R`f$;)`#je(y(e9-a&j zCB~(CtW(+PGwBt^uZeJdJ?au`Z0)(m{(Q3={clEG+w_T=6*^e{n%DgF_NLvZ6+0~$ zoW|d}?JQc~Mbb^+WyL${_&%qa1>#PFBwA3t?py-BC7>D2rt*T&zc9P*Vn<(}Pnq9t zw`jEb8Ken=av4FYco=^2d`jKNFoSE0WDj=?uC*lr>vxmgO=bU71X*tNzN)NUwivugURNqb_Oug zWXk2MYG1I@5U>tUi$f-1dCk8MgQSiEw0N+D^;XKjDaL5`j9gFftyce9ric8L5+ghL zaBUHj71ijV0b`7>D{K4jHS9w=5VQ-ABZaCRJb)gw5wL9+sdZvY)Zg=nk`pO@saLa` zacp7Ys5(izp}K;kWX-pr?^&|j>~WBQQbpDeYJ@ z)HoljT{{^ArCzpa{aCl!vfyeX2=WW5oPN-H_6vt&a8$b?kyw80P1p3PD6aEt1euJY zHAC8GxfDX4p%^&OYBDmVj>1;B>1-dH@Bi%#V|18SXB*4#_;FvDfQL@dCS1Av&->FS z5lN%G1T?sOv01ADmRgmm8ob{5v*BE@=kR?i$BV7C=h#~7{~5jh$3*rm zX0CPE<6{qy&(&68M>QD`Yq;_>`=TrRZY=PsFI{7B<9NtHJbM_fE^nlwJDfaVo8{+V z!uwa-;$-*lPo&_2o?AuVuI49zZQ}W7^_7slWi&$=Seu7xz8njg*;);Xo_D&MSkK$L zu1)=P5UIZX!c*n~d_p0scs=>DZDV<@giFbaO;EKRsr5Sk2j5#gD$TcZ>0gtOS#Q!$ zx$^Op6#>+c%Vr(cg+CR+!a@HWKlb;glul~(FDk!xgb)g&p6-8eDt&wdm&^w_!cnb< zuQQD-6jM2kZ7H<+XY|X9GotAD?RXEa*1EUZ1CNINch@vN0oZ}{c@ElmVdIIRlilUq z^0eT~9|B3eMe9H4LS2BF#^k@-)64lL??se2>d_%AfO) zeVkMWZU93Qi%let(IYRh8v>a4_;!c}utO-;wum#Te%;d^rH z$(rxE2G9~!JYUqD*mH42Na4Ts4fe0|aAH$ydH%_9Fcz1ZCYT#FV>ye-I5eFS_xv|r zQqeX!IaK2qmhxW2nXk~*f5$l81^5;NuSxHh^joiL;!%34?p{@DhV1e7?`9sL+}i%W zu;f_hf!V?#`9F&H26*QkxC)_NZiAdAxttGz`e08}zrf*VGyZDF zKa`4s`szQ&k?%E>SFshgSI6aM>K?3=idBxE7AenQ=IO6#AN2jBg^}N+C8t#6UHgIP zHFAvjAWyHO>_d|qPDrVRN3JR5-Vb~Ftaa~2Buc|oTY1g-1%t1R8MHO@T*5j@_v^B< zE)RBtA~+MUoZ~k0SUp;8X>=Vu=P-}PHJNqYa}^`!%=984hT?J-kGHz1hDZs19EGv< zdah`F?&YIls+|0E5dDK`P7%$YCmcF9Hb!<`aF%qzg)zlfyjc4)cRrE7cs^_1cTp7C zc5tDWczw8&?Am-`KY=lC@)`p1zbCZ#^BQ8Mx7S#<>O@$m_B=5nF&Z%bT>q?POY98p zPCsvJz-DNWmWO1#-gi^=fe}U1k$Yf1a31Kt--Et=>y%c&t?FAU`klxM2USS1mrSfe z;6C@$R)FdSzJSi;0HR4~mJ7A%zTRAD>)z0U9h;XwD-vVTlWBfk%gkjL){BRP<%6W~ z^0?$;j|}iq%G*VKww#8N_sJu>a~D0+htN~bdu0~Xtm@BD(9{BZpv3LA$rkZFh+jSZ z!Wscyo9N1p)ip>W%m7hVysb zZ6}KhlK$fuTk4VtOK5g&e4TmVK!d-=Yb6P>RxcncL$3Emd2vz1MC`UJK1Vm@v9sPc zfI+!;)mi}zXkd{}O{aqHaT$oH5{HsByeK!K^v1MM%1#$WB6%LRjpdiml>T+E&}lX1qJ{Z>p(nxeWk2UTZX(_9fyS(CZf|dH z79IF9$v$IQCCf#Cj}j}$e;ldHxzqQg&`2wG7A;(79P<&+k3=e(PK7=vcdj$0@I+H; z*H^q$QN<+Zj&eLO=D)UuY}}N&wj6G0+#)akb`0Om@iw1Mn#$wKND&0pEi@WmoqL*o z15D1(a|5<@vK`XamzIKhTkf{;AwD~0oqgkj)O$i^{~^bK_&>bV!>?QfcmJh-Wpd$$)`@p^(M=_Ky&?!2O@d;rkKS z+cNAtmr|?e&gAg@Z(gSOGC#t??2A?JuL&5Qci6q!i|C}098R)PhZ?i!qZWnJ-4EBTY3!EzUxW`IPz zzJ9uVGQI$a55w+%fqL#KK|(vgUb%z`mbC=f0w_4U(L=QSAm9*4qCbH$4B3V~Je_fW z36*}v)o&i>L7?Y}7J)x?Kr{;Bb4po_X22CW?IGMo(@oFK7sQa(W|4%esxWF63qr^v zP6#+1@_sqimFj6>96pkI5Rv|9|E5rcbJArB0s*P({X%6rF@@bAry z2|3|jkK}0|}_wA}?oV44*yXe9l1MH{x7}b$mEA+XxdA z+pR63hl91FMj?4Du$RUxxkv{&lKAJc5m1{U>p<}s_&Y}a=!$J5D_lDP%B6&PG@aHj z3O8x}m6Fi1`@qhA}opv&o+donuo1lf0+4!1U37MeZx67?BG9wzP89*}N?nYOWhfkZh) z7yJ?yoAY`=K{!HI9d(I5H3mAB?8S#NB%QJ4=52TAor-U8^+yqRXDYlcCjyek$MLn6 zva*EGe6rudX$chHi)jPhkNyF8ISCPwtE=QT9;T@ZhODg2=#`88&g(kY#r@ha z!Y6w3e8yS}yKfKD!SSwzw=R5o*4+L_W7%Rb*nd9v-(3e!u^J{#ZpZ6;qUIOBZ#De3 z<0X$ThhL^Qo=ufM&(10vPF8UbXqh4D+{}pNw6{2;QyJ`;svx~pXLS~8DmaT*5;*e* zU(-J|MkQO1%6!jBi7>=1K|;9x%&a9nT-K&yltqI9vMm%pxJ$87ANLi#$H`=Y)lY4T zFQp&)oMzbztWznd6y>bQz>npqC@B;3Ry#n7DFWJ6@BO|xF(*&Z2lZ4<7FikK9TH() z<-$s~eP8|2K4;FgRQsvjD;B6{Q;NhimXK6Kzal2xEi{sHi)z*>&TEF#MGA^rpw&i#X05+lPd}o~hsgzPYnlUbKair7#Pbl-(yo=aQ9nvsv62r@;w${tn zH4^RrK9pOk7dPe3fxY)$i&&2YLN~s*%R~9&avWt%|F@)Gw=`pjQqsYKQQ?or8|=> z6%Cck(31y!S2^NB$bWc+w##g?tfqlWHIzQr4;-bUB?FP&afQXou;!^a%V;&Yzs72S zx!Bf{3~_V_WfYw><;e}H;t4O`YG`w7{#;D?b$qVf!wN z4XAmi?LIAO>13Yw;Xe0Q}oFmbjz8dB`{$Bq6PsH}zF$vuk&m?1boPZS%Ul;v<%g zS-2*9T6KHVH{9}X{At^z&|Ujw-=$F7<R7wRDE5WGS;tCWzGNdMxR+Jv+LuR@0Nqon-XkDOn$(w2!i zR)o=2wv#^?w29($bn2Mo)%10U48LZ+ns$W`j%kRCN{Ve>OMHEuJ_Lq-g@p77a_gCikJG@D@1WFM$^n3zg zd3t)maWz4{3FxfSxz`fibLV}&A|fJL!yBekB@TLSiVY49`o8fNQ{F>)7^ph?F%nsa zEh8GQp$WyIEbC}xX!Zt*$id0$#_}}b6Ici^b;j#gsFg-ag=8n23*$1dM^V0QZ`g z{N;6DaQsBD!bZan6wp&OznQ$x*OJ8yKMWvB&kdxuyKDNWv!_yjst@~~1 z-I(D#X{lfdNysZN-LH|ZvlipQF#@njTeWtW?qo z87MYfxi#$|&N1KN2RzRj^aof+7ji<+o+KZ@@WLp=?TGy+b?eobl`Y!d5U8~I1DYR< zUIF5R=`$KCz1PVTwS*vM2XYQbV`-QYy=?5zCz#{&FJJl45CLh^hHj@^*x9X1(~ehq zji;Kr{jBPx-YitU7M z1JTsME4Pd6oANf-Dd!B2rcqn1%VfbY)2H)ag#QQ=@Q7 z+by~-%lB++&E2qRwsHXUzJ5mpH!;ER@9^Es{h9Y2W|2?QV7%r4-65QmpaafoJzljG z^z4cQ0F1Jww(&zQH{HB9aTUhBak8qMhK{yl_M^{VR)&;(zz0!0#eIr|lx98;D=&5- zlUY39c|)$H`V(pe#{d3Et~w&azN^j(pS<~r7Wqn!v91;scN)83SI zy&NNl7Qw4VNBmEko4CDRF zva;ywsSU6li!Fs8gxNl6F9Pw+^)S6na%0DEB(x+)3k`l^& zWL*tY#Z5zNg|o%a00Uy!hNy=4n-~DkP%=RT0#AUMCy9_2x!RF^GnXR7a4k(pF1;mK zK`E8kU_)DZMHt*!do)?!=xnJRm%wR<2_llp?-OHy)X{-^1I ze|c|0oTupEy#^pKy2?IH8zS5FN8@y%01S_kD_~KFl^idG`{*;fW26XJY&)A%hEZL6 zUKy|0;InA%_?g!)qsX5*x6JRSg!dY(=r?%|=1|~-%%D>|2prqN@Mx@)SEIr%vZ{b% zH|a%(@LRS$Z|t$eobLvJqnex@s%8&GW8I>{!cb7+F)w$YdVmQq>U!C+h$qF>>r`43 zS9-os8FkBd_P0A(!e)4?aDzI1s6cV3XJkzq2$O00MEJ#tZO@51?{V(tPG3j&&v-8E zUU_mtI7fWo^&S4yLmh!3!$z`6<}8WXxn|EB>O1Uku=6)p!R+gO^Ol(>70rj0bh_XB zYA|mUb7G^J;UzAclIQc;kE?u-jVt0KI7PTsopn?O^S!9xc(H%fBA`ZW8_`6o^qGf- zcybP|@)Tg1uF@Q?TpDu14ltKq60QhGHynq)@x|E&CHQfy)OW6*%N!gsgp*_f6dG{?Q{1{`(CqtKF`^eBfvDg4N| zh`MrT$Akru6_TGeOeW>;e*JW&mI>;Xrb92#veL4K3x&R9D<%Hyz~wAO)}E|9`3Owi8ZPA`&W?jKBw*{ zrtkTo+7|aV+0^KgW3V(0B_~&kYz)fXLB8cqqDez(@qq^Mob^;Nkah4+M(|EG2w-p3 zVqXdHK8sW!)wr&AMzX!6Hxca*YO?s4n4RuG58xGs1{Hdy1b6n#?*;;CM!-9D{u^8+ z$Rb@s1-Zoww2+MY+9VbEIK9lpvn_`Ch!_LmW(pqa){#fG!5=Y6< z*2VeyfM15$=l7 zQc=G@8j-@e%iBBo#;GPeeZ8t00_GAfHCTWo_9xH>QF7O=)3pDR4(76G;9l6hJ#1Y| zai~QRUEs=Wm4}Onvxl;$X*XDZ2Zb!}v{MM`7pgmGMhq{QWG>tvQQ!9;Mq@!Ayu2oV z#RJifiAF12(Gl;XRMJL2ywFJ)Bs$l?gbsRVefr@*&HF;U!LZ!y({f1U@z}}=+yXBM z0I}R9y0*SE)c7jf=6iLianVm3tlEX1Yd9YFbMyIdGYDo?deK7&f)K9=V{D(QoB(N+ zr2C7R8q{H7Qw{^zA^EoXA8fdIAnmW*OqMB`=lQ!C9zaVDv!C+Blefr1^ftDz+ih;RYFnthK3)NidV~Yuj)0m zC-N?<IKJe)C5%=}^C4~f^K$72xms7yd$gOJEthdVS-9#m9FB$O z4NI1~0(*zNeK377DR&G+4phX2n)gz(MzO%_i%y@BYO%bE+B_Pi`Am-A0ZvMg@5;d$ zeI1(60lwaJx?M&M+x27I8B1H3mF3(6yy85FIZ1I!gUc5MYe?MadW5spGWCOZGwB)^ zXb256u%a6`E%!^358DT`qGhFQf|V-SXI<(%&^Lj1!IH2x4n)?j( zOCtMzJs-muVZEUnCUp`U&GRlhvxW4TjwSuBb&j^rnK|E^PRY{uxRtIy)+=)~4~V<; z0}|D2KIy#i)YJ1`{Pb8e6;^1%H1bho2+y#}Ne;n5yi)f#Fp!RviR+A+(bw%h*TUrN z)MEF%cY|IxVuC7>2H%$kOQ@Nm#mz(`fSY4c>vhmk1_E^}Ptv}O(^hV^lmlOxTCh+T zbK`yD_5B$eAF*md2(pW^-f4z(?E3lp_#oN`1<^jA0K5Ox=bLeCB zoO3LU)v)Xxs$lOrOC?psddgFI?pJ&{1G}fkee{$}U6c2DX7=^W@VDFdgSd?9g=3B7 zW9v*7XA7K_6xrG4U|BhWYJTkObotJ#%^%OskF`a}f@yrr`oqE66H211gw*deGe-P! z05`nR1%*JOGonyyHl)(PwD0McJP^opV!tt(>xu09YCU=DC-H78!ZFe;4+P95C4d2A zK}{&mbGp^#&&d3~n)@<=qHW=ru!qO32RJ|M)7GZV+XDO>nx|dr0Oo*;jOi5e{S4ul z(_~Z$PvmPg=2!u`198ZAHzqB&Z+Rt-)bQ7#wb(a7yIxJeGa-SiayIn$q$B{V#Uc3A zVuQ!d$;rvt9=k5!$D1@_F(wj^``ixFB&<4$)vm53g63{o@qcD2W|YD!EG)!afqyug zcD#D#?=Q0*_P$K-^B+<(Z{|@1s5@2m>5jN@f{=L&N9^>x3pIRI(*(JM zj;8CJ4#yLG_gVIt&V($l?4RS){ic8+(wqG3>)$Hb7lQ-Ye)|^%piRMudN0(xSzEx^ zqyqM^rgJ4d`(~BB?Qo*P$-Cro^zY22Qup1=hIzC9PIy?Mt)-Dneu9xgC*Q}NyKwfb zCjXTyFI2#Ccwyicb^Y12v-d&s>Go-7w@VYiavqcGGnGUlYu@Vp7Usxt$WjHUsy)ym zhquV)8Z_Xm>7R-9&@!)LRa6Fn9H+^G6ce~<9*r|5!~`od{kEv9*?axc=U`=8bU^+= zuc0H!XBPZaHMY(X()pzfm0bMs>YdQWa;3*00nrVXHH%%{iJT7cHu$&t(Jx!Ly>Tgo zRLjEaGB`B_PW%@~8&d=0@8rA4aC4Sg@yy++K^&Bk+RMy&)jAJ?S{I=!R4%!5??G{K zjGW?0PyBNx%k=R%dI+M^)dz)R2G**r1|eVVJ6A?FGO8(`fXGl)Nr+PBcCpatFU4%9 zA7}Tu^AoCHc-BYTdf5h#MQ|2X z5Gsc&$d!_0)bh3nLeUOnU19Jf>3WGr3ZB_G#n6#2w;#)!ZDqE$bCnOwen%uxxZwnCa#$h zQ+UzAwfi>#xVpNB>aT|_#yh=Q7+Q}As4}-2ZwV5UUg5vzxOka*;PcVTA)MTeKedQU zu^*-q5%-h>hlmU~!nUkp5I-0|IYnc@HqW`nh)S}F?Dzl9$lOfz?pWyu(wgJ2s8~u( zsU`~(RiokN*|Ns+0WK)o_xzyk^z7DHL+o-6#Y<2Bs^B=a={#Ds9mt>0h zeRQkZ57;5S$6z&DE4RYx76W_*({+b#YFt~yGa2UmKLoVK*x8!41M}G_4Tj8G=UWfS z`^13h(swV*ZN@IkgcbuX7H@>I?>iW7I;PBgqY=QU7*JZ56$3PqGIrO+;_dBAYBLG{ z3yd&=M@eTY248MvltVfT4Rs-( z$;g8#$RmuFZ>^YkKB_vFfr4^!%}#O8xv;oJtP06~;d!jVH-ppvLIrVl``L7x1X05&V9ZC{LXApmj`qb_n}dw1o{`7?{8Y; z^{8|aDP)8!4)64EXQ{0zbv0}9D(e@=aW-51kFxFp>@z7ZTiy9NXi#Clzi^4_hJaDZ zHo<7Hl(a5fya8xVSc3bkqMmlUMRA@+a5LpueK4Oh9(>m79_H7j$Oj{^#efae8)2HZ1Cz5r($v9Dr$G$~jf6Rm4&i)lU00fQ6i$kt9{Fer{zw;}Dnb_s zO|v%^0yL9WqMlSCjpggdoD<=H1Pewv#+^|B;0_V}CIZyu6h6|_*v>uh@pK{HqBe9S zHb3?$-{)skCOqdgo&i&*oYos>wPH+4dOa!OsCcIBN3x2ryiziF?Ihg>w3y@BJe3@` z)sNN#Dt_p%8VycC=ZOOmCa&2z0XO|;Evfg1!NU%=Tj24rm|M*>chM|@3fBi zh!u6gQ57wBSHdx0MY4y7G3r_W9i;}(e70j-Fpf!1)RaD?U_-g)bK0qM7(Gc`$o7aC z3b1IIGjzZ_(oA7o6CbZ)$pL9Pq+iFH&?fqZxxX?RLDUd`_!ja1+De038kD8770#uAF;Yb>wI~lSO$Q}lgN;>{FXO5Sg3U)b_ zY4v8`?%Jwjh9`I4{3|HmrV}b}`1IXJsy2R0S{x>9?&&mtzax1g03p^8|4(%N*N&_@ zGGYaMU&U~%aNiPmzew8oVb5puylKeGf9LA3?bP*ly5ib%@!li*{&J-gE_kN_uzveo zuZG&(TIS7~0&XSm1n#%IP(?wy#d`r$q05hMeAqofLQAp)>cYHhKb2_(Gq)x2ec5Z(EC1$ zDuH{{$e4riTg|GRoNB;u6VVd?CO<0Apqx;@-f*3Qg-06m(LPoiv}_lp)UX*v4ZU3! zG}1o0RRMv%%8dma=qFjMsUYnjFDaPx6XDXLspBtFzTk{O+v^8#Bk~AwiEtw_Am6L> z^l&T?%mU_ZULOOhYS7Qnhh8Xx*CfW8Wn6VwCwHQf$YXkTOcS>yNmvT^@!pQYp5+-J zGXzPQPLVP!u!m+O%siFhw4u<#Xa==@8Ts}A7aqd7cKT_LJsKGrpa?OV3(L0v_b(>B z8Dblg`E2LcJ&0{}Oe@&AW`srh91QdD0ESrJ%aMeW?WO-)XtXL`YW+B5_L%HJlip8% z^Z&P0C%NH&VD_332miagJ2~I5N%iHC&WhgWRW49+xPI%jxbYH{C<9qUBp{*>$=y~* zt(Ok6msRXOg5-C#BMgeN7kq8f)NNsr9(O>Ujd^#t zY zu-QjzX3${!Amalv5RP0>3Qlcc=tmY=n97tVZd#m^plCNpYG6pvrZ}wtnV$QfM9kl- zQ@Y4*K=%HGlzLeFJOkc2ND!X+N@AClp39^xJ&2IN{s71cp7vZ@F|<8<-ahapSHPDC z#1vs=OS1bpESW?;=nqCt&Zcp73kRCCd;{>{8(+YI3mlY>L|*X~B-re+ zs1-{QQS&nPF~9T7I_4Tv2Ax0bSw7q874w-dRMYx)jXa&FJooav!XwOni_Kj6^`= zl0LLIQqki!=ThfH25NL^!XD<80Y*4Tt4_;MpiZLKi0Xq1f@t5gwEHm^{z8GIAok1X0Kw4d`wJcA&D};%67;KHItu! zuxl}yQm=T6?@)x;ot6HX}NGDqTG{^;c`^cw$X_ZQo3*@BIsq?m~<- z8B5e{=jk6&@7s%u<=t^(IKn+DXDKRE-R8c(?SF%XzzfoG&z5-?;QI+YULT4a6UdDk ztbZ7(te%FtLSnd}?EEpQR8mqiX-tfjdtR#7B5xnHqh?r6O(joY8-V=Gc*<3A9#C5~ z9%ARqchE?C%P0i}1*P{uZCb{9QL^F^Ue7?Kws=0i(s8tX7{*o~IJ6S1FZ&Aj%KDrO zsf_!zC{~5bjkDvkT;2yg%FhX_4ob7;M31z{g&n!+dYG`3D`qe`e0hAvXGQpAT+4g} zAj=)Z3^-X@+;?Xuf#s_d5gIIuhA9+TfK81zNoQ;^Hv#gIehNu;039mcI)E zB7V3#HrBnmyQ`Ypem{+NsBy#4=u>$gCTKTssyLESlEc=40PK0xQq(O&&V!>Vm@N?-g$lNJi+IA!@ z-0H0j+e8L4Ic2pPLUwGhWHy>zmT(0;M)r`rax&``FCdZFC!X*eQtT0mkv#1CMrsiS zcSy4l%1Zuo+ud!bd0kA^Y9H{ql8SX9eJ|`%d>}Wgbz?v zXg-UzT}we@-sH&O&Vur~CLVhU5wXGCdvx2C>+O%gJJN-Qa2h30!vJ^i%kLz`Mq=yrZqZuLh(#-OQn+EE> zggB^Tw!@m5)VD+BT`v=ifRV>RIPXhN*C76ao((xi@w#|V`N+H*=;<3LvjZ-jL^4|y zS}%=-XIEBAdYa$+{fZT9f-Ow zJ^#kucJ5p8V)1`Lxg#t38vg%QTMr)KSw$%kWf+f`aZg1&A!o=m`5N1s#`o+M_4QK6 z&&G0W?SixK#*h_1*2T+>{Bh7P4r=BHL}o(^B_z3oiV^v3FPWm`$6rKBzX28DGMf@H z?c26OO^~6Lh4aA3*bO*|do~98$%DE}!Ri#)6SV6r?v!6m!sJitu>&njGkV6y~or(3(gdo+z zyWQQ@e@jO64;NqGx+vr<%pj4?D6Hw!cr;_1$9d+2O=QshEW;%{@WT9t9q>x_`JV5G zwU!_WzW4cTI6B^fgW-!L{(3Xq{9m;MqE-mMys#kdpnWP+>4g6AAPBrNo-(nW>rnE? z)c102gL-VHqocVVe{jR2Q$oolZT;@{x+!3g%UREVT;x>Ca5s#tr|sy_=hJP6U=!d~C{p9#sMUJ;;!blmE=hSTC34hJ(g4$sUU9}D+NC}D7)ZExNE5b=? zx_Wy4i_F_M@1da(`4k>DR?@?Tm%Gp9PAG`&RQn{P^j4J{2!kkl8N2+4tA*EYZ`$rx z*4EmtHT?Gd16L$(y>DaApI@>E{tLUiYOU=-KLvsQ*$2INcfNEf%DexKUGWj&(z<2f z`{tmKkb`TzK=aFuWV1`j@kxUfA?$Z@HcxR!2U{$(lh1`bzwC+k{Q6YD@P=U1A&`!n zt+wIqvhWBZ?5Z7+bh>-eG3^XoWJXz%5zrSrF|qayVvDDl9uqZ=Q-bLe@fr)^7KF^@ zbtOt_Fm_xTw2MTQe_f03d0GFk+^TlG!@X8GVJfJ7BrDD;8iG!U1!W4fhdLLlMMhMv z)8iTo?)H9=@5R&7`W_VJIve-w8L{c#29i$3A9YKPDwDIv$K#V*>(MK%!-E-?<1guG z1bwf`a;r^7Sf%SqW%SLaeEH+nDW6u!cgeJ>alvfmxo)u+DC{9MAdl z5YY^e-(O*n#Ng|IP)e|X76qqkEU(@NcofD+ZnY%>(%w2W$ay+x)GWiw2eUa7^TZec zbVt;lJcQ)IYI*gabIv?6I39nm1DP&=L}<=@WsTr-hT6%d`@O}C;U_~k zM)SHBgG|3Cd%!vf%aN46!^7x3FURQ+FxIBO7aH=vT7vktdrQe!D1HV2gq?Qs(Z4YN z2HQvWwyk;tsW_??+Y+BoU1jbj&JM``n2ZuZ&g{D-|Nl1cmZICLpOd$z>VHKi1}IpDSZk z3Ehd9$d#!mHAymw$)0sw_GfXulxNUhC5~k+P(58`+zgN!MAeD z3tcahf{|AwYlop$Bca4K#{-e^>DjmHOvq9d7;X^Jy2{fGetQ*)LERGvze|>U$&dsp zehRVxtIKUyY}LG1E~Ru!1zrw=S6$&mldG8B>}6jcunQlw_dD8RZ~dp=n&EC%QFq)2 zFyj`5B_-S3{B9Royb){qx5(ju6}m+P)Fr7TI5@a&vAJo+S@Nt{BXA|r^^-mzo>&Mt zDvm!!@dh3y2Xvm_KgXn_#vabQobKufG46+2^e6%gz0JtsVw_qcd__FDU*1&@dm%_G z1-nxDSc}c(bG?=3b3-@c7N#F*NF*XM9oLe8u_NkHyYK*Jm7a#!Fots@BO03YBWtdxd){$8pp1qJ#_NvHpqP&70{hp6Z}z-mt%5pL1T$tTTuPHw!R z8dZgK#Fu80QKx`7j*YwZaz664MO;geeJ`p1C*grndSmO%r4kB9)S%*Ir@l!|G-`xa zGV~N2(pb_1+&Fz)CO5gDP=^hZFRCPKui4hVs`ui5jzsNEhjlwiWU7O5YF7q$#gB(v zn>k^jd^Ya{Gf+~L8+)~+uC4yw{Fx!(%wwRZ0z{>n-I6B{?=K}!8E%RxMQmn`t98$= zhUg?ubtFCgZtd!3(47~{$GpA=9+ZyGbz8l-Mw&%7@wHf|x&acY!=j4c90wA@#a!Nd zn>Gwqb$jdag3N-R*Gr?FUu%BNh6-_H2qulp{Kjxu)Wh&;_0aM( zY~kzTI#JthH%^eQu5P=5<9Ag>s0!P1OC2th#-oAi<;8z^9IOe;eiZmW>PNy-QfIDa z!O3zYP`ck;LBAPuXG9g`7-vtLYF(C75K;orpB7duYBVI1p3k-Sn|{o_lF87WF$l>( z^R1>xh3$2BAB~nb35$tXt(!8)$_Sl8PrSATEd-7Q^HSm|QgeLwzW4lp%_Yw1_Fy2j%*t|G-$__nW^3(tSwg0a%_18c4 zOeAO)ZnGmTa2CZnEBtQowo$x%AMkJ2LxXj6$i}v1=epHr9&7{WC^VRQQB7|yx_jL^ z(h;d(f@wgIvG)N$@QluJb`YSf-5|zm&+F%k_0S~rHLP!2TM-RdRVaTEc-RwoBm2!< z{InxJGo4r|oc_BS$OLQFvz6Q8xq}J3%DxS3yFArzJ@9RNxCD4h z?S%zigavN1-WLw22V=RewKn+FD%@k z6<`vqv4uCy3!?32a+GyMZTPq+?Yhxb-=NC<5k-pS4%+>0QF%E~Ve={>=!V^RR^(iv@};nZN72b5pihkl)7;DbtVs^M zL^n+K(ofyKhZtfgH8Hx{Ho41~rNjyGHFTR&A=b+Eufty=w1O zqm4~bD|p!}s%oZ0t*8~FW{}VQKEA(v|A8FIao@SF=Qz*Dxk>BWEV?H}L1Q(cW7M&{ zRXESl_%_a*jxR&Qh#4f+`wm+Weuesee2i+bri3XanR63x&#|l{12I5q;O?HJmsMfO zLtvdzq+1Q%3cn1z9c9Dz-V|Vd2S23FvTa8?s<(Thq-11V^0yCHxFTfnP3^k7?TL+UZ=d+Y9e%TeQftcXfz4X(l5v2kX;DkPL8yUtMmDngqnZB8P(H4L#f$sk3?srhOQ>hk zUJ3T9L`GJ&7U|TwyxMs-=-#&I*+uoU;YyFMb=r(;HXa^#97S1Fi~%cHxo zzgI?j3a=Ka!06Q)PO5^_nL)A>oL-`GgpF-&&S(a zG^OJ!5S5@DgPv8L1Z>FDZyplrLQ0=Fy9!_gA3BfID8F%2@InJuAIpYnZS;>746Y)vNrU*KLfhpL8ngCJ5P36sFbw-Q&%JdIVNAgpQvt*v`SqX?q9&}n|5|= zncKenh8?%==cB@plYEY@We?`(i*RCeK$$#cP!yDNVyCaru4NJfaYZVnvy3*`2|jxT zeyw!0TI0z1S(;|X{khQ`2t4erT_4xu8%^{Jlo0)5m?TDnBX;rNIq|8v<_0j8Gn?FH zXtTL)z@2UA-FhsxqxmS}$nzxIeBl#OIy6h&@nFdXOl+?#Cen%*1@r>2V zauQf0hTNT*_8u36Pk4RhzxZf#BaqR?`)ojj!NINho(-#V3K$UihipiLLZ;tfZcY_$ z*#OOr2SbcFkrM@E=u0?mqPLTchME-gXypOOSmTA4H6TR61pOU$|6Lq*wrtR|$34Mg z6#19*FRjC7c^_%;!-}7Q@ifUToQAYTg}&yM80?sP|D?)G!twc#0F&bqorh z4G4KlptK%MM>kpu=kvw;W^XTZGE`I&L?z7KK->hPT+N_(T8Y1rW*jPnQ)ZU>?0oT{ zXTxRnw<3fp2iOlC2BZ1ZSE6+;z)8t#$H_g}G1){rY)AITzGZ6Bs`fbsj`c)7mc#D1 zAidysxFrS+jDRol9w>FOdT}Lh4tOfTI z?=41~^K84P6D3DRxRY_%csA6DmFssBX%j|V<_=6?ht*y;vE{Si#z!{(o)I@(^d>7Wuj@m5+QIh- zzfqK{a0@+Q<1}1~Vy4Pm;{VB$cBJV_wASo#QwIJ!*Dri`nc6FC2*+%WOy7K|u}a2V zeqy@}sgLw>**eLmL?{MqZD-#mVXnk@4i-jbQkxOs^O@Fd+*+3dDs1Di9YK z(7VgMJ6bEbiNY1oJOdML+;y+5dH($Nuyvs=WBS{Gh&FFVRMge~&fW6Ys#9HMIl6_u!xjZR z9Im3|f^|;RP1z4z{v^a#1ZN36Wu=8?t(%PiCaw-DHEIrf6&-LHePEmlS+E_K;4E$J z20l9<&3&p3vxQ2H>R|@<`1E@syc+zF2N2#;28YQ{W^T_;e!v|;QI8)SRT{U{6tfl-X<*f*XzZF4xQ$QCo2wmF( zY~6Qa@jz)ixZ`kK26E-Flw zy|z$3n9bd?DP|RUH>h_LEfF2#FgmE1ph5U4xdo_ehr8;THKRchfBklQ+UFD1cZfFx z@b6;f&#m_S^^)SE`Q6>i?1OET4-wPte7)b{=L8wi-ON!W92PGH-!u4V7=Ekbf1e=~mzWjaSJH2Br22 z2v3$`YpBZYc=yQAA@^p+VUR?a%Fp!SVSIh|$V{4uO+i>kEq>gnW3soSr6zdD*e`WL z6A#;I&ot7+7Dty#ho=Bijq~1uG{rx5S|fd0)S#i9Xo=W?q~< zeFIO&wCnkzOmpsRJ1%oraXmtJ@QfN5_+1SL5qq*IeW=y9kTFfed)n9{Dw*dA!6QE} z&b)g=^Re*Ge^_g*03)0KF=Z7H;DNb-V{DVWa{@IszI<+|Ud3)jZRL8yr3#-?08+0> za}|BS%Rr?x<8LrF;E4r^Zu#w99?2-&jkVR!e0hN`e9$#X2%rMl*L{e*BUJkA3o8Yt zd=-ItS_A3*EiSpofm(yNU!1Su)sHE&O;Z)b`)}PIW4bJm#`DxrsQy7)qqRrF$G5n;B}l#UZBgDRgn)g1xjEfOl+MJz-iPZ9@4*RNkm?h=*2}^1Ak%`O zZ_>HCul(}u*4=(R=?^Bz%%T#&0KGK)72DI4lCW)hkQCoQW0Hdzi=0g8dRv`Pj40=y z&$F`!p0&H<4Atcs0nc2Jx%Sl6nT3Td+Q0mZ)bBpye)gW|-H3&+T_?K@NDVV&Bvhz} zn)2U4-l*(!$}4;3JF#r^pSdv+g6P#^SFt6x9m3@wu2=vG)Z4x7si-J|*g~Hj?21Sd z_`YcSfq1F`T|t0FM1X_3`H4?Q;gr*`Dcp6P%4Bafm(wX0F}ESbP6Ik$`7uA`iIjWy z?qnsgT2Ht1g(IiR+QsujSOQ3DSi9oht1tIb5wIkBkFP(aO%Ies)uB)gkkCNmerpY> zA!QuH1CHkXu}l&T=%=)JGEqjOs#^XOFbsCMBGRB@yW`zy5nmdY4yBTKLpPe12qLD5 zpkaCgZSn86Zs@rvUxJ!&XEfq-&qV6?sHnN+If2rAQykPew$qICrzoN^@MGI9rsA;k zqoUYHXBEZ2rfQ4N_nyAVmpuLa_s@S*_L*diQ;%7&{fk$ZpHcxnpxR?$lkgpjE06l4 z32PRRrPWq6r)!ZOjo1#uXGQ8J1MLo~Na!4ckz{gR+;=m(5cU0#n^85nyB}Psk7^YD zP2W5>y?iZ90m>%la53ceUw!Y@RWH*WCL;joOXpl$m)(^#b!LIO#g2s(>H;^bSjwb| zU#mnHd-N#N)oYK(@s~HFssR#sPM*jcb>p-cQnIAcPSoQ4onXAnsEz-BrL)A$Y@aXy zRF;%6`%!^x38VI+*p2X;RLX&){n_aX79RxaR1S4&1PlfA6mA_6*JlmkXR%>Bv0;C6 z!v1ENT6g%O5y!{gs0**2vw0D+Ud+WU9LTty*zm+A+FVwiCQi*9A7id;Ztni4vfGJ@ z@s{>SdZd;U#^oE0wgxUVuoe~7{rtho_ShuLyJAH|XuIWf%Qm+N?*VUyZ@sbBP_Sf5 zPdnn3a%qyjsV>ml7ZpqiX0*KY#ZOk*m5bCT)+c=Nsg|^{mE(XFQYVprZqYGBPDN{< zbd_$Ms9u_j63dE@R=3=@q@y*Qj7#(NTa~-`?>{M_#PkHSz3x^BnnBGrUz@4)IoeWo zWsre@9ZVKyyI%>kG|-gh2GvvRkh%)_%2?uH#Q>pX9(&LW&NZ#yWXyzAbgI81S4~?d zbH&U?23zA898`P|>FFfSCV0(6W2V31`L^f!$eo+5}#rCi;j@dM{y9E73<2DUBrdHU`{KeLCaBIH3gIXWA8{ zjNZ7{FjsZeb)JMc^;Iuop?j|qFs8iUX=4nW_`P_5Nwcs{U3`Mbl4rXBiwrt6sblai z^QQ5yGno@)CvnhD4UedqYfj@<@Cz}f$Ciy9H-yL*TZbRz_og>l)IEFG z2#fZIK4fg8P`U(^femd1bFLCaZ2&lW80aJCC+v|iC`evyw9|U~KQ{;a$ejSs~ zb1(iwp4%HaQ_;rP+=|^d@wo;n=ep4}zF`do`m*tOAQDD~2(6MBXlI$8dF>f+RZ-z7 zYJPP=t}mENhP8IGE4d~6)^ECu64HG7!9eqasvP_jNfY7Af)@eOdTHoX;VLBbfSm+1 zZ!uG$m_%fhAj(f7>NX66k&H(sA|UucwD){BS{>M0Xj)HzM0sib-P3t#ht}nDT1e@s zq5%Vyl&?V=AE){g>F4Aex_jedM*x(x>VrfpH1xoB-ek2I6P)@aqwPE!k9|mVWdGgS z(Vr?foMG6GA$E=^>En?7s{iN6_ZZW?eS^nKs(ky z)BopmS^>AV+9m19@(>nBB?bb}-a|wDufM`v7Xm>?lVa2J{?KtUyZvcl6Fr5~&2L38 z5{_S49Gvk!j8v~dfnLhM;1;5u)c%}Y*FEhwedzyj!4cYks!x0 z*jiGyQ(xs#0bZw1`HJ3s?o3N#7lu4q#Z!3>leyaT&+MR@-PMAaKh4^OhBW$Fg**fB z7M~#sJ{4a&!Nhpw<0Ct6XojsLVVWhWis*J231Y$B@+vxxEJB5(&rTcP!79W5Tr*$h zqk2Sk9*9Im#71U)>!g(y)e|~kJms^%^V3h{j0WYtBD9a7aQ&tJb=t!Uui5u8DD1+k z?U;}gYkoF z_&*sybwrb27$Io*ByO^BJ8Y1kQ&hYtlf&p+g_jO{OY5UoCQA<`Y_7*88*H%knfKD1 zW`pQihx?^lEQES4W;JBU!B}vIMNCxa_-#+uZRal4K47nGDI6lUIbij1Ueb3b!@5FG_9M_d;?EHRJP~4&2qB zMYGq;A-#HgkO#FAt5^x!@fRxHOKy#cqgKO@Fu#`tr@Q@Gd1g5M%`JT9EoyRDSqv0^ zS9fqmvcIkq**CthFFE=kfB65pgfNs_IM2psp}VpXRG@zRqr}gphcj2-Ezi7qLN8iS zr;~eLJ=e$@%?`*iJD@iaf*2ooJwCvgLBw_2g~)*C7+)NWN4XrSev=eZ~bHzMJrSU@X z{Snb;N-M@Rvra8dzqf%P0;;8n6imMhDcFT9FL{vR;;pc)4U}Un;0FUB&3{R2pDEr3 zR~jVj;Fd!Z#s{Vuu>{X8iaS}nnpkQ?yg8WZ|KP0IK|Bf)$tI|(l*S@|4GFhzim<+oO6XH?L1fs>RNdQ;G&)nuYM0Y1@e}yJhMHJnLh8K z9W82n2mnL>s%tsqV2rP%P`CQl$@I6C52BJ%I`3O3N;2d8=;-JW>4h#ty$3j4Q9&de zTP#YsY>$O=Ca4~Aaz6Zhvk5@^d=aqb70(7XwligtM~_Y>4bmKQjeW?*ETrWNtGNr# zD2&NJbF+4GsQ5!aHxU?vcpOy;sg&4%W|lu8rC~2`a$cn2`YDy-ze3}k6q;A$D1t70 zJgX+~)ViBCU2yXv^dq6@>oJI!DCZ&6E&^oBKxB{%PTJ1DGQ}PbnD1EJy>tJ#p zEZp9uba~Ykz7g0lF1&!Wxn9qKV;1g~fh^!ht*xCtK<2x`SqSp`Eb#^V$iR?$Pd5X$ z$ZL1X$+X?|dvmrX6^|xhu;S{wy8tE$qY&5VqzEE%L$Fsx=6Sv!JEsfqqnmtZ?t@_3 zya!G#H0q+;x%iA-!g&#l0|y&!2Y+T-IR@=W9Mo5X(KWNP8BqfSO1}@^yFrp>AYDtI z)w3AItNgMj-j0cId#Ho^J>_`JOK^4Ou!vyJ#8pTNh=Y(8I`%?Me=7ZaU&6syv00== zRj-gm=1JtI)pwZKPk{6SUEt&!KB&fpoo!V;vV7M*ELZ9M@yArWJ`u8sEuUw3 zlk{q<3swt&=m=nKI(Z_`S9^j>?ow=8_vVLx)HBz4HK5-4?8~GxT>FK*JGeWqU?%2V zJTvJxLqMTT4YWZr`yku)y(ROEXtLNe(BPZ|=wFx4Gk~0p<*z7IsY&Q=SX`_}ZW*3J zxe4vvtX|{6mbY?gq%lj3L>eIebhAYU({7zS4x9Cq54OU?N$$(`y|It9J%Jlip3j+;waLoke^D8baG)QLiL}sg8bYDN>|l1T6o)4g^bTa@fav9_=-qNPj9eE%+Wq6Lu0RQ zFP0U=)C_r}KO;D(NI35k`tEzz4r^zj^jI1X_o#ZhZ%!xlXtwPndq@!`up!{;8Yog? z;h_KP*>pAWARegd%su1!uV2{4-)e_RO17e}k^IK~txM-wX!fSP_X7b8BSRsFMQ5v& z15#vMcDdSB9E3Jk^_ZNAx!yZ9+?m3W-t7h$+>&hYd@n5E?P!uWLo7#J1V@*wf}Fh6 za{WAr%7OhQ`sukVYt8nVlLlcZ!}e^RDBxxtvVWn1l-I7FJ$&Xq@Olen9-A>1&{d?( z`v?Osu$M9u-1cbc{?6b~Y>Z%Z4S5afv=UA~k4)Ja0THUxlQ{2V3x}HQ_6+8zGbAEd z=p1dNtIEAiZl33QEWO+Mo&)9LM8l{en|Qa@SkQ%Z9K-HOO~aYAc|pQEHu4H$pPzuD z;lu!W0H1?|n$c*p)vkfKEQm#Ui^;A3r_W(+U|(c+=p8{orGFqgqCQ^UhvfbxNN}PtOO2efwM-{&%a{#kekz< zbqU7xO}gb7KYLxPjn1D0-a02@S#ib}8b6kMPv(Sge^A~!!0gFnY($y&LNv|ssLmF` zajD&RQ@F!-Cc%%p<~|O%=XGDeJ};E<`%pE?<=ApsyhAOKt(Ygx+qKPB=C{^%O7lCN7k|8>O^9U{6{SIB%o=-Jh1-RyX^6nL$ z&0lfT*cT$49zD{6o~;7M_G<^ShyJ%m|DEa;%f54XRuB`m;+A3g?<6Ay#}`qU z^!_I{yDoCX$@>=f0x;+x;7@wW2a>+Ytaio|sTc-TLwu0osOZ)57E4ci!BpDY>PSF3 z57>op!wc}|dD?|#c8k7g!`_yH+msj^V+q*q>9p)^0UWoWTWtjTQfu&0B--_7W?nba z!;KOFQ5}b5)hg3cRJF|c5Q99d&bpFYZ5KnGlWvefi=BUCO9$t0B{d4kt%4Ij2O1^z zd#3#`p}s1PWDsK$Q)u8>A8)Wzi-!NK&VB9|7Ue3hBEZX3DvCtlya*45Xdbghl7V!) zhfIq2kt+C+9{-iPxUSibR%qwxG%xv@F8sPl8T|j)t^$KnUg)2yXEN?JJsc>^t z2ME6}RszFM1#J8gU;mrP5nk9k1Ds&~-rmQ_ULhMRfvDRZKxln@0%%>Xu=2s@Cjl*1 zWNO7$wwJ5Zy~)~?Y`wQNy;15{{8JQreFr9*vpPf|^V5 z^*2Y#@ojK8t0p`$kq5kzz9~zjS1b*0OjSeXlmfc#DOaz zSE`SUX{n(tv=5`&JbY&in@>5iYpYfl2A$m^ry)R#bu`J{A!YB{9iLA-Js6$-kea56 zBR_tZ12=^SDqWq~!2?YWD*jdc@AB{$r*dtd$g)-BzZI>5s($Gpq+Qb!lgY_0dI7H( z3OgbR+NNuNJjI97-?doJ5n1VqJzP$T52qj(#n)SgpQ$8NNG(ao6b-=~4uydeA8qQh zNB9!!FXGgR{7Bp~JN=;_Z$0r@Cf_Qg0F1{c_ZF|PlqnZo-d#wJ^2 zM*6A!jzdzFPslgzQ8|Vq}+z=v49MBpw}{g#GR2fVqAm1Z_z1m%DZ4Qzr!qRRyAU6?xwl zr&(62ie$w7%Ft9Ke78ox1VI;`MpxKd__oI>lkl4#T1!MWM*fpXwlu82Jq{D(j7&?3 zt=#QiN_qBZv(a5HgKgtCMW}}FvX}I?Z&u7Z;s*!>VmpW!MCBDTEbwFQdN<(W8hy7E z4%J2nxp#huH})^Cz74}Luym!L-Uf!>sfY7;KKH;>Pc6&B;bEN&CS}LkS#uImy7_@t z%G{s)eEL2-i?^_nuLu%+%_Ip)Y2^uwe(4^qGn-dN%CeZ>CsEgyX{b`=VtCJp$d=Dg z{PUix;X9IgOz`n;;VZvemJx=}u6T;}gdivJ3}E2oqRYp}C%Nabb99Oc|9ic?9&nqq z82h|tNqkCOkb3c0c6v^^G2m1?MK<9HC_BBr0mI6=@;*UQaym784YOt`5Sc_M|p90k7+6C_SbkHyHl>iZ3U(Vy?g%XN7(anX09EE>T zxbw4rrtzJr3b*1qU)>@dD3#5sgL-SPRN<5WtO{BC!U9}D9s(J23f}|Z zwj-%*#U(&EbBzu2#i`9rV)$`UJ4#+2a`o|lUuzb(uYhf}xt&AZtwkaC&T_)nu+w_e zcjGo!0=VIlJF(sqfnFc~tLxe7lExN3z2feWtCNiB;`0^z_7&^G+OX@9@SVSEHa#6T zgA#w3Ic~c&Bh?={34*cwWU3ijH!ZwVpV8um|VN4tP@k%GjZH_pMuYqc&R;c#H z@mjhnLZ%hpHAz3i>P4LoiE2fd(7)|h`J#r1N-AkIm3pG45JxmGp@FFW%szuJVu|~= zISJQ8QQ*J{SvXf-U=h|^rl5adDu8fokq*#yrtgE|l`zr(75GNr-D10ZmmkX{4TqHt z3glk+z&5}qq}kdgcCF@_kEoz8ICLRClj>$_N(X#(b~fkQ5%O>6ldsTlJe38T(%T3V z8ZdtgM*83C#mETu01-?KB9zwc$`_ghEK^!4NO#Xbtd6azy(?3%nIM^MaRz6F@wzN) zU$_xEsm@8%;Ll?oO@zM569cPyY(O~7-nUvP6W9zfl76`VhKO8NLn zrM8LBTM!Rlym9OIIM)RAq(KB;Iz!p#@$n8M+3ZT?L>3XH{@9T5*M+%%#Gmj+jkWQ1 zl4U6jXbOuanL=*vAm(`dr{2YJo@x(yx95G4DDmh3&jm`u}0K&pHLMsMK zd6eY&RhcK5j;jP92U2Z~I`4pQBqY4)sU{QP?DPyuo$+{*z%CCIM1rCE@9{v()U~s_T*11)EI9~GpOm}%_q>YqVCrk|WC6$$_rLw)B1`^5>H!|jS8TwnxjGzlgf|akH10lKzdo>-c;vOLz zAln|e=%U!Hh#%n>@bu};&B@m%&R}!H*=x9)e9-kjAa%ZTJ{3E)d+E4Z-7>pT?~%7N zHlT*O-d4l?R=}Pjk!OXo0EMd;-Fw$?mpJ=*ZV7n$!H;IYGfGM-T(l^(>#g4X_6oo3 zw>N`18b1_q!tj?^s*urr5y~0Tr}M!R<7YR6aj8l)K%erLlVdrZDmM%9Tne4&sJXX< zhQ9B_u2_PK;u!Rq^EIucb7fXvlTd$Rng5BpPf)8z%!QW`@c}$)r+5!ko9SSn;`pCr zIjH`DP-GoO(W*ovh*q(OiIUKtA&$VYbx65ay$jKW|K z&$qWPFS)B2lRP~=e|#ev5~P2C{vFWO1MlPq=VW44W1U5(0^KDFs&2!2Dfbf+HU~$6 zHPlo$TQx0@n*IYs#*3r7JD?STOrB0o2BjD!K<)>d(sj)_wE@YSQKG`2#+Er(Q8JxE z&X!bB>2k=M5uh!|uTmyU+~UkR**R985x+fmxfocJqtS*OrVYKt&;LMBM)d^5YAHcW zu46144Eh@7)KgJ|VifJk^Te2NLC1onLQrMS-aVjuN#t-}KqJ$asZ~GtMgqc82Xn6H zGy0af3H#$Kq+0nP3KkbvVJwCukM7$JA~Lc!rSs-bhYtP<_5+H+o{O)}ugQ`&7R9H; z4|FO>ax&3DgksNyPro}C^x?7615KEh67!AhA?xu)=&=^|){`U=lA^pYx$*d`5`l>( zzOORs=}nUZqB4Rp5-@VmJykW-v_cL`Z4tVDU!CfLKulAJ6|yVv;eU}^O>y`Ucozff zD}k%b`Pn3L)C7b9<&5GVGSltz`FLGpIfl*1|N3ZV^&_+q5T$@(Ee+9e7CsO;NA#L8 zjF4OlY=vAZLZz&Htjk}Lop$;u{`kin8x=^21TfsTBnC)zTBTa`j_x{yNTSgmlDTCU zk3ShdtzPe88@XlKuotB|^O%hr7as#W7@lto{kqJF_0-f7YbN|3k;OZFqKZocNxaD_ z{H1wNsI=kg*#pa_)zYBL6m@***8e>lMq0(u^Zv}$n+-8NZDZB*{`%kFp$y?xk5@H; zU>-sR9j1|1*n-aO;_wRjZ&F+x6ZLjd7GaHZao!}Agz;Dherj#}|J)@A4600Op^E!DV?dZPN zsINr;ukpo}w4(&f^V=8Idx|f}VYBwEW*=Ch%t56=y5K2Ngaqxz?{8sjH8+n)Mn9IC z#A$Cenj5I_k|K0OnKC<<>RR%AGw%uXR_KPF$}SjucVPnI6M=Z5qI7|PSw>7DC0W{H zWA^W^2bZ2xaUeyIB`yM#xq8z}=xlp7Bc3t-pr)7u#C~O)^tPR;Ich_YKdnjG=`95w z)!Z)XK6wx^oX3?qM}jrRA(uoC14dNx^F+E`t$Nft;@6SH++kUZ0I zK>2Hn=xj9ZRJ$yvx?#+L{LAT6k}v-HweLAFw^MsaDC(6i=xw`H)|_h-GTD1sEdq`a zyqp4nLOvg~=4=1kmm@;s&yP_?^!F(yvNMX+>`r;&Bt3CsxTBSl+^Vayy**VFlsJ3H zhuWxkLof!I3;dyKYZs%Izf~KOfnOeDN#%BSo=CYrd-7jMpf)wF_FzOJQ&(4ZVr?W* ztD;OGzziFZCN3zCixFZceY{@ORCV;zuFkd8*$IXZ0#CE+_W#hYIUKJgdW2_uAunhs z(xl##Wjy#y8x&DBs96i)1K|7o@iU4Ujx*(z&#X?w|QXe zXKOi?CJ`Nl4kTrNnK+H-5=i}(rzc16&RxX>^@j3aw=8?!y87=bN2#9{+9%C*@y6+m z`CA1C2Qw?RAU)l7f1aKBgY$9-nhLi4eAab!F)mAJan4Hy2a>0yLkwC;5<8W&9{Dkh zTHao|@N8L~;PWEh5pEFbKYB3RHX0S7FF#f9D7-6v#~+=|K6u%Ic=cI@&3+=)mv{X$-2eLnOm7=GDs zcf*#Q9)*SmZnA2DSG(YB)^fp}X7#({tqvjo*7{@F?#_3eKTK@R*h3D^?VBDX69|Ci zZ}&4WxPD=qUikHVyFyA!_bH-Ft)}J8bTPHdf&>r?KJ!YAgHSKKyJVISR7Jy-vN5TR zfuFX~U9AmxJy*ugjIc?gbyG|#BgQW8WGLZRNWJh|KnYuN)5L~5D(LNyh2!K(aH|b> z%MB|s5cB6@$L9lRVff0&D%R_jvzCj;B_qc%b?)`&8?uyd-lU0b9_&359tCxDBpb1H znhKYAd0J1MULF(oYS5#?vHcg})6=**+-`%gh2*!p>)n&pD^>98-V)1)#(j!+;3K17 z2~QSZJXFa9uH&N5&(l8S(?)eE_U6-BC&W`h)Iy$=z4UnagwR+E^1&Gi3GmDZwsN;W zICl#)jG$#ixRnpAD$!;Sn?|)&9x#fCrbBDAFhv*gZ@kqJK}OK0M8k737gRfVNwK3$ zwuUXr?XS#D{57nSyC|xlem|mrnGDo235F3r2sx#O>XveWL^AnPjZ_e?pNlx1tWuX* zoCTY{R3ha567*c1wCYRx>^>C}=38W}??1~y0Rk?j4#jAdm(D<;FC}J+q=KX@sjSPI zv(+AZYz%nHyf@uCg&ja>>Q!de?-|!^#sLt*su_@N*Q?l= z&f%7ehk72&o(&5d?`t@*w~rOO>IGCoN$M!=vmM3_?UO)_E1r3IKV*Zc8PRe?zdbKT zJ^lUcE5G~~rl2!xZM+nWMuwTiuZQ)B{Ug;U{Izx^>`vl%MWC!KC6(~n{%v$?BK|#l zPQ?*3q}o+=v`?V0STtty{Nw{&gqM#dg?=n6%k1v>ri=^?Nb#Xk+W|hAVjrXUwKuTN}3M%h#t0kpfU?1L<9&x*NCJ5AUdRF zK><60O9f9%9XRzeD|00d{Zx5!$s2tZ@Ib|>Z!j^e<0T@$ePJ)m84xWtE=_GniAjJO zEmvPrB2#l2pXo452K~# z#Y9CB=4-uh&L0kLkf{IWd8tj^6sa;uoJ>yLdb=CY6N(FlUtsQ5Y%Zr7!Y~UY80ONe z=P^Jsnq;f5DU?16B3C&rU@%gfL}VGNG?NgeR^>}D)BdNJV!5HTcPa=vO6`8-mh;~x4eKiV@H9x+u01`s4fUc=IiIU_y+WBhQOQ^g8KfKhQ;%TW#cZ(3&+YoV_y@d&s$yBL}KrDUriY z2J1u7RAecpH6@o#sIj=vg?qf>EuUDAM zoy(ma$F7xCYgC2Qkd#it@O1bznmflU9Q&%chwZHPLXp?ZFTC#MiyP+uY|&LmdR9H^b_^8ty+8`Sev;2cq!(-CislckC5@bDY?F zx-21ps=X`0wfA;1&vvsFC{9#P2g$$F<4oYE&cuiPxG+m7DinNPurhPJe(kzIc`Mts zhdh`?2l!C%Rok^XGiReNc@BDt*~l*t*#$_qkvPz7n00M7?>qF-XGf5QBJ~^zHJNcU zCzKCecrWW$5RHhBQ{-q$THgIe+Iw`Lq6#wRqAk~hQdEpJVcgsjuV3ojY`^qPDe^7N zH%xcG)C-2K;YGH-@1qc}s`~2Ru<0BVXITXM!kc-Dm#!#rkAPsi)0K$uEyw~-t{W-H zWe+SlvcL|3aF(j_l_NF7v1$$UO;D(7W*nPM=LCQ zlDQM(PB-HsPbc*apU)U-&Xc9f9MT$7=N(g>r*Pks?1H>3{49ZoR8&Uy$pq-|e;pMk zBLS7Gew*}G5HvwozgcM4i(M6H#3SYe|2$(g<9G(PC`g!cniy28fi6gJd}rVii&@J) zu+QfWXzN{h_Svi397Na`sSl!~zQ7*u?+cEY$5id(JOxwNDa+l8qJ1Yrnbi10+#0Ts+1b1OZN@bdSzwK-=0W~EZQf{kkrJJF5mUc8XUj?`Sp#2W&5$l5`)GA zv4r8wk)YKq!);3OsGi(C3+qAI8YEWE;mZ?;T+@U!@JEH6jQt z4ErG4-YIG|8$rbl^$U+4J<3v}aMq}UPuL%BJM|zDqNJbzE8N;++3+Si1j&zf9n=|5 zPQ8bxq|DfmXolt^*;lSndrZA3&u?jb^}osZcWLv(sJKNvmU*i?oX5&F{8BpNK34f6 zuB!faeWccNAk?H+6Ms8sHM=bco+^DAfrxe1bf!~1LP*Ge;ip;H3a~ZRrWQqaqcA<_ zD8KAHhCMFX>SvY6@_$*VYw1!fz(p}aUC)oWzIz|gzway56;xPty#MbP$@a^C}$r6Yzd~NfxJz;QH-$dr!3qhXGtkPIb(XU!N`9wOX zxrdL#vb4`aD3lvO3MX6&=@K=hr9jAy3;yA(lY;Q__WJtf2BrL}hZoy#g$Moqp)#J5 zRa?H8p3fG^A<20dg?H@s?MP&(v4J`b__HGZjA?`Q7Fkva4TJpUT0!VT20PtK@72-} zY=&15CSh~0Kn^r{8shJt^&n!{5=3HHtZ+52rqCGD*wX{C5Mq4l2C=C3Nz$3ErSAdN z$8bc)^q|)OJL_H4fmpgpb3v@ZlS zECE;@L=eL@P)}8hD@a*YM2?PlB{w5Oe7irkAND^&g;$dkJ!?h)M-lc+XCrOY%gf7j zB8!nEGog?l^g@;Y{*N?$JRh2eFPx0~^Jz;XlL(3Vqu+4ji?~I_(&WLCkUxI?Qc)r7 zsz)9g&0d1H-0Ab5OV!KQw*K`)l*N&9JTW$>f;b692B0hTU|M3Rr`3KlTi_mO26jmz8#&0y9Ao$L&VT{0mq8A zYoF3nKc!lE_;vWY{Q9E*T`1V`MIY~c+Up38YTw^YT zgnykyl0!-j0B1Xpt=47Ntg&(o29cJ%s8}#xQR zL@6mLNo4%y1?U~7=Ry;Voth5@ChW~FjvCd=ffG_wq?gy*=j-d-0x8HM$`7Fr2C=tc zP10Y(#1S8JDFQ3LTHo$=d!MhV5k)(veR%dI*XgB{?d@3(#TSYBdvTz=rG#>P=mTd? z4&u!ZA~`S|$8ac*2ZLByHU&A$w^&d#F&O(+i!r<9zFmO+L) z5&uxdrsXp4G%G&wnF~6qnn=3EzghJ|ieLsmc?3=_`jRTNrJSA9QKTiGJgC~6*r^yD)Q*f( z1e?!c`=HeZu9+V%SS%jiXyL}f|Do@GPjkMf)6wV@N}AndY4NR@bM0F3lzhb+Q}1Te z9TF7qezn*SjD|A{2@7i>YPNieH^#$s{*)W(MjoA+ zn6G5~lvFdi%sWnrWHoj^XygSo8T4M2lmsg`c;lN@;nT#d&;9Qs|DFdd^@YfIxra94 zi|2YZ$QLzu7KdiV>87N>-=7`;GO@k0v8APk+g(Yo_D0KA#Y|<`VrjhU~c+*Pup=w?{UykSJ)0V zbT8+&QC8Ztaphg`H!`4#t2&14dbV2yJZ;q}LQlJ<~l3g4uwZ zE*1B?H`6!X_a^-2w%6z<9CJM26*_0PT;IS`-F7gwR%5OZDt#fR{y{jz?n(_nk#q&N zk$$i*oW@vH(BQ4z-L?bl4Z^WXaV&2weQ44wey8{E1yP}4z@LnB3Vq=0Ra8Q|)5%Mo znD{yD>W^fJ=Y=irg|IFlTH8O{s%c-9#sL&B1?+ytY_LF7SUWQG&X;m;*pnfK0eKsy z_q*9fF4TL2DuyE4in*|CSp2|bWZ-&RT7;d6>uHVvRG*5VOY09d#s8*%S&0zTNBAkX zkENA7icZBgmMoft(Ah#!+)tiuzJ72NmT%~dD5^EVI+n=yx+{A{r!0}I%YGa-2Ui&; zrd<^Opn96K6G{>nQR~ZjkK1q>Ae{D?6s??cdNb) zV4(NTe*raWG9;U9QdYUg!ws^<6rOq@tED7YWgbPkeRCd5h~xaqyzC;h^nF8!%;`1d zbAO$;{fTLTa65!|u^}PV%ZV6OEupV&qcRq$*!l+rp?DDo!=Q&zZb*7+O+&xpB4#D! z&2C>#&dLfF2W@Ymq>6G!B_hXK*YpXWCaM`hAKxzK>dR!*-MiNP1~2OikAal9(r2N$ zw`XT(fSkeL>*LWfwvdH_B}JgvI^CVC0XBW%C&wcm$jJPQ!@n))$iaFZwqLeP7oars z{S*|g4t1#m@`(L-ad$N_QT|LzUZ%JADY7sTD1WB{l0=)8Ri(`Txu$8wpZ+FS)}B#B z!kGiHKcvKb5mPSj@3aE$x>8i@6LsrM^+qi~rOS)-`gjCBYqDJVW%LpmaMo|R1O!|@ zo``W4dJhF1Y#|S!3NO33KD#fx%7KxlfC;o&uJQyQTm>7DvE|-R6CIYCj?bGRBhSm5 z3_046YHs+%0jU(%F)Dl`&a;-80j$QLPto{;6k{E01wzq^DZR5sux4LF&9RTv%Fa{< zbYxshQLzoIiMsJ6knz|k0+7k-B_B*;o%XF>nE zkmIPZ9KKbbto%QY&O4sU|NY~~%E&rpCBiXsBsrPKjw5@IkYgM&LmXRHiDQpcoDL%M z5VH3uI}Sd_NgS(;BkL3)p?>%G_xJhdemu_mzV7RNU9Z>ko@@gZwCpr4B;ln8LW(t|Fe zgPOANB)w1TL(;=cC5$PFcoBId=xwcUvUzC&CvKJV?ECl8_4Nl{eCE3Q$`W$hW+EF6 zMXkf0pHRQ9|6BVP)4f!^^7NFWf-BX>>&&B*c}X+sFKY8NiZ>vF62G#8%Wu>7CKKQq zaP>n1Ec@7mpu$=WH?? zgT}I2!TA;w=!6QL7Lg%1Rlq9e#1p6;$3nNt+hyS>u_uG5z|crFR=Sd6EV_340wgRn zw1o@=3G6GyY`^V2a{hZh8T&m65IcQrTuPFVRz=Dv-{R?93=L_@6O8&CIKRBK09;53 z*vB{mRnzX;O>9s5Vwa2*rCRm^Z5**!H|SANL(wqg!F2QL#*lKkT9Gqua#eW(&Ez^J z*TN=>W#-d2E#9SgXIT6h^hq&fCl{I$7K;Ax{&i>iJ$kmA>h@Z(=;gT$NUPa$3rN3#_D=hI*)lkxN8OUl;mgy``6{+(dU0Q_FMGl zO`@z+fZai~y3D<5^2EzNhF6*Q7j|zQcjj?>Fg!d%0kZ50!>)-n4W5!OXiN+UWW9}TtX zxjl(y5-HCXIF(ov;4a;b)|5}k0CQqE`!V1G^wN7DxT^ZnZYX@B);SL>d^+{sceb!B|Zt8fF*oxv|=!?afOqj z2jNMQ?OdoMWVJ&I#?AVswa7giFpafva6D2_gY)tFtUw;j=CXE@D^vf{roqzhufDc{ z|NRLi9K`fyt~$wUKK@}V1B8JP<>El|q3pmhezyjr%dR7OF3fCL9>*{Wq zjD7$9U0xULMnD9IgurWF68$+qPzrpbm`YRWci#r!R-zOqfrk&!*-Ug*D&GxlbSy|H z7tJ6~bOc@l0Sz`{U#-G2CH`XZ*Qu_@f2De&?~EJwtmVwa*-xNk4#_D#ULWO62<^G6 zvaCREj`!m{;!JBaVRil1r*onEIUg-sgr9v8C}H7)ia>MXUgXe#(nsE)uiarbWJP-}FP0KMDE8xyH<7KOakL9~6$p+ppx z%JM@l%!<7p3e0`1UoU|YW8@CBU@d;Yo7YW~CunVeu}gjfVkrsOT);3H$Mr42eWjjQ z)}6TWf%=S%vODO6`>gk48ZX2VR{`2Zr%)>zR}HYh5iY$#uUoveU#0FSibiP$BggjF zc8{?wH}};u_o7dT%YS=AO@t`ZV>@Zv*c%k49mGx90lsRZZ?65i3&+vBW7}Gpm^Il1 z7h$p=f*$p2De5-(6(#Sja*d%J>d0hOXEqf1%J9x$x%Yg_;Zo8NINKqxa8I*Mc?$U} zu8*eAzxAWP2um!LdLFwRdmLL&-WAe}2{ScF5iX_)j=o+dFQQB_ddcZv5h(LTL|_Pa z%=aAl)5xUv9x3nyiL$n(oeWz|+)!+rgN(m_-T0%;SFa~7^bG~>bZD3~UXgjF=Mj++ zrL}$fx#@Rbf2-psA(ZLu^q-rX%5)Eh&z1LBXQFz-=5e*xGTb`k6j((PYJIUoi$5K3 z_TN>^8V+TbG}VOyG+&&AV4XrV!eTsRM>=}DZAkKj$$*23q8O)Um zf={n_5s&oqjP9;Y$**$JAPMlYnNUCSQU6EE?}Ck`g=G$zqUKUP9bnP9v2v=b1R&f# z5-ZG{)lbb4JlC&78XN^~@>~RkpZ$J+tN|n)MY4ci)oJ#0by-7UcdaB*uqSB0MeP?j zcS8P)wgCP~c1*v5Uz*j`Hb2ICwLL}rgLb|Q+ltrZW(OLCtUQc9M1Sbl4#|^TFd_Fw zYq`mUDCVZODR8;ikVfGJYl6)07B-1cPQ=0;oc+7Jm&%@>bj$3R3w0XJZ=RmF_(pr_ zEVFyOEYC$N-l1QXcWTbFc|2ClLusS(x+xC#M``jrun9si80K3Hz;zPfrd~bUDiF)j zm(;T;oyzd^D=i|M$fLW2lXQ`lJLvlt+3?!j^cUmYHVjL&<*m6Iwqm|7o#6sFJMq$i zU$m_x;}X@ zDqOIx5%U!*38J8iOYROK;&s@z&|xs2e)PlvjXX%Q+?>l;{56K`TZJ5rlvBA&S@2)_ z4C%C|rqF=-g+-nXy_SLp74{2PIF?!u20`-+h_nF0vhhLSl7dK{ciQvoR}=39%|G;~ zJZ4fpSOUfZjjnu0KvUy~T}Qnq`x!!0YpL1?e@NAU*m;&T)<6h+3pqbo9gG`aLCaaYh zg6OelafqM$MJvyX9C&Ux+%vzA4Pb;>AS{idsvu<}h#J1R@bKHl{gWd~72F(CSB?>OA zvtVst6F)CvKpVH(DEj!%Orz)Ug)F1JQw{DpKXj@WNVlmH9&`#-5C6@C%zK^-8Th{Q zms0B#6a7_rQuMOII4BN+D8pn^2b$c~*Q9PoZM67R->kfNvr$f0OKkYs2h{t@i!vS; z*j&U7(*VyB-nId2&tyWBXDpQO;KUjN)lK`T*88exVniXDKw^J`LK52T3~?LeZKD_= zpd|Uv4qv}+kFbstP*n^t#-WQDX0!VS1K*W60S1X>dLeCD9q7=`ooQBJ&!!Pc z50yecM=$sfLE39!E#!A-|7M5r0|7VUakQUb@<_Y+q}kylTonjgqsAVeE$u(lO)Z|V z&$CK#Beou!CJmMVL3xuCzy53-uCIS;@tb%Ub^}`Wx$Xc*xQ5Fl%H6mn)Jvl6Q{^B> zjIHn(>;{I}_9?zb;@YTWpwK%7@FM?xBawdkD8n7rBldd?bhW9ouXhEk#NCaF(j_5y zBPbqpd8hXQ(Ez@ynjfG=B>lk$stt>j2R&2rXmOO;o-9ObXlhbB{LyqG3HWNoJ~NGz zwWk~!{C`550i7=(Z1k3a{MYnOA73;XgEdSJJHIgwwlwtgJPvD}RNM|B{_ttuFSHp8 z?PLJScC79}mQK`hI9&ZOXQ*2OipLFCVL(TFy99XMqQRHIx%h|Dn7i}}go+#s3=`=w zG37b#t>^46bJh=1ulxD_LP1 zqSO}Lu}?oS?9ueq%g8dXZ=uMJnu?zxqFliL(^7Gf|gCP;#ICPELQyPYRDUV`D=3w;?JMv&8>Tj+d>lv{ryEfp$mQGn;Dkx zb@(deM?t6)0#nZwNEieXmRGuNl{LLv_8Zn+uEg-g9C57mXG&++=VeJ*_ulBoHkoI# z$z8UD>IMZ8Xd1j+r3t5oKf-l{D^ISNf1e6##M)ebNv%Jj1Lq5-VUnjJCeuj&_9q_t zc)S>VwJ|4F?PCDkgeZAd5C|kT%+W?%v_v*_E#g{1%1dp={y1*}B*%y=`_gkvc&G?H zwfnktUJKb$Q%MK$#2@y%g=RX^+{)0+04i?7okjY|0#ZC})|bw~YlAr{@*FxvpA@SX zoE8&T;w8D^-EFf+)#oyh5{^Y&`OrTt&4mwLudM>v=|n-{^kz-Bs!Hj?Cb^OwiNS@+ z@}>w!CISP4>Xd=8O|jppKSMz7L>X&zHFE7^BpLyEp$%rx*T%`hD_bXn>I!Wg?e?sxj{5>u%STc{=&04F&eE?A(^5)$F3JW-amK#yR>}hrfDcr3Z-dRzUzVwMfcj-!Ga~>Niwp1e0q1&V1?2d6UO>V zrxW!KPDo^TNL^=72SePgp0I|c3>kT#bnLr(8&5i=a2(IIr$!wQ1>(w;rFpFkzaQH0 zeu!)h=+wXrNiR-aHGlannu!fKo{)(5d|6%s73)*qCv<>Wu3@u!F=Lt8o%ZlH)9cj@2=ZG?O=`*3z^1&yzbIIP( zF!p2w3)0F_ZiAyh6VRcE|8jIlYT?$;0tJ#KA9;~KgYOOd>-<;}jiUKvd9*6OG0W8v zq2ay_)iQy&uC;ih`5DH;`r%kgd&3tzuj}DGE*lrhhkk~RBgPP%was)5-fJX-so?W{ z8=9&h4KQb1S{%rn%bI3<0r^Z`tNO!b{1S^72~|F3?3)?aH*RZ4qfNo(ZwP?j`;jL+ zh#Y=D3I&-2Q>u=yHM7W%j!W?{`8W(%YrRucP5N(V6y^yH5lDl5=HK;oK)E6wcnCit z#6#lp>__LAu5dE*f$3S3r)Tujav-3cBiI8SFNyWs1}r>@DGz;{PIesnDkr0d;E1entCDa@oU>va!r=d8V^plnpitCQ;VuW$hvj^310&ai{Tf_8oBRjRQd9pqc{xbP=-wL!awe23YC3lxNA@^gE83%2 zSLo`9&6caNd-hVdR{xo$#g}qwae#az2QBA~FQ2;T8@mRR3rv^d zXgJr0nd8i_3V12;0C4j7w>kzBaW+bxyuH+oBS5m|T%o+As*YTCx|PHbpMv3bedOo$ zdzYXU7$fCcrWg*5!N`+86gs2{K%= zWgphI8j}X#o^scQM+VXlCY?+&30J|w=8R+Gtl@!y)U6GV-Z>z>nHp{Qy|;CGk$QZ3 z{`c>B>UFme$aTO7W}cz0OrZ?{9B&SLhhu&*7!RYgpKhH zCQjTdh&YA+Gb7%M2N%X}LrQVy7ME=_c9#Iz4o`K62aB|ok5>WW{fiJK6aB%QN}jJ0 z!n8DAqi|Th+)ZOHHR@3#nmx{Pv`n7~2z1?zVMHVqbAU{=2AlY3i$$L}y!zTl8h)Cm zpI&qK@QE89+)Y~Ox*$ku0R|T)$sC4phUK-{A3^5X^-^d0&}60vJ;X=ZN^ZHR=m>O| zLuu}lfaEMGBJ2Cmg+~~qb@`vkxvS2f=53WJeU40$5K2!p}=to_N!AgWPW z${vvzlJXR4dtKr}9Br^n{|D>{##JySIIa#dUVrQ#T$5KUOiZGKkphRC)6F;I%VjT={p?NrLV53h3yt zS?|e7YY#{tHDQ0G~f?4=h+hre=WI<$_(tq3S- zRC430hix)opC1mhxK?h=TYs)83|!bfKFyKCTW4PawbjPbJ>b*sj($-1I7*-KXD#43 zHl;g^)cC@))V*X~)`9!6G~0QdAPTY;Jo?(UVQI*mfpKXhgS57zp$~8^=k+KF8v_`8 zxs*?5^K|gr;q^LOy-$j zxer}{m&4^j>nvyn=yO&F2fG6cZ={y$7ghvrcDlyF34SYw9;(A_4u415P6M>Pj0{-j z7{?E45~FA1vbk@R*Z$ne&o2`=@(Cfx4eo|U{o?bf&>av-fI!Qhq;V4%<7&6HnYiVv z5MAEb`IcqsG3W;nit_4~a|@dRmitP!Nk4p+-bG5P?5F?c1-bkU*o52NhiXMd0@UN7 znl<=fLwn@=hbN;?tw3Lvd))mwp|8HRg}rR;gqHUwak@cE^!*Lne>%lsr=$~I1%_v5 zl}7e9*W_Pa!j>~ncy!-I*vwIf8;QhPXUCqH^WR_3H5P@4+cvR}qtACm`Yd|+hXcpG z1%_Af?S_SvxwZQLaOI;}GC%bCK{lUnU za&P{`gWJRBG26*ckC(up+#G>U%OI%kz?MS>M&JkPYx#fHPlt0YjYl~xjiZTDd2Xm9 ziE)iA4*OJ*nYkvc|BSdK_|UI)Il#t{+!B3NP3TFmRpB;}b}y}KB~5^fG01YuCO9Iv z>`2U2z=%Xjy*2WOJtF>U5-qf`3E0?xktla z!ENhGRoqiN5U3`VW6O}-3ofz`g6|sMjoa&swxm0+SU3`V54BZpIWiBO)yZe>6X_i{ zo~Y-t7c5%4)U5vunSX^1-}+L7mjBI9S26sZMjG%oEfJ2tZ4C;^uo;0%&q>{6*#5m)=|^sCd|rFi|1-IGo54M>sUGsE zwW$tcfpvS-b9Ek&yLKNOJqPIaf8PQWyZHYF?DX6>MqdKU8!>0TN_DtB{_ESfuHD2I zzku0$$k`LlaV=|I*3mEU7dhy?d7d;QC{!?<1~w!?FU!aTyVT*Ix3wR4U94>}gjq$@pyDL;^(IH+;>y>j&lC~I)UxPPlDnv=~lTi`~4 z2;De1StLOV0p$TzW^Dh}0@2a25pE^Q3|2uTn0y$&AL!*Cy>X#RL z!eb)Nw`|Rxac*_LK>L$jl?Wdn{UUUl$v8XrJ`S1tm1;c@m^xtm#sMXzLtZQSf_pDi z-BxmmnP%pMeeT{#f@saK6C25#tA?wxkk`)2`n_vSExMU(^-=oeAZPxjFmv@un!~%T z>}pXnHgv8J?5;cfMhO@S0k@Y|G0S%DcHOTdb|K%{7DewX92N1OEgZ*uzvz7Whw+w^ zqf<|hin_X{y1F;H-r*f8P3ooPLvm=d!(I!ymP+$;E|p?JE+{B+c6Pp%`1R}z)jAjF z`pCra0A&ZbN3=_&Gffqhi2e&2U9shsQ%|qc3ukxiepQ|N>e8o;C|ipt#2p})XU23h z@~1g*#}_F;uto8^#n@RkjCD#V^1f(SkPMQXw~-S2!g8Kuu!^;0CqK4!I?}nBc6uIq zNd!0yyw{_-olHBsU!pupA2%rXEH0Une;%LBQ2(aK(jNG?^6|wV#B5;)A;Gso>;4W1 z{ZR@d3kG_1UdwtVqZW1aD@x;EPt{8<aB7 zcL*9{a^HW_>x!eD(z_Y--k+ssE)eprsZFA*>~ut-uZZQF^`I+Q|K4S}10cu?aM1zk zlg3xK#>e3?Zn`@-BYR6jkBm7uqex!^kYk--r|xSx&OPO=Em@Wqj{ys6%W@nDF{Zzi z<9e2N83fQ^lKrr6uUuIEJ(T(d*)5fZcfpt7G5veDAGzVA44?U=A*=;2EN}WGPZjG> zuL!=*_NtYnVi6w}GZb;|(??oPKhKp4V~xf`ODcWv1yJUwt;3JqVs< z-E;B$-Lb#mZOj+LZhS-jdz!%YD~tWBKIUOl!IXDqh!=Z5kwRCx?EPjfzH+MjS*zDA zH%$g;E#&^UzAi4;>UZ$ts}ABJws!t!m%Y@7f|&NBSrPiAq#=<6o!@(lVH#-?hm-F; zgg;xxU|i&*E{pglb+3QDSaWv(vzD#QO9#@a=rhpbO07It!M!ezC`3VX&;~H@&!BcS z&0b{gS`dXjX2iQo9Z}Yg;Zaea z#$V{Yw5&4Da^>49Z1l$VQ1efon`yMSwY7D1iIuFbt^&FR1OT)+oMi<8w9A#H!9ky9 zZF92`z;LT4m1C)`yr2@vMB|=aQtmfP@*R(IE>OfU1BxETOa%O5wv`S@@c<=eI2mh? zjQZdz=|lS=t#CT~c8aIhQ#m+PffW)ahw?qoVXqt!?;ms>? z>^gR1o@ZqTRsPhFKSz<|O`;pN5Khp!L-Vh+t1`W-ebx0_hWpSoGIW|^C|)ud2#r^6Akjom44l~L&LtLWv0-dKxIgVmn^ zp7fAXp?XxyB49hLMUM^-S96~EcTT{g&$pdVf4t>z?h5}qttX@&`}geI*$24NZpC{2 z&b(<#B>O?`QpD-vVdTNp-k&dkEUHLq8QY zco(7NZyc!6#CiGf3u#ZO(r(DI`VlX8{4prC-hf6dSAzILcFoC-9x*?^4FXDKevgL%V&5<#hhc;BR)kj_OvMKZr8*3z3C)0<(A$@<7Rd_aEAIKl2X^iN5F#bka6WtKD@k8ux=hZ@jgk-kpmOltK_d zbtJT>rlzh3*-8H)5?2AmA>g>9uKoJ`^&?}d?j)lqmkBsXEswl#W$@m=<~R6*+!aE9 z70+%Y?RGPYiExJqg7Cb^djNp!BB2RffZd`>|5-Zwzk$H@-ym32gBe9db#C8%L{Gn&kL;E+sipzH1@pbg zZPJG_Xk8s()1K$UQC*_5Gnt|k*2)VxM$x8jq^u^SKGaX~^`DI6(`a=XpzmzY z{ni~2T@MkSe>Mb;Ggy5p3bgpolJOa<%B&xiGYIYHG0q(3=eDGWbxuQYfZnm?Gd=zV z9%{-~oi0#+`6K+n)2(y^liBA740Z_0m2sDL$3r0N{2cXOO-1EsDfJ7Wwjq&7k==sB z`4}7DW0&SdkEF8PPqRv$$QQMz+uOeT83p9@C=X}wlfXi*N179xYM_PM*&SLH|D+fk z3S1ph@BV$hXQ}Owe>>I95`f0?$kE1#;IZ*kNjBRu+8JbFjcJ#`Zu#XJ zRoUdvB{RXQ4a?YgZEW0UC9{}l+pTw2ok!eI`!{dXSP@|BReo*ErlnKyrJpo@l{>w_ zbTO6Y_OZ8$=)cdo)1(n|c)RwP285Byj-orcx15vn-=B@p`AjAiv(v!d3Gs~8-+xH! ze>V6*h}&1P;ss~+9x|a*bajo{Xqb$ktID=Z^!!bJ_m3-=smI=%L?9ZvsAuI*AmH`*TYH?d+9jRBt z#7dw-QTZmbd_;vvUx^O%WapPyxK`r|om_WJ^dV41G@}2Uk%eNnx1Im}xh@oYHk~B& z`0Qt^k!kVg>rc!yPksSE%vTgW?ywIyA{Ufi8ev;>(TzR7*Lyqo-0XbQ>^2zoE1G=s zs=JNUZpoFGH&rHbxbTj#{!i&{!iRBg#7H`kdv`cT;4gE&xO#O5XcL?5xiD;AokF_t z(QQ5Ubb%H(d|3F<`6$RRV3#do8LuiWNgVIVN7)J=rPKNy6GMe=ByMVR3^8!xmLKXZ zHx&zpcB((GGmwh?W_Ff*%CWcTME#~*&R1}8HAYe4rj%}VxH_-8ib^{&V8~msk4Qv^ zI;F__UDbYS9y`0)B`QWgPdzdgz=mSo2!;zo^yF8^!}(@Zb&U8e9wQK5;YzlTq`qGT zaIv6?yMj(Gfe$f{{%N^crWUck*8h}oSg*Q;`P2HN;HGitH3p>Vt#0H-)oppdY#VbX&q7;96URY3V<}1E*9BqbJRmS^}TVQt) zw$H>KsP1h)1!-~LsJh2#W!}5^{E16?Sh?;y`E=mZ5UIJxqvIT82~AsJf7w08 z?(6BSnI+ZP_SQBx*F%BXxVZL8Qyj}%0Mm`3#Xzz7UOb~D-9_PQekp4+4%r%85KKIi zWKh9rVHnuQPFR^+Zk|ysnLLv%Du5UjhJ9$5(aJ*($dkey=Zr5LXkc;DK4WYu8lm zd_#i7C{LJW-J}fYo&Fj|{iW+1dLDozhKUd2E@y`?m!mw&X`fVb2k6z8&@+uEMUO5H zB*SMp*qC)op9taO!o41VOoFC!0PcbHzDFY%e0zj3;zcqh7x89{be6O9P_-eYoW)5Q z}ptw*!Avw`)wkO<3ci3C#6ciauQuMGBLTo4n| z3(r=N@(9rV>k*q6;~Jk6Bd#TtpDb$r741FSc0M_3%)Sw3KvL4_wc`>Am8qchGRAFG z1(0h7K@-9+zF|_<;G>5Vf7bkDL(!(<$sFV31Hff>rx0BW^4lHHs*vC7259?^=TRIUx&63%C2yMd(zt=r1na%;ceJ#QZu^ z&GdJQ*7iSJ=%faO)sb5YDDa_y9dZlvPMMwQdk3Gq`23V)eUG~c_H;T>Gh}ckFP|A? zk?i>3c&0_vQ-rmcEur>@bm43H4E@@r@3s!+_I-AO0fwtqi?+YhV#JME~h8WLDQ{dWWh&>0T5MUq`9FnQ zMM`pl2p{ekaV>=umZYolWKROAYc zmzQITq`|WO(=>vit8D(Dtn?byFsbVvj~c`Obi;2AWW{*5dbLDvZLZebdvu zekW-n1|1oRyCi~I)RZ+RwEL0a+o7F)`g>yuOA%bNTQv%t7x?m`1rIxhL{SADJ_QV+ zAYm6TJX3QNf5mOWEGk@5*O*C$p4y)@l&U|9H41cDin6hqNECw%3Uf1?T*i7}%e_s> zyBg(}Qk%}dZT{N%{ri_k<9Z^fKW?l3somFLxX9_aJ zY>c$?f4tAk70&1>0C~f)FBcqUUgncgb<_(l>-hU1N#7h{9?#q-P5E|GKTadp?4z*P zwlPQtQgunPp}ooC(afU;yjrODzkd_W02|=k;7Lp`up&yy%pMs@Gq%vZOeZRYp%FJy!{NDLZ4;p!MSnNChJj^s>l2B2(eD59 zp?{>9-O3l{r2*0F&|Ce?u<>hu*d?l`Hi?!2>@-|7YO0?MLGk$meD#OvXk6IA*3z?q z|JZwpp}qC(4sV~@HDKFU!1mT}J>+x}1>9u7JD|q+l=!9fTG}`)^v!`tli#)@r%s8~ zxwM_1leUXc+}ui*bQA{l&cDiB{o_|*Igm^6njH-r+X(A6yG}Bgvsx>AWF411&6r6G zl>oJM#h9|ko?QnDO3aQPg%x0y5y`KlEOAub?5yL18Tofy22eu-WLTl4!#i8NZ3^eX z*KT3`Y=_?+Ey%jrkFvQC*n`=YI46^%Iy1^&-PxvQxhxbd#3nxIIEY{%1Nl}^r_k$- zmEQwoS@%B(^J72D{q%3OYWAL|2+-Vcp~0`T7pR6T>04_lC(PVpSL>9%Luy(!A6unU)3cGlyOlay4u^-)Kxp| zGy8AC61=h2LF8}=5Hts(!e}0QDuy*0gZ%e<>Q*9*U_u1_c7!B1`=yq!$5(&a=)d=o zwZCd?C^l?8n!S>$1QQsz!nnz}MBa=mt2_ z;uqbXHQ?!64hitqrFYv(bpmDcU7n-J)-O8%=BtuccF#&;b$M3)?9}s@C0j;Nbq=+%` z4NF`YVzXOSz08 zcXSV;=|3@xccnjn{aW09J@0EtZYkq3H-w9RMQ!3o?SHWMe}MywLvLt5SSnaUKE2Vp zV@d@@*?i8?bN*xWoOocfjN4g1+CTb_;oHlu04@eIC;#8q2K1jh*ZFbQqsS#Gs2rN|tUi z+-T$izb+qzS2!-2dwzk@t_NX;<{v6)db_;Opv zv)heZ=?{fH)}At^!k_Bf5aQ9bfsSv>vlZy|Rt#|$pC1cQ5-@8V)mVo~D^aa1) z?EGB)EO4R9o%TLkw;qG_4!^|++|b~Y?yYrDlw9Khz5PXZ(v!{Rk?mJ?w%1czF0y)6 zrhN`h*Q;dBDM9HPmsNIrgb-y73?hZ6EAx-jFarrp!ag?lo&hAwAO$WMHJWLn`d-~< z^7q}=kA8$zWsSBlGsKg(xC4Nea`V22t&*+>s|S1W_xY3?j1;MCpeyq(#FmSoCK$eR z_P)jcCH&OkLeO)5i4giyoZ=3HSzL~GQ?>kOOG?b~07+uOFr!OakYIM!8CEbK*l6^< zF{xerMH*AWH4|#iD0IjU@iLVr@2VUEAHC3f(tFbQNmt3Vof@TZKcHn%R_ukeLrVRD zB^QXRFKz>#9rmD*l1DBtvTC;vz-S>_0D0|ba{*=m$4@Db2nx^RN^-3fZyQ@fjr$SL zmV1v<|Lq_D6U*kdfLMb)455*fZSv_aqS^AlGhRy^t1!iqYO25>J4{f`fZ=&238$QZ zq)thXLm1I9MoW&Ah%IzRvDuadi~toa;$`8uIw6pyacK|NqAdljqb84g-&PiI~!Ad z`P0Szz8)t;^E)vJU2+7^D8zPOjry6*il-`05}dOt<-0c@?>K@gX&1Tx;v3HPg+Nc| z({Axxq@HgdcI1|nD}05P4eVI6WwDxbbADf846P8$DTy1aTt1DCWddz1D!M1g(A^&` zTg$a9DQB3ySYslC7M~~-zvH5%r zBrtCQ(h`<%AA|Py$G!aWyzYd@28=1s;Zze%iK`-f8W>|~1B5)&A&NE8Hr`ZVdJEU$dNzt`9s z9sIb#pjEf*_s9+(bGE`Kc|ApKQL%l%cKZ?&ua8LK2g&;tHCq2yAysKkPKf6e%q zv;A+Wg1t?%_6CXZGz;gbbF(^%<`SKSpI^2>CtX?5l!!e!7=?E@H#afi8~|w7Z~}ao z*!Y$-s=i+x5c4~EL8jzm$t2VAtN$!Cpvu<2|pJxvbd8*;OSQNL0k@ z8mc(l1ZEkCfzEoNiq+NU@_LW7MttmC;)1GK`pjdqm!jYZqv#}uZQPBr%9oE@eQC5n z4D@mqqcGD>WX(W7Jqa#MFQ_SfJBgD(A+JE%mTy zI@w?G!KNM;-q0ug=uv7Ef~W>FjJ@_0z-0Sgv*4$UMhSIYCgwJK^J}{Y(LWS6Z^*+l z!7jhaducghFe+tV5Vq02gq5;+#%!5#{f4)O3jZDwx6|5+BqvkB}-Y)ALyyU%U3)+$-!8r(sr4sx>`#H&!^q9HL`aZvA@^%LqI< zv<$#*t;Q7Ual>pNYH+}p|H_Y?z-i54zmAcgK#z%2a)J!nn~)U8-@x#p4P-=a2YYU;R$31}iah1=lCb-eieyx+8Zq#QM3gNyr=jT5$9+l~i z0F`@I=y~%C&Mp65tBzg$&iDf6r0d=M>mD!hJf_C*=~!`fyA!wQB$SkHo&vz5H*&0t zF%PD5Z$401DTlMUKKoKy`r?eieZ*$>0RV^3!O2(PR^!=5cB*)k#gwy_es-?4a1TkA zl}2O`CE-TUoS^ir8OWOr+|`blIgWG=1^FMUT#?3-z7 zI+t|o@mHhXe;j9Lk)42ZwV-JBz-l=zA7G#m@R6rN^XzI2sn*58@-N2*bTjR5(9)4K&$hkN*j9I*gB|YsO#WdmKKDjgeEI zm9jR3F(geLM=ys0DA#{TJ4-^OC*)$7z>|-wR8ErAj!gg$f_{4I#;hXXo`RO;u60wN z+#|K@-iQn5E9=W^-&T>MLs?*SQ||cr4YFcbEkJ|T9KML!tk`$(C8c*9hv^YiQVLCz zU*%j~PKlvER@USc7`N<(l3PObhN~Yecb~5PJf(K8GG*|jMpX>!EMs|+TpYIIb(&Tl z1DX}0fZ`K`{$1ANy&_vtF6~DNOLB1~G^;dg50Y%y*5rA$@ZA@(;S{a(F9Jdh9N_r9 z2IThN_P>9}rH3N;BD?N$f%*<8lbM;BNwrndn*bNJfAeAkE;fo=3U=v4_hLn`sB6PptHa>WbOJcY6ge@zNf*F5-=R`cL zp07-CS@$W)$j5WOle}E*J?33lZaKhuWjb~^+pe=V;M8g63!}~AZYV}lDv)q{M1C+= z{QA%IyB0?>p4lwcAW-|qbX60yDW+|1*G)#tUsD$hxGs#K6S`7jj?RahnX7rLI~^hk z^^0cX$|k2+c<#4^rN5s^6@*jG{gv)o!K{_7!;s6`X`>NgAt8sK?`I;o#^dyLnap*| za(#aJy@6N28^UV2?%-z@E(ye(@*Yvot%qw4oS6EjmfEuM2HmPYXZqO*Nx$hKC>!-s zBhdyoy*qnp;dxiCpsG~L=mM37?1B`b#`3?^MWEc4?7jhC1w9xn~0EXSqSjmJ}z*R|W@c!F5_o;=mIWByFk*Np2F?eYDwa_7cB(Nfvg>5)wHJSM-=Yj`oT|Nasi)i}#GS z8PQ3hqzA@_(jBX(d5mvAQs^GFwlOMdJ@{mLI5pxEFNXKfDooP_v8kz;OTy{2@}EtV zL9;8?ADs=h{@dK%PKT%pb#bf;r1HUviV6UMa|lw)+(unRWBwtqfDpnye@@IbB>v{i zx>6V5uX+CK`DE;0BO*@s`=39dxboV-atMoyH5X5DQp3AE+69KC$Fl;KZY>u}7AN}0 z##uZ(SnW{rdx|guq)h*)_%MUBZ6oKJpy16{IxF1RolsS=tQ<66|L|*7g=&DnpiZ9} z81_`WaxDO=i{?M0$-+0#oDyP3A(ox$h?lqtk#=)SJeLIOZi!vcF5ZU3ImLyC)u9#M zROow1T+mwMbm0E>Pcz^jkMHiwjpI9$u4zB7ZQ!BU`JP3>axY%XUAPt{&l{Iq3&J7RN6*|f4PIXBYn$gHiT(m{ss9wki@~!iJW-Q z=p?w|UFex15>h-K9uoNTZ%mZkKIzbp1-!xW4>4T{(ViJ9ruIjDJy%bAUX^Hl9oTRNuIQ6oi$+C~iC(^UJ^6?K}GV6_NMe-}VHBAN-w& z+({}Pe!{C{wyZfWt~3%6>_y99hJ<|kc&F)$EbGm-591)SQ<$hOyYalke=sqSW4fEI zh0W#lIIkKhV(#nzqv$;Rss8>rey@4?%E-FvS~uaA>=~I?$*jvILPpui-ZSDF84)f* za_y10)-|(pjp8C)MD|KTxyJAF`x88z`#IzNdOe@do3jIblQb+{&|0E$u4Qu3!1MN$ z1HF6hnoLU)hDp7Gqp{&@UgMg_mmT+|M<7LPL#h&0XHaM=_eP6h1r62R-zXRZ^eC3X zqIP1M)&BS7p4p3e70LTkhj=cwDtuFy3lwQ6Cr_0XEZ%Ii4X+3rj(F-wYyuvvA#BE} zzjsLl?uC;f0{^*4|Ayr7G0gT(^+~PcXA5uO|K~5I^irA1IQ|Z;dq)W5A(N&qj9!3? zG~rv-C*A1nyn;*b_s+@JYJ|**ZC;YqOsOc?2RL{uwO*ve9&bLirlMl=Qa7ao zv1up4*g+sj%Y-F?17eOOvVk*mll$!fcKo|FEFlHOt;7m~SN5s)>2yo>YNgfW6-U39 zsms`@n*P-eD#wcoq@__!&N4`z-^!yhW*~1y6*bz<2pjD`T*!V#+LLtz>xnTNku|f7m&tnk=?m5nO@r`=_g5F zQ(*&_3K~*!`W{FNMJzI|KX65}xw(12s=it&U{~530a8DCpR;;)IP8L5cwTK5Xx_=L z`E)bcn3JLAN%u-Gg+SlbygxL&dVU^RxYh!jhe=3CV4L^sI{4N$bO0WyF%yBj`@PQ5 zA=|sb;>PE+ilIU}8-U^h8~nGl)Tb3A=qnxFxy@je#R;7&O`l1w_~#+pcv!CW)8XnR zNYBH$dUU2`4Q);IT%{Ng~hk`v@IVJ8@ zj|ZM35~Z@R)KleF{FtbC6FDjSGNhFdd?7;D7lE`=ShZ+NpJ0JegC?#UGrdOk2^x+IF*{qvUCoZS zP?vs)1m*!4#ZejfDhk5`ytOVI>kZ;3(2v9MEt8*YdI7lt4?%V9Hq*@{1{3eR_hnwl z&WA`UTkj3L1_MZv4J3U+3X&_H7xD)eh zJE~Mq4=#RRR9_)@FV>4;>2#!iTJVY$4d~zarax^EmuDv=py2C#vo#RJd9NyxAdeQVGVSXmT9D-<$~fS{DO{w}bwo^U1UB+O7J@B~VVn z@QXD6u zI%g)z^M$k`U%<6TJ;~k0bFaB+sbmJuI=gJb6`{BYJ6uy_n;5$RH0BiL(u7^Ohrv!IBo{T3z)*S4Rt$ zK2aXG%KDPt&+G1{u^0`0GKu1>dN+wf$A#|%&8U&#Jj2KCEA^*eYqt-a`mJfx>2c9AD3)8ac;K$89edm2g3&2dqPqFvkYlrrqvz)!p$1)T6PZtxq`SR`n&77GLE8VhE zqJO_9w8c|&h{Jy1;P;=ww7|T~+{X!mIMyuw-QAAAO9xvbe!-MTwpElsMfqj3O=uat zFdGg^V1;#Ef@s-1s2cjDC-|Mlbv0v9!R zYM^g7arq9ZJ#c*}4X#2|# z#b)dTY~b2=pigJq5e5(#RnHe)8bcKlZ~9OfOLP1FjlY>W@BVvR*f?OhL97VI zc`)gQ9=;MX03mXq@APpfHo1;h^+1WORpjz7QnGhuQWQP7Az%R{bErZOI3;OFtoiA$ zp7+^syI?w$P4VI|+4bm|5jq#DnJ7HN0X5K78kOn)@shpYW|s(S@yX2q_^d50E^>8T z=3oEbDoMgY*}&b3)p}gW+uyaJL{@bzNNP8kOmG}keRR}ux|9F!TF3s?4kd$wLsBn5 z5wFhg01Q_D@DvL5bH(VNNe8U~m~uWMwWpJKR424-717tjs)rEHQ$!oB|q-#HM6LW__UQnO3q*;8XAbaU*)vH&NvYv8xapmq8iE z&E3-ZyoXfs4?sR<{vK2PaRFNF`LdVhugw2U5^{q?kifZ#V5Jy!4;q~_fg6I+n<|+A zN{(T)y?~vMf6gb{b#ux6I0ADf>E=%v8XC&|53c)`_2tzjqM?H2cVe84+>pfoX^&m`W zWAmdevXrM2&n)v?F=(179k-?xXc88!z6)6-YUt4_mWf&!cJgT6MJlA(A(4nztSFgb zs^V!J6nIrkn5=Y{VpDKKbJocq0 z4tW`Z!jg;7lXj8nURO@^0ecU>;FJqoFhwc@MM^hnGLV$Hvjv(>$j5jMqGdeQ0rdh{ zwh#LP0c{Gfmq6 zeR6b^`I%k8qaY#Sb@_^l5*2*)(?m9oF(L=;F?yYy9&KJ_itf=Tp9QGE{AMe*hFZn& z5P`ldeG*t)T-@)Ng@t7{VJo>U;jI3ycLA5^8x#u~@YvO_^_=OTO0{5>y>F90Tk3 zn$Q?(aZpL7eSsO6=xX;k46&zLcq4E;BIi|16BoHO9~``SR;EqT#k+I1yzjTePm)?BKBc4jNSts1Rz(3Rc-)u4 z$laN+FXN7*?8n8d6zef;;E3Kfp?oR2R?kj1(*6{6y;BOQH!zX)1cCeX)1iu4@D~<2 zIRYzYEO`Ik?aG~<$~WD0-Zf=9b5`Z$H~Je47R@nnyNotH5z&y3)~^w-x_>MV)m&tT zZESwb7Y(}^{kMBB>;=UG+UJwIN*XJ{6z2`uxh!Vc!3)c2uU*z)(!jNRhim=WRO@?2FU~e6o&GjmPgd6S|#IlmOSp_S)w61S%@%HwJ;prKkGu zpj~5QJw`bdrP=vut)L%8^-+`eef>R3jG0`GyK}#`dgRgDE575R^Ewc9TV5$6$ z{qK=pEBTdCcX~nrU?tQ0jBLB?V*89Mn2HHcD08fR0;jZb;UXP8rc)4!wA9@o>_@S* zAAR6Ia{?Nl`<)GXXf%RAa`u5We*He zL8*&2iRp6y>ZL?2i1*;)Pj?)M3b-Jn?EZym_6B4Uc(^I9{3=L%?KFzN zb(qJFMG2_wb31%KaPI3OlQ=-ozk8g{kJHxWi=^_FFWSGAcO5X!Ifzg8e8cEZz$wra zULD>9XsZ1p;8#}8I5M93*JpwhLhiLx8{D{7BKAoP*k_y6R-cJ$H z0#qsqLj$}8_>~-)ytvc>tKO0lxB(3(7)x5cToiLQDkefJA!Ez4XK5@1F#I z^89>p`sCK3dvFKYo#DsH2}6{`kUqy3lzdLY^8WNodQAwg_f7IBiIKxc(75;Sl9T6$hp%L z?{Ve$0TSu@?Rhd83=#mOL3Y)6cmKyUUS|F-2lA`lV`&h0JSMVniHIl{JsuaLs~D<~t=?@shO&b&&g&;rFXmS)-1FO7gMuhwji=uHp(wBG zOKYP~4Eq@{GYs5dKuM!Fr7ej-CC6shRAV%C2f-y9zdn+293T~m{{$vbuD4*CT9c8% zOd%cQVo=R|D`n+koaKBh|4G!!@$oF{062G|H48+G8k7IxLGw}Kxi*HDZCJopYwC~# zbxpu8HQ^EFq5juM9yqL8-vS=`j}eiq72WNUb}kim#mDyAnYszTNd_qh8nHov|Aj`A87*>)+z)w9h^ zv#e)m=4R^ty!DE(XAJofpm@}Mmp8fp{zOtgPzcsGW-3*ZD!3;2inTyWehg&usLE}S_W|RGD!=50ZX&=&%i)NDoC25=y1vDk)G=+Q`Wloo(`)@y_8PS|r z&FVI#xh6j-E2?8m`$3_bN5hy&u!6leKykjJhMbC-Ydz%kMkqN7>q@Dr%+VKf*bS}# zC9QFz@BrU4q0G-5`kclk<;i0RBc!rP5rQnbSa`K)juchW^}!EGBluNR$9h;d7<-%M-zbnd@mve z`V{$%cK@Y+a|ZB+Oi7SV=0#@WX%~H_zuz1m5;+iw#ji$aEmPf5oqhg?58lhlPqN(! zdS~=H`Ced9rN$`y39IpFa~bher86A1mY@Tctn-8!&)BKg+_0E6GQ%?h3Ck~MejPrw z@1A*A>sWG@KSH|}U#8z|(ID?9I)-zEE&tOPh!mtn{;(H?Y}Yxr(IKHzAFcaG)oS+mR$1E5JFB%?g?ZU-UsVD%EHNBq`-mv5+*0qI?qX8f5V z-D{Af(SI_16aKb~Qv^RdO>2wmE*Oi@uT&-+cZw&Dqtb6n{6~K!CNhiEYXCNaH1_`< z>SUYyzf?;T(q9H*1Rnl{9+2!2$Z4JHQZyiLv^od+DIdk!uuM+1`mWGz!~UZpWv6bC zY$CJZ?Ur}cqu_%5oEZ&0-tsIH)(_Oa}3j(B?aobpdTd&To)JK`#}BqJR5*?m=H0 zl;qU<_9cf8=0L%(Tg>7?onyX<5D@y~R(JSe{>7AEEUMo6&PIX*Jw!15hF}cIg(dHU@IhxrO_zwr@e!ENnfNATlz%hE0uS{^(AKTZF#Ubz#1KQeWZxoGN91?J*nzo(oll;lw0#%C2>5`SHeYy2II2I7(d?*!)PLW&AAH4 zj_q%iZ`N4zYJ*vKk2^u{qYSqD5-gY!Q3~^l>la&QVM8=okCAjgH}fz49`;_ZiTM+? z*LIH04`SHW>IjlfLa!WdO=V^RSC;nE*>H+~dHSq~0&sj^3Btj=b2RLF}z6P)Vja&WPL$t)zU%4(}UM?%93Ku(oDXMt&Z7@l* zv-$Y;u=1JHi?2y*_CPg=;C0DiJJW>8LiPKRjs5o#ago`#nhif<0zWumH4=G2Ox?N@ z;e!%!-9wIza8td9zRWhvfCNe}lKLeti*lNmcU{Y)nv%ekL|po+l&e0Ij2?osZ0J+{ zcw&BAoI}P{n8;lf?FsC6gkP25SM4v?7Mk`b-o9b+F-(GBb@eZ35dM{|+GaY~(6o25 z++_@9?BvjVbPWZ;>*~B`X;X@ZO3}!rl>^r1OWm8m*aBc{Mu6NgT}FO_!k^Q=(!Vz% zc7h}`<5PMp)&t%#8&?C(RM{7K*jKEEslT6hXw*w+7?vnL^?%X+kB&1uu_y49XaNxY zSnW2!1ziaH#9Im5Qt~(YB)%pM(_g%`E4*~?9naqG0^2xX3k*2kb%&Wzx+w!%n{Noc zAW#yrb{W-!WKevC#20AY!V7)@zkhD!?k?T~i|dv1bSxaVEz^#zNBI6wR1CZmm)2n- zq97`hW$$zGW@(D<`uvG>)m27Jhy~sV;}+33f1QRyX3W_4Q1SRx|BaH+#&L;Ym&>3C zg@V|ev_?}Q0lA0}Wr!ku)hg6K_B)I>PBE()+h|iAYw5zdxn7&WC`k2nh(-00moS{| z67j|rs>UO&D%}1QK=W#teNhq!tlqL>=rYQS+Zb`+vdzo>%DT-%|Dg317wFn69x{nU z_2y>L1U0J-eb4s%aob=4BfbxKEFt+ewuJC47cq*QseT2_e=Y2zfhqxfX1Uj~4kagY zZNZKt8)(ck>}}G3Wsd6nYm5ch-ofEBLgn-6tSjNr;SbXddeG9?Bt06fY=q95;5d{m zcs^nzt5DazHD^^(l#VW9sRQF4@IJlJcmS;Ts~XlzW)VBNSbebYh6uvneQ@$G}= zO0C+&E;mw4&j1)To~SZg9UyJx^0_00g`DE4R6@fbo>{vrqxotH&TeUy!ti7^mza{q zoa()}_3w3u+)r9h=8sh=f481o)71k5PiCs;KL>rLcaMJWs(u!{Ow9&r|#p3SmnQVBbir(N@!k{rkP*m>)`4z4MY}k$m8lTSAibDrn!U)u9!~ z&5_CEIm;aLL@R4u4@Y_*q^1X8epFm@G|ygYs^P(~doK;!Gr-_K@o z{Xae+*hl?eBzjB5KufLE+^y4{|#}|O~5%SAcFTtcEc25t2}zcHdF#y{M2a!L?-l?Ewd**6Kdi6m0PjLdwVDC z`NdjSX(la940(%CGJ@EamxqY-mOrWU?F7n)Cl~+r&PQref!Wq$SJ6I0?Ubc+$=-n_ zx?;GUSJIMgxpuh{=+!jI0$k0_@KM9Y|9gP1^#>@}%h z*FyGXVb4iV$GzozD&tExgXHRc)uJCDv$TF1_8w83@y+s4v$KvIjrpPH)c%A%$+)5!zmGhkITl74SJ*&e zf_tQmb`o{^+-x7c-hqw#^7eQB*lt$Yzddrxb-ceU=Uv+4nekb_ehb$q|CSt!=h;8{ znI_UN!%zRlsx>t?r%LN#`_UppfID_p;W+=HEZRFt(9eMnOLaLr?SqL1QYz9|^(GxN zvpS4u5Dy36#%|gMw}zaQbgx}JTt69h!Oz|)YX9%O@~z`BYgdtm)LbbIk>|`WsjnMi zFox=V%$U-or*|W*{s?Al3<$`M;qa^1^q-;FrM5a}Xt zCP;P}`gX(crT>DBdO(jVvcLj9R!_dn<_;l_NCkT2bC0Ih_J<_a4_Mw3`-MX*-C?s! zpxv3{(6`b||Mx`z%U{?+Z*b07KlN+5N0t1q#d)xcau}$yiO+A=85+|mh5^Q2u*>Xl zMJMGd$$N(HIC+3>Z};?Z6^RA}7bt#S<14rSr6enw_Lp9u0ej^gzBfPvg6bZW4njv1 zgFj+SwG;4xyryhK*d)GgzrWMx=)m3Ut=CLWPAp0LV(<7;bb8~R&7{{RnUryz{h?mm zi)T5A=!rx7*pN+|DO82-PgdaD_~9o);QN@~HSLzsp+t4PD&lrTp?uK@>#V|ya#wZC9nD=&3Jojn{|c2LEJ=ly$LPSBf_)bMaV(P+OvP; zg){Q7gN($lBl3|)mL9_WW4lf@;0F(NrTlVe(;aFJ-Txi6e8ddR2s}TTQmqHPGrM&Q z%@^w*5@u&-Z~QxIkQrBaF>f*ma5X4r?FmdEpgKM^vwupa#BOcr`LClkDSRN4Dz1&J zEFQEr4bd2~42?A5WvktzG`z%2_+2AeXzA=$mwYH?zbr!^TNaNMeA!Z~ETB^*h+6bB z7l?kK=R8I6P|5lynWW_CP2Dr$TP!aj^A3mb$g{0KJW({-(Pe#TFVL7!wdpX{3&Q+A zNsi;P%785^GH>A+rQ!xu*4B*j9MoWb!JxnEQOU$bW$D|u*;e)W_Ph)AfdKum702$ph)aKFZo#BiP(5j} z8i(b-(=K(_Ux7woo06xci4@tS%wIzLntvj&zsn=u&3B)D1bL5r+S54-wUv?KmT?X> zmoaAd22vCV{c#O6L44=v`1VHs=nJ(oCN598N3U;U^&qKWo-y960i&npU0fey(ovQ0 z-7EH99@k25IJ7V9=Bow=;oIN++4XAkGizYANI8fT;DJzs9$=O{HZe9%j_z7c@)Bkq z^a_q}e;*omNs-KsST^4bOQjH=yRN&tk9t_Y8ja9Vfu6Go(p-8%B^nXqRJ~XTdf-Yc z?MuV#vKcWUqzNnk>7!ZqzFgqQWn_+uX6Vz;p^Bkp*~4yu!s^Lmt-=2;x0pwfWC9WY zR))BTK5hFG-t%K5W7f0d6eW~8X2(u^^Oruk&2AL!Q5s+DYU${zBSY1QyEHK{wl`p5 z0qAS$pV~3IH~Q8e<$wi`B6uv)%Fbrsw2R1qofdI3TBj_&=akT3;XNSQ^&fkrX;yKZ z)A%tW@`h#vXbwUGgY4otcp~_fjr{nW@5s0CY&>+O+9~b*%9=g@fIMaxpW?SOW+Z=X zk;7HT)D5!1bD?kfO5Nk@bB9qP`dgX^iX5Juol;5`Bh98bRyLz-MY`-&>LrF}hXD^< z!+$$J_Ip5LWVTVt#68u5a6ln!Qj(&Kn2I+y{)8T{lFypj#i$76AFB=|4|=sj&jit@Y z)JB2M<@TJ{o_eb@ud2hc?@nDJKj^~CWsWGZuU?@_UjC~n$;FA*hI^Zt{a>s_AdBmn zJ{_yyPq3S$AQ9ywXY>^2|OS4^iYhhJPZf!b-Yjd2zZ- zPQnHszKaf59RB^~p~efy)24DrErJM)XfX)nctitwIqU3W0GkjjR^<*ot3bb>-7Q`= z$#~;U4LEY8GXwL04BLJ{Zm~1gJK@>fj^Exw$CK5-Y+<&+z>#Nk5d^|_Lg(PgfFvzF}qd}?7Y zxj&Gt+XA1Ozn`xB6ghE-kQfR(mNbez3nR>`wgyvF?=!r9%Ee ze`iR{fPA2yv2HgO$u?3p=y%toq0to)tq457o@wab**>C>cRwynS^jQKzrOZNmG3AJ zi_1`NNIzRS{PiPNf*dikTJ-G;yRa;`jIX5I8xcwM^%mw%twpHjEhQWcUG+$(Q$1k; zH+v!dSTZt!L@RTxK1Ra+!$Wv{pkDADQspJDqL@|vuT{^wP>EKH9GZ-|#7bI=zu$F_ zo3g0#qr)yu`rVDk>%UY-+LPbY!k9;{+GauuY4>&#bU6__%yQHZF;eoM9w;B-`6HQy?cmjYm?2L) z^;@M(d__F%TDB=?`%^1k0n(9{@Ocz z*P%5qOO9lfsF7DNst!M0<`*CG3MMqqC6-@N0+!s#7QafGms2MPNc0)_y6j%dqY7X> z)Q7KXb-fDSP*Z5=46+_aL{%moR~mY;7}9v z@ar-DoXa~`(ow8BMsDB6|9&BkJio5qS$yCmPeXH0J;3cYxH(6;;V$a&*s#8V6{!pS zEBu!lrQ%j+>(*^Hp8ocCnijln4mR_XOMWn|Mbi(BbvdQ0x#%7St6QVA7#FlmiY0Sw znGt!sWE8Y0(R#X(n}A(t>tL6AYVkX9qLMfejpwqfMqMa`pVhpWwCemVaq;7cm%k>5 zg2!GMQ66q3K*>{yu_bZy2ORE9XGBQm$|7?NEzLX7XRkThdS*mB3Og{TvE{vhm zLF$O2{Rmc&Yj8(4P>Hbgoqpz)U|lQ1<9dwZpQ;Jt;KNNcBofqr7~fQZSJm|}%NbJl z3BNcG$ij&^%pa;G+0S0KO9V8o&&^GaZ86D=`VQha{qgjcq&fS<$nk5%v9(eu*Xjc! zf)4@|;Wkd^i<57Ah1cV*P~l(Sh+a&pdWJ+oOutZpfLsfq+q#S5d3JJkqy=YJAmxx; z5c1VAToInN=A+}bw|CV-_Em7Q64xb$c0W>P?meNGED}IbFrbtJ6^Xj^Z~huCs`t`U z2bQ`BtZZZ?Slz2^W$16E^KB4op$y>m@}@#P_S8(=A(CT8fRC%RXf9>PV^^v1KLsE> z=W9jJ2CAIg-reaa$4`C%pKZrTJPQ<~#dP-8C;+=uPRGf1clZe@r?KB0sTy~{xMl_a%N6Ibx@(b>vy?+z)Uw?PjfRU8v;MASlU(KlOuq^{<9d zI~n@ly+@r5077$DzMhLgAVn=6B}W zzFSrnAE$WVRZ|keey{hUCR*q{cn`fM)q_F47Xzr-!^+}6cdh7+UoG#e&#^ybY!%Z( z=%tC4q-Q*{6|zy6@j)y5gR$6mJerCdgTwmzF`oDd-0CYVrOCko{BEszkC1+nj#6%q zHO|cp(EF+7wzGaD(gT%yc_*XS<;mH)GwH=m$))>OzHMLTePU$=#swlAkE81!4L(CS zTyh)q{)aNF6-&U<8gftTelY9j7nBIwfpAczgZ+1|7 zory~2O6x~nk$ru8b;(6PFMsb#jxT}}dD-q`*;1tf^i*7Wt9igZS@C&b9%e_4dr?BN zS8ss}u4?MZvpw%fG^aRBqu9t&9xb!;&&5sQOX|$g+lvu8@Sva#*(g2~BL|kVVEE@S z*>a}v`v^Va`Yp8H^X0Z=we!Dcv;Mzs349LBo|uDSu$YS-{xd&(kRq(lmNk`pw9Y!; z*0$HOtGG2>Oi?)R3qLixn0q4Y?bm8^Ake8B0hSs6PBy*TtlLhO5;RB? zNZ)mOx#t>Epi&naBu6o#COn{PLCn`rCN2_OV1ni@0FJ$0cQrlOwR-xz-1LNO9zHK~ z7eILl%K+PZRs5ac{Sa@@@sDFkMK_2rRHMc7pB9a?lvXkFM;DPBGTuL@FRfAI1%HQ( z95`KKc({iE3BH0V$xh!c@L(End|RBK&g=M(1uTu)O3y0G3amoE9pN3A_7g_RQfoE> zTA`~aJ6qHO%1bQaf4=hzo2E^_dNv3n>>SzVamzf;i0g_GzduaWSw}1lv8Y{^ahb%= zNS}8Wd8||6TJ(1oq4qxYH%Od5(2x66!!F|J8$e!R?#Bc6taHlqD)n z|8iS;rJJ5#HT>+?)EBSUAS2*fA8^d7ss1ZBvqJWdXQjKHDeg#6PCSIk&47oaiGu+~ zRh#8gJL)_0!av|3!HirJ3qoH)q$t0!Fz!V8xSVRK+kpH(3oFwjJBwSpi~FTJ+2^{x zcER4Yd%>mdeZKqfE)9)rzt$s*Yll)C}y%;g+iwT_gV$-`_D^1~QATRrsYi(U$=k&2k3z zTRq}>VJ6E0C;!HqR_-;t2yi*fX~}w1KG{GLAL>a=WzTAP?0SP5^!_T8z1|5Vt1Sb% z_LA!s6G-qQ!85^NA{!Mi^{~LAysYW|!gYF8K38msC+UOj!g1Ar>5D6mPzw9Tr3~=f zqB8W)s$Rd`0CA)kUb}MKX<2^qVSuNARwlC?_-h-duE439u+7e_siSHamPoC|<^58| zOtaj&m)(SJ&)cUNCX(Z%X9u`aZV`RO-QQU65Oc}BU%_q;Zkub zRyQp9s-RmXg>;*I++eY#GHMW3SZ*Woy+t_y zLl5V8)wx}L@vY;e2Bbc9jK5)=C{@2>ioVJKBR-~`JDT#l%#kX5#Ex+IGN+W~f_ty! zQ2fXQrCUJ);1*Zs<=2)#Qrh%11s7)vCqUKHp^@xorNVcNH)THk7z*vhQ#&6z=cuKlF+9+gg6OyRcw%Zf2)#opodWin~>&Mb11}kCV});s}bg za`heWOw~d`08LHVhFVu=o3AOGXP%=si&P9NRsJ$W+qB~C&lPfx!mdtM(-v5#9I)nj z=lTAaCaVUsX*LBPw{OvbF#8iffHHA9V7EJYh)gMcXZ=`i`uwPT<}-2l3eFg{03BEjjdw*vqogW1Ohl>a*=zOY?Q7nQWT{qXAem}|pw?#1tpza8!#tZZ_P z|2{5QS_1UT8=TwHqp{g*hb|^)1p_IaTk1s?I4Af$4-f{N+5Iq{PSeh5t@W*%hhzS++6b=X8et%Qb9R}g;+yORN(&CM3laXf{J=lF}ke|7-Dp)+K-Jb zNoDrinA)wGr!++NcR?dF0!kv>X_1ZAS@_vLaMxEO=?w`5hKO;-85#wLN$F$QV_SCA zOQ_TXeHdT1s1UAtYrZY?CaKR>-)f4b%w#w)3BkvrEOXV2x=?A_w&QC5+vdD)>Bdis zp&xQsPb7;hzNk!D>W;8Sgzp7+$oOm=ZKSbSt6=5Nl2y+%@;?JP;*#b4lTA(BHsAc& zk0+;-XXKl2XM#i~{LwcGdIyvwb~TZPBSh`%rE9sf8g_ZfoAN8p9r`#i134~sf2&-+ z`b%U@?#Io|WOt))>j$16Z;cFRPUq6@To7r=sWy3zn1G`o4CFyUyAir|I7<^!^{Vd) zwl1$F(I-Yy&)LBPiPWN=FrGZSfA4$4pr0-VHdFQ0)ZW;{=RpO&ML60I>JN;fqQKdT zpowg9?%!j%*E3mBSj@HB+X5a4Z*JpwL@%b;fUvQ<9t~sd8ZxFLH?J*N&t7De;|DJw-KI9BTZQHNgtN(7rTmyd{)UdtFVTk)> z-pAlpCGqFc%@1jD+(Ym}?a0X7pKkRp8souPkWObw5FN3y7!(a+o^0T!0cJ)6mgLtT zUlIFJEX=-P054%hhK-Z2#T_+gR&?y{WjToU}`a|`=Rf`T$-^IY*JFuI9(gexntYVc4ls&iR?YEWDRfTc)hchZ; zqA_-x5eNa$PnDJNmFG6L=%bVA#)NmvHzhzc#&NF~`_$*iVqDUh^)02CAf z!2M(rG^@}gfxCs{H(!l7Tr{BB*<^YL;s}29z$w|y_||8nHfciFD(3B^1#kni@(tPuRD8>nMfT~YPqvSp zx^r9mWP$Xt@MoD>_r^>Gg+SG$vtPk`s=Pc=~= zjmHzl43|s#88`$A!R+ssmGsAX=ZxTTOWs#24FU`+LboUnn7|D0#$X^o2Ve2CgOcOi z5l+Va{lI^|es$q=W8ym1Rm$JZVORLIm(P@Z5IUp@+_OC0!6^j=eIW}rpBwC;ZU5sP z#z~8m(miDv@^k;Qot3)TisdlM@9!&9)~Bfpe-9T_w)y*PbD1eV_>=XOgT^NzeACra z+Q|Fk1$C^^c9D%jd_hv8+O5NbaiBZ2%sp^axsIoxtN@dzjuqNWP5>HP&32M9 zK&>8mb=D7r)1v5a5=45MqK( zbk17??sF=C*jQn2N1}feUr&W^cXDKXjhZ!Im^9qoA$~Jyx@AJoM*XI95Pw;7On!jG zCA{-IBP9Omon9OzaLPvrsZKAe@Hy>MFiStEk*9lxh9ZKK^$uRZ+i# zY6(8jCnj3Zj)C7ny+@XVXrluWb#FVNE$aII1bUFoYWP}k$G<2ewZ~SL$F(wc=G!g@ ze>_^ZG|do3vCr`HD%(eq%KlF>URko;H_3s2*a0YNQ!f_CPb{sio>-C9R3elk{L7T5 zF1`L%vQr~(Tx7EvI4CHTdVTH2K`w~UPJb&90hjTQ6xF%4J1jaxjDS)#h~#2KFNL~t zH(tuZY_?XKaRcY|ytTyIy>D49DVevKv|s@p3-Rj?p**6iDmhq}P?eU3D|j8={@OSv zuMzivml7|%G0|ehd>Eq;Q2ISfSZ;Pg)ze1y6p}X0!>&7hT<;p&<;!k|Tr z`=j^>8R;p@nXbR23NsjrQ?9P&Y`+E^eMsL}!aZ2_{lEVPsOTf8nM%&~5S<+dRcg!^ zw0G)1pP%!uuwRsTx`~kK?(PDCqU>VR#RgA8{s)3Wj=`2M`1V)lr`Fy-suy`ow9{J_E7Y~7!wzRLD>B+^zcMWb)a5)mcFmKgQmQ!u-#7%)S+}{&@{h9{u$Qf zJVw$U^4jY9%*cHJ1m_1AIqTwSUV;Ru@I6*bWNp*Yn!xknbFO9)mYhM`m&h~@Cuvu` zWt!i)KNWKK7M1Z>c2E!MY0ko;!4Zuw=7AYs`~FtP?}n#xjjXXi*s#OjGw|$pRHpW3 zuLGV8(3)_QeR+p=PI~&W&Fqd^%QdG+(Ep^K-XRZAk&^%^2CZ>2xZN!`4 zTg58C^=pPrF{`$porh=E3 z$~*BLCc=F4_Me#nN1KLHSr_%$&kTam5spmHUEdqKhLnNTe+utMs8z)hAIzV-<1fCCX!1thZbN`#za2Df)}4qlF!K^jYi`6b7FQiyYsJ| z-zKBEb21s-AVemqzV@q%zO{ObS4;Ucg-rZK9OQNg?OT+Z&>bzdh210fyq0Nk<6(0X zO#m79@VcgqX9OnQTO8jv)=VgU{~`N!V0>-7*+XL%Dd*~9@kOXW!;~->RDW$qF{#`o z>Az0JBqDLy<D$=9(QTxndbVPucAmz6@xd+j;v(GA+PdL#gQ>IU zgw@oF#jH<3)rTqsuO_5tcsi11H_u!67hEpBJ>Xwg5G^pT*^7NCp!-aa zf;$UzK3DQUAX&IXWWLRnwY2o$?hg@HO9^kN-+e$5AIE#Mmo~O7*H<>mE5q*!7sQC+ zwB@45R=dg_)@`QmMuaYepD&NP=Z7u-_)$uFQT;+(yt?C}ZSQOm2ooPIeOKWva(D=h z4Nx>eW|indM)Op`9>%_n8fbUzWUtwF+gnBa|c3 z;>W*(HE!-}=^`|Ji4JX8gZ`-euOH_+``!#&e117KGvc7`Gt+W8R-6uUe=vJ*Unq@+ zy#jCQkb%N~+2G9bCb%OfEWRX%BK$>sJf3%RQ>}Hvv=VE@lvuOlbvNJy!el(m1+kdq z?wFB55XCKu6m(Qnlna2b39^G@I)5&@mtRSN4w`&@OHGK4QFPP8Vg$SrkdAP}`)|ED zzv`CB$aK@y+G-*tNmr-}(`m=Dl-9oMn^i<&vdwVE)l zL6E*6)=EIbmUki6AVO#ksR?tKthQ@;>Qc4&;iD)&giuRu$O1E4tya_V^lSHhKj)>C za&vQ)a~@XXG9TkP&3CD#c_`bMq>VJ%>_n?b)vo7@%S(4qgL$4)mxf^lV1WMa-8%*Z zfDga^_1$ilhR_94BZxIFB3o6C#CVFysgVLTk>R(wj$U8I&p3IJ6!Mnb@x^Ll;Z zNL`BC?Uo2sgb&%<~TQ;*~#<@$1S_wZ0655us4P18(- z!!X1MkKKiwvudMK#N)K5klJ|;5u359Nw0%Dn7TrB*IFQ+^wMUkqGm?tyQS6~nF&(} zBvOvY<2)m{4nqe3A}4J{yP&ly6Qt1OffWq9wEp9N@qhmQ{y-EDQ%MO#N{ngA+IJoN z|8BeeH~+bR^RN8-f8`(L(;naciO>K2ANu|8LBy0&QC;SY-1Pu?sRY32MQYX-%VS() z;H1@RmQo~k0h_vkFvhsqY-+7<-{1cW|IPo_um2r>`{v%7rO?FPW9*~|6A^F~-E3AJ z1yezHGY!oarKSLGFb+d?;Sdzlaeu7Gyn6l@clZAZ4l!_aI?3@23Rd@!fIy45qq+kc zvXj%e8k#Ny+#&@cKu1@_z~pwuLjqF+U7(xM*_`d}cyenuVsN{a4I&|;tL9oFs0CUah`TJHycyEd-nzR z?wE5icU6lawsSyAgJ>mYs47*MIZw3$5Fiq_n3HA6o12v>yng%c`SX`wyoC@r=Sn=d zTCFv8y@(+DJRNrR`1bww=E;?r_5G+T=;KDg3mh*lF2-@}I?w|U(Ua>No8Ihp+fs7Z zbt%PVCU-Y)*c^6buthoP`Ksv!C-B3ho1ttp2lu~W_t~N5~QK}BZFykqkD}*pj zi=6n5q0J@TteN|prlg9rX)!e2HBt-Mt+nQ5ndf;NM{>xupm~U0s3sun+_LS>C8f=h zbKgZq%ehq37-NV55lw}N7_j+PLh2f4f|&zF4qmf1ysnvmc_VpckwBONGjl{hHFt0m zaOW07K;~7%l%(dNOD5G#0HEnCVvH&9i)XjH?Zf@UyKj8*k*VB1?YiJBIjW#cS7XS> z_v>uv7 zND(Lkz?{u7O-n9IbKWyJ(us84CDH5mk;aFGWB^ZLr5IHZqo9m5ggN+aoBj9{E zzWV(0!*;iQxI652+~%CFL#b(D0A%H2Erocm&0;80?EK?~HAu*6u2>@u75x~@>wlg?k0H=mB zI|Uc2&Bwz$&*l;kw%cv%dWnb{wEu+^DR4-!4W?4RB~e|r1TG#LQI*atJI zC9b=4z4?59N9)xhy1n1+?sq*8h`c*Yhh?EIn%Ow^==$Wzlepe|c7M+?%u69~LIh$5 z216$9Vsy}!9a(EJ(LkXKJpqGZb24%uHHjx`7=w{!G=YmWEQkA&Ga9nT!}bsfnH*IB zfHZ=}NRa{=F3xi)6PTtLx-Pc#YnAFI?3M_7A~qm(Fl>8+nVHEc=23)*VwVh5tg1^$ z!O^Q0N6V7UONdg-)^%CtIZ#}!E{})YLe#6%>o<41-7YUa^pVM=9Il`I=xblQ={iF% zk(tSB9m;i|Z5Cf7rgp|a^z(R~wR1MV;2&QE*(-2}6yWdI(9Z?j4 zX)gQu7gkr#s9P0x0O;3=gQeh%P2;8jVV;jU&(nP1fXiGqt2Gl{UtYQB_0`pC90+~A zUgZn`z|1NOf~pj7MKEyfx)>vy)<)sgTDuUtn82OUBa>T+1Sxwd|H^#u$=_pskSy}mje zN{rDQ`+mH>x_$fhZEX5-cK|sjT|?{N9*@Vb3f@{GX$mpdIbe=MiV3PLRbxz@idX=O zfWckJ&C!jSayiDBE-s#yip1&iD(?30`*B#x0tS750RT+VfKsHnN=hpcA%YNC4Qd1Z zxThGstrP%2K#1HFJ0}3(2?cIw08ndPtyaxyjfjz%s0EgXYObbDYWnENYzMd222ZgE zFK#Y+(x9Ggl&2gX#E@cYYvOTQ%q$qVS413!VZVC-OzI{g=AedVP87y2j6MnOl1*XM}c+$5U^hB~YoozuR5iJbnG@ z{cc)5_~_fylE)#+R7xqOa2J^aof?%CrBqStx~>~K0O(>`mSw$OwQ!_)KB_uW0F(K6 zV2&XKaN9mSnAzp!Wh7o;icES=2tHw|7;0Inn0wAS#2A^PnYYvv1Z}xAjzGW&j?K?j ztBN|oS%rj=8Qqp@p+LKWpZ@Zv?TxneKGwuP@&3n0D|R2t)4yLK`}=pYKa!OaLhal?s3XL*Re_B2_>bLN)DUK-ZiX z0Ei*Pkebb%1BDn$DZl^@?EL=i-3Q~#X}5GkjL7qG>W9@h_S1YE`>v%Jfq5g*npY3C zR_$ZwhHh%DbKMjkh$f{Wi<@@*G)Dx;b+L0;R*PwAQ(ZcfO3e&Ts7@q8hYBdAz~y=~ zEwX#qroN*P0if$wufKn_+yAKVaW#(fbj3hAoGl+wOnnBlfs`j-{mf!wcYNuP(0cf||rxcz(?=G*O z-aotn06()b#&J~fhyAY1IWN;R9RU0FFm}T#CN_Z>M{-DeAfD2W3`{|Z0HCeLn!4zQA+)K-DWnGg7@U|4!ORRSoaR#~qJhBj zd>q#+cOZn#YJEH&hoK+GQA7xE9Q!ga-}w0H_2uQ;x39Zyh<#T@9q8ui?c2An&0#6^ z^74jby1E$v{o#I_=lSxoAhUYQjCTM-YZ00NU;tFrfm-xwZ94RRTuUw#5)g}`I_4@} z4@FEIfS5x>Vli+810-Ogl+u{176Fc_)ar!eI7%(karUpM#%rxZzuFxU08?TIFh9{) zN~y}sOb*~?*rc*<1k~sfW@klJ2q_^lwZSAY#TdbLDTUlx-y6@d0qv*H^K53#7WLRF zISmfaD5A&z!a0ZMaq8IyC+*s|RzcTws}+V2L{Bm_RdV;k;V?~83K(PRx>##2xzwse zOw4K)VnRYaW6(F7&AiNkLrO8{+H0s+olpkca^S zQXr-P+z)-nU3m58?gN0sQa0BwMa={6d% zY?FY;{q?Ku=%0A|=l`kiJnp-Xo9JVs`0+h|zqkJpAAi5j^W%Fqmrq~4yC+WTi|gu^J@Pkj^8eG1(y!f48{XvIZ-uttk{@~54LkL&*clF`$#mn2Lybvw*`s#8h z!%^h8?6z0eq^}r&p;@Q! z&C~6l_wE0^XjPMgl+|XeHRt9Q1#lE*t~DcSM9!kd)|?;ij;@G6&4_r)5k4*4*oYd4 zJ@=1PPPK-+I$^ZoyaD5v1mIxRQ%v)`#BQ+ZUe#CQ&>C-77f*iU^(zFZ)m1gd^y21O zDT_$Gy1MMHf1MsKtp#rXp1dqT z!D>~t(-;8&+A^z-0VRuw#4fe_F~$gHc$QhU%sPWMV=nj=>Gf%7@7_oTZQ__xB#v&i z*{+BP9771i(afBy)VLqr8^L1bNs?p{p;Gv}Pwu)e+BgbhuisYk+6aq6(u|smAcsNceb-Vrk`ugVn{$5r4p`WH>KlD;XMTenRQHO>h z^is-6a!G`ahhtr~+skoSuh-LI|K#R&bM^eM|7U+4f&a)a{!ABRU|=NC0>vY7U^vc) z6#!8%*CkH?n|YqWJl6#PR9($5m}CNwqEfRQrsLQT!!V%wyc`zbzKdqo_kAl!+FYkG zpP8bWRV^Z7+Ni~9Dv>EN5s`wMf~%>jx)D)eOfkAt0%QVajzq@jZjB}_wLUyNGznF^ z!Q6oowYf=*F$B8#WBVOzUM#m@dxhTFJFH1(;t2BcfR|BmmhpGTwVdd+nxO9|LK4B zv)}&CAN!?0{(t%R|ARmMOTTpc(I1_^a|}#H>a9w@8oI8#fA?;)S|wm0^1j=IFgQZ> zdd#k%OAXC%-?%5-?lTara|S9k5hEcHF;G7Y4S>TTsAvok97NlR+wb4rUA?@*)t^27 z=s!#s!?aAHgV;xLYc2ok3t$wTBXguq%uLkb6a`FeuWcLZSGL|0yMYkTSzE2{F~`RO z2+o{K`pN?V40s&Ya~2WdfWz21Sjhqazxx|_JS?kmP_+=!_rCxC_{aa;FCCBD=eN&3 z{lO1@>ZiYX_xUc@=14|WBMxS&CIAeGNT{NC{?$aiWj#1JHnE`_F?4-L?Dr4ve&H8? z{@wd`cXzvI&u;GS-m8|FHv8$gUawns0FlR`fA{Wvo)fRDPXxQ|;r?EByBz?8KxT$aF{D;V=qVZfv7Bfs<;E}v3ee8t^D?s$)uN;n7>shR z;EM-5i&3D~T2*3~;0#!9(IBntK{%Bzj}Vo|6B3*cNp$v}G!g(J8iFG@VF&?`V~lEG zW-ZGB5&667?HlT}n2! z<`@Bk(4}A|%QCHpF~+F14qey9lyl~j6m}toNL2DNj$<`_*zeHQzz@?=RBxYM_N#Gs zJls6Fb@xv{{q%60udi)hO6t1yngv8?m`X>W(~I8`s+a?H{n+>Yet-Dx_a46a%_kJm z92_vFQH?0X*bQp4nu|G~1b3uX&8-Sl^)zfScJJT6yWf?oCkjB4=gW)h2CrO}vH?md z9glP9*Y2RAEx{0*!65Ld#>LYw*QE5V`HW|-Qz`m`AAF$(fY1*?RYlw#QldV!Zb?;z z81YPpKmgcow_VpA4hKY>re(k1w|u?k>u9gJrlV_*#}XL;e*EJv+*=YSFjL?6yX_PL zs6hza!(WJ;@(nL8SM_0cae1|r^7h@k^=6GruC;VsmzN9x zF-5J_ya~QrQ_Be{FdGdo8nhX z;h)$kzghwR#P|7$0{ZdO{(gn>@Bh%h{;&TV_s2T{-|qh7-J$+(|CfL6-~B)T-~QB} z{4@W=Kl_*d?SJdv`V0T`e|Jp%+gER|uP)UzrcR1?{fLAge(mG?H*eo>@5k6}>pdcN z<7yZy6}ufb1eBL-?vVo{x>T=1;0#PBQ7F|?V~W!>55v$Bf)SwgE|3uo+${hdw)?b- z)H4>OqA8+7MUDuNt#klICx=jF3N(-=bVfturh0ZlVkB^@=i99r+u-j7v(!=$F*Lh6 zVK`SmjhY7NftiT}0GyZucnl%UdE&^(I3K6Y=3;pVUwrW@#W?h<U#s}PLQ6L2K`8Wem-;XJc;MDp(Zml9bb=?{Y zw`E7~cDr|X$7z{|VF2Q0xMy}pY}Nw+JUNh%8ya162INLLA`I%dlp2x271${PGF1`4 zLcoqEOQ)kXY?iuJ$yA|hTZuPetpj@Vt8`uG-bkw_y<~f`Qfh9YlN;VniOwJ*4kwrV zSu11B+Di>;%1ll7&P+tBVRUyj5fLS{6HcPEq6kg33Wm5Ey2zMw23Ih2b8tsMLLkDx z$YW;;mdjFdbE^^tbqG1(a<-fQTq64D9YioDzTVwWl%l zF~(P~-dtQvcl!~NaEdb3i4T5HS0(UUoc%}ia{f~ry=RduTrAV#1eu%ZhM!>FYOwx9di z=im6`>u9r7>1N$OJuSW%md6$M{HFcSC;@Ve)GS5|Jkdq>!)cRhJLjgi_~ULTaRld%1drP z_79x(e*7mtetiE|H{GAuAmQ(=fd2ur;h*@bf9QwWxB7`s{%ilwU%k3rfBdaazWs9_ zfB(~00Pvsx%CG;{_dox=AO85i@+ba$h~2yQ?-6_$$9M1FrLnK7MXIUZy?*z#7aw4C zTS|Z!DID((`*-`*YCUfHJm<~DU~VZ!G#Ay)(67far4*2pLzx#fQ`OW(GX;by5-2cn zsimVXU~qDG1H|q7-OY=eoAGK_9>$wa!0zwgkLyj~1OSG`P$N5`gHgiH8KD)9qK#JG zO1j2iYJ0GsH((-CdF(5m;NWIwvg}v#hR+WQ+$~E%CI>;Z!{LA!$5jsi(_tRRG3O#8 zhyCCBrC<89%kn-1IWEU1&u=AX(dy2CTt%5$e9#Hc1rBCLr%7}x-{}m)M|Ve{X1Cgg zA3il^h9CXt`#<~h&wl)4h>?%S5HQlBi-|CE^I4d~aXK<`>p905n@3+&n@y&DAv3pmzq-4qGXS27T{w3Z z&w3gsLUnT!aZ~kLRn;CZ`lCOEh?)Y1PMiFRcgD=8fs~yhiixOU3A(O}kzJ_N#XN@4 zpVilyz|Gac2pO^EcZ)lUX*<(90U}c1Krv_)?79&6u}7pTE(%qLv3q*+1i+{L!L)WU z^<5;i%guPX8TY#fCR?wDS{AEi)n5a^%@x2EynhdGUhSXVOvHwyj&A6uwhtJ31Ezxk zIk^+fQ^q0}7Z+DIS0My-BLzl^A-oIaNZf{};`)umQ98b~60D-xb!jVtrLBz(hqq7S4G}9$A zv#yKgR%^bzxxBi%8i#exnclxY&Ql7pA5uU9*B08T6y7=$>%pV~HRk2f9 zi1U)iam9gOy?VV`jlRebFD?_| zVZWg1s*mqqzxw!t8|=Dj?hN4BagEQHi6LTcgw(B>J8x+mK;<8L; zZDqt<=2FUPwc79ZL_}aUGZCbe0x>!;5^!K@#6&Np)T&LtYA!j?hQJ(SOdi~ewZE~^ z7SKV##f{uzjAzcom!yuiMz>|RJsM`b)Vkyf0NaPHDhw&zJ!~oPe!rck`T2_%Qmq!z zDvSt!kmmXEJzsw4w@42LRxNj21Yu6ACZ}M|T2H6IUQYLLwl7HmGalj1#)D z#a&TqN96VzIL|M?Oaf>%(_^LO2x3a?0P3Kk%hM-UKm6ej0G=}wbJ*|qPp+^3@jw5U ze)o5N{bGXvHAbGM!}I6Q?_Zx|h5?A_L>w>&fd4Obe;TdpmfiJXbI#R0&D-quH22(l zt1p#GEl40FY@=AR1_n%kBu?Vk!5?gf=#gk-1UL=@FapbTQb91o*l~sj9w7;dAq21_ zETRrevYL`gRjR66x9+*;o^y8hc2BdKIcGkscb`(e#3NraM&5D1+`Z4)=iSfyJZr5v z=l}ovqp9I{RJRnVdJ$znBqXls2^DG$WtvCgxF7cC=MT>x-hJ11|HqFX!hia=|ID-Z zzJOF5a+Mkl!yr6#T|bUPw%HkMyfvOc$y#eDe2FH+Qu3U*?9yh+!#vNzzyk9;vpRB? zK~rMq+!QlqE`?bXiI7M@Uuo7#DN0sJ6@Xur_K3z9vPO#@Sz<>{1pok95)kXEBUuo& z4xu?0P(8|*7)6AS1rn1XYYCC+HdEKqTnaF!R0^vID4_@x=3@{5h^iupAuzSWQHqLi zP9R394&q5w$8i)9g)gOLAB!tciv-Sr24^e?BWUmzf$IVyh~5}5BqD&yrNop|%5`5L zB4f!IWBa~WQLN>TvZ|Jn7K?>5IF18|SORNsx$Hsp<;&*}&d-!^cY7mJ5|{O|yL$2R z;^JZ)_RhJFfBBa&^IM;Nt7)2xvy*q8zSS;#6|Wj&s<{6s0y^HnaQ`igF+YF4>6*s3 zo(KdnSYvv(UMwXIvCasLC9EujxMV=9ZTySux4 z_vw48iiV6aF8C&NAO()`WWL7dy~@T4A=F%f+S06-qHWuGo@0zh%5exOrC}IqIqto` zySqcg<#JhL3670x<5YE;YDNtc!FIcS=iPTVw^#aQUyEqwc@SX*wAROz#&PQVew^l4 z;;$pLr?P}V`941cfV_3CbET9x&*}|9Ia#$$(;W8CsgO}7bPBoT`TVtqUDqwTpoXV7 zY<71kB^P|t_!wvJgEL+UD_gE=TT;#f$|`e;MF0(z!s9$Im+KIM0k_@VA!6G$B7D2K zRfYBHlqkd)=aibJsY$>wW&k+S0(`AkQqD@&D@b7$P-DGu7DO`_?|k8ehD3@Y3J9yP zG677}L_{YiCx&PkhPv$5m$laW#@fLg5kJ(19sus{wlM;c_1?vtxtlSc4U~M8;^*3cVC%qzco8zHhnga~dx$F23;gXI_8(>=S?e_gHH;BDutI91k-wOAUoWL~Fe@ z)g@o*icwMFwKgTPBEqQ7dR3rQl=V0RCNTFzNoH0C)cO^aEFxlqtFSz{pR?Bbc(Dk~ zoaXrII=^ae)u+#_j3QG-Ex*ruEqbF&Q9dTx*S~)rBMSmH?zETCChd!^}X|Af=QD1ksQsRxCNOP>nz^ z$`Yq}eX^YAxyH-PbF5vOd7c+tud4GjEf-zpf~qRnJBtX1-R|sk1zK8Xa*lJ(A%qWo z@XeIc?akH6YBlHml;+dZQ|H{()eGF3{m)-BHV~2Q{Z%q=sRQW@SrfJseQ*124S$Yw5i;Cp!)DoAbDRBaa^6z1T)h;m^fQ?EqZw#+F;;8J3Y zT?o#>e3+Jt&U+WrfTTsZ2@cU*UiRzL)o$2*@#)(qCuenP3jq85wpd@ESVbwUSva)q zVV+#@t_d%mzij(1m8dAGM1w>`({=+a-usj?pwzQ@$tfdbLm*B>Hk(b`_N&!Nj>S3G zHSM#_Ez&X2$v1&4DyboCY=fexqyTN-A!3R%Icw3*)6n;Q&UqLH1gMX+Dll`MlBlW* z9`@twufM)|=biO=3jiw6HVy!WL%eu!QfGH<(_X#24Zbnfd*=&4e>G=TR#l)H2O_H0 z5RrOI^RR#X@ZyiPX8`c*2Yx?7A%&cov+Vc#>+9?N;ZO$?rIe;==6P14S4w+BPC%Iz zjjIuXb(mcncZ6w@=@B790{n$?7fkZNKQ3XD4U7&Hi(5eeq*bY@a(_FwVe%uw-n4t_iq*bFD!s5WThcl02yjwNP{yzr9=u{ z*Oi=OjMn-4g>Eh?Ap}5^QZo0AM?`eaGINa6{h1Z3Qp*@yC6+397!r~!mhQXW`J!_; zP188+-hKCrAAj>0L9kpc{vZF~ufF;I2gCGL3NdrC*1u|x73Ny$6A+7#Atgk`qwJt| zbpZ?!GuNK@5fUPG7!1R(Kk1MR9(Eh+oM1dzJLdh>_1JcHf4DP73ugd`aUw9=&8_o> z80L7e)~;5I+q^TzESJl~rIZvx7{@Us$edG3^9&-kf;j4GHVylx2~|B>o3}(%$D+zw zPo;>E6IWqfVIcxzpo%|y-L~f%k-llLGV><8goU$KxP&I;lv7I1yPV>Fa~HaP8t0q4P1|*sj~>^I>-BnlI2;VBcjnc^sBS#W zJkK>a4qpAHReKdb{S-PRTm zgaC6)=Cs@IwphTKW=_{EVA8<4R|_yydBwldhgMre^*~@laL&~`r#9TI4`?fix2PT?)2(-i z_{xBeHH(ikfN-2WX$W=zm~%=6+r>iEe&joTM`4L`n&;RwfmHxty<8TK5InFf7mE*m z@Xg@8G8@D^MN*oFd4IFp+-*ARbCHsn^T86eP1}W*kfvc!03uUBhIwqdE_gc)lN0Is zZki_Ny$fDM#%XN3cDY^-hhe!`RZ3NblLLUL5*cS*Q3f#MZd{!`wtl@&GkGO41_+K^ zYIM$%I>S^)Ro2?z9ipZw#TY^L6$_|-7ZJyCM6w|SP{vp4=wr)NYe(7`bFVEpq7b?0 zYT0dece~wCcLih2Jdf*>B>=Q-D4hShAOAaVe&F$wC+DTi`@@Dgdg#@RZ@=@d<8$l1 zWZ(j5UI|m6#;;+`h*5pznNm2CRjV|iezM+DDFvkvkd(-%0xFVJBWeZ|xKFGpoF!+) znVF42Yx@wKh%A>&>$;Q@gEp;4g?XNg1rVw7*_2Yw$I{bUD)*+?y5ZFg`2PAJ)MyUD z!l`hH31gI$Kox2i&}2&_&IgY+tHv~0X`0}TZ-A)U9E`E0lzAQtbJfI@BBhk$^t3Lm zgeXu=y>uTOP#+m)W;v#rF`x)AaD@&XZxN`j?&?`4M6BWNwN+4~h%1TDThA$%Qk-+F zoKk8Uzgn#fYR)m|XzCo*TJM}HTWY)|0H~`wm@|k}-HpN;9Y|T=H?sO-NNF)`dP(V#%Z+SxQV@ z*A0iUI=Q^HDi~wjj(6ABY1MC+%f9hJ&=~`Wno`M$S&@t6%m@%dKr$(FK{Bp+{^Fgk zzf2j%lwroAuv(vOc8ADN6M1ho`+n6Y0b|2-m{Q8lAp&`4Yj#=A89VsE$N%)d*PX7Az&c;ze0R4+YZlA3h*Wj{a=ENh=X%{rDHj(P z)pcG~SAE~t1`Z-l(^PBa+G?KX`NfMD4<0;twf+*(a=EP4>TB-4*eXzc+I%Z2OdEyvj) zYT-1^Xq||ZEY?yk%3O?T*Xy+lp_GzRVP@w{J?^MV8ye>gONlWyP2;VLF)}lkQnhk* zfi{Mg%b=>mFaiLP`NnVfCL{|0^DId4@4|qRKJm;6sW>(>obRDcAB#NXSwMN7?+VHeJ&+^~j$y z#1gZ_x&ronADp!YMZ_AD3o~a!u-oq!eQQV*vb7FFdwn~g?PJOc1W2pZNh#Sm1%NnB z)<7wFH_ct&TjNS*5D`{~et)-VT~o%GhG@vdSOk@UQE{5eP*URq7$9#m;^ORdIP6oL zmdk~2n`xfZ8WVgfg@lc>#@Uop*Y$un%~Ru>I#c9Q3KvJNDx?6FkP6Fiw{KqimhkMK zv~42}5>~yBF=!DjiV%Zm$AA0@f z`WZP=veOvRuufjjWJzln&4U=L`00MC#3O6eOd((hwR% zA}Uf)0MRH=6-tleSoNzZB}C}^o|)I{H6l(^tZ6og*!TV8NAo<-&KcJ=)`zAsbIjwM ziiiRUAcK)CNE!#I#yb)P8E_3)`o_o_@?!?1%Sg)d`C!ru{^c@1OTTg zBBB+khin8WEQBZ^Dnf`vrk=e~6#@FLbI!#Sz4KhkpZk-4?vMQY-?{3${W#6@e0s8) zW8Q7H>yxD;OflBtscx(F^~%InoBTvnrMn-zI^%k z;lqp5i^FbzI7~66loA8@E;tub;VDJ{Vv!glS;NQk0m{7!K?zGKC=4u?oLesI9Q|-O zRC|8cbumWg97~1_jSqT6PU3R89EL-hBO(Upa*TP7L}VP5drRXiJWl{p5Yb?b>SJb3 zF~%5Q-RI2ws!>r&8OO2jm&0KKfXy%b?>_R)f7E%$!gGw)c~u;Si3JRq;GOdp0F3j1 zXbqK0pdj?#)i568tTDLX-57`fK%gllW_1{9`CK31s)qC4qc!*{57ihV3ce+x%#4WM z+ooCei?)un=qTbv=3D~?tNYaeIWp_zYI_(`noWCpeKY>-FFd`vn*&l7`P^GySgjUq z6NX`Yc>cbeX4f>{IIwOSCbDFRN-1Z{HDwt9jUp(uK1AUf*8%r2FmcXV*^t1`X zet4bkLKlv6K{29O>L29{gx>coecRkNzH=zbjFzMLG#5m|&xZ8*`>YxjcUSxZJ@TZ$A90um1Aa9zQfF@4ffV_4Tu}vlAj> z5aa9|(@A5F%Pav4t0EbKSDPy$-Yc&0zV6lg`WFFM6#y1pPwcRuB_mE!%BX5d2#`fm zidx4E4u-=RLTkKJGRir|m{Rh`6e%{w5!!J)#Capi^E@98yIDq-rj&Z%yI=Eb&JK~ z=+q!4(XMF~K|~a_ZChh-828S!MC6@qyj^r%7n+(DUpK~AqODg`KLiqyDQ3=*V^Ttk zV7#+{rZC4Cjm27!9mzpbUB_4>DpJYLSTe3~veu$eMa-fA>J=4$~dvs zGgkyCIAe`5qMS+*vH)NS0Z0&>b8Q=(J&)7CAbCz66RXKZifFqy*$!8+fNlC>C5bIy zQNWnyZnNpT#eRPn=i)<1Id@%8#<{jXsmjob) zYApdk+XiOKT)el|HnrdBoXI&evoRE7O$_nIpfOlVj&Tmb0YKAuBADlSyS=NM`ZP`6 zHO69fh!YWjY&N%b=T-$k649z`Q-!8!;U3P#g;muW^1%z|rfL4+_2=e!Uai)@<>5z$ z!^k4u2Nh}CHszF=L1DL#s=QtYmDSPO_MdaSS;q@z}8lQ*u4a_lCZdG8bsXh_aNFdO^k6I$MyOY0766g+5e^e z=>KYt^WiYK(Cvo_jWY&vPEbI!w6MI}ZZ9q`6+lEQP>>Ko89=LQW0g~XRAiZRB4O?vsiS~5bc9+;J5$2Yi;E}V6lprisqaM)i}yoN}L&u zswxX2rc{*~^$WtFh(yNJHlw60BK6^N&ekKc_r8`sN9iqMJyORw*NSJJCIGMy+OFS~ zoeyRQ@IU>Z{r0ODFP}aC;+taEYX&RX<-{+2x!@lk{C|o$Elt7Tkgfp0?95c>Ij795>wblpP z-fdD6YsFemj+kRkW9{A6_AL=uN5+5%q?8Pi5`i+8Qc?y|<_rKn1R_#JW-U^s>$#}R zDaA3C3?Kk3Dg{-Xv1yJ`rl!MHzc^W~g~5AU+v#&j1O%eSkaaGlOo+z^ah74G; z27z83hU)XH0P{RIO~)*Sg;f-eBn{?btRcdqKNt$I7$i$(p6A74fhuHhp6Bg$iwGyn zRo`~Tm^sB#3RPK;pXNF zROfkud(kfd)I`fBDy^6cK?%n5F4DQUOu#+&oY3K6|<78oO)$&JX{ioMV*o-~8yO4Z+z0rX8$T zKn$0s@Rc7E1NfO=`t*bI2Yugn?Xr%s0pRrFJjNtZrRbtxs46M7E^LO$kwK{UFe$J@ zgCJ-q7+Wuq3ae^EzV_{zxzt`z)iHv}Iq!A{RgUv`d3mu|^ushB;=b-TQz?iz&+`nR zt5w%{hm^+Yc9^U;)yb`@!klxi`)>W1>gSl|Si|a9tJTfT6#&G1_{C5C_rLC=|HeE| z3gCRma}nU2lc;J=)66Zv!$*&bRJ*?t8ABw@pvM{~&Es-ynFWr(@J1MnvCcUsr84lU z$ILnBeLzGXT4PMLR|+4QCD4-&asM0Mt887`UX~g<+T!VX(f=gSTt>eM9*J7 zzdSqN?+&}`+eK)|SRmn2gkj2s0Te+H&;Sq?5C$+(2;nd#EJlUXJRimp4LR?R;#Aed znGj3PBTS9;sU&A@jeaAYRv(ym!VJV~^L6hzz;PM0-_oiK=x5 z)B;2)h6^=e8G_$EhwJO-i)HiLlaIuCib>Y%iL%^D& zWCUw*3!;LC{@=RKaNL>^Nfmqu#+b}pij*{t<2a5(jIorY$~7|G8RuOcGX+2^08&H< zfk*^^DnePNY2b;SCl%T6_tVVG%uoa>qyhk-9{8(Mw(@X7XzKG)yLyO*nKRdzjiOo0 zeU>O0sE|!&W`P>2NaW7et5vtmxu8Mk+-|$~)-_GXyiX;OF^){vb#shxKa*0`B3#AH zL{$4{ulhsBjuBarqk<9*R^=5y1(6=JgpGor5FU%vTC=i%AX_K}3YhD~Yv~lyf%Z2*LY6#*`vOxZY7#6`gZvsFc*S zZRKTDUYn((y4pKutjYgz0(%?Jo!svO!m zi)2blDMrGIoOS?EN^ZPg^b2PBE8p{pKl8nR293M9yYo%gc8lA)Yac?>_j8&5wQ+Gm9uu zy+^Cs3q{yj8@#Wl3e{TP73b_UO-mv$jktUfR&>F z$KDr}Quh0)u>b(UHArrpC*L-&La&W6UDsJ}LfaT)47s|@31k3JR3e*Fs$vCZo1b=qA-rn8LB_5_j76k|7AviSFcpp&ES&wLL zZtpagShBa)HzA>*k};?(vE(QP1z3v#LQsq3h$@D?p8QaOnXNIk%qh&33v0;4k`?vt zc55Dgl^?=EkD}`+0E<(na>~*rm*)lJCW@nX=d1mI>S=m{K zaOgsueYj+2W&b|E$K(EjTc7v){dzrLuaC)4E+uQtmJuE3Kq}HO$&Y;<=D+(y08(tA zrkBt90P>zc{D=HW{>6ER)+b|&BecOr1U9U%Xoh!>WT%*Dt3{njk3Q$tznbnX@JhvOZA5 zWH|9wZ96GJ0vdXzm_^z2&wqKrs(i@Vsk-w{aZVap#7t0CV`qsj{0`qY@TcP^tUm=> z_MQwwNqA##7l@{Tl>6A;pdQ%Yu@m42Y#4^{9OOcT2OutD_CrIJ{xbl(d{R5Frk7ns zP3R11^d!}jA7ZBdd4V0X9s!tWE+ny7;lDCxe7clyo9o0;oQI4+n=R$?pFe2Jh65RC z;y5dzoAoh2FE8No7xj*{WyPhg^ChNkI3abG`#t-|+uLCbVA!3W){XDMay0tf;v5*o zX+NSw{{$PEt`HbR*DEoS!k?mS3nqWa$Q0gZMx)#R3eek9V~jFB|75)9#z&TC9dQG2 zzW!#xN%#?k#>Olx%!8KG45qXgB#q6X? zRoKY%+2k`yzNn0!V>Ix%>)%gpFt~oz423K$C5Ei2gzVu%j%sSqTd8p(cNHQ0{FJ|r z?gc15$l@ zMGW{2tEvV(M))T0OhWIMIFf}rewP&};al(PJbFK4^dujyH3p$G-$thCF?^&c=cm5I z3rS5DmEa~5tTSx_iC|Mii;vYyMCk}3WA*%vnu~9{ZwAP%D1a%fl>cHBLbvCuNTrEn za3#<8N4~_48e0x9fGr^6k1SZ%Q**@gP4sftFQo!;)7NtwZI3T){d7^y1ZZvQ;~KdQ z1yCp+zj?i0dWHQweP0`QD0~sOc(@~7?vmYK*wxXRyRsa_>AyI7K9dvG7W;W$&f?!FlCL9Ew_)mBd`w!RL>W3oK_~cl%Qv%i7+7~& z(8wmsj>2Vjii@^)X-NLbq>ua0iM^zVVsxaw+Dk9!mu^}~z~6~-%1S}vWK#9K+i1TL zZl-_Oajjvr@$&z>(oenH1GHzpY5HIp(qYL6!*CRE>kVQ)lHP_O$a@U;=@@?05i2cc zgEqMnLF`0%b?jU|Nj|%_-n^`k#6p1lDvc-)D2C?=+v@}m>(}z5uztskfsK`m{oa8O z9A7i&zD+PNyl^mi@a&;|QtAsvln0iJ3@@S8XCs7p9k1dQhh{c}dkH~_G4V*dWfOb7 zp^t1uHVTbt?Y(9cZqG%)U(LR7-xPV(?Ppa@ybG#b7(Ez)v4IJh5HmR zmdl!hGZB^MM)tHx$_nddYF1z+MWreIc?j)own}+AB}N?*WkkXY2;m)?eJ;iLZP0S6 z-U!b#vA9mSEx4RLU{FR`%Oj(-pwkMpWVY^SV`ra$#O6zC>WeCwPfyv`uSVoc5@Q@p z7ZZDB`!^;&N8^0C3>0pdnFc;8rU{*DhQwR2QNci8^TuPK&I49l5S9Oo)a{r=Ruj5K zH9P4;+4a9v=cM$>_zX`z{CQ?9Rk-q7RD4i#ML zIUn0=z`0{oafS@*pB=N>PlE8K&M!Y;l1n!JG;Fv-f3|5dQc zyFa6Q4{$i#wzsL8BRhTm@X;5`WBhitslcsUpiH(~&K+EO>wcZnysTCp3pc*GL%EiR zvrUt3v%7}mnxRTt?m_JRL`Z(r#bC`B}iQ2Ew2Tyn}x4_#*N8L`-L3A z{{6a{C3Qc~s+M5q<1tGzICJ_^*YF+8cFObeFV#{bRsxtzhJ*b)JPNXQcbW(da?fFM z8v0nj8R1e#>m`OaRexW=D z1>(Amg0YBJQVe~uVV1;hpL`ZU-Q!V&t${Q$=BT$WngD)rf$Pqc5C(#+DC5{F&u)H4vdY}d^ zA&lQMIk}oA23D((g>0)XmHCeA%bJkfJ0JmOAc~24iN04W6Q-F?1WHbhOM?Ob(%DM= zv}M3J>*=4Gt5_-7Gf=49(c<;D(R!aT=D3Ay#jMO3--@(HnHt30#o*(V-Ah9Ku$0^d z({*bG|Di2>5ZAs(62}XIRZwx0Rm8w4^^^9|Su=Cd>p$l@dITC94f~Uo5^2f@M-uki zHVc9doJ}rQNxo6sGUqt~MutZu{}yermUu$4eddL+6*W z#Vxn0ciWaUNV!R;JXueAG9&e6p~N@SPpMGq8hoBukNe88jhczzc#kx+W_o61fx~~) zPvcb$GTvnh-VT+(8a{Xg8V85RqyMy%Z~+oMmr2E2ejV-97LGe>WE1Re9Z%kqBu(@U zHo_PXs1<1Z>2mnM6syRTuh`2H(YsFX);)3!M3H~afPO>Qx`%`T(c{Z6lAbwkAka{n z-hH!NqfP!vG9BuY|Z=S;+y7od;s#l9yeQE^BoS+<11IPdt~3X#$WPQSVxK2@eZ-^ zrmE^0J3=SaxjAake-|(GQ(&;krK+*`p<5s>LSkUl4ks)f72qU6jeJzd5LVmC|45B* z;as`P!KBNm0p#v|HM-0=vP;JNL26U9V&{UEF6>U(Xw&}G%_qj` zB(Jo!_{L3lFAxY>3s$s0+L=YGpDmg~Ts+^yrV{d#%WUm4*-Kp#J&h@Yem2s=x~8n> z+Z7G6eLZ0|sRzMQ$FiVr(5*p}O^1Mbx7j#Gb$-QV15QpMUP!kOcZ7jajp9wZRVNQ1Al0vObs!3 z7%LhjpvE7G)VvY)xwBF-m`tgG3#>58WdLX7_J~k5@*IKn22ql$#HS;P*W_J^|wTl{+@FkLZk6(Se=VD~u*v)@zg^3Q1qtoOFejnET|q;)^C(8cTd z#g^Z*a_0lrZ?Qwt`j5K>YNctg>zuUzqF5 za6ZIY<&I|sPfX$S_aA8?@r`3Obick%{#g~zyI39fT4gGFn>R4s?mi~ zQDd`sk!hS%-f2ie6G43!tZ;kT;5M^p$+KWUUh#4-Lp$Iazw5zYg1ETMsJKa3oi$(H z66Dq!8IrtmtuG#MlK>T``YZ$=FLj5D-j_T#FhVAR0(T#Yp|jK|Li??~I80+7YK=t3 z2cR%V~%-(%A!J0tzf!A&?loY>>d?yF% zV~630_zHX3q%3-p0m|G~rk^rRv;JwMN;axWwEUB0QLVP1vvIS7P6SMMIzbhXSszP&e^+0vSQ z8&z$U(L6n^>U1x%sE{hCZ4kB_-D@S+>~hx(*L3OHrt+Ea6^>xrv3@sBA{NRQHUmZO z?}=?*oyIleU45E5zxw_Ld1f}{SG(OvN&?uru#?hFI3;%pACZyp5Q3Sh-i+~AV6|qh zrTkB@toejQEuSYmE4Eky!|Tqc{4d7n7Ps%+hT2dgXK(`Rh$y>+yvl5S%v5e+BlD24 zIu<{xEhK>C$H@c~J$mbcOXrBT>T>9D_2K1X(TZUWd)4n3TegLoaizx^M?YZu*n^ID zi1(1h%$VE_Gp(7%Knh9uk;hYpi;SR;jW#q%$`QBk7^~YblE9VZ<7&&DgW=VL^q)a! z6f3w&boC=cf^u9G%_`~z*&zE{g+T!`gV5vXJ_obkBr?xbZCOh-0PjM1ttiD17WsE7 z)WP8%J5`*}Kp$HPBh#{V%XoH$ZCC!1!Zc~92iBjM(+nX%uX1|Cafcxjj{`%tNGLi+a zm{{(}u2Z%C$}P*JY3g-eO3EFB7*RDL?g)OtmiBfHzU`CqSPQ4Dl3a>E;%3wPXVc() zcc=g}HPLMGM9JYMX|bmS%gpS^5C2aLPL-w|rQ zsN=GlK8i4y=gM(=uNt9jOcvi5x`&rL6uxQ}4tDTk9?WcEtfiLlX0`9zwr;taBg*=m z?sac$IAXy|M^q{3u5}p|d0dT6qsdX+B#`ac9;l;fDEc3RC}uUV??)K7qT!4645UyP zgTlCH1S*1|h!V_JWU`@`)Am*8`3(#!mkbA8qj85(~~&) zwzm2Hm+}Y@SHyjNDsCkuAtWsM%{PTWOw3tvCAlJim$6h^A ze<5c2E#+uPB>V+eUoZ4^(jCsPBC!oqPadmB6oUA_dH(#)u6VS=1TPmOS8AN%FXRbV zJ&-U)P_c;Gxcuthzi6ahgnlJfb4KORdW&yWITN6FI2>yh@OowIsXs$=&Wt8I_A)*c zs_3a~8;R8Ort6iw+)>Az74atDPGW?^TeYt+PSWHV=qLYyM2*8?_Izw$A|nYG<~N6- zLfl-b>v9LlhQsM3Vuy$;}=|?gHXro7yHf? z^|+-rpFG))=l|~~te7mMOsH>Kh+|UN&T87sW>9d5my%V_B{J$Od{Ulf{i%1X_J~D` z2boCbgsNg@C?ooo&s7k9n<*)5DpJzDLHp7oK)ifio;Mz-7Pm}&_yPO!6RMcdjAfS} z;hFmMxI_#?0-_0}q9K?a48vGH!&#Xx>cOG2a9`KN(*CmVhxP%f*4rBr1U~2M^vii*f7WJmix~XD=EZ4eu z&h^x@;FnX0`g3%MBjd=X7tWqSpB{c9D5>&~@C%sPYmX?yLryp^6;;#XtJGojt<{;w zj_nH<)pE!8LI?p@2=W}{@$0q{KA!ZRg=o?L>@2)ozH6Kmu<;cal7iCWch7p4bv9lR zpz8s(c$qOT4bw(In&0wndnR)A=>;9yqz*>4>#? zae~6S-dEn=mS_8%blf(~h?sH8GAb%wQYg`A*z#(h^CGBgU8*l5N3+1uI(~tC8G

3YCz-9L0Y*uHACS7dBo zWCQ>mr+onMOG|5P42#{rpEEdu0^F z1Rx8hAfp;Bsce_nmem%=GM6+=)X42X{Goa2FzP{8DCWJjT}DZ!ge}G0EUQ!;?&g@~ z!d86ybKM z``*P5oqr!au32pNt>(RsJ8JGcnP}U+y0dsSEw?Xq)xCHv7ld!V8Xh_D1h;k$4GEsr zD=(_PTwqthoa7*@Z&isIQAIbO5d=~CL3eV2O*`3ivONosLGRG;?jPCzXU=9#>sZ)B{Zq#Bs6gnHUjq$RLTbY zwcDMRH)2$Mf9euZw?Zk={5L8UmH7QwqsD@d>p6oipG1;(bA^iXqGKW=2CBaD*Mazl zXrss>K1ARLk~ulV(4UFNtlNQkEgt?RllF06x>!$up^TcK`p8y z#htkC7I0-No-YS|2|n1nVp)Yf9|zL$>d!=*I`FjHg(3gQs@-0^Z&pM}y^moYu8$30 zcthv1cy4#OvNkXC81%J0FegSWt*v)QWJpBBcu(+``^p@f4KK~~G@|+KO1LM9wUI~u^9DZ?y*T|fA>C|($v#x-3leZn_LcOd_ake zI{#KkMc`5BL1;`$Jbv(>F&P8HABITsqNq0OMq?|mpqYL^0x4RDB$?FUXmPM-Fdsr{ zBY&&=-K~6Y?9$mI^|>7t3WmMTja|u`zi7}Rng#emf!FCH{|!ciCm%IFK)krPcAry&LZ8xs)>Bb%|yC}L!K5&DRXSu(918M5kgy)1YAes_D;9U33q%U$x*%Y3@Q-Ho!k z)Uk^9sU6%UyJWL9_PA~9V zJ+QZiC+6uLDsOyB6&E*+Tj*|PfsU2UF9HI zTG_G=fsP=p$5Hj9-FA41ZP(_CuMPe|F=e~w!7Z!&Dlhb<11BGZKsi<-299aKtb?%W z!KUiXHT+G3qKN)d~er3#-&*w z;7`+7hsi({ll&2OU3uvhYopuVTE(UUX>n`Y+q5i>ttEyd-2Y;^A3!1=#7}X(ds?B~ z_2=-S!RQFcK^w|>&nK<&#-m7Vw{f@iY&9Vmv*Rc{IZms+;<^Cl} z)zOLRki2edGmqN489cTNIL1!1Yo1@s)Lfs{5Htu^MKW3K80}ce$F_`-dwNkjFgyF2B7Gw!&OMHd-!tNRbmWP_Q#!xO-aQh^~9q8EN1S z4>xd@7bBMERo8zx6%>}T%5=}M`9{yXK~w|6Tq5=rC%Ovg7y~7i%avuq^pEQEwRE}D zwcP;Epa37gzpsOWt*Vp_c6&EA{H6s<9J7d&pr9^|jcUgIg_$XK!n$A6FEVFqt&iIb zamZ>CM0RoOEkf|jmC|5p=<-mfpUE!(eF zznE9;xcWf$X8WCee58ig#3Hy%pr=+55-NOw%`GpKzC4271eM#|wrJ3F|J^~m049lH zwo)x-sIOJy7J-Zi@P;Xk%w$qUR`iBI^JIg3nmGqE0LyzP?c>d-o|(AU0`R^J105f= zxc(k4SE+Lu;!%IAM8vq1jtRMMP8{=4XVd4GLe>$;c-#sUS zX@lm4*Ymu;WseHc<8Kllx6L0TEUzY_>9vigG;#5uh1O{L`<*8jk&T^($Ml@ChXuB2 zmm)@+<*MtjVuK%MreSHk{e_d?;vtaC;wV~SLucD4e+Y@=UF}etdwy<}^xPbjaVO>B{YC$>(B3Ixi#IxOv6 zvE--n|Fye#eRa}xvT}C3?%iU-CZ>7kPZAwSPYc^>vc^lfJi%y3+wbVc5{JPreIWE! zZ^}o`n5e&>5qrD+BPuw<@B0$|)5)8!E*hTBfZ6MOUZ+qrc!uoX7R$nUliTSl*J{ha ztAo7f(y(mI^A|clVE-vG(IEeOZ#*Fxlx=SjwYEKa__t$oAe~-oinek1=$<7?3Z|?i z6Y_V}SuW%TXry<3J9xe-80U83b$xgfsH3+$-mkt@u#J;3#=qkhN#o$)=mhVdT$%I#L`ip>#9j_f)iogyD~xE z-m}4Lo7cnh7h?;)dnQEucoVP-(6^aPvJrE5pMZM9Z+<{g7pEd1UrPNFd2On4h-@8# z?>l5>GzLsf^^b2^qI1v7tAR`rx3X#97F zX0y{@f!sYk6)>qV5>S!{X7u1~(XnaU?^Y|6ck`?Hzm>b^sXu@*1ur-EAg8M#6;$>I z3J4Pg_ZtAq^0Ir`!NhEoxllSt=lN|fm70$Nt#X?q2Z5H%WV=CFa&ROZ?#T?H>3*=1 zl=YAw5_0r7SOVxUdc(fhlyhQH>>w5;07^|(CZioam}{DU9DJ5{*;#X*o?#pEt6B32 zC8g>~d<+`Qr-xdxL;l9^eVU&(kJKIxq_$hwXQMUZsSWT0L}LmbEW*2^UXe z8hkp{*5ra$UTg|FntcU35uxe%Vpg3#|5s!y14b6&b=2cM^--KT(khW)Gk^kImG?Sg z``$eWQGu5_QiK8)3XiM~>d4_ZmN8*r&g$E4P52(3eut{^&B*$uE3!)HXY{2$N$9u{ z!YYf7a#9qfzp|Nqcil0_-#Spe6KORmG&Y+uB{{z%VzXXnwl8yY(KRZ)xvh!@XTNx6 zfb-(K{-x6O&lAX69i90s<&7V;_VfF)<@sJX_qpPawHZ>-j;pJ=2-<*wu*_g~PTRGf z?aAgHEm(YNGjBXCh9kMMc(c{P1q)HNqT_PH5qE5k} zCY1Dm^lS+Zm-p17`h#LIpvXKhQsQji&w~1&SCt~P+&XthD{p}qE;i1rvzlG-8ZR?u zK=40il*_BDMZ4!Prmyx@KhqbaiWxx46?DBOro~A(iT8tm=U;&Nval+8$wa7vU-91Y z9T0~~_X+ua2m7ZZ)@cNZUSo!-Gs0Te)=qJU_f&@ot^=hS`}2p){fkBEZOhJfFFG|{ ziK18~yS7DI!p^qvxAc(1iJM*PPi%hyI{sHqIMhBD!9LI{K?_l^5l;JvJg$Bbufj;r zicn>WOUocqFv~V*Hs@!E;qmx=TwYX^YuDe3;3MJA^S?;9`O#YzA%%tm&@n7P;o2KP z`=NC1!LFdC!%<1%oA~UKjZvvt$N0#Dwa9=ou|@OUPkYP80iDN)xw$t>GxeK9VK=vC zOJDbe9EsqWy!^)G!=1DB)wl3W)&+o^4xOq@HOVyaSK7-p2ZV2&S=#X0kdxz9tp5FV zM=34G`=8X|1;srh8fDB??0T>D^riKf{?U=b|9fS?*v-JMP?1;wQd>4q%ZjAsMz4+_ zm^v z>Ps9=2puPU#lZVKd~k5--G<-R(lW$fdUsfG(tT!LPtB9@A7OHGi9eYa(QwQa9vdTB z8lBGLpo{k4xYdw4M=>xm^^u#=3Z#$!FcMaTh3a}?jqUk;HV$oIng`IWbaS_=%lVLh zC)WwPm(IpB3|Z(LI|m?JbN{@Pc-{rPSQ0|6{`P-X`LN&zAYbSkeJ~b;L}=p>P~|e@ zq?u?`?3nfE(Vte=`k9~a-Dq7Z+Y|Xu%Uynq&SRgcIUX;#HJk2s@RyUaNMd4Rcwf!G zaH-_Jo&Gmg&p2Qfq~|&3rr*B?wFk(`RG21{ly)7b4euY6atl$ujFl`-C+227A* zQ!vh~YR6XCI^eIh0)bu(X10w(spmbPY9;+YwgZikm{g5wjRLUfUyuZXZ>#Lgb`z1_ zkkmJ*+3&~El&_&9e&^4+KNCBbghSlk{I(13-|+A-i*Rlp%ChXQlNZo5cl7)3#Qgkd zd&YoAgHSwSFfiif8ft=F9m`G)vL`;q-rrwpDcY<^zw`|9_V;X_@h5u57BlKAn^uj% zB9~~m%eaz#B>N>eB;92EIfysVri;yAC8@-8m$+Ig}e zSHpR2&pDQQId>MXG3V)VN1NKYs2t0{)h9r{J-JI{)6H?g9!xC(3IeEN3dvM$H%QhVMV*%x-Yk8Na+OmEZc0`e( zcD!aq|6k{>bwUgV;;9C^Eq?wQS%^9qIq2_Ko+pZAta+XShDzPQ{lBNDh=-(Lklw@A zc~?O6c&M(F;KOIn)QIJeW5j;<;=uoW88HaeE^zr~%7p@gWg3F)d1|z@&G9_>QgVO0 zv$uEV)5qxXBp`)0c~-;WxNUbMVXbDuFYkn>M&f;#eFg=nu^>=d74Oku@ya(+v}r0U z%)P#)TvHfaPI~3yFb?d_~J9#@S(+qWJRJ#}E^-DgYMyT1( z6I!cWqLMcWTGQc}5I?}B!1IucEGa9|Oa%cWR#Rg@Wvqp2{S*p!NF5BK#xDLIq#O;f zq#J9zO9s2g0?)G?uLw+@{2^jfRm6n~_N^~*Yr@%8qWq$Rj^z38hprTMHOE@y;_ z2ghcN_uQ2Ix3>5%p=&o}PwwUPd-o=UyI^zCQ}$47_xfyLgv(+MO2|;8QPST9=j;#>6w`Gp&h? zT1^^Z>H4P(shwJwGh>V6_BG@d6n%-4z-CJTU_jD?Za)K2-UR0_PFPG*rfzHXj5c}E zTg?ZNC+8ijKbqd@H<7Hf(*7tk--_xL;V_$~{4Fz9;9^kjg7;{9szj0C0i_9lfFsKz za3F*P94b%dy))WRY|JM=zYWr+$Utf#z)81-g|TY)saYac-T*+}i)ARW=-UB`>?anv zPm0UPfX5wQntY-^FSyms`ick^wSx;N$3%pw_gH^SYQoRsP9P?=6neSuxqLswK;*H7 zt3c-2&wz)Zhu_JgM4x;-_*HlVUp=e*{r2o_@v|kxX?h@9t=i4aje2uH?rOiZ*dD~C2NQn=0%s>aCA*+uOZDCc0L*xk|0eX5@k5ic9zPg;vb zd2h0oHn}LNV?~Qw#PF&v_$K*$7gOr}WHk9y`Z51exwuU+rSW3{oPk=b zOZ&yFrO(wu{g|rWe~5dL4DF7mR2hv27T>l8UN8kYNV_E81E*Xf4~%$xez~^Rdl@WDTGMfS2EG61X7l_T zi*{cRAD>dRCXRJ_)Q>cD_#jUEdeAcHU^6)H06NNJl!JK}&(pKLv(!N?3knU>I(QrG zovVL4yxV`0G(U==fg@^rs1QaY4Pqr)Wr%~`4o_|$k9*tG=gOQ(0}9T?TGxJx3q67Y z|1DbXY!5+9U%VJy;)2kT~}3b)&U;p5@^WQV=4zp-gK77=`~#+ z3=mmD76JNCyS`qo1pgA`^+n)apRT|BvY#OA;S=on=i+aDUWoMC;T^eC{Vsy#x#p|g zj+FoCI>~bgsObt^N!8YW_JT!>nc_%_e5+X?XMX2+XJ_TF=U2Q2;o{7>=6Yo8V*cs^ zKdA7@n48z+Nf^e&t{_&MPQA;U}dzfSuhc&6NDkyGg%Exr4^ae2K~ zI$-bj*;?FK$g+#QW=>nRmaycQBpTsx_vvUluq%4}Qp>zmTo_a0_&t*Z1Xv-a{C7JV zwcxb(XqBGER;QU^VFnG-J%Dk7E>X|Mw57=y=eMD z+;pjE(0Y2;xUf*Rx9Di?%yxCPs{!ph%M;LRn%_?I#M96WPRm>(N7A9#r zH<2Tm6aJ9!n3kBQCN7z9{G7xn;U>LXbN2YxNB>H1C8~POIz1 zI8p&dOZ4M%FwhoNbrPA;t;Ek)IF^+EtDu4s#WJG zL+VRdLFLf$$JqFs4ow}L~>jAFM(bbyYu@ciqI50 z25e@f-r*8RuUh@BAcxUqw8;&s1-`-gfylNO%N6Nk>S~Ye?St@wKvo;ZL{lhzxZk?F zn1k*nHd3dhh#%5S@uDSFW@F-7io+WcHYFC*SoB?G_3lqtixc_l6hpp{q;uJ1hv{d| zE)LR&AZ(?X{HgR+pU>6g?&&a3F@FPc;nmorQ{$QG;z{$>mgQ+&Wiryj2$*p;Q|gai ze)23zJ?r$DO4E~sTO)f@jSmX2EH(d9<)10`C}$`nr+D@@fp&r9BGx zE`qKv@`4AXf+|F$x(8tQhuJD|wgbu_Ri-N12xX;{Xy}1w(7vo2S%LdY=8fftQ=LK<-|iPF3c)O)*f8N4xASfohJ;&sS0UxfQh+liwyy*#~sNQa<=2|9Q!I zch-ah(RAXg=|m06ZlhhryIDpbq#?xs#V0cnEqB68xRU(o=izFFeLtpj~WzU!X_DPe4_V{rOjK4*W6qvF^L5>$Ma=JG4-F63S z7X5;A$j&GE3(iLfel6}|nf16k(pm%w@3;eC-(m;voMTN+hTXIY=< zI=y^+PCDt^_i4MXHtCw1xnE{V+k=N9uu5*ulMT5g3w2<&$b5qSWEX0-vXL*Grmfu!6i;f zxvxBmL5~jib}u0gW-jY{SU%Q%@NT$&ayVTxz|+%zf97PNj_yp^NC2ObG6{X8lwl_m zbnz&4zGWAS)nWka78i|57~{YAta5^eyVx^VVGk5>`;E^eezSJ zogp)Vgki#%*xrR-NV-LH6F!RMC@nac@*@nIklH(T{Kl+|<%D;!Tc#W-V+ykj$sVf(>@ND2)e?0Azbwmg-O zN09s?*nO%bwXMYal{{MpcX1DSODNU63s7$2;lqk@NxJatsGXwg1+LVWhj#VNoqksf z`s9S5`j+4p693b`%9_l;9-3v%)MtVKGL6?g{xCm}S#Xg`phw1XWq4LAW|lY_MKF+f zG`HY)n_F!3Z-~ZoLq7L;HC53#v^O7_{xXVwM$^k}FF!!F5TMRVya?zd;IC&b|2gxq zD0wcJZt8sxTGT$6=e#)Wwr>9kG{+Kd>6MsmIx;EcQV(5V; z0+#q^K_vN}@7HAi+y)a+6|7{sALPv9MWzS7FbYLhHIC?bR=)&I3hGozn@$vT`1;Poj7u{bDKc8nlj#K_u+lK*^mGD%0AuR-ku;sd!U3dc18%rM%Xyh6M-I* ziO(tkR}_7DwYBwz^%&i1k;k>-+6E7;-fTZHVgPGvYiG8&nb@WE^opP?bKgj|jo(GY zuYKOCew~2jV?_y}Xo_DkiDG!ov4ehd$+d_Prwk z$pco_o1mVSKC8g<>o>yJlV+ixkg=gLt7p}sn8V9LkiaWvdkrjK>0D#g$qsM6t=;;h z6M%pP{@Mk-=6J-Z;Ls-NJ1sC&6}*yny{gi=a#sr9D?9pO@nVl0 z=zjOZ8oz(D$n%NJx4G*{fUHjie{5&ndUE|3cCs?69zi=HRP<8N7|xgIa^+uqIi_s}mYm-obtZoGA~ z?|Hy@783!W3;@<}bKwMYe9S^@F+zN`u@^*sayJH#kS_%7+D>=ORB7R^&Z|pwCwSR>mj+ejv4yqR|VUhzT%K>z{#J!7__@_ zw8fH>P=C=Qe6bQ&Di;vEa$lxhXR>+mN&)*r5}N4mx#Bb*a=Nr~WA}Oq2qhZ3o*R2w zuz7KIEYIvhI8BkeuHOyXNfBP0&8=LD4&EC8hWTI)@K@l<_*?d>j$Sh8d}#u~3vAPq zw`VcXVH0b9&CNDt&~h)-&pCbA7ANtoFhb=`Qlq(p({918w7vOsizt&)UD|p}5x}|Q zC20r+&e>B^45T0u@q626swXda7p zV)z~2mss7WlinS%tEcbQLOPlnjG&B_)}bKP=_p)1&VDR6+M8|QGMH6c8~Y+&{HvLe z^6K&N6bvVk;1xj;|A3|P1^Ws^eMxe-Gh??DE$CyvpITJecB}UU#t{Un%ra1VXrK!| zy>ShU1dF_e)y;;MX=-4sDjixpJRV{bDIxxGd8FKZhTgmsF!<}Y2ktGcA2;n6?394q zp&Y>SXI&?P`MF;IB36_N2%|ujhgG79Xob$%Pmwh+e*UUPoW$f%tk0-~roapEL-i6*qs zj^j9FH^=g)X4zo*~C@*PiPqfy5n&rZ?^K&!?N&U_qMrCd%7HV9(crCgP0LHfE2{ z&P>!ys(|yXySK-TU>X6+E(YOB-hA#2<=B$&GC%oVZapXmmAeu8_``iWOyxdO?o?{w zUsg!^M{8ywTNH(uP_fzcj$h)+mTOxJRIYhy1Q(y6NQQ{X#7T7gkD~LAX8Uc!c&ti) zlxUUMt6FDTqc6}c0b9!CPG-pJ4b$J#4fLQ96v{vI6i zwnWUjesTMzbt9MM=15*%M&8`P+!l6EZR=EBBq7*~yAscW)jPL?wjSiYwQu%anVu>P zI{NK-$*gHVsVH#EC~d`c0>2?WL;d8 z=!>VO>sQL_`jyMLe@I#4tfpATuXgoYq8-&alp6j9|MU}viR1jIIbE7DE6XJ{>)O80 zUTvNfW1Ml4GOBt~59n0KX0FtY1W(g@1L~5AdGs_`X z$EdL?s9&7)@mzDNssYfd5z7_-tlpTk3twTng5*(7SkhD|_pEW3E&Kg?jzHpR#5oZX<}=;LfSH`uJcEXg}yUc&i~J`9mStrw!sdhu*3 z@XH~!)!Re_jW+N+5|Lz8{Z5QZC2;pdZ*)O4XKKog(~Fs9Kt>oNOU!@~vLvKS+`9Q4 z(6URW-H6I{Gpc*wFSx4gErx~k3W`qCAh-2o4=Z~`<+12Ljm?lk3GWgsZAwlbZr$ur zTs%2Z$(#UQ_GOnLU@R+f4W(2_#abqzds`ymEIL7{)bH~msnB2g$NkcuKSstvA)fy^ zu;@GjJECgU>5URAJf^RRz-JHJz;E2F9>wr))3E6Ifls2-yWTnHecSVu`E6a9R|8bT zxj~gV360t<=ACHrOBaza+^EL z6G@BUb0=xjUX@<+kx*N)4SyaqtOsy>hoJy?iwKV=R-+d6O5 z+G%Xwd=kgMN!)BFTOwukEHnL5d?S?mYtf+VeLEDDzOl(pf^AOJU7a^TB6q@d;--n3 zAU*#&b{VT=2*0L`3ZRr(aB_>> z0#5B!Y>7XA+uPiNDqk%A{wkgeFip(%#Z__+?_uvxt3D_vSy|r)zJ^~;ITui>H_8=t z;k0cbBs~0LibAoTc3hs`{4B-IM9Uz6^Xy+P)Ya}Y{<-gy>q{Co z1>K^9bj_$EJ%n$xej6BUnbE#ZWE0i{3N2zAO}LOo#>2OlC5gSRO<<;1#cHqRP0WLS zdnJ55jW}HhJdnRM%pVe<0(=hI6HWYV0wML5v~3KB?QmJ*r`!CGfBh;MKA)c3I&Nq7 zgWnQp=f{DIJ7i;$un`J{aFq)c1E0!j>X5dJ^L|2W2Y z`iD4wUhRC5;oRgFd0jscQQz~u?UigvZ8eH4gwDWVOq^{dnf2nqLMn8C@BQ`o*IDd` z=?FZ(#VxPjvjN%U>Kj^cbfSblEu341@Ib2*EPorMW0~sTkiLmM7139G+Lnf$OT@{sXr<^0d;F z>H}{u5=5dMm0A{OZG!mrj7TpvYfecU_tDxeu^c``4y4F))ME2^8JXSOe-fm_W2|Xt zs`Di6h~jt%XP(ASm_QS{{*>aT@m@qH#F9j{_=|(7&3_Ful&fZX#LqYq{#D1Xq+zCJr#Q;~5F^cDS!4l2FxRIlOna??m|E z{LNcwkdV8yH>|O9mLAgnz|ZXZg0cMZ(98MYo zF1YpPP1i!yNUcYmgij;|xd#vLeWD?LhNgi=!8DN~I<#sD9E3<&zj*MnlkIUkA8>wr zWtG5r=RsxjH}7?)-zhzva4BH^G5)v^L4(?R>#R)9cjIv~gpFw@}pYnyaTvcWG(Lt^%^Ibb9O2ZGmzd(q2W< zblSZE4J_ZPHET4Ct*T5kQ5(a{J-V*Nw0z65gh6j@m7qNIpOAZVj=7)T+Aie~eX~as zC9C&&3^i3N(KO7Wi!zIX-_a521sdgQ5rlQFYDQ`P#qaP zjEPRU>^qELyee(}b}R_vXb*&CM4yi8Kvzd2^(b4TMuEd3iQk~ghv`Yq%}NDlj1o=< z7~OfgbGaUI8F8$3$>VEB6}ja3qm7};tnyX@m4lDZ4fre{Q|Xw?m;#hl^6g;X22AI6 z`qkESN^}w&O(}~x;FSo1ivEubgo`8re0h8r@f2CqaOGa)6Z+pvSSTcE1Q5w>0ms%w zUw?HqQ4sZae)IVl3C)7Cu_O_jWEiuz|5WH=B5O#~n?HWZPWKAR=#jGdx9ilm?i~|V z9;iN7r9)+mf?2vk;RdPB;G&7z(EZ;(4Yx=KUoX~rS!2F9DBZoA6r(;Y$uC+`BLil{ zBAJ<(z;Gz*TuZ=5mmG2PkG$O6KnTWTc(@b>J9tI2aEQCF4VrBU*iu_KCi<#&_^oee zm;Gzmy`T2LcrH|sv45kLdzmU)MIitq~exUeI_hmR?Nmqi%KbwQ@E80}#& zPhX8mt&6KOZYvIFvcAq$sc`DyojYSB``bp%O`^>AZ{ugR_WknXXzl&o%9b2R zzDovCqxSIrXnC5N_~P-b>!Q~57M1=`w(AnWQWgIvFPR3oL*j0*6C{s%6hNr?(^L0Ua|hG*0$0MzolowxF2GOcJppcG(|xuioOVpHZ7 z)1Rm9{-rZwz8UW+(1L+}z`u>3fnMB5L}KA0>uLLftRu0E-2_5-z}*M9k5epZwZ2G_ zjZWQUQ=z1D7%&;9Tnu-d>Z=u|HhhOc`Q+rg7 z6W579FW!AK)r+YZuZ^vBL65XW#g&h`HW{o}7$ik^#!UP?)42OFZ^Ahy9QBFj6EX($ zMsg1=pL}%+yDuMA{1lxERbLCTGb_+72|MbUKfe?8!J`uDn#^>{s1cs+^a zy8MRk3tN@X6~LcnHmqP-qPl$nW1Q9^aWgSo}rC&r@AIHc>4eAQOA6)U4c%aX@tsT zSNZ7i@0aeCxB)EhsDI>wSt66dzh(Y82(AoOw9Hk8HzRZ{GvfkYIj$jh812fB((#uz%o6u|QI6&O+8Y4=bJe ziic{emi_%keh&=|ZT>X^axh*o978!~-Fjp@+0zAvSNVnn2F8Pa&N>@WtbbFd@?1& zyYLBt`u(89W$f~4fz<*T%N%@u{ETW5q-{$Top{oIZjEgxN>q$vc>~ZL{)l?GOpgtNTv$5&M?rVWw;)hUOAah8~G+= za4oOxJnv|kuPmCGFeBY@;ouxlDaHU%*?opn6ud>b={%epe}whDbvrs$G4nd)s_tS= z^KM)Iolrdvz2c2dmaXpw4I}MjB)qbesxy;;*eov73^6YnaZ<{SgZ^08g{f@Nu$Jt_ zKe>YY$jnmGxTQf^XVVoC=N~T{MhSbUp5M!Smq!;D?c~(fH$j%v2xs<{a&~w7ApG5v z_QtJQtpX*UFeaHAk^mYhDvK9Th+w?WjzYG6I4p(_iDhR3W zxHe-l6H}z(_*5+E&AQGsNf5nAoYNoHJpa;sI$w2`v(@E_+062|eD5ItkM%ush)TR z)Y6!dn)SIE!Xo&?3b*n?=8d6zwt<8)f)!3nCCEyn1K|+8R`NOZI-q>JUbfi<1*%l& zMLFqm611T!2wOukJB9vn+tB|WUlqDD6RZEeI=Y=K!kZnuIsP%*%;z#O`8cs~F2>#?!@x63ar_Ah2ghp{G-O7SuYEVYEl z`wdj@ExOaCjFYEN!{~L|ZASR; z9(qLA5>oP~S-aTX%7vl^T^+u|O35z4rVZN3DQjr*qy+!BJ|F_vj#1RkMnb*>TSC7S z&863+^CyQ7zPya8OEg8jsR@R^+)7dlK!GUpHL}YJI$FIIgG z8Te^1Bbdqo#kEdMXf3$)sH1=!G-AdnaeG`jF|R1zf)*lv5A{R`3g*Oo*Z-}YX(m@rSM^)&Gplu=;j^;^4YVR)`&~uPIg8`L$X5B?JSG2jp9@v z2edA*oZl!){yQX2HrVs&qa>DvBSKX3^0Ps@moNg5|hze+zS{J>sW_+jo{qnD@Wo$W<& z`_qBVd8-e#B0Ey0&P88WmwN7hfAZYH*DnwwSL^TZC0N23;EzdVO|L4iYH&E9U9PBD z9gOwa{Qc{bA1Zd8+1QAVCIwKD6xU5Ks|$K1+fXXGr^EH)mp9(^wC7;WSC#3<{=S+YLsNk+Id3|7~L#Y zSNrz>z7#~+;P+Z9+-TthaP(_MKo-@CP^2Th5O1lHoL+ z{uK)piUzGz-9WeaW;;5I(%8!jpvYcitO+N`i%r@r+s)_RH-_%!3Fk6)Y@Nj5wAod? zAb#{ybr3H*7*4JNMa!>ZZ}4WAd?kq$m5ibj3nkct254MaSYfH;HR8YLwv$lSr!Xp7 zG(`^-{PpDR4J8M_G35nf4sL*i!KNS53lM&Am}Zv9zj9cWEo;zavt&r+dkUigM2FN4 zvGl+!FO4Hkw>XM$QLW|EdfvpQMJtzeH`}{9@gfPu7?&J>=uFoOk&=1`6>`2lNXKuY zI^}Mzo(Y6Kk*d>^pdIfwIS-%08&7FzV3SX%L(dd)#t_If+<=Sa?T=$(=3`r_SYR+t;X7&XXlNM2AHVpqFhBo^ZHd4A^k3fH`M|x9b~60Dt6XBR ztNe1drK!PRhFffkgb*A2yYe#ZyvaG=AWcNPqStk5yfVCFtIDim7wj0Ntgf>1ZDz_& zO+`&jE#wz~0?mMa;_S|#)-ym!Phf49ii2R8 zMI)Xn_JW>JRuV$=sClvbMtJ6PM^;fd1;h6NQ>kjGtskD^syaCr7%-G$Q!@fmYWJ06 z^Et8vR~6N`)w-X5*xqO-Sb_82hxj=y8lc$%^swjeCj&qeM5@TdPNCmXhK`R-gdbd3vg%ntM@%v5q3p zS;IHrb&4{Cq|r!#!1r3ySV4fYvif>o(J6h(z(jA-$IERpz*0&%^go`R zm;)SScohxz_R}X{E@<-pgA-L+kGemGFZp0bMSw*j)7Bv;Nt1TcgRK&e^+W}VU2>YM zWd-z|$*97V&=)BVRT)=lw1$wV4~lF;#Z}MH4$+92>j_lL%^uN_YTYq_^FQqE`?1oY z8y_!5E@w*G+N@vB717E+n<0R%WXTxFQ2Pe90(ENGLeEO9FCT)M?#`=(?w22O&xW5$ zYVBL$&iHrLrC-xqX+u8@;pX=05EB@+nH$F6Kyomc#f%+RqOSH{=bfGl`gs~)>|SDe z()_QPg$uLEg#Wv|_kG>nC4m~hB0(57iV=oGM`ez{HF?|Y=j=td2gu)P8 zP)C7+p-*u5?s!rc(IZLwrln@?Q1zyL8tgSddiC*#3ArJ1;y6|EsfNH=9X;ir=7J|E z6k)DEz}966iVojd7SQ?mJ5+@?<#cIrXy%Yct9B9XrOiFW_CdDvE?Cj9h$pY?+Vd#y z7pd&+TXR9%BFVufR>MW81Q}kGHrBkU26$t1i+Kj0H)O8!Y3YS~zSO{;WyB8}oeZ~W zD@DU)?0T)ViOP!%1tV*RWKVQePvnc73@A7lQP)4v>`|->G&w5}I-Jq|l$N^ByHMr95Nw{?a3>opa?Bz3-U0(;6mAPWo`qp0}g*HN;*?5ifZ z)sTrKXBF~X?ReNLP&8=Mj>8rqn2=UmMfGlBD_~*i<;6-zHRb_0r2RXjfpQ9AOn^cV z3-BvKlL*>++PkPuVrZ~ipnnf*LOSSD%NGXRdX$E+3}BwtzMnB_TU-Z!WeB#a&S9vIN>ft0w7l5NG!QT40~^TXU+N7dWXJGBP%% zdI*E%EE+w^Mp#Z743E}Uh#3X4>gGGZQlsSyG=qGU4f+8ZbGJ-Hal$3DoRd!_Zv!3A zEMk$|{J-(n1GeVFR@d&>haZ&aWg6@!8#7bVwSN{$wSKo>5l3P3D`D-m4x^!d{$AsJ zRfCA;rnSMK(z`z;7{j#C7=>IlweG~=7}+j@st)x2 zLu=NIiRZ|W`o<2T%3B;=WNEcZ;*~Z#5beN^=h;I1_UC^u*N;=qrp)jJaLN-vXBH?L zCkrh4&Ww{qkCwWim7>Uxgj40vVto9#^)d5ym|3=h45uO3YZ`c8iRA_PZ=eEY+Ctk- z_2ti?7ibfR=jK)DT^N`)`f5VFBEdC=g*QoWZ>n|SF~DY~KDv(LSg*zzst-G31kp|R z9&wAW)Q=jPnhT327>J+$O211X`yqE<&Nhd+ygZ}GvbJsi-bxX-!j1>+dob1vmxGrl zK72>tTe21x&i=h5)rPH1v}{G}*gAqt2c%1V$g`W9^Eb@f$+3qWTS<4ftQb(#-Nmqh zW~SvsEL0N18S!mg;z^a7<0I6S9-9*qm(Eel%_+cA7LIJT{@ z%hOlsJkq>jCwuny>s~ihC)#Nw+i@W!x_|N9p&51T4=aeH5*uyqe=oKqeSTzr$q@0o z?^RKk8~ztBoEw($0BB8bn{UOn9&+~Q#<2aI?Z?cuq}<^$@lT}ot*|X9X|^e3fjOO= zAGYFe}n%(! zqO@VJMBfwXB&xU#iAgfP>2i3p;+M%s@Jfk)mp09Cwwzw{-iwJM)9O-utm4*Tfxgpx zw?4M3w)h*OQirn^AmSz2(v%dc_3+ECcYIra-M9YQs%mPsM;yhQmSPGep$;XKQ|-gX z*PZu%%?v*{OE_+WmkX}#LDkTnq;+3-;mG!s`Gm=@-^ZW&p-_$sz3dHQR zG&5aUNKDEk72-E-jK)>lq^w1dcXkq9PI-Z>I70zKdZW* zMfD6($!G$xiuuHMr>`&0km2dKLPC0yjeUWWE-$UNEi7L4gH2jsQdJ(Fpo-K!;w86w z2othE)7Nfs2mxO}SqWMEK(s7cR#;6l@^7)~TvV+!b!A1)&~raqHRl%ats=}`-zpS$ zlM<(6hOn%Zs%lURX|Y=rqIJn3c+F(=+1fKnr`3E-w#!R@2h9tAS}~v%~7A(c1o1gGf=i?($?Xf^sj4cYXrg- z<+28KRaNTF)3?>%$d6Hx&{eGbbZFykZBZ{K^a#OUyn5lLaf`Co48HAFD0cE?Zn95^ zS1R)r0rD&y11f!*c=Zn>FR?tJYn1ID-> zD@fc2w_@^6Px=kykf+@9*%{jfkzJF(uB5VjJtHBZf{@{m%9Yltinn$br1=X{@4l0& z8Vtv(^To@Cm4aI0M(^wC7ik}V4tEHN(Y#+Rwr97%Kx4N;=MRp44ujPyb~Btnzq9tL z8yAV_#1B2MTk>QCJOT$|{qYaFA4n&s3}-q>P;ZAp$8mmas;Tl+T}^d(cWa~}2nA#j z&Zvoe9Z&rpb`6j*dS`D8H^hP)?}}-p?L#vQTO{N;{MwvrX=S%|e*l())FGbNn^?(3 zM}6wtGpqC+=c~uL`4rQvSEGT?qq}y(f`J6y{iIXfik~JOjFp=2K)TMPs+Ms*Sg&J7``)cs5#lu z+A?X|+m8?4m)T&}3keAc%72U4kl0aQn64EOL)X+1Rrj_RwWVKI=!hZGZiywL9n2<% z#HAzd7@bf6j*sp84k0rkzLy&!uUT}kZ}!IYJ>&=?S9?CuEc)7ZN$rh%#buaK>Xeq2 z7D^v-*c64`+Nm3ZOTQ!$(w7;Kd4HbxqDMVsNf0$;n7zmZzbA&Dtng#n6{xW-%Kwg* z<ReG#RRR$aO2C6H&SI#!q#{XZ)=KSS>HtW?#fW?7{%A~Zztpy%5fnr;u z6joso5tla@eF`+112E(jr>8C->Ob)G-@3h4@hI)q{%0tu4Df%Y*5IVjJwaVug^bz$ zs;r`>vV^7B(K@KD=&arZqKcJBWci&je|!s__zM} z5;HFlW5}bi3NG`H-^v)ekB@yVCmOcfc1|L_JY)j^gm+6ns1}ut)TQbvXTK;H#G$pu zl2sS_IOZYv)s@s*i429k`t4{`;_o5r#;avqT-MXFTFE?1mX;ldG$CHx`B?5fi@)~G z``b%hdg{LqkkLyS=1gM}A*)km5}(aDIu$k#i!u6lobC8S7Yc~Zj%nkDbQnv|B)2>e zMZxKRnz{%n^ScY=%|0B$*@)ynWl(t)lmQLFO z#s*pkOAWj$bo&Dlr^y1s-9f0NDp^4nJGoaia*iVJN-?JUaw z;<%qXj%0;I^vL`~i@T_j*#Q?bx zKR`4;09{e>hwUYzN;VVJY$G*;x0S5(cx;_1Ea28)MYX7P!5+R}aL#-QZ5%fQs!j`JF&!Bwj5ZM#Cd^tfFsTpM z5gL-buew^5448?*8udh1g7Rkb6=1JH)Slpx>hR?dD2Nt9$3-kPQb_AXZudlPL zS~bidie`U3Wn?)y8>Y5vi?g;W@ zN6^|XFMlSxBDYP(oiCnnI@XKzl8*0QI%)nk zhX0oSZXuKI@Joic`RGLN#U}At&36>7_PZ(snhyG~WYxya!-Hnkuo5kc`u>A~IMn3v z`kh^~jaRXc4azYdB+4dk5Gc(V38&zb!cgUXWxB}bDsZoDiQ3@c-tmAc6X{V20NT~G zUaw@EK(HbWIFEF2iBAJdcDK=m{L+?8pftrNP+GkPFNW%2OW@FH`tUk zc>{E|etE{lcR7;cfKq{H$qSs+x&sNk%U&Q7BK9hafwFUmmx&p|wh=5q)F^7gSPK$?a!?1-58*_IZnejet0)M z$ja1Byw&8t+|)X{hd7`Q-?CRCUEuRWudIctf~#S$o4sGGLIOS2?YJz9?%ut7)mP>p zR#uL8%c(L0zd5kd)^RbJAH1A&*U5S|_ufNhJn|w*>yTZ!&t2%8zntp>#-A3@%o4VvHJ0^F-XIsG{#pN!+RAKzgTI0C?`|Q7KTcB zG?n}O^X>_)e5a3C@$Ou)S4&#UA@+f7GQr0g=JahzKT?WBRMZM?LmVN1bOq_pz{=+2 zpLUfP69Pk;g7}|ij+DN=#NHQRJ;C2lW_YM~s9TijKnBjZtd4)TLHKx0_oc#G$}7@m z@l*2@CSy=Q;#z1n`)P>UOv_uD&U5-WF0Pq1Z@=K+@V(U=TVc{0hwz0%y&@=>;Abnp z3>FYH=XY@S&Nd*&x2Eqygo{EHq+P(%BClRG!3jMXS=2luva+73Um(mJE7HmthlP`XGDgSouO82%rWu$G+L z$;%3cFK0h5VNdtVw8Few&t6}SF|P#8Un6u3ZmzM}PK(MTtbfrViYvrE5%(zna1|6i zk_@+N^b6po2k5;v`HyPt$rF+*g+PVuup9Waym=)xKO zUn(lh~jkyoKsG}J^ef>ousK^rJivgX=kW8=G`0LSnxu1y7@=`vUTk@T>%4L|rT>FNKFB6JgnE0Q%=!%T^3EQH1t0&(2yHsqmyS5G9o9Hi?<{#764T|>G_Vl3 z_s^j<+*dBQIssxO4d%`>YV(xIy%}-Fd}fjVmVHcACkwPHqfLYwV12BR=&Rl3?$&xA ze7^Ux0tCoaN}QdJ5GtWh_c#z(QPEtpXVEI`ChMg*4tApDJ>ZSMoh)Y-;qkKIA~fFX zn{>q~1)4lC<$Kl8@N4Q$LA-`5f+({MSji=IW4h<^A(pk94G0+rQT~uu;`6a#I>@ zbf~<#{B87v-`o-J^6V3xLYbrQ3|n5!HUC8=ULG5t8*pEh&;jU+)({ol2Spl54@J)- z1*&Rw+Adbx&gYUS;V-RC!T{ikp*4M!`CX0@3$?5vsVi;2o+%+S+RH0@eJLVlSPBmx z&)FYx$9`DHL2o#gR-1MEvxF6HI@j0+iBVTkYY$rfF!;7QKR=)3mv`%YZ_gAGl*tCk zSt?&h6kL*p8+4=)jikZ7Z>4M@Z}&X2`tjdJGsr=ZfeHzZL*KDrbchXWr9wi;+?>tF z$&Y`mEc?pii+=nlxf6Z7i}f-V3%Oi+4|?Vk(pX->zh4Oam^t{>skSlVgEfPaQsX~q z+pwc;fT^av)F*k`vDW^M^SrlXuqju+<&zZ!TT+s`vg&Q%;j*UA3ukBY*}kv8><067 z;gy}A&uZKCM6VT<#~7pagb+zS9x}gqczB{3+Koe{0ddo%n?$Yii%$}dg>&)H80CMc z-@dDZzOk|dj44Eand+-WWwBol=Y(oeiCc3&EE5410t{1oaJsZC42h&MMS{9>nstHj zfzWA=2rtXKOmAv{jz%J>hKW@-qT$b295Jt(hY&3l$X*c&&l!m)El1tuJXHI6ycBiN zc3)_OT23d52dYZ_^Ovv+NWRk>5^61hRrV%feiwW?A3{uzNp2U-L17&mepedv3INR zgK2aYu??BBoUfqZSCFrG_j*OGMhpSL%3`?-S(fl)X z?3R}?e_ToEtn~w%}5;6r z>t4(5r#mTdidfQV%d{x#qN3ROqi3f3OEs+x?L!WPU##D?>$uNdwVJpwv@vnE-8I+K z4{UD^P1Z$lkGcl&y#!^ zL-r5PI}0d%sA{fWI{nCcA1hHv`FOL?YCz!s`GiqhnGT;N3K0+~`ybc^nd(CuuSBwl zlNG>R*Fnt45L)PFRP|+K2_Q7C0m$wAKxx40bi!P7>#73xKdMDSQm(Q3&KKOIT^p#9 zec^O|rqPb~fQn^$-FU#HukEX|=4GsO!~vJ_?B@}MYd7|%$oag+_=VO}ng6OyG3x*MzyMFHTEVz(UYQdJh}cX65v4Nl&j<`@#WON8na*KR$OR=Y3e4g|5)9bV_?#uFuC0GQKc{xcr=i{&zA?SIx%sRn%ZNNSB?zQO%z9^g zxO==s%qfXIIsso5`>a4Z?4f5)whP^95*+4$uX#_T{9K>>$j7Hai+n01Yhjte`Gi$i zZwi))$NkB18Jrr0^L7(Ctj%whC_FsD%2O;%=ENVFU5hnG+Ia}9vF&rXLUi>q*l~PsOR^cv}05R zzJPD&jyT8^V(e^eS`SFp0GVt9)yMCAsu#8@@he!<1gdvM_xD3I1W-PuGuJ2uFlMcN zgOb6JiP$@CrNo6A^~`lIY#o-3tzBGPcsF5ST(n(#!&;YG9p}@0A-lE^7kY;zM{GdE z=$vTLUM{sV+%T@xz2p~G!1)ipfz7h)T;xTAcU~Ju=V2I5B63eX#V0}5BS%kEIIB;^ z8E78Fc16>_xO#m=NNW=xxT+H`lbAns2yfl&w93`wUqf4VbrI?4*QsTWvpB1X&K1@||rbPI+s0hAtyJ zqGYE#LVFo>1l1*BLc+cA{DBuAoL9+rj$LI7h*#r}B`DKz`uh5u%MvN;$bP!T+vSq5 z|E#^CCjGf?gv|R{N-X*^w5r-Z%yVn#f?s3|0d*>}3q?d+gNqEN9esac-VjTAYDyLm zqmIas!c&#&U*#vGdx+p7>#r&cKWyX)BJe7@VK~4cX-RWT&xehxD2aqfRYRivlRQ*n z8ydF%eLU|WYi);V>O!I%zSTF}Q2+0~OLPD?h96Zf35H|62swh%xRIiz?^bh4N!x+> z&JYOX;cQ};3cLyf3CvgG1U=IUZ2wrl+D$`;c_zKhIc4|lIZ2h+w)WQs_?NzGCLp;6 z9&u)T@q27I^cujf<6wI+P}6R~e(M?~hw37a{thZ`JP~v)V(ajtrrCvpcgPa=)R(cc znNJ2}yZ~E~t55DmO`6as(}Jli;8XzkPb^(000`noH7M~I#6Wd`l9kg@J!n_Td{fkQ zxFLN)EkW+pOzC^t^W4ibdW6OUi8bB>%U%R^QcTG9DTXX)2^v|P%a|?W7 znYqT=LJpPzgFG!WGY}sdgoq+jK!L2S$%AhyR{%U$6R$t3S}$D(#%zVuy3|KizQv2w zKu8o_*RW)I%(SVdt*t#1(VM0_vT*cP>tZtesIOST-P&}Z-Y@IPdZ&d0mZg$cQ#JVK z^q4!-j9Q4N7B9dOgQp=tHlhUkeeheY$sDi^zH4;@rSU4H z&BFSA6aX`CC-_TMie8U3zz%|((UTi!zys3@t2p4; zF*685q-;elo4^C?$mkPl7*q~YQ>WK8?S#2SMdjR(<}N8>$!Ah)3acV9|4mz*6RCxm6b6@a?htcDHPvy|R zvwEN1VRsdOVqdzs$#k_mWd%sQ9pncJyHd;y0LaG438fD+cfYr3(yOzw!Zqi)GPi## z5cQozctz27+nU;wWz8zC^*q5J1TYw(QbJ~tHot^w+c#$2wIC!%Nwl#(kT;@lkH*f_T0 z`oq|1%##HuRZ%-VKkrejsuBX$%};w}0+ETVNqM;j0R5pQd{g;C5n?PVIU>&L93|Bg zygI8~adj0#d?7Xx77TTdZ277Um;GweR9On#6*&%h>}blJiT?Wt9}sH;XYIQVQanv z{_vitM+P0etvKOOgUsFi*@ep}8$?~@;*`X)|4cdJU}64z)IC}pP|jvb1PbNL)mFg3 zgn@fzn51zmjgH9cXU2i6;=-qM@SdzI=94%1@7m>J^YVl9C!3@b3!gyZw9Aw5{lZ=* zPjAY-Q0FvsmUqYaB+LAHbEEx9Vtx7anwwMx0V8ELb=SWAl9ci_@vC2^IHQOY$UduAoBJu;#svUf)2 z&E7Lq;!3iz%gp7HaqTU)kSpZMytr|3afxeRe&_r98_qfJ^LoFY&*$T5ILa0TSb)!N z(^DH&%~)m%=!<`cnbB!#(0|Uj_VZuyz6uiv^LSJb|7@ahzOWrJ+XUNFIgE1fbnem7 zWWHL1V(fG=LqM-&fgt>@wrpUHguv&l1o*wI03okT!piljL)UrRh1uQ9-NDnxyQNQj z35!nltQ7k`h&@C+jEs zm=M;FvFO6@y=#N+PvnELZk9wBrHe^<8HLVWH?QhMB1_UI27N!xZWaGj=@sd9Eu??k zq@Vz5jlH_%&3Xj6-^~`D|EQ275@Xx$!me>65}A$$W+ry`N%2BWWNn?_nTAP}nS*;}-`!Ad%Vavq%y`=-D;? zgsfp#dk9kFh?NTsvi3^w78c7kq4tN@6@N)^L_-8$vM{jnf13Xqg7VbjQAQ>WsfIa6 zghT|$bT4YU7YxppYrhu&);#ma-p4?_<$$ds4ScrE%(Wqq5dtKT@1cXp=5QI>-7Gvcb5|z1|Cc#%G%F}i%Vsb zu~!THcx`&x$~Z%FF#w}iVjOa#+1gbrF}k}JWsfw!C0@b#Byj!v3pVqqdu1&Mg!GPm z#1?AIy|J^i(-s2|QJJ+oahOZC@{uFW%6uM__DJwlY`-MTJ;_XboXLEQpu7tb?UVjClUQIFh>Tx(PmnHt2QO! z`$FheucNf@A9iV-97&wzZSOA1NW`?0)pM(BZ-8-CJv{L1P=K#XM^~@nXnmYaVc9^H zRClgx4v!$xRJg(o2xNqi<^iYe;}7R!gS%tXVa(e4D!!P4f&zMypXm**v@6oC$iNC= zwth_k@PZ}JU5?T!u{(Lap$%s8S5D_ri@#2BFn}1-4a8t`#{<={pjkvy=8~f$vWyj! zhCo2!ebORB9~>oY2HAxn%5EZ>%CoyOGmA@8so_zx=668SeZ)qp`P(hM7I9PeKHu}a;%iiQY<2Gr-cvhzXpKkV%#L*M2# z6-Z?xGc+M`Fdg>VJae!}71~m{V3b(C%thbp6Ln4u5+%)Dtn5lt$2M;pR!r<)$L-a1 zlkw`uHHWIlpHnH=qH~1{lKO@4Mf)+|<%I6xGV1*!KxTDox45MRW)Ab-_sf!b4mY>v zPf(J%9|8_ul-5ONbnVPE{TJT@ooc+}I!7e}gOs2lUbo7$MMuDCCSlyCIiu%qP^jI= z;BQdTq|Lpe(>|scoCBXFs2HgU?-|Rf5tMuVJNE+Fy{8^c$@6&uBh9tO_&1rj+97Le zS3HmZw%ea=WOpB3Ejm6pFmI`AQd3s$iU~bmV(REtSeTn$INK5Ksudew`VtfBtA6F% z%#3a!=90Q*BEttuQJs%$+A_C?6yG!!R{OY5U)fd+^iBTYeJ$xkbKk(hsLimk^BaD@7SQf9K104WZI&eTo!#zuydrNaKLzU zZBP1NhOs(%pa=lXo!kwYbZPBlfF9h{`%@fwTlWb5kzPugpje95G#-Z(x-A-F@7Y87Spx8OtIJ(tj4f z<;~)E9fSQASf{b8r9cY!OBRfacHMK`&*1Is^o)02@sD9B!w|wd59_J8og&ir!0QP@ z#C6T@@mKS457or`e>FU&9~Kw?U@qx|mO@Z4U`?W8JcVLjaMbvRkei`3w)|U0rANhE zM&;bY3cu8oC@tpNF3%-v#H>~N0sN9+O!s6ei0h=_7L)T8$97}k-rrWfj8<1MFa2(3 zQu%6AY5l+_@_F}2Gq9FS?|0ymlT`krUb;G}WMMYxnYj{H$ksHgSP=%RzMtrhmzIH5 ziLf#9mLP*u@2O)f39-`u=5I95>V=4>VB6AXf>|1BX8T)OT0XM-#>oPWCv7!JkFR+k zJu*9jw~}~h0TF08Kd%khp2Ds5?eG$Bhn%5B)lX{zSHfq2gN$y$dpp9rt5`Rq8OltB zc{YXpfCB7U+Lu7Ev@3`hE%h8&8$>eGglx~{j1u(do&rdrcVKD{_@se!t|S`>gazxe zI~QjsoGv+`00UZ}poo!l*&W>$4-Nq(pXlJ*QBBboHDbM1<5(}uYp`m2m7_uK69Q?)TA72S% zi?lWJ+%1)nM~!ke0segKAAGvi4!j|-D!8#!^jgkr^Mb6w==1fl-Db6uF*Xq<8S;>U z>=idcMgFyEQ>)L*3!(M$fSFP&(JISB!=5!}JGmCuBJ&4_bGRY?bB*W_v$tMRIN#og zM42IO)olH^IqtnAv1_S5LYAgydR*V@D7Wy(dLcsFGKTfP*ie)H4yT*mTvPn}#5kw#Awaj;GR z-A;OAG~a=396FY~v+DPk?o!R5hNmw$fpEisOiK~irXTeRyDv(WP@0iE=`Y76nU!po z#YlI*jOL2w`BW$)p zi=gbU1t9xWi(3(Fc<%1gwV2l9vkBK^b&7cR0o%nK4l_Iybo`=cyTHPBDmDaj4Gv-s z)ZmD%Ax~ve;B||!*AS?6S~)aT?;lA2Wee+PaH=O0O|kFCb7h5SNCz1pMyllujuIAH z!sb;{#OPGl$*VDU)6B{MNhuBxRLNdYAG%rl=B0wYSLc@t6fV-xAgsibH{)M9=&q4dl!W}I#5YZeeBSByxFe4zkGTztxhRY z`?ti_9CGm7ts*_86ME$xx2mn=b52f9M)x)`p*)*;@CkA@1Xu&NStG+cA)xB}jK1SU z=e}^tGh^1;PCqZM?8*4V#*qdr&3nsl!pg1JO1JKAaGh<(VJ9hS zo!EVlDODTiNdDbkNWFu^S2bGCTH63EaMq86D7U{7CVmDvK?sd97)686)gaI^R86^s z?aIwp^dP?cf})}j%m={8Z3YwXaIbG&k?u=$Lfd#0wjBR-uTxS}QU+Asxj$%#a#dt+ zR4+gn_HSJye^`m+E!#W?%7Rz(EyA7w-=YHm3#Lg;>bu#GAEQg~0SD4K!`eb}dn!&?Lm%H2Flg&7u`1|{xr7^`U9_@_v_V&^( zTx`eF9oE=S_b(3dU5$r6?kg8dAo)}qN~49pSLDkk+#)n0uJ{5bT=CJn$f1;xL5gy7 zf9y7C8=zzmU5#yL<~jd+J-m{C?V}V%9;??SRo}QFEYn|FIYiBZ6u7+!XJYv9=AYhrVeqDF` z6fokyZ7gP9#R{r7y+#Yat?}D^7U?$6^~ZXaJzGfzT*k^DSGVPDh>epOa)fjFig9Tr zfS3Yv%j57%x7iO)QV=OJ8FzJc%*=vdGOuqtnXDHFKKllKhOCDB!DSvj(6c$)_D@#E z_A1}TZVRu+Sr&wrKGjpj_IB;{La2y9Eawunmgr?-pEM8{oksMXenI99y68doUSaqB)5LyoSHipP1m=^P)HIIC>%7G$@8 zYqdbJXxkkE!2?ERn$P!c+Zc)l-Mj~TYBu?cG!+DEEXm+_`cSk0#Xz;3@914{i3Vc4 zZ0w257_xqo(F?fBC5y^F4d(q!4M)kEU3H+$#63CsrYE&UB1x=P7}|K;a(Y-_|A+R= z9daZi7t3FBowS86o9zF}ek(W&VQwAW-8|%QThT|~ysdO(f&?-!nXtRlZ5+DxV}GmADj{1N1oaE?iB|)uz|!z} zz{NPgADHUic_k%T?<8j>RhGdv(F6nFEJNrTF;y$vBurD~dEix*wkK;f;27d*a27{n}&>hIg@RqFZJI3CDP=Qcku&^Sdp?yJqH#~ybT z5gzjT&OOYk>bG!EM|y^>oc6wNc|XUZ{$QC<6dqz%^Vld`x!q{XA?#uF9ucc%j@L1E zf%9gFIe$bMPHt{(-O#ab9zUd%!$h6U?4DwzdFQTU6+sXN+T?+HMBZ5FCY`3ei@vO7 zIeVW=m*s=4*A(p`+WhvjH};WahchM5O#5oS}Yyux5tN=A^`S;$Fo>I*GDi1Wt`!w@|-03mAQ{ z)gj_*q6#gPBoMlo^N8uV$n_KjA6!W^)2_&!-!mB33G>CD*oDcZ&`XMXAJ?4IZm#&Z zCV*<}JH=49;6mmPLGCtIS*~_d6YGHiUIllXnnNlLCQ5k&F%e14Koz^ zwlrqNOn!|YEKm*sYy|1{rWu;7b@dYR@qS(Q-BF}81Ta3xF4fj+*)*U!M?N;&mg*SObbYmc~Y&UbwpB zbiQ-5Pkw{Eg~`=Sn`!r!Hogo$+fK z+7dwx3+`sSGde1WS1)gzsD|+69;|gG>icErl&3hVH84zSxZYlU^Ti_|YNb^;X#SG$ z)YTS(j+HFt0e`rF9`eJ||!*tv@gC z>UNhz@EI7u>tx%qVDm%Ap8A(-iEY=*f-dsFgg+cw%)H+?N>K8P(m^y&oeKz;O?`** zP2b-z>*hI)j#Yh8dd>Few|!?Ci9~Xk!By^=Sz(F9^yGEfodhhki2Kcvp%3?7Uqx7h z{=!*4vkf&#CTGcZv_`3J!#aXU`(wNRHYRtG%EI=IkNJk9Nd2Q7QZ{!Kg}mCc9l}>8 zd)==kM4QRERVnfc{OfFRRp#1%&m+b5O8w9I_>=?XFX@cpgA2!(W!vZC21&OJdALUa zrAb8p8~XgZ`rckq6z&w)+1?(}Wm$JyER*A!i4`5Ad~@*Q3OAc)frXjfyEDC{vs*<1 zzb8z6)w}jiX0vPNA6&f*8viAo^F+H(pl;^H7GwEdU~Tt*j4WEJMR$cju~$=NU!=fh z%)v|5GFT{0%yw$zMhK;64?mb`$9-R#CbmBl+c#-Q+25X2AUTu{H%K7gWrvg}&+S!a zTydUjy2mVyhQ^v3NbP_=uZRH!(X)i0LMF71S(YZ`$_@fyV&Gu=sLaILt1L0YQ{G{7 zYx{Ax|p_RK>cs?cFy@+UxjKQ5X#RkzcFCVXFpv5p)199j5#=1>hIqgK{nla zgC9x9mzZsJ9gJAG*cj68l8!soHvzO!q-NbMCR(KIR8NH5@6z~=+4*8Qv(~Y% zF!bcaB6@3z+UN=BjWM$xC3^JPw@$}s88XctJL$jId)2Lx!;i%b$3R#G%kajJ zpB9joyqV#I1~h-$g@+(VrGlCP%EG4@|KM^t0gC->Q_0BI@j#r(2D2|t9PvSHP=z=0 zC3Cnh3Fb25BKx*>%RrP^%=<^S?_c0RD^Nmp4){OS4wh26xVcZdj99#D3_(_+uB?}J z^$EYExdY$N#pC1aB}PY$avY~>>6 zq19R_2Tollf4-|pCb(OhH5>B7s{Sr46cUoB&zwR+ECx@|BNa-|s_y^d+|O>vB)pO8 zIQqN0yDK|_7+?T|?3^o0Z#rKB5z4y|+QaR_{S&#QxQ689CC4XOu20Q|`DUl*jVv0F z^PLNyzTYo;<{up13t1;S@wpqb<^MvV2ECQ$XJ+6kFW`veEUW6k2FssVc?A*SnU6pM zsl&Z=wa^3gGq3TBXU{k47Yv@9syUCI4`04v|HV+WEX_owkIK|eA7aoC$&t;E3T{O#oi;^$zZLkNh1i*zJ#;S1%l9R2J?QdKJAs;OmF`C)&qQdh#q zvCvNpT*aD@*M8$xog#X`oyaOHq|qgTSvPerb6)`0*AuH`Zgt4dq6MUNhwGIKLaw&fwcWjy@*NM* z<*s2vo?-Mq1VU=If6pJl77-wqrfFKh1$~R z-=(4ylc

(MAhff8)+Z4Bj#o03C7PNM$Hky~Nj+nBl(Y%Z6dkV$0o%FH|-# zFn5$YX%!m5n>{B1o*myC`>m;#YTa1u=Kh2*>uHN)A_1ri;hV4YLT*z_V)%R^4r z!8c(ZEh&W-nkRe8OrJ~mdW6Y8Q3==C89TxdT*9Ja`<2RyqKT^$m-r4|!(20n)n(U8 z>_&@DrH)*WSXePECszCj*W-bi4ZZZd{P$|5a5c>_uI?tu1NX+6$LC$~Puu%$=N9dL z#40U_l#*k2*xniuV<#OzlaK$#WJe2aY}5H%KHNKN}?#op)U?)Y2$mpe}vBxW0p z9<2NRokmPC5gq!7)GZr85k0Rl^}+Q3VX%8q0PpxWTc%8k489Z`MuzpP%$2F>;q?JAj81+jjs84DJ=lKFlaz|bvz*g43mZM@z*DGw&B zn_924f9I*?CBXtHVi=qrYm*NtIS@$8ANd8TONgnc-0&m&cx0h z^QdZ;E9dMWL|L(a4aS3yXJxu^aybMuJJK~Y*a>$azB=E3BR)~Aqpcmq=Oy5mmxQV) z$4Oz#jw=`bn++WYtI#%-iJh~UQIHZUH&r!H(1EP7&WhMccL+e}PUU{I9WU1$%gYF{ z(d6U{)SkORidbYyy}o0|>o1%H7HT-(HkN@B6V7;}W-%Xjl$rvu+sB!_3c;pt$@@tE zKVx9&sAciuD$r;O4???Pm-BByBhu<;*M^DK#O?;Ez&a$P!mpWNa7`o~j))yQn~2D9 z8?OxrlURenM!!A%ODBB+4X;$=Im*VM<&=aEgI*|ZrBgH~c}<9x1*KPcoE#G)MMqBn zJ2t?sq@uzVj)9_y%Q3i^H}A-|Ep2`}*$n(v#HGR$E+9cV+JlULt|qA!ku9}Nzjggi zvlpDyRdbUcyl5YLhRzsGZP@3Qb{P{d2eu6j=E{rWBM}4_xQtM=nt_4? z)q~VI&iB5LmiU8CR6$g#tL$%TUsIaFb0$d_)8+$a3k5|X?&WmPE;~~}C?p1_0LRaX znrgTJQ^44I>UTa@?U?tL)hjs@ebE$qX*jiFIv1@TK>n|haKUkFc(G2Pskw8m;-s1_ zs7m4f&z(tIMbq#vFC_oZAtAzh?lhs8=ExlaL?$o^q4lSM^5UpI~O!i zYP2=>fkg|^<12rkAF<8ypO{4&YjVDANApw6K8D=jNvk#Pvy!v=6*J&VjU>=?ec|)V zjXrT7ag=5`$Xth)D)x7Gs}N%7)IkjZ>yd^u3?6Wr464cI}>CW1ytl}JV@Wp*7w$o!$S^1# zH=dj*3gLL#68EK=K?ILoOgpMbJ~}#~AJ*Hi3U<&HB5?JmhkM6{uk0m{Lx2l7fKG`Q zH0fGxs>a&D`%r0{GXeUzAUI4U%cv!60|5rCiGgD9g#OQ;dBMknH6K5IEEQ^uN^QAeaZ(qG(ys{5G{P|HDj`XQ zq|W7s6rDcK4EmA4z+I7OU4nwCWJsz%8h`w~AiITC5(|<5)$bV@+q4KCLP%JP}c3%iQAo@LHU7&c%LLq5;xL)vv;FCwA zgm;r}%-h#-9Lk`hQFe2)uU^i6@A5>r_Wsl zFV3M%?W*jHo^)lpHp*Ot-h0FZgaL(A&y=aXQ#)T0d=)w^;Bk3Lr1)o|ZD+7e`C91@ z(bJmGPCh?gh)&-{mx%6E2O(+2A8sUK2tX~(Y}Z`7UQ{OdEi>`^6JvY^A_}4u2m9p? zUV}4o^LLjWJl&3_>#c(?o~jL18|IGdRpG1-6>zpz5AH>rId*L#CW9vdRy2I=`@GVti$0j4Xypv34?CXYF(vU_A~NCX+@oqGWCf_#m^_& z32wR3fI(wD%k+D*k)t&0VfN+iHRZ{(&l18X%M#~Lem^)^UewP5YU$?z_F0Ty)6Z8t zjl)zx>Yv9|otBt5fhuSwLBVr>@mqz8*tjTuii1s<8?m{{;vF_AwHFFvzixH|#i5S6 z`c9T^0Qcj6S?$C;xZ}B-DgX9&^|azJGP1Jm!GUKB-(&+X4<11IOR8T4-!FfXe}n=i z$D~zPiNz4957I{ABOdYqeh?F5p03sBHkqqr#YA1Oo+n8-%7ZTte}15<2aoH$E!~FJEr$B_iKXp$J;Gs zoz?>?g({pyelnl!tu{83M=|tW4SZRmh+1K#B@rIZs;kpePoM7$RIVF*&y}VOQ7aOa zlGo-w`E*Z;N6&_$|F$Ruar^R3Mzy`SHmM*KY^4CHn->sPn^3tyTi<#5wKRvw1j%

K|Rq@3BxC#J^2J?b+VJK8jMJb zutf6=he*qCM#=$OvMt##6mt;gjX{NWunhlqrx#@6`#dzzI6|3VI{i*JZw@@QP%OGb zIVbhIJ@Bitxg{ywp`i4y+)LzZtVV7vUwn=v)3o;n@7|c(N3x~Ev}(+S$HIQW?Sr>( z?F1iKFaA@td#w0_)xOLtx%42Qf9R2~4#a=6wVg5Rg2$hua^i=XhCq(+YJp+dKpvAv3dnLPXEqPrvL)8{QnEQGOp zQUtej2p?ReO#IU%CC!n)w5q{aQ;rTh{nIz`cSL#Fw8M+?(lWEqVq~cmpXS&*6 z)Orf|+nDsJ8(ar17KO)ZbxGVE8}n48wReArWMfda95xwFHgP)7aK!3SXjf zWeHMLBuNerA(ygMK`4$T@m6gGtS;M6(i(g`(|&aYSTSx7D3@+#O>YcRrCxS5jT|nu z%WBZ%zU<2OxVstz(l!^yYO6pMZU*Hlf(tKCk0ce7Z4N8zh+sIo)b4}eYN@Hl#t0df z7>W>GXeh=}bZxy9hR4uKM0S)KM)lFA6xBRsst+e%e>0O&z%}detzW%LZKJCzCA^wq zVInL$+k?Kw1CW+uwpr1mC)b19I#_Vmmkr6Rs2n9vXlZlT@%! zyKA$NQ>6Iut#y#CCIe7)BD@Z{n)8jmbxQZdKOF-DUMQF6S>W4`iW;-#HdC|AEq{#Z zfZPI3|5>fGRU&rBXX*Z5Ma|M$OW@T(j5y^*m*~gMsfo=AQJfw=|E4@t1}Mgy{h=tn z+d=!|5t}X(W0WC-MN;C9{|IyNb+xjh;(47gWiPF!ZDhM<2Zfq5`_t55B(do!QRmC{ zw;_RHs)VE8S`;5F*?pHO32?h@4BU9It9ySx9C3Kqp1g7X&3E^che$%}UDM^+W2)zD zMSj&6^=5qVTt7hu&u}V~+kUfO{)c~fIzW@+tU6T~)e|3sg4}Z`G6W2IP4f!le;M;i zoa)@S^U3M1p?8g^xB+o!f277?rA@H7MNt81D7YU}glmKRXVC_FQkj&SyHjds|F~f8 z)m3nm+2`otxA_|0-CU=_nv=G^?d}0{KJ(>)F0fz0wdQ0&Pr$Xaov8{u?oE7(W_Qmc zMA>1PhERKSd0e}NQa0t45`teNJ@z0NP*GmNNGu>yGu$y;N9=t|NCRtwdZ>7`#zs@4l)`apTk3s z1K@jWqOa?9{4{Log?=Zbk%peG1Rr*GF^A?XLM@^-zMY?m>eK0vLrWxMx2nsx?2LL9 zM1&Raysg+#4&-=kjJL0xsUqJn&<(qB2u$Y8whx#{8jIuR66 zJ3H7H-yuH*)F)1S)z&AFXf8lw+n2 zISRMP9Tf}C#}6cJwpv>PGH0Z>GF2ZWM{oN|Q)!!T>8Rimce{xaT#_DtGbr7IKa!WX z4C8dCNl%x?-%b8zJSh%AhF%!!D$0u;Hkmw5MX=jZe&-NiJC?64`{!c(5Lg;}`PkUY zr2VmaX3Izb^rtw3kfa`ER^X97in^?kx3j zWs^*Kxn0)RiWw;x|3$&HK>}Uc7u=vl#N}nn3D6m4UaiuP?sRN6cII|s9I03JJjWa3 zpZY!Sb5p##HXp?X$eXGKu=)SxI(rqmM&81X85TUxIL_UB`75jIjJD@y?W`$dqIiVt zj{g351bacS!wR1NK*IF6UgpS~(r?6NZgKen>LzJF%B@7fKOJQS@Obs{rKoo)@vGQ$ zIyR7`&jY$@uJ`hB3g2L`CV%}nISB)TaQoXZS6dGt6hWAz^DtaZ!NGL~{~P*|!51HI zMQi+mPU>_)xOOS$$jrVZR6z?);I05kYV;<2RAH5<8KIf}kh|D?6L6 zXpj1l`RS|wVV5LZ@B_xOP#~PMF!e88j;giG_4w6`8aTP z(*SGO);@0I_TA^o%0V9g_0pt1_r6Spn6qzIe}!-n((9;k6XL&plgM|u|D0$6@F>d^ z{%|{q&n5L!R7CZ)f&Q_-fCKkB4@X*g74y=+Djc7;{XgzqJme~O`hK6~zF4mP{%+@O zlWFQz4S%hy^n8y1XwYuJeX;FSu*!MBFIo^bW~I`%b|SKO`neEk$e=v%sng{T=MT#w z&c&_)g(~9+0wKlYq)NLnIKVh!EL<(i27n4L5maPn>U|}E7fMG=VVOmuphi95G9^zf zuk15%GJ5~-(5K^e6E`>+33*C0QlLeNtz-6@Q%wnkVv>J(k`JY;&cZ%|*7~0h$BDJe ziNmv~m4J*v2Y~?QHjl%BHlTNg0-`Nw%x#<@-#T~d{jl2o^MJ0VhV1fh6N>u zXM7ICs=pMDOI|CxIa&&-6Hkm13A|Zoz!}K{E=PvUJAxkumg1+0mdOJVh{E!`f)TEo z2Xz01wLExDDw1UCHQkWFT@7w_6;uU$jdwO0enMfG*~?GqS;oJkQe5_I!7o=_d-TpH zVCVN?kIOy4{U7PE*UAzDlZ#2>R$0+p56b+3(Mk`Vf6yRkxcEV zXg#I!WEy%4IiRMfx?FH=eFy?C8_OLQ_v?D1$SsGu1Woxi2SR~qcA?CBBy(~rZWr!s z1maJY|NIH458x zLRf7_8wifxe$(VzYn9mJs{UBoTPIu8Ma>ZvN}wo@f3AZ2&xxLqXHB$7T}5V;`mcx) z2!3$lE=0b^1`JDS!yaPcwoWu~g%}4tMT)+iEccN6J={2zdw+Elavybs?ZhbvxE#CK z`xPA6s&jmJa(o-Lg(I#gJN|9V%_LR@|J%8hNN`_lmF$L-#q>WLElfG_2>DDw4bzj% z)^LKLFsa3fEJapl^x9(OG~|TQh(YX{nbD@PDlX5}B#CRf@Y^xXY*v3%U3mo0jG5PPlIo&ag1OG@W*}p4MT7qJ>4l5hVtST)q7? zh4_FH-*f%g&c=Y5%i<`hA=Yr5EMih;gt*Z=TH;?HSi(9IU-(RF)OK9sPE z&#h5no+jWI4cjv1;7LsJ?C6v%iA#%bPkj4))%@La57pB~@yIO6d6_pP5bS6H>|N0k z?E~bn*pcIZFw!AdQQi=EOf7po6;BII17as5bSZH_ayp^wbNIJ0wc#jRG{5!liBLQ) z0{jegD1c26A1v?_l%G~;^l8)Q>UeUD6+{r`>n#C5&Uo?S`^8(-inZ2}#*d-K)~1I_ zEZI3FXGlqvm4~SS=jBmy^e)7@xq&QAh9>?rd24o-3e7Y$;6eSkwWOjGfgL}i)8Cx5 zgkn6J!X5^q_DYr(LO!m#DnSRnhO!>A7XKFbYUe6jTBMM@vWX1(65y_Bs}FECU3yps zJ&GDbtW>ri5a-w;~3DW0`3&o|uuv({c>3G%C#V4^P8PgI_I)oCaQ)2%PAK@0$ zyVLutkjtlca`)F3L8q}6ag^@ue}{AbB`)3%b{O+6$^@Tm%yisc%*nOh?@T+Wr2wT!=pkA6IAyS9p(j0n&QNZI(mSCARaeYE>Glx zGQKXn93Qz~_)dOjAnxB>chu0syVYkh~0C1229vDR4%9AUQZcTGOG8^7StuhStNDOxQvb?vr(EFez z+succb$1OC+4)>~IMaf#8?H8O7Qr_{FjE1Sg81i#Q@lP3>6XhOxcl`ojLC0$nEvQ7 zrs|h$N>J?uCUq}+JW$iW99|D?(%AyF{xXWih64e+*I5dp=k@HQLqou2x_`=Hyq1RNeEuw+grm`+Is|I!T2O!(T=FwvLNk3a|N6?9$LsoGu95aGPeA>=emf zpKV)ga4zI@KxcP=VnCc_Q&!xlo2A+*^W`@_8N_0-?b6)g%gG)v<2JcfT2f+UiaZ}t zGMzRknUxiUVFU1`+BzuZWqDa1AX$-_nfca4955x@w?}m2$YhYw!!k%FJ(UBWB7hL7 zf^#hBB*_Jy-aN(KfFF0<{jaRDuA~w*oJ%%*L zGz!h?@+~r2{pO#CHCGF#iF^Lm(_YdxGfgKEFVKjB_DYPVP3&B~o%Zwdmt|sMIqGv9 z`+=DT?f8nLo#=~pT3P>SE`^+~tWDf{HcK%kP+nF|NQwTtE7ly$+?sfaYbblmcd|24 z+&Q318d_>l5|f`b-IYLvM?er3e^OnhG@q|-2>OTyfc~a|rBaG}97BQJ;&f!&Pl!-J z2f~?30o3}tdOFLGr)JgGWE8r6KRQ!?oXp_rCurZ1j=rn@5?sch9mv*pNjza*a7MX5 zWtaQ;_CGn6-t&s6KI|goS6_{L`|{&taWg2~doMEVp2LB+i&K9KB?dSH5FPSl2d1=* zmlpH1G0e1n)>VON@LCP2k;4k-qO+9h4h2!lHusLQV;8=i2t9q~rGgcH{w z@<}>Xd~xrXi&>bpY50n^GkDk)EZO%q54KXxUZ?HbDf63Jucs*4izp<+tkI&qO0AVr zmbN}Am$51c0MCXKw|H+ZACWS{H-`bq!nZhz>g+6K^_4S6WW|+@5<#MQ`h(VGHZQ6| zUh+l8xJ|eo;%xapW75!m(GJF5C`gDjo$3cjqF0&P>FWeO;y*hv_g8x32pT>Bl0L>k zU!VX(dmi6~^U2G{SfS=iTJ_0GAgSp02&jQ*F}=Y;eT^vbL>uEiNv9|Wod;a^iMQf% z`YZjk&${#qjHz}s3Z$OH-Z_tT5xe}cQqsMjQza>})s@cl^4xZ@2|rJKO;ZD#OqmZY zBo)VNo**AiEOybFOUj*ez@g%7zn`py-kQeEzIP16Ye`idH3Q!cv!x@LxZWsEtHu_#)UEKcn zC|)r5{`N-hYQo~~3`^M!0`uLE1xA*OV=zhje)IT^2^uuvMxladcKoM&v{nTl8 zj?drg;AWc@iBz2;YJCKvKaPALF8L7&z8k6Ue2 z#xv4YL;`P=y+&})@y#IYGSU6$ka=_PP1BRLx2+vgtn?pbUf*@`Ram;|+-T!BMUyuTX+p+F4Yj)DZIGfzgU~#cp!i=?2-*xq33-hRGk$=$u z+Za*@d%pp*s{K#R0rs~?hmX$k&AonVp6Xa0D3C%kel^$eyu=}M;wK~vXr3F{;8Ip5 z%-IX?74k~S4@$)7CRcJDh`t^8{;M3=p*m|^ou-7x@X40m}Zl%i%DpKG4)usSOrF4 zT%g+umEW_AacO0WY*wUih0zZMsAU+D69SBd9u=mHx|0`^X?RrG{HxV)hSe+pmKInORtd z_i;skl$4aXZ5wyzJN)2v z3oHpyYJ#BFIx*=rFQ3nlcrbyZUTDa?nlOK-`=h{judk&$+h3j+%I(uC@uwF#X7d~7UFt2 zU+UxN&tj{7zvtnmv|1UF5=C9`X+NVGZ}Z2i{fuUKRf>vg)YfWJ1?;ZO${*L<7I3~< zeQ()Qw^u%Ag=yC=Ql}AfkL2M68tbc4FF0D(@jTMeRhY@vCxibkhOu)#iMlar2D`fu zKZ`kRfMyAPB=ZXUn|f47&3JShHICM*nR}&LxITP4YnSc&vCI%-xosZ$-rKQUSym z-Mn76C}Gnwd>`xbJDLUk)jp_677H#@&#}1TYqn_$X-l+hsqqT`Se!f0{LroZis0>u zYcAg2&7({@E*|_CmcXY`Nol8+NaE=n(s$1~7FO(j)f~XxMx@DGNG(UXT)cY?->KpmU?qIb)5MojiHSkK$NoKXuBpwK!%y&gCYnP0N@DtE0S)+d+DTM zvL$gY7DoRbn<640WW!J9|JjFE+F7X#nE_>x^kc*mhCX1+;>KG}2Lizary}h54EYSN zNiF*w;3zG{a!g}MPAUf}Ld`&3LGh8WHq9DOdlOwUsI}&EC~&N$>H})S&RnHZj z;P@)=9mNwQZJ67ww8)*yb#YY@kITwy;YT_jb*9Ep>UEngk$u>eiHTZtnnaUKB6_TW zrS+|@f6@;+0^I0x6ABkUt(Tj`E3R)fL?pU>N9{k&%e&FtbEYUXfPe0 z!Tar{XAI7`>X(tLnOo!-c)2-c`m`xUf*8(` zPyO|aqP3zH2C~+-^$0i9G2dPIg#6v%zTaf|*UW>nke2ZX5g@(Z;@-<6uQx$a1$kRn z*2cQ1Ds!2|IqXKOX(|Ry$n0BynQgWmwE+`tA1qmr_O^FjS9Y2mR+sIjGGE7Chm@hK z`PGKB5cK=wHUYgkcDfOLQ9e9!vO&*sN4I7Y`<^bgB`|`9t3Qy|iBTCwAP{-Rg(|KL z?x^VGY5jspxWod>Qn$GCr>()Ot1C&0k0=z4J%zFp1D)czsem8>M31I4|2L!}n`V54 z69Es`cdO9<`-Ek`C=M^_+=MBm1>f1lJ)X zzdyB|6RZMbnd_S*D~I-1&pjj>qXZMa>XuwE;@YojFs?_~z1P+E76?nWa`CFyFYwn> zA;PdD$WBo~=6mzl*g9BEhO5bK42&HK!3U&>J{lKEnDBU&%CWV3Fr1MQ36cltT1i;s zBu4f94=wY6*wA;}Ufeh`7OdZ_-8w0DIycR@ z6=NaJAjiAU6qF(HH!$_X(=;-9bp-dZ6(eOI8<#2nf0;O|jcmnfvw$XkP0H2rE(XO) zMfIr$_8#S-Y~(t!;oGW^P`q_@cqhSpnA#*U%nsn*t|@Q&!S1cxg?KY!FW+vG&J}(- z;fE_Ec!sm9{(Ea=Ah-(A{pGKCxwfw%U;8QExXcyt_{>beZI!G|;&n%MWzNPYqVLyL zC1~Ynyq14XX2aA|>(ZlE5M9WFbKh1e#v`YJ!GD2}N9pOSQ|_~i}{7y zu2d8XpC7W+5APH(RjuF6XGrc$$5JicV(y`{&4Z`Jorlb-=LfRdz@6(@!`qY9vCmv1b)@!QV z|InB^&-kszpr_m5*_m9HrRS}_#zFFnjMQ@r71&NS=k+CR*tH$gAkdfPI{uIVFiXA8 zhpSE_00nTpL^#pBM(i4DeeT>Urm(^_+El+sI=%I~hnyB9;KB-|)U!jV#*cD;F&Ueiy%}!k-!hw!->%L? zR0MB*n?Oe&)ejfhbz`lBglSBi@n+>qEuHkdroXf{cbw9`UEH{P6HGju{L;>Bb8G4T zuz_b|ILmM;t!~fT#$}6%mSuJ2DAJGZo*U z|GbsOE#^N#WmBpdJv_oJ6eJ(>(TF7H4P-um*tyc(yu*Yi@aBM0e<#)=Fi5frz~_?g zyrVRFB`wUXI&4h(MbOLEG*1}=vC-!pG)71k+18}^H<`-QH|x2kOZavj)-4%IfluV? zGBdtzG%OSb>ADiHo^w96`>vml4}1Plk{Q@jE2%HRX-Pg;Jv1xv`nxLDAqvRRxi!)#c0mYfNst>?VKZjq7d0eDWSod9Kkc zcg-y(G3fB<=+NM4L|((Qh2c4#{K1D-Yko*!99@UJ3L{HVK<0KPee za`8cUh0ouqxATkINH@F(fJlf$cZ;uw-Lgic?(Kp|_oBr@{x>DNvFRzAgWy58GN48$ zA%FoI`&85G3qV?7e~>L_s9hsd{{DeQ+UUI|Vp}G=vq@#h$BKlklY;Dkm_*wn^&*u^ zhs`>T46&^3AWkkLt*SiIft=hffm2r+A)TY5d46omG(02_){PsAGgeiBM&NQko)y87 zIl(1G4o`}hSVOH(?M_~)0}fPfd?tm2O^E_5x1nQwl(%OhG%`Jsfu?9WelGN;xr6tW~M`N@-Ukkm#<@QT-&yMPf$1ybALIfYkDM9==Uq<3uON5zPvBN zNIZS?QD_BiS>ZkSIfHX107gRIjZlk|bZ@=)Bp0ky)XdFUY6~bv6a1$y3PP~L6V60( zS(uMha7?Lk8{-j6$K7s4CYXHdDvsE%O?tiqef7szE&_B&F+hF9Y!P(1;KL%9?Noty zPK}aRag6)Q0s>nVrBJ0m_!?Rru#?wrlbe%sczF27I9FrzvTtq0_ldVb5B9CNnvMC8 zR-tH#jU{hBWL09}_D=5KUDnZ}f^wlXKQU`z3g1x~+!>y5Ynbn_+>NDf( zNnGr20>eyZTkZBbQ*~w(alru#Oug6@)+sbf4{Me%G(n4E{v&_7sn4fv3_#Hp3osux z>Sb@8OIye@_Ij$3KdzXnZgGPasA#O)Y!c_*_$Bx|=EOnsrMj(zZErBOHs*{v_g~An z3_-W}$Pw0Q8e83r7TQ1!kgd9Z$1-A9ZU)a_K|I2Y*s3b}@<%?yk9w=H3uceBpCO|g z4p(hspk!||f&-_?X1(9e^KKMp>V$wz`awWu{k&4OwsNK$XG0sR$)fO=YSe7yifq(C zk%meu`B$l>PKJv)lVICbv`pY9%hG|LNU8igK_F*XTzWvM<_`I%ux5y{G%KAl(v> zJ7N!*)2v%*LLZ(Cbj}h!%-C}Q5FXQVJ=b^Uw(Z0w$x}~AR@ocqu`}eC8VDIT)%^h= zh#o13QVTz!dgN{ptE=+rypb#Uy`Zlv)dN=iOkp8^V@ZVYTU^QPw|ZXkhvZ_G9LTTw zm-c_|{vKf5m&iNA;RfVG-PM!LPs+S{_DADd)@BJeX!KTDfY7&{1^-DU_O0P8gt!8Z zZPpNynMSj1v37S~xs3Kl^1!%?foyREsi$Sk`*=#*`&j#;=30XgPGtgoQuvR4B}6D) zTHte1glXBx>9Yjsf|El*ncP}b+?wTPvr-*xJql%8Ux}+;ib+o!6k+nJpw)_$=*{z% z)KK8I`@Fg#EhjB4Eti|N@nUsaCZ| zxN!zPmG;Rxh`*Ere|LzJ9b6-uI`axF9 zZoZP1_)pQ%O$IBp$jfA^AFUB#os6BQV-SH7aX7z9)04yU0IlM(AsJT~Yw3#yJ#Iq8 znbFk+3BK6{?f2m$H~S?$9dne-RD0_rj2iY zUGD6}`qo~XrL`a;%+M`zE%!Z!9j7Dt4Q%1r!3Sa6^d~)EoBac|7w}N)pFt>KKWc=e zd<@Pahy63&p&{jFJ8J|3j1_yEh#j^s+jC?%-5#&}x_>aX@O;5U{3asQnLm3W*WP~9 z8Glx`R$7eIxtPdTGUh`f0;ckfQj1LOiAry+tMK31q-m@e33YcjSAjTY*$LbxS9B=}K4FE&6Me;*uMz3J9uh^4kRn0Y&mI+4GV)XDt7iD*XnuSf-zC>DPK^tyANZedS+&FzI_b&W;00wo#y zc0BSNG`DnB9(1Dh6n>(iii&9Kj+>UF?&ZT3ZY5pRFbjj z^TB8g{_#~^xvI?k0!!j0cIEF0E{h4le@qfkvKG%~~QQ$4i+y$<2T=UP2Q#0kFZ94W`VnUw(zp`HNV9=UXGpNqF@ zz>(xnI3IT%?$F$oLwidkwpavWR?6-~2_@D=n|B2nfzlSp1y} zHd9pHUS{Kf)javFGr?Hjz~Rrq|2(c% zqFB<`Hr_nb5afKu9CZ^F#v`P&MR9Ys^Jqe#2&8K~P4*_KIfsLCO-ltT&&`YMrM9+$ zGf`r%j%c5o&ft_|3;tTUH7iJzBcj_V9tG32gdugDELFI>XU85W@(7P|%w#SFUtKJ; zi0d1WJ`4H5&0#BKo3M^VZkV$}-gq4$8a46&>XF@~quTsthQ_p@)*K-;}qbSf6!|UbfbCX2J5HAB>z0 zH?pIjrp2NoMGy&v{cj|*R+>I50Q=T%1;{Z5Y|7j;rBijZxI9XIcIyPl?4!>IJ3yQj z$XVZFs7`l+#J2A#Hxf`_hkO+3(7U@8`m+N1C%+> z>XF6-V#s?hhpdvhTY6#2ute2q&g8i(|D;+yHRoAhUOamErDXLLo> z`vRex?NX&^H0mldbXFc-={f$NunQBX9!GDwPRR#-Z^lRIAGpC0%7RKq#d|Q!^!4qf z9F;bC7;1h&1{6B=OH)FS)hjC`awCu~5~3gg7O_#nr=HmIqw9SDmgzlwMHRAeZVK%3 zOUxdMMy3Bt{!-GYYv%NNR*=L;K$wKbuA)fCB%B4w_-~#p3BbBMML`1>m>;dSXmg`{ z2XNG{;28n~QcY*PE3Ah?CGfQ>rq+!$#%{(JKfmti2xzY6y--9vefbBwnNT%1*+yHK zJ6l!&NH2A&w^hRqeHIA}#)YIL*eG%q)=i3O$W?WyR^PYBrP?8=IMgNpsv zZg5nF2b8X&OP*tT2VoxsCzwC7+c5Iq9 zC_$mKw{!M3qh-Sx5{JF1N;rv78B2*+b;v z2}3!x>CuvfdV07g39ryfC&Di$3d;^-UV)2jPEYh|RvyC2`6U{}YU<<=hb}Hf?A~45 zB;6ISXNc~;N2pcpwefJz^9ajf!Ix1R@xSuZpp$7qO98>hy@{7Al95I}8J4pyv6Z!M z2${pmyS}mWEAyMxLDL@djDuj8OBub(7XvqplKxq0h&uC!@prsubeB6~H{@g1*EwUN zuipUMag$9*bs_B}=BGLKq)7N8ha>QvYuL&C5iZ-t@?U_UlAIh;scKE=?b#P&kTptlIk3ERvYaf@Nn8RLEl;!hD19%AK$)SX+c`=AD?q6?Z z7weO`^Ile9p3!IksnQhV=>PvFSESIUY2B}0KP+o@ekt| zJ4a;jKtu)uLoLB&oJU!b7hf5%#!fj}i7_4YWGmmPy)`Iuf#kxYOjA^Q+vVL&;iX^w z;}+T>7hJqrGp&1nC_oD>Z>CFsDK%YwuJV?zQ0@Y^XCB8nfJsaS1%BJ{}8m2_{Q=Hf;FKR%(C#) z)|js5P0(7@*2m|NNXOwRBv(A+e_w5pg}a%9AU#Z`US?>IS?bo-sTI8`iG6A4SlOgg1r$N!A?q4Fj?xn~jK_N9JVo_G3%F?~ zObzm?RZ2Oz3;VZ4SJ65MyyrvHH2ZmYhZBPn@?AP;A+T1NTkwu|;p6-L9_o`H z%y9rz=X}?^uNm|nxs{{eMn(#KzozXYyjjN_Ae{e8YvGU$mK=ZZr;sUZ-Nm9VzK@0Q zSxl9)|9C!HQ1E-b=F|+8t1~mcA#XQ|wI>PRcL~YY?ars@zC0WjU1D4HffVMK$vMsg zqj-&W%5(+`j2mM%U35J2t*Ql!y2-;&YbMcASZH{4Wg$vZg$~d?mbBoiuQ31YyqUhn zpD4^UoOUGZAds`ds;ne`-=Cnh8c@oNQy@jvx$%x^{WMon3=*By9t?xgH)Y z81J0iy;`RuElN4QTZB^knKS65t%CAV7=JRhBWbmMZ8+iyujleAcIrExw^LHyY^sBjwBMT><9O_lhnycFywkNDK+(Q|b>#Hrf$3nNce}8fu&)}Jx-_nNcM=bJOT*R7@U0lgKfC?^pvLo}%?pYRo zHrNxx1t|c{8*KyQSAbSweh1#|-~G?t3Dh&|FFU*53~o4;#m@9Cl#Y(_FHGg9@6vF; zSRa;<8t)>TdzZU*QZ--h&%jWxb!yNS@K9{OXb=Dxz&8t8awWHaH)TFFE8jOuH5BC- z=Q2N-;-fd9JfAJuU=_{vy2X@V8IFJ!F&7F7k3Uln>v=1iJDR4bzjq=*j{UUM^-*(5 zFBCQ7eZndvtT0;2Rv=N|X9yBV+#c)wr-+ENdYm{$7Kj$VnJ;U0e%Np~ z)C717+cuLLwLkU*etu&XSlIUYJKx;HQU`UstSfE8x!Ttj$($FkPh$z4#Bu)MyPC~} zZMRX=$8ESd#})>DUYnu>l991EOn9WL_4-9VuCZG$Za~*RN7z8r`VVM(GcX#M?<$&7 zR2U=jjL&R3)_B;D4~Axmc2I*BNfh}q7DFk}h3n;>h)(j77^n`I2(JGq!p?}R_r&8#b55F{ zG{Tx+zfUmQKp1XsE!Ott365&29PHCPKhN5@^G#2?9u{9xTLZ}--znm(SUfy1K?-|d z9)i#wrRFd~K3h!`uHi-+&hOzhV$2b~tXCei;#)S;p%T9UU10I^*$TuJf`+#03@t3B z|Go0#wtK1IdwDpF#b%ejcUV0>JkNUCZmI8W&As#)S82rM}#^aW9V&SetbP-evD?UydWQ#UE6MROf;t#5dI*o6n_2;F6~K zaNc$!6>dap*ke4AObDR|#rNLdr+vtIxqvIR!`s0$(!T}|-K&`l8=NY<1+%#GW_Ddn z*#W?HCYo=G8oTxD!~b0@wh*7Bn5P4lZBMGu*q$_L}|ui2|D&bb?`V z16yAz_ne9}V0?wXzbV9~wPtPGP`-Fn;2ucN6ICW_ob~R-;jD4Y*3ols2r<4enxR8F0?*R`$dcO(q;R1&YGdUan9%W#)HDW73dT1see)vAg?==7XgLl3y;oRZEA&Q*h@`#&n^LOxrp!PEM8Vwerqxc&;|A$SLE8 zNpyJ9s|Rugd{5MEiJ!YDD~#Y~aPvPn>?kO%7)^`|FaRf-! zELM*I%&iZ=i-h8F*q7Vu%oyX!1@sU0syYzY$2)79&;&}#D)Wx!4(Z%h@f5s_+JZx0 z)iLPja`ey05k-WpC1>%A`g*o5H7WYHPh-FXP|7T-KlhIS z>J#g8tc7Qr!?<`xi#|whu55&-aVi~PRbtW@CqB%0#|vsq)IGtSSAkT(D3sW-K<_Dz zQe8&`%oTal{JfhRaDubDKPR>aP%M*$F9#9D#=dYW{GzkhJmTqWv%#=*>-#9)v-3_E zxgQ?~ocRDw_DH;N-J;I(VsAWQ0!dMP3z^;cR6P5z(zPQyX13LJHokL-w}U}@OlBs? zmr9&Krp}sYXN2D^a?qT`r_BG}=l3_SeZ{d+r={TYS4(7$QyY`<>2O4lBXIx7+5Y-P zW)~(tpzzg41~$*E|H(mx<cK4cr&$T+>l`w%8;`f8&+nhPVTLS`{>BD)$--#~< z1-`(P5biV^t&e}2Vb$X;LJD0_1fNn*kEKne)hNh}-_$D=7Qk?qR$R_9oN^7vuBr)2 zjwNS+jnMih{J!J>L5lV#@vf?P$?`z1HmW?k>>y5UMuBh;+auP{Uic7U-fz$=z_t+; z+n@9fGN8SF9ONewO-+!2YdK)CSoma`E%fno&3~`;G@!)nf;s|65_W*dS5xvPQV3f` z0XFx?(d;;~2S;hvbf?e^y;Qb_G#p!&ts6}BKgsX3bz=VTAi01Z+-vo3xeK@Q4os2Ff{#gqBC7iXlBK1k32)P2AV`QYwQGjK~%Zn&_27|o9 zm*U>QtWp82DGwNQyF-wq`tV6eO8|HFf$uOl@NFdV_2IgL)_d+fN=vF|;zGV3@C( zXK}fCkhd}BT1=S9g)k4118#(69ShG~y!LkO@RIXyb@rQ-vyl=~oz3Vs6SO$sX)t}) z`!?iqGxEk{<+a<%9rDP1s;b(j{=#A|uX5x=rTeLq0J z;Qj{%l||&jQqR@6Il7k}mfC;dJJ>t(FV8L*Gfm1o<;@HFdsbe7<_QPMGyd$fr{~h+ z{$NP>@H5EKY{sswyI)ozp6usm>Qo|f_!u$?nW+h~5lM0lRioqm6#Z9WU4efh2X2b5~*T(ezyT z^mtTKJ(>7=xk^MLtOA_jot&`3O8edLS;2%fKc=5Fdr?`6mD{qiq>x9X5H>PBct3_G z{Ud54Uy&mmsb(3j1lxF%A&m8mv0?=89Ap`?$jOWrZ>?e1a_@I?smEDD{GZ9>Dkys@ zPrMC5fWC>QdGqZxQ#j%~tW+8NIoOleGG6UWiq62j^S^mctZ^shbW}h0zz3GxTgxDb4JTRa^XyD*Pyaxjo#sVLtvK=lN8@t#z?2e?ew#^k1#T%4zYgS6#Zdb{#G#SV%j~0lMsKd-auN_i=}gzfIeKOYF=Lyc9c57T#g+B z3%ax|E-UrYx$={b_d;KrkXMhc(E?6hvuldy&!IOL!0R3oB|AzZzl*wuK$ViJ`OmwT-{LB4ErKW>0A5uoKSZ3>Jq7zMR=R!k`zqDe+hB9!9Y(mTN?mC2=fiP* z%Q*1(-#_UDHboqp2}eDDs8Os7{6hU4B&=59SVO3xME0l*eB+zzX3DE!>b<^G} z+Xqei+3aF}uSyZo^vad7`4ZHXrB`I_Jv1NBFbp|o&!Bg8>%ZGSzt1b%R=(c_PKQ^)&Bo0T`~5d789zsz z;imZl?29>Z_&4li z{cZ})nn&f6d5tCbYa-qccyrLb?p0kGn3*@S6%jM>*2sQ`>w_C1nMozSMrPVby!;NG zW5O5%$EFXQU(^TEZsT(Z;89*OO({)dA|jDOV2UutH0L!0T9#!CKHvIwD*hMVPR+&sA^TaEY;v@-u7>~ zl@mBs$@4si5Zp9$v!I`acAv&>s5xhEHRxECVHo_-dQBa?q|WKp+q{v$#28EVYI}1L z;k$^Dysj~XFpeoEGAIDXXeN~dMsm~ELS#lTkQAbs1fu9IYt;O-}QQGJ-_R+hI zKn8ZQv&lmd@rSN0gv&j>;$}B9yP)8$C$u%M|-ex+&1`P%7k4osmsg*`qu}%T-5J(Di$;b` zeeA{Z{@C{W^th?2x_I~V=r3F&^v`smcF3cJBsCHK&xxdn-Th7Zc)(xk=Ooq zeO_*Uql?kKe*M{R?FYU8%zh1C>ZI#UdQ~XCv@d@4^8jF5wDR*A`T5UlQ;_cd!iHr4 zXyIaJ=Uew5rPh^pcW-Oh2Y0DIvyWQu9_+U@ZFGVF;AK$&0Ikyi8-N$^wd&-XyBXjW z35+I�KV_BMjKKM1h(iY$c5oxr_I>SE%v z?ydk^_|ExE_ON!B%WXg};aB@Td?P+?PRIWB=NolL!s@{oPj5S@3Pg;=g{jm2p(zR? z7%+m05iZbrDlQBe2HN&jxBrgjJOU#LWUUsb(ZV>_SaMN}a2OS`ko1>$Y}4L|cXyikTS# zDj9&~S}m|u6{rdrhscljcc;rLDpF)ESz&F(Mre=6y8(!fgd9w96)RO0fz+6>6A-#v z<3#BD)$FRyyB&A1MPJ-)1K%!gcc<*y_KtRjW0ze`$BWg=nlI=40r75}(l88T496jg z5VHZ*T9+joQzT^aS|fmsqQI<)iEYan0I07ynjwf_QBxBtIS~>8GATe2R5K$ZMK%C! z^xI|zerV8(zr9_A7al1ZnVA|mF5mpq8|JGIX@5YjC+H>j@Xayl6MmypZ>IfcbZsF&H{EyTr`z!qHK=%UcXNSHY*5C81T_fJ8*!|6KMAiQ9`|<3} zmd{??7FPZ}*M9Eyzxc<}##iJ0GkL08*8OMKdcWJ+PQ30I|H6L9T!*iw*OVc;w)p$@ zlYf&Nb>$!31bcCCvyc1isp*Tn_3hVNfW7fLvEOZ@CRTN~>1MfqcC`J5`?>0`y=9*N zQ+MF@{b|9M96GSumY)q9;ANf${>V1CxwWKtx4VRn%R0d}qkETeUO_ znptOSp&j{vsv`YNmdm%QKg+T%^E_Xr1i*eK`s+R+)=jG2=devBA!6t2)~-TK-K__J zoUbYNoQs(aA>QBLa~w;BWi6MSOA!?(mbH)NBcjC^IVLc21uS3#2ZyIa%)@~ad@VP{uB(Ro?Ih2nfU z97YDF4M8R%0k6d%`?>a+hZn#={=1+5@#d|Y|L?!De_@|T@;vvyMXmO7 z$^N+9`+OMwB7FX*{q?hRbR!CF=gWWo-JcDM@9%T7apfiR_07BejNJ_QXXozkf3*L| z>*m;ezO!^SBp>F=Ds)pXuYd3ql zaU!6DI79>iJA&`}fzM>n=fm4g>8-KVXK(*ce90BHzlqt;+g|@ME3T@dfAnMfb?-mg zBUCpkvnQGQgX}X^`!j#Db@y9+Kgj@HV7@qg9`2?_^Y=npsWxUO+hXjtskFVJ@H4^% z05%APXL&cY{&jeM?IZj*MqyXdUadrgr7RwrYcomc&qPFCct%^O_PT9$uRSUoLI6|` zpdgSc5Mc%F5s&Nlc6N^g?jkWF-`(An3LLUA8`dJ#Ym7=MW|DJ8UJciHt193yHT*oP*F1Zka0J})A4I1Mc!pzX4bT7^<+V1mxll~w#U;q3-{W012$5!NKknq`ggzbF6A2~rkJK#Td-F)o-2YZVr_NgFK2><{9 M07*qoM6N<$g8ZJE9{>OV diff --git a/modules/image/instance_segmentation/solov2/module.py b/modules/image/instance_segmentation/solov2/module.py index 520300319..eaf8616bc 100644 --- a/modules/image/instance_segmentation/solov2/module.py +++ b/modules/image/instance_segmentation/solov2/module.py @@ -11,13 +11,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + import os import time import base64 from functools import reduce from typing import Union -import cv2 import numpy as np from paddlehub.module.module import moduleinfo, serving @@ -25,7 +25,7 @@ import solov2.data_feed as D -class Detector(object): +class Detector: """ Args: min_subgraph_size (int): number of tensorRT graphs. @@ -33,23 +33,26 @@ class Detector(object): threshold (float): threshold to reserve the result for output. """ - def __init__(self, min_subgraph_size: int = 60, use_gpu=False, threshold: float = 0.5): + def __init__(self, + min_subgraph_size: int = 60, + use_gpu=False): - model_dir = os.path.join(self.directory, 'solov2_r50_fpn_1x') - self.predictor = D.load_predictor(model_dir, min_subgraph_size=min_subgraph_size, use_gpu=use_gpu) - self.compose = [ - P.Resize(max_size=1333), - P.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - P.Permute(), - P.PadStride(stride=32) - ] + self.default_pretrained_model_path = os.path.join(self.directory, 'solov2_r50_fpn_1x', 'model') + self.predictor = D.load_predictor( + self.default_pretrained_model_path, + min_subgraph_size=min_subgraph_size, + use_gpu=use_gpu) + self.compose = [P.Resize(max_size=1333), + P.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + P.Permute(), + P.PadStride(stride=32)] def transform(self, im: Union[str, np.ndarray]): im, im_info = P.preprocess(im, self.compose) inputs = D.create_inputs(im, im_info) return inputs, im_info - def postprocess(self, np_boxes: np.ndarray, np_masks: np.ndarray, im_info: dict, threshold: float = 0.5): + def postprocess(self, np_boxes: np.ndarray, np_masks: np.ndarray, threshold: float = 0.5): # postprocess output of predictor results = {} expect_boxes = (np_boxes[:, 1] > threshold) & (np_boxes[:, 0] > -1) @@ -57,14 +60,17 @@ def postprocess(self, np_boxes: np.ndarray, np_masks: np.ndarray, im_info: dict, for box in np_boxes: print('class_id:{:d}, confidence:{:.4f},' 'left_top:[{:.2f},{:.2f}],' - ' right_bottom:[{:.2f},{:.2f}]'.format(int(box[0]), box[1], box[2], box[3], box[4], box[5])) + ' right_bottom:[{:.2f},{:.2f}]'.format( + int(box[0]), box[1], box[2], box[3], box[4], box[5])) results['boxes'] = np_boxes if np_masks is not None: np_masks = np_masks[expect_boxes, :, :, :] results['masks'] = np_masks return results - def predict(self, image: Union[str, np.ndarray], threshold: float = 0.5): + def predict(self, + image: Union[str, np.ndarray], + threshold: float = 0.5): ''' Args: image (str/np.ndarray): path of image/ np.ndarray read by cv2 @@ -80,12 +86,12 @@ def predict(self, image: Union[str, np.ndarray], threshold: float = 0.5): input_names = self.predictor.get_input_names() for i in range(len(input_names)): - input_tensor = self.predictor.get_input_tensor(input_names[i]) + input_tensor = self.predictor.get_input_handle(input_names[i]) input_tensor.copy_from_cpu(inputs[input_names[i]]) - self.predictor.zero_copy_run() + self.predictor.run() output_names = self.predictor.get_output_names() - boxes_tensor = self.predictor.get_output_tensor(output_names[0]) + boxes_tensor = self.predictor.get_output_handle(output_names[0]) np_boxes = boxes_tensor.copy_to_cpu() # do not perform postprocess in benchmark mode results = [] @@ -103,16 +109,18 @@ def predict(self, image: Union[str, np.ndarray], threshold: float = 0.5): author="paddlepaddle", author_email="", summary="solov2 is a detection model, this module is trained with COCO dataset.", - version="1.0.0") + version="1.1.0") class DetectorSOLOv2(Detector): """ Args: use_gpu (bool): whether use gpu threshold (float): threshold to reserve the result for output. """ + def __init__(self, + use_gpu: bool = False): + super(DetectorSOLOv2, self).__init__( + use_gpu=use_gpu) - def __init__(self, use_gpu: bool = False, threshold: float = 0.5): - super(DetectorSOLOv2, self).__init__(use_gpu=use_gpu, threshold=threshold) def predict(self, image: Union[str, np.ndarray], @@ -125,7 +133,7 @@ def predict(self, threshold (float): threshold of predicted box' score visualization (bool): Whether to save visualization result. save_dir (str): save path. - + ''' inputs, im_info = self.transform(image) @@ -133,20 +141,23 @@ def predict(self, input_names = self.predictor.get_input_names() for i in range(len(input_names)): - input_tensor = self.predictor.get_input_tensor(input_names[i]) + input_tensor = self.predictor.get_input_handle(input_names[i]) input_tensor.copy_from_cpu(inputs[input_names[i]]) - self.predictor.zero_copy_run() + self.predictor.run() output_names = self.predictor.get_output_names() - np_label = self.predictor.get_output_tensor(output_names[0]).copy_to_cpu() - np_score = self.predictor.get_output_tensor(output_names[1]).copy_to_cpu() - np_segms = self.predictor.get_output_tensor(output_names[2]).copy_to_cpu() + np_label = self.predictor.get_output_handle(output_names[ + 1]).copy_to_cpu() + np_score = self.predictor.get_output_handle(output_names[ + 2]).copy_to_cpu() + np_segms = self.predictor.get_output_handle(output_names[ + 3]).copy_to_cpu() output = dict(segm=np_segms, label=np_label, score=np_score) - + if visualization: if not os.path.exists(save_dir): os.makedirs(save_dir) - image = D.visualize_box_mask(im=image, results=output) + image = D.visualize_box_mask(im=image, results=output, threshold=threshold) name = str(time.time()) + '.png' save_path = os.path.join(save_dir, name) image.save(save_path) @@ -163,4 +174,4 @@ def serving_method(self, images: list, **kwargs): final['segm'] = base64.b64encode(results['segm']).decode('utf8') final['label'] = base64.b64encode(results['label']).decode('utf8') final['score'] = base64.b64encode(results['score']).decode('utf8') - return final + return final \ No newline at end of file diff --git a/modules/image/instance_segmentation/solov2/processor.py b/modules/image/instance_segmentation/solov2/processor.py index 0de87f5a6..a5e4c9d85 100644 --- a/modules/image/instance_segmentation/solov2/processor.py +++ b/modules/image/instance_segmentation/solov2/processor.py @@ -78,13 +78,20 @@ def __call__(self, im, im_info): im_channel = im.shape[2] im_scale_x, im_scale_y = self.generate_scale(im) if self.use_cv2: - im = cv2.resize(im, None, None, fx=im_scale_x, fy=im_scale_y, interpolation=self.interp) + im = cv2.resize( + im, + None, + None, + fx=im_scale_x, + fy=im_scale_y, + interpolation=self.interp) else: resize_w = int(im_scale_x * float(im.shape[1])) resize_h = int(im_scale_y * float(im.shape[0])) if self.max_size != 0: - raise TypeError('If you set max_size to cap the maximum size of image,' - 'please set use_cv2 to True to resize the image.') + raise TypeError( + 'If you set max_size to cap the maximum size of image,' + 'please set use_cv2 to True to resize the image.') im = im.astype('uint8') im = Image.fromarray(im) im = im.resize((int(resize_w), int(resize_h)), self.interp) @@ -92,7 +99,8 @@ def __call__(self, im, im_info): # padding im when image_shape fixed by infer_cfg.yml if self.max_size != 0 and self.image_shape is not None: - padding_im = np.zeros((self.max_size, self.max_size, im_channel), dtype=np.float32) + padding_im = np.zeros( + (self.max_size, self.max_size, im_channel), dtype=np.float32) im_h, im_w = im.shape[:2] padding_im[:im_h, :im_w, :] = im im = padding_im @@ -232,4 +240,4 @@ def preprocess(im, preprocess_ops): for operator in preprocess_ops: im, im_info = operator(im, im_info) im = np.array((im, )).astype('float32') - return im, im_info + return im, im_info \ No newline at end of file diff --git a/modules/image/instance_segmentation/solov2/test.py b/modules/image/instance_segmentation/solov2/test.py new file mode 100644 index 000000000..b3dfd416e --- /dev/null +++ b/modules/image/instance_segmentation/solov2/test.py @@ -0,0 +1,96 @@ +import os +import shutil +import unittest + +import cv2 +import requests +import numpy as np +import paddlehub as hub + + +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + + +class TestHubModule(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + img_url = 'https://ai-studio-static-online.cdn.bcebos.com/7799a8ccc5f6471b9d56fb6eff94f82a08b70ca2c7594d3f99877e366c0a2619' + if not os.path.exists('tests'): + os.makedirs('tests') + response = requests.get(img_url) + assert response.status_code == 200, 'Network Error.' + with open('tests/test.jpg', 'wb') as f: + f.write(response.content) + cls.module = hub.Module(name="solov2") + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree('tests') + shutil.rmtree('inference') + shutil.rmtree('solov2_result') + + def test_predict1(self): + results = self.module.predict( + image='tests/test.jpg', + visualization=False + ) + segm = results['segm'] + label = results['label'] + score = results['score'] + self.assertIsInstance(segm, np.ndarray) + self.assertIsInstance(label, np.ndarray) + self.assertIsInstance(score, np.ndarray) + + def test_predict2(self): + results = self.module.predict( + image=cv2.imread('tests/test.jpg'), + visualization=False + ) + segm = results['segm'] + label = results['label'] + score = results['score'] + self.assertIsInstance(segm, np.ndarray) + self.assertIsInstance(label, np.ndarray) + self.assertIsInstance(score, np.ndarray) + + def test_predict3(self): + results = self.module.predict( + image=cv2.imread('tests/test.jpg'), + visualization=True + ) + segm = results['segm'] + label = results['label'] + score = results['score'] + self.assertIsInstance(segm, np.ndarray) + self.assertIsInstance(label, np.ndarray) + self.assertIsInstance(score, np.ndarray) + + def test_predict4(self): + module = hub.Module(name="solov2", use_gpu=True) + results = module.predict( + image=cv2.imread('tests/test.jpg'), + visualization=True + ) + segm = results['segm'] + label = results['label'] + score = results['score'] + self.assertIsInstance(segm, np.ndarray) + self.assertIsInstance(label, np.ndarray) + self.assertIsInstance(score, np.ndarray) + + def test_predict5(self): + self.assertRaises( + FileNotFoundError, + self.module.predict, + image='no.jpg' + ) + + def test_save_inference_model(self): + self.module.save_inference_model('./inference/model') + + self.assertTrue(os.path.exists('./inference/model.pdmodel')) + self.assertTrue(os.path.exists('./inference/model.pdiparams')) + + +if __name__ == "__main__": + unittest.main() From 02d7e5514b0da9a7ebabb004533b274056c954e2 Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Fri, 14 Oct 2022 14:48:48 +0800 Subject: [PATCH 099/117] update stgan_bald (#2022) --- .../image/Image_gan/gan/stgan_bald/README.md | 7 +- .../Image_gan/gan/stgan_bald/README_en.md | 7 +- .../Image_gan/gan/stgan_bald/data_feed.py | 22 +++-- .../image/Image_gan/gan/stgan_bald/module.py | 76 +++++++++------- .../Image_gan/gan/stgan_bald/module/__model__ | Bin 358737 -> 0 bytes .../Image_gan/gan/stgan_bald/processor.py | 15 ++-- .../Image_gan/gan/stgan_bald/requirements.txt | 1 - .../image/Image_gan/gan/stgan_bald/test.py | 85 ++++++++++++++++++ 8 files changed, 163 insertions(+), 50 deletions(-) delete mode 100644 modules/image/Image_gan/gan/stgan_bald/module/__model__ delete mode 100644 modules/image/Image_gan/gan/stgan_bald/requirements.txt create mode 100644 modules/image/Image_gan/gan/stgan_bald/test.py diff --git a/modules/image/Image_gan/gan/stgan_bald/README.md b/modules/image/Image_gan/gan/stgan_bald/README.md index 1d504093b..6480f05b9 100644 --- a/modules/image/Image_gan/gan/stgan_bald/README.md +++ b/modules/image/Image_gan/gan/stgan_bald/README.md @@ -129,6 +129,11 @@ * 1.0.0 初始发布 + +* 1.1.0 + + 移除 Fluid API + - ```shell - $ hub install stgan_bald==1.0.0 + $ hub install stgan_bald==1.1.0 ``` diff --git a/modules/image/Image_gan/gan/stgan_bald/README_en.md b/modules/image/Image_gan/gan/stgan_bald/README_en.md index 7146b1f5b..37503dcfa 100644 --- a/modules/image/Image_gan/gan/stgan_bald/README_en.md +++ b/modules/image/Image_gan/gan/stgan_bald/README_en.md @@ -128,6 +128,11 @@ * 1.0.0 First release + +* 1.1.0 + + Remove Fluid API + - ```shell - $ hub install stgan_bald==1.0.0 + $ hub install stgan_bald==1.1.0 ``` diff --git a/modules/image/Image_gan/gan/stgan_bald/data_feed.py b/modules/image/Image_gan/gan/stgan_bald/data_feed.py index 5626f02ec..c5d84c9db 100644 --- a/modules/image/Image_gan/gan/stgan_bald/data_feed.py +++ b/modules/image/Image_gan/gan/stgan_bald/data_feed.py @@ -3,10 +3,8 @@ import time from collections import OrderedDict -from PIL import Image, ImageOps -import numpy as np -from PIL import Image import cv2 +import numpy as np __all__ = ['reader'] @@ -26,27 +24,33 @@ def reader(images=None, paths=None, org_labels=None, target_labels=None): if paths: for i, im_path in enumerate(paths): each = OrderedDict() - assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path) + assert os.path.isfile( + im_path), "The {} isn't a valid file path.".format(im_path) im = cv2.imread(im_path) each['org_im'] = im each['org_im_path'] = im_path each['org_label'] = np.array(org_labels[i]).astype('float32') if not target_labels: - each['target_label'] = np.array(org_labels[i]).astype('float32') + each['target_label'] = np.array( + org_labels[i]).astype('float32') else: - each['target_label'] = np.array(target_labels[i]).astype('float32') + each['target_label'] = np.array( + target_labels[i]).astype('float32') component.append(each) if images is not None: assert type(images) is list, "images should be a list." for i, im in enumerate(images): each = OrderedDict() each['org_im'] = im - each['org_im_path'] = 'ndarray_time={}'.format(round(time.time(), 6) * 1e6) + each['org_im_path'] = 'ndarray_time={}'.format( + round(time.time(), 6) * 1e6) each['org_label'] = np.array(org_labels[i]).astype('float32') if not target_labels: - each['target_label'] = np.array(org_labels[i]).astype('float32') + each['target_label'] = np.array( + org_labels[i]).astype('float32') else: - each['target_label'] = np.array(target_labels[i]).astype('float32') + each['target_label'] = np.array( + target_labels[i]).astype('float32') component.append(each) for element in component: diff --git a/modules/image/Image_gan/gan/stgan_bald/module.py b/modules/image/Image_gan/gan/stgan_bald/module.py index 5d8328570..1a017f6f6 100644 --- a/modules/image/Image_gan/gan/stgan_bald/module.py +++ b/modules/image/Image_gan/gan/stgan_bald/module.py @@ -13,17 +13,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -import ast import os -import argparse import copy +import paddle import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.module.module import moduleinfo, runnable, serving -from stgan_bald.data_feed import reader -from stgan_bald.processor import postprocess, base64_to_cv2, cv2_to_base64, check_dir +from paddle.inference import Config, create_predictor +from paddlehub.module.module import moduleinfo, serving +from .data_feed import reader +from .processor import postprocess, base64_to_cv2, cv2_to_base64 def check_attribute_conflict(label_batch): @@ -45,40 +42,43 @@ def _set(label, value, attr): @moduleinfo( name="stgan_bald", - version="1.0.0", + version="1.1.0", summary="Baldness generator", author="Arrow, 七年期限,Mr.郑先生_", author_email="1084667371@qq.com,2733821739@qq.com", type="image/gan") -class StganBald(hub.Module): - def _initialize(self): - self.default_pretrained_model_path = os.path.join(self.directory, "module") +class StganBald: + def __init__(self): + self.default_pretrained_model_path = os.path.join( + self.directory, "module", "model") self._set_config() def _set_config(self): """ predictor config setting """ - self.model_file_path = os.path.join(self.default_pretrained_model_path, '__model__') - self.params_file_path = os.path.join(self.default_pretrained_model_path, '__params__') - cpu_config = AnalysisConfig(self.model_file_path, self.params_file_path) + model = self.default_pretrained_model_path+'.pdmodel' + params = self.default_pretrained_model_path+'.pdiparams' + cpu_config = Config(model, params) cpu_config.disable_glog_info() cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) + self.cpu_predictor = create_predictor(cpu_config) try: _places = os.environ["CUDA_VISIBLE_DEVICES"] int(_places[0]) use_gpu = True - self.place = fluid.CUDAPlace(0) + self.place = paddle.CUDAPlace(0) except: use_gpu = False - self.place = fluid.CPUPlace() + self.place = paddle.CPUPlace() + if use_gpu: - gpu_config = AnalysisConfig(self.model_file_path, self.params_file_path) + gpu_config = Config(model, params) gpu_config.disable_glog_info() - gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) + gpu_config.enable_use_gpu( + memory_pool_init_size_mb=1000, device_id=0) + self.gpu_predictor = create_predictor(gpu_config) def bald(self, images=None, @@ -135,19 +135,29 @@ def bald(self, label_trg_tmp = copy.deepcopy(target_label_np) new_i = 0 label_trg_tmp[0][new_i] = 1.0 - label_trg_tmp[0][new_i] - label_trg_tmp = check_attribute_conflict(label_trg_tmp) + label_trg_tmp = check_attribute_conflict( + label_trg_tmp) change_num = j * 0.02 + 0.3 - label_org_tmp = list(map(lambda x: ((x * 2) - 1) * change_num, org_label_np)) - label_trg_tmp = list(map(lambda x: ((x * 2) - 1) * change_num, label_trg_tmp)) - - image = PaddleTensor(image_np.copy()) - org_label = PaddleTensor(np.array(label_org_tmp).astype('float32')) - target_label = PaddleTensor(np.array(label_trg_tmp).astype('float32')) - - output = self.gpu_predictor.run([ - image, target_label, org_label - ]) if use_gpu else self.cpu_predictor.run([image, org_label, target_label]) - outputs.append(output) + label_org_tmp = list( + map(lambda x: ((x * 2) - 1) * change_num, org_label_np)) + label_trg_tmp = list( + map(lambda x: ((x * 2) - 1) * change_num, label_trg_tmp)) + + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + input_handle.copy_from_cpu(image_np.copy()) + input_handle = predictor.get_input_handle(input_names[1]) + input_handle.copy_from_cpu( + np.array(label_org_tmp).astype('float32')) + input_handle = predictor.get_input_handle(input_names[2]) + input_handle.copy_from_cpu( + np.array(label_trg_tmp).astype('float32')) + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle( + output_names[0]) + outputs.append(output_handle) out = postprocess( data_out=outputs, diff --git a/modules/image/Image_gan/gan/stgan_bald/module/__model__ b/modules/image/Image_gan/gan/stgan_bald/module/__model__ deleted file mode 100644 index 605addef56cab766ff83f7c084eb7f815a777d0b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 358737 zcmeFaYm6h=btafumGz3OtYlJMkIwF{qNuJWS;|T#$*jsOZD)5YTK!s7KhzJY8a0hT zkc?!8D)>-Duqv}!f?8V3Fdzun9|L0-v*R&f%|HL}j{gj>wk*u7uoy@)_So=qXAK)( z1D3rzyB=e)zt-Aw?!EE65o9Kd%&yElZ8?h!Mg(tM-g_S3Ip;gdxBvS5>8Z@mz5k{c z&nh#UhSAI(JALNT^y24~Y+ZL7TUw`Ux3%?Ux82h!+1b;l7EdWBPM^qpGJR=kaY=c) zWpoT%ce}Rs+Pfdrw0^ItyN1@nKlQ8vr=L`2;PjW^mN%3Oo2F%Hjc&(r^^U8pR@iM< zvQNT!=atXEdERh-KAoA%%w%TZOI9Nzxxp@}W}k*jE-6n$FZtwj2LEpguBs^W@I4LP z)hcV^TM{q%{%QOgcF7l%Cu2`zb~d}>$)AGnyBHthOBwt>{JQ8Ms{;plEQ9~yRnI9W z*}T?L9%nj}ga4aa{I8W_WMtLIJUeEq-8Gwop7IlMr^v)qz3ES$n3Mi6^O0X*Ma)R} z$CZaz+4B&CX{jZAUo|`A7YO~}e6_BmOzK&GjjSl=yfrd#@`6>hp*$1$Ay%}e0k^nc z^=5w&mdOQaS^ zc$C$F6A*0SYmqGszm1sSwxM^z<2nuFT2h`0J;1vktZ2637_KL~LZWU4&MrlMI=FKs z_~~M&1}m}3f4G#D`2FMlWd2u?apPEg(UWrVC&91&TwJc+xFwdew?&U-P+DJA&PN`& z#=b-n(^1LKmqSGIAA@^EPr50-FESBRi!UixVqX}{u()E#rj9N$nDrW;^#|b}bDI5_ ziO=>Ry8ugUPMJj&1Uf5oJaZh*mS*gx_d`Wz?77&C@t*}|Eb^d9_wz(+^nTZ4`Z6lV zBe%te3gnZwVUi_vu>K3mLTJXBfO(M3z!aQS6gCCNnNpn&tH*)&{v_D&@ikfr)@UU~ z)CMBm({yQa#nW`s51uAfyc@2je6@GOU=F!bPTNj_j1R8xwo&4&hh`?a89eP1S|hmo zq9=LlW8g{L)>#R!Hq;`4e5rW188*VJ$_X?y)>9@+{7$hkdUjK2jJWkfUq5h4zaQ8f z(P!tvJR(w}k5wJKBm=`l+Q)0&ghcmUR4u@^5W+jI7I4#vsY)}+gxF0GA>L|tRtYNf6V~f;V9|OJ5&?9)OHE}?pM@X12@2gVg%X3=SBK8K#De>ENiU?yId@Mr@6qxI%011?dWRpsGhmS<0tT>oc zxc>kcVrAIxD9vysSt|)8m!gY2-y&uY@Ck}&A~bBI@APqn=&vfDmprzTqF8#}ur$|h zX#-ruT*87{O;$Y8SNn!jbck`BC8~L%S~YDMdV5*=&NWYdM88uyZ*{lldG=ZKeo;pi zmZz|w5o3s0a&e5$o4U|RXlO{|yWyP}{oc50se}!Zh-x{8dYdb4u@2LW_*7%dcdVmh z@XZu?i1;YdhP>fT;lK?kU3Sy^g;Fj9sSMwiAXB~TQe>*9Hl-=AdgDxaTzMnmVExO%t3E%L`3#>4t0a1ES=S)TO7hlJ{m{GF*@7 zSr2XjHL*+6vLdf998X*q2Iyc^7ozTod(+k7-gNN8yrf(XDM)|(0IF{dG#CQR=!-FE zCJcV<@)psb6ZEVQG!Z?BYCkZcs@}p#{DD9-dsd)t%!(D4Xw0e+qZwx*6nz-D#DjUL z#DKS8k3?1jn44hWajPK?;zYhy{-L8r26rXWjEumDi381nStWj*WT2TX@r11(yF6{{ z2RkG|czK&YMR<7zVH{SC=z+*q7ban7B{Ro|1)SosqxkO>JW%v^0*4S>QfA$iVn1zQ z6T^?>NhThdN_bjN;-oySw@|N&2+pm6mYKBML-=mmZxO7~7h}6!;I^UNo;XMxA`ebQ z0R_(!3+Z`;U!uDSeun8bsk+L!TMv!P;`+C{Wz|v#1lp|Rmh%_#<=Fq zWXdl9O9XzmxHbsy*@1#In8@hzK|gS}!Z^5xGP>$T5*9K`Rb@ZMv)VLSV{E_ z-;vP`#edj{UxemYNbL=_OM);^)PP`)FO!CGoDvNPVLIO$0_g^LinqX$w?v?N;_zE^ zI@4BODeiTzL-VIDXPXHGe> zX+ZWLBm*iipVyQVW~bM8v(G^2?MdZ)G+~EHVGzWS(FT}o+z^aNC$WB#_hztdlH^9j zzCyVXmjz)vZdbA((DZ?+=HumwLxhFn!MoDB5t%8{TJ#bXh6syLxY2X86Ndo5tmkP< zf>R73E;w;VNE!$A{t%55z%lXPGy-`H{7r*7PDlguq|tyhu&AvFa?vy1xaE+%5@B&j z(*344t5NALL5SY|NSl1`_xFC*G-p99=9pjyV>dZ(41XhOEZ(ywk0r?ZiT*tB9o5~s z?twZK?nF8=#U30UM+9fOLxVHXn072VSS*?$_FpuV$0jdqtANb$Y?bIX>Bt@`!(Zi0e`y|XE zP3Q*e-v0_&KHyix!Gf^CIZ*Hg-w;nC6mF!XD54rAST+fk2+tb7IT&*szW;{W?;*W7 zSS|*tA`Z!kryUB(1q)8vKC7vwun<}|huCD%)yZZyygEUp`k--iNx`YByWKH?IF?qp zc!0QGMiW>(L7Em3>JygXWk$o(u;p zotPHxl1z^^S+t;I8Mj{}E=?3BNj3n2E#pt7P#%es84m{s-UC}gLzZBIxRovYw0i_2 ztqy{pQdFz%eX_%wf1V)rJFDz3}I3vQq;3x2++Yz94_Y}%kT>kmof@g)r@|3A+vXO!tXSzdXz zpd5c2oXk-A&m#LEKd;Podz#$^c?U*R<^&u#0|%PMeMoW49J@A^KcgIPbS-c}kL7== zC}-hAkd79VnKkZZPN=Wua<7_}k;|7qa17fiZ|U8g{*KvcmA}+|(6PFD(<#5_w)9Tf zH5_-jx07GWS!Tz`-CW9n$UOphXIxOI78#VwZa~N4OGR!=X$!Ww;c`Ab&ZbRHa7B_CJt$N^?C10pNF5e z%gXw$LwNT3&Gi?&8L6@x2FeIjq`?)EI)nFDzvo)tHM)8_%x16QiwQKht;H#2)fBXa8gt;{ybnScL0ao~w`6I*udLv98 z**C^NGP4^RQl+-aXD#t(T*dMAY@=s%ni^JwYTcewZ0lx6>)G8FJmOLg55otr88uu* zFq_+x9&=^2#(wGe$kg*VUfp$VXXE1n2!X;Ea^^Qn>cjC5DDmgLwR(%MKyP5(p89Ms zB)-~~TZUWAvq8c}daK(pN+tEd{*174Yi-TjBOTL)J@R=R-e%X%u?Ip4=h}J$#8l(n zvayY?U5$y(V(AO{&oF7qe&t5DhthVsoS#!p34x5L-#{^rK*8R{_%w;}(1s;3j^aOA zSdtjekr>~TIYDAPMU4MW#2P=#t#Kat*}DXvA_-0soDA`}FGGfS;Czql5RwE>k>LMb zMghJZ)zls3Ddi+!9%0k;N$Ckp#Fj5cIMnfR@}^HJ??^ z@Bsn5<2^;pg*vO8Y}ws@&w%w@~ z0=l{4_uY8k?i(fb-Bg!54g`i!wT=;n1O*HM2XUQiuc=RjzeaprNqy_lzo}BGFu(mZ z*mo)4^5H1oQU{1q6XOq=D3x35?Cbc>RxjaW)j^VI2wQCUuS#k^^-D(<=$5{NxNN5r z9?y++j#7yMUBLn6L)#+%gU0DNAfE zfz;OAE#S-8>RL_q@sD20gHT7=&&4>C{#TVNzB-f02Ko219jela@tdEo-zOh!N+E}{&*LZGfmTCTF1#8UzuK+ zhB2NNn$J4|`V+-9 zWrb<*Q0MdbJBF{7jB@VHE^>MG+wU74r)w)$loRY}QttXdc~&|772P&<2oz*f>Cco! z3eVabp+lA^tTpyvN`l{>gnlmuDLseg)51@~xPElB!Y%*h?uRyvYa12Ez2cTEIcJyZ6zO zhfop|5iFkM4Xl8NAs9L-%^tHjs_Sdq1PXSr`ZA7@&09V=ic^K|le5A9@glg+2A9AP zXXF7X_IGmQC(Gn8M$=?VIfrm}-qZ|CQ zCzNx{q<8@Cap2Krlv75}F)c7frmz0`j|*#PjkJYEheMm2n=fK?Pe=e}&v?!#Y+6{} zzBBq8a|-CdJ4reav?$Bu7)oljs01~}GM4cFl5Ql=R!Sq;yXL7@R>Pj{O%&-|D>4UR z17Vy^z<9cLR4*1nxu5%|g>J1^dAvT@8tNBt$ilzmsWHpoD`P+uy$ao04$duZgYrSY zhd)b6{q}UK5+A`|?{vz+*OmidEaDwY;}0c7d|Y6ogDj1X0W{$Z_Ol?d08%z4rRGUZ z9yB#6tH8uG6baA;T-)>c|NT#IzYxz;`ORIx<4`K7lstHRr~=zPaxO|32suYfhLCea zE+3~%QS!ahQi6y3-h`aXIlIimC&hcv6z^w5Xw%c8Q}7=l=V{2fM;)DpoQu!*h@5*2 z1cQ+Cpo%yVIrr)%(~7Amp5ob4FLS&lO{YoD*^mBxzI)C-cfZPLMz%01zubmSKYf zgPaQ;n1Y=^6LOwhvrWi(F32ONobyrXDk=4ix%3Au^-WfRiHdU)Ip@&UDmCX^ zR$#kF&ILy?A?Jjg6LL;$Eivxvk$se;46q8ZwS;-aN1ofHkZ_gQT8OO$s#7&&g7O+! zq~)B4+Ue|ptwmflkgX-)`cFj8y$&(y$a!dJ>BzZvn}NtVKWP|p9t>DU&iNe(IVa?t zkn$4LC$L-_6yz~h>&wa&Ivh>vXl~A%NT>7 zq$Urfn*0=z^VI>!xvao;kDQCv%Y>X0a!$xOA?Jjg6VJJz<{cRWNJ91>xh9%&&Uc@4 zE?QBi=A4JxbngK<_ZnOe$~hP3q~)9oV`Bi%d1z?q$hmi$fylYnL3=RIc`#rZIp=pE z}=ha*6FA=z=^si3R{D(fKk3HB2cMX?S6&RC4YCvzF3fgVq&`U9SC5keFRQ@Bkn>)1&i~6G!&vf<9-bc!#tduR&U3^H4aroqp}MckO>q4Knw$jmQn&#Vcr#`5ZQ{h_{x?nVUI` zEwt=@&afOK_fY-T__u6grgG)xE#9Caw!qa_LtQqY>1?nqy1n9U>`?hSYXn&=gg=8V zQ&`Gfy9T`-b;~K1K=$n0gVd_LUsY_d>ScV80!}Y)X9&$Uf;ZG?!yrG+)v*)l7m_;) zCuyY_ecQy|#6S1`n~pcv_^a8}8YbBx%09uh&z!TR_Y7a>oM3Nv(Ka!bg!TI*;tzy~ ze=}9Y5Ab%P{g*4%TdbMWYwv!b=26aJ-!mh(kz;*#p{-LxcZ(05yVolpK_jG+`u3xL zWzdHh?#&zJvR;7jrHsdKnX8#)ws?yVOLtd3+P5ajm1?cV=7W8?`V!6vYgbrc2S7E6 zPK6&A-oTcHpbFT5Zw5UJKQ2J0Q#cOI-AFf(Zh-wXKsWsAS>>#vywYhpqW>>+1Px?p|%)W<) zz0eU0%6#Z8Qxb5-S9Gf{&hX{_rfFFkY*-j>$JInF;No25@Gj-Q^hEzRC?I%7aMLZg zedAu{1bX}_ka64|bq&W2^|<8A_jw+^ctJIB;W83>RkxbUY(|P^8^}GuMm(Ykgd^iX z2h>U6ZV6X5>N#8x_i=%5RF+pJ2?@DI$LZRkrMXt+4nbtks6j*cplNwX(k|tg#kIly zf@{#zxDye%3G~4IrLi`X3>;e^iY-H0nA|F@Jrx5ECzj zk6_=Sa*147WVb1)-`U59s8pDDCXGdX5r+>tx2J*Mg`Q+s@)hr*_gXMJZmkL~T+?hX zD)Z@h2!js0Cx!pDq^QN4&y;JC0>TDIOaA%-AEMtyuiyU4`OM=%X!R^fhxv&Ql<5D!jnqSOJD@mL-@`7-gzf;iBFa6viaxVFZR9lLg{Qp^AEW3w}E zi26pdGYTM#5G5+9U*5H8^eH#rAv=TY46|PD?YPLs>wq7d65b&@gY1l5tr2Qle9*WX z$7g3`qISmg>KM(8H8L~MxIjpR%#2Q}9LJbKhSaAY^CUQNe{s|3IO zD?=kPyiM`Sm?AR&oI>;zqMuxd{;@G(2tE&r!A91ZM-V<=4B&Gw?e^!>+jHA9+cR-A zeJUQL525KNBs3igHNt56afngo|N5Mu^^GbMA@P-`v3?}2SR}ESq}{i?46emUF$sJi z=NlZ5Vv_%FmXZj-Y-6SW`_TFJh=KdJsw`S8k_cEC2b8lpkjM=Gg9QAIl@ew(D1!U|_GE0~cK^Z&*& zsJ{q%SY*e&fTfzbmeDb6-DCMrsF;c9-jHt1p65B!=eX4HRqjdtUe1;qUXh8#Vqfn= z2A=uQupv}GmH$KX)W}ntn5VWkwKsvBU=bRYa_MR}c`lt$$gDt=C&uw3L3s^NM3akZ z?n0#FBPg~B<=PQXPD+6O2zq!@lCrEy5;absU!?5pUzt}5#+jfLo!xsY0Ib zCo}o;%KRqk3ccZ)_jT9oc3AllCJW9n|l6)Hhtlcatj&A4e=Y#81-fR(b5 z)D2hnvXNt%%TQm{vb+5r5+pEvEZ9!iIB|-Y8fDK7YVVKcl{~t9gSD!183ET+yk z*yEs{2j8U<1zth(%8xAlgpVhS&!2OK|2Wo^TOlJ^#Ftpr@x%j5wXjCT#kZ1#2QOMv@iOFqtf zCG~y+=|!CbsIlxsru+Aa%&ixd1!H?l?>pf010G=Lc4Lcq`A-5T6cBV6P~;D!mW-6v zqj4ST?_W^n__YrVv$X|eCo>yUQ=hyDgPjG((eA=sp#w|i1ZGM_9rPH~kOE1-kH~=l zACnjoNlkL32OHwx=_RfNG z+S96@Hcdjc6Toj7G6&VoSv)p-wJLJgKt-n*RS0gM25|lN=AkDV;M~Kk3MGd~_W)a_ zo_&sE-9zqlh|uoeC$yWiJ85_G(Q0tx+TAY*LhVin_oEq4G6cWMEl7dDcYEq<2>J-p zQAW?**1@mq*7+CQiwKO5rrYzKbTjp#&eVt%?jnS7^59z z&eWk^m=z=qGf^l?`<~m5r%X{=0>26T_QCH&s9Q$F z2SjOqIJY~kUdfiZ$}SFoDJV`m)D`4iAVWd2!%mENNOd8_X(>)y9@&~Xf-beJl@gWL zp5wHCF{>=0Te{oyA>`Bea9A1bS+46&xX^NQCCZC&kO_%@cA9uGjtU|1{^;FUVE9=~ z^0ACf=%tHAyAT@)$_#VYL?y5K?aB4jZR_rq2Khui-EC}ztSqlVu%@v-_L}-6jx@&g zfRqp5UQM97_z24TMCqQS1fV}WBfkw;>*MtFKaO)HFo7S&4vkJDfh&QF?>Hxd*C>9h zoCv>5oCuV1GtQKobzXnqhwnaz@+#m&2xKhrBJ8G>E2Z5~+D-lkLE6m~Kkepx49_w< zo82Tv1fe#hd>i1+h1_JwTbB6u@J^IyGldztL1`;1{~VIkN?Zt(XrsQFTF1ZpR;c^p z0|{aRUsF?}4JF!8qD|M%F{c-5dUC+^-hgz%#=T`@8(%YYz#|H#M4Nb`%@601&-cy% zdjgxUeV9$OTT8{+t@p<&_%(dBDO_Vm^Sp?vaCh zM96p`2^n_@To1^p`N7-}1zyQsVmRn_%C+USvSrrGy&ad3aMp4F*o0Uhnum8eFVJ$!aEti^ zsgRO_rU~Rcd(NEs?Kx#mnPp^uqPysIbUSyQ;F%3)$y63kI7r5uzo2Az&#F!6C8x1t zfcawk9En+X22b=z2G2qNa=Zb5|2z4=B^^gPZmc>k7Eyf(RSqYVfjpIBC3ig+D!DHC z=_){}6FLruJH`W`oZjqy8hI=prH-G+vVu?H(Wb%v(QU15fFfuXkwk3?NQ1h%TkN`9 zuo;2Tx>QnsasRV#sx{s@hKU0hq3!hRnpzAE-TMSmcZx8~rQG$(GF)?CxB5m&{e@3q z$dxtLA|^C2HH!nzqtF5>BK(Jvg}iLK%B?jqU$8YySJTwb<75H*>#ks6iDMzr35)Te z|Jkgfpu%ElK9xP^byg2gUgvkuP+q6E5~-fz(F3Q|TZchcQqE_m?3nIe%HgYrXc*_tVM^&#P zxagIbvTh*|I|lyjBFFRZR9J3T`E0^5Sn&YT?x6YCeCcmt&srOvncD*aJ(7QGC9j0hHewqh7#+lAR6dtp*5sxMDg^$G|dQ4Sur)WfoE)Ah%%C z(p&uOpJiBD5!ypSO)Mz$?*(s}o`A%ouV8GSpOJlri~b9nre$G6QW!2Yd#iZf!Cd6{ zE_HzPNdEvz2=WUcNy38rH|}LlsIR83_jpkRF|Zyi2G&o-SMYmAqOWicr5Q_L*??S@ zlKQRGwKlT&7mcZ1qvLe#5ZldKl|O!D+wdlq!Xx1SfRp69wzI+hF6EfvwZZ-ZyMIsP zzC`3Elg+s8yQbxYp2Lgaf*#YjRmdL|a!jyMn7WQr%wOLv#6(QtBXH4B!9=bsvfGr@ z?@TVENP37$g=Z4Q$D+Q7!w0?F!`8^?P==*nkvXviv*XsP;K?=3HY9U(ZLozKz5d4g zHs(d`=aLexW@L=&MSKFsg|z}*I-4eR=oBN{nb2Y<9*={@<1q_)y{P;%v)h;ie8ykH zEGsw$=fTNMlscKIOULqn95V6AGJ>bDGx-_E#Vkg!8z8cDtWwMW&G>8$j~0q-jRFVJ zK-7OZbr;9YBL_1HcqIAgG2D`^0Y&~%#lRY7ldS=Ak8F+947619m28bIid>Emxg4LZ zk%`(G)9d3jHExLNUNSYv)YxO^I5IWJ)F4=+{0^^m-ec*r1AVmONEw+LWNM63u{KFl z;}>{gb`UaAdQt=>n;a(3ri4(W;{xuGjtkoaLpb38jTf7bczN2bu^KYJNZ^%D}d4wTDbZ2dSyEc1VJ4tXZ; zWX~rGedTGa$hZiN!rKt`d0;{|0ao>|aFvj~%p>}d&!KgWpYS^$zVIOT)cFLG;wY>Q_90-BP^#kOM);w2fx2Fqu< z&DraIP7z}G$K+V?^!%hg-_xx7uLUP6Be$pST00~`LUee-ZuS0Yr)n!i(phtt$u zU4yJlXq^f!PUsz}svfB-uT4*aha)lio>yi=%Lh_)XWn}G^{?hHDGRts9GFeO2+fCv z_NEF6zMstG&nxq+GaR%8GVklI+3mnZtQd(MwhgP#`o)2!MAy5~2REFh4PQ|fgeuW= ztJSqlcdHGTo_^)dJGb9>`HlAh3_S&SrrBg=O2<}?fmS*#u3%M4P}L-RuXhEM*yrt6Fxq-*e!Rm_8O19zN+YYYxbs8`Rz(%_C3coml_zxK_pX z0UxI8?RxfEj;;>W&4;MZ&z}>xRU4nr|};3UqUq>>rr3TEJMGyqoH9V;-rIBcpT#OGKa?s@&1G$ zUec~()vla>!uPtbpbH%Iu7_Rc8d?yuTv7?uO+Z8$E-mZ2RgOvrv_r4q?0Ov_5UFqL zW=HGUT`0+IFE{$O4fz>b9YU-3iX~5rg95*%+bz)E*RJsfIYhQG9s^Jy0%sG)(7stU~kBI)MV?UBfY(eck#J;6B-skMmwhyJo z@hqjtYYsHj-@l;D@oOI#W@`(WPG&ZyrapNQ1}k+PIu&)$xr6UR4JnWW{D{=UK38b( zXo&E@N-LY~7GN)CtD`j_lcTPeSMa~u^4EGTh>YRS-s<(idw3NRIt-wbU@!>0r;?vN zmi+7oPQJ#Kim$Pg5%~|th?7@D%R%mZ4nWEGi%juG!1RcHZ<)=K55bwx@+b6eiNJ9J z$00hv(|-~|na?Os1PViQw?I8_b*&~-Xg_)>AxL^vx#EM~THSzvi~*6_p1Z|TYjK+} zB`HDSmz7K6I_o6~n}_G(omtpOGu zF{%*UJ`Ldd@69XEq60k4s!(!>bPup)>e=Tw);;7-hY0QdeL}m*A0MYbPTHNi+JE2} zcDUEgqjAm=EOF3ixDqqTWN2bQZrN|m?MkIrvddgy7g7NQ2!^-XU9%~Gg;V0Kj4b>; z<%5wXJlLABT3PqXchIfeK(H2#d8B)^`mhIh`th>Z1BtEf#b=Y{-sEueX1~K+2O!Su z7`E<4Xhpr|!+4T<8=oW^8U|3AB}fbEP<(Er>D>4@e+Rth?Qq+H{M&sOj&T=)La`Kz z^+U0;PD)f=60jR{jHMEr{Rz zs8~vbR%j0FloN^2Uh^Zg;bfxN4MSRoLbQG?cZjY97@Q;HV1;_n_`u}ul>GKY`{aA% zx{~WUPS;h0Cw-kwAzBL2+WIJV%MmQKO{_LO;vw2Um{+dGbCeU!3gxpHqa9?<)S+JZ zI3PNaeCMQlQIz&QY?J^zq>dXZ8s+ZT^SpDy3#y6G8tbT_>Q-}^HP|YeZ77<@{3*)8 zvhQiHsn6jg39op=^Nw@oCQm)H`#A&h&Tv3v?)On zT}c@c9}uPe;oR=DdL>)tD!VuUrl2@&e`iRx?$PUZbU1Y(#c3%{TOK(n>Ik~jvQ|n| zUVDzy{>7}agl_3>(}$2xuIo;?&};L?;-TRl{@H2b9)|D}>ydrvxrg^f z@5Tbd&!UrU8Jo~V7mIcQehA78bJqZ)z+jg8?a4LM1yZUl4f2V4y4%O5ox< z&WW%V3Vx0)<>q&Z6M@2aV-4T&-Xnha?sF)w0#1ZL#u6{WR<~1j%)wnlDeZ>RZt_0} z(r&K!X*cI%cosxYx=D-(LTyO-Ho%(;ku1ntmiYJZP9a@Gi8hpILtF@lFAgV&34BdW zi8hpILy0zBJI9<}sOiZ8gLDHJUmEw8jct6*&;jB?NaI5I;XLyB-Wgy|V4t-Qvx$~V zJ)7g$<&z0^4YNilr-pKBBBkA5>OSaLUA^g)`NXhHBCQUoMCE8+%MA`G3KILnm{UW# zoN{U?r-pKBdRj+s8&0F!GoadXx}3njG$5zu2XjLdcqMz4E9uJ#N?LRE&X&LpX7IJl zbMSXmEJA4WIML?)*L&4SlNogl;q4@z1Ka?KBnS8bF&qH&U0Yr&TV}o7+i?jA??EpC zi2tfA5Y5B8oUVAeWw^yWJ95yF@T8#WZrbY%V>tNjIb}|nWhIU88y%->E7#HO+;xIy zBunPz;t2=Ic=H#O4DVUB3BBYrb_^*0uzilitUH4zdX<#tpjA2EfWQBp{NIv}BONzZ z9T$tJzJw}=6UsoIO0kl=o(q*+m;7{<+~>o`;c&-z0F=|4-A^Nr#iP{GygZf_dC~l{Lo06dIVC#R2D0XaN-w{zJ(^UN&9j)|!|v z*czs*Y3k>3vVi?{S1_=|v5=TO6f3s>vspz!g~if*Ztgj+bM5ftb$<5@<#l>1k@7kp zy@0WL>oDj_%K03a7nPKg`iNpYQEDf-FT-;_|8aJY-Uu|XhM3q8KrQ03tdjzu;_@)Z zF$KarB}xilOYiRRVooaZ+Pxh=Mg{P1!q)v7TKXSyOMg8g#BW7~xZX@?I3+<(Nlact z$iIMSllL+bt5OuU!Drfzhhio2al*lEPT$Gfrbn;c_O2x>o@hHgViNUTvf_E2 z9$E1R*oyzjEac0dcmrv@x$wS`eS$~6=bSCQXJ}Os=sv;T%0U7N7>Z#)e@Jy#M<+_a zo6Z3_s(KZ{MX$sZf$n999YZl+k>mMyD_9j(Qr~`bAsTdfgWL14VjXKsB|d&!&c%nN zyDJ~5`_+Q3QmyeRVVg&N2`7aS>KE7nXshTE>KE7+gm&iaK$uv+0L3eC9OjK!^@&yA zXVp(Mt&srOvncD*aJ(7QGC9oHRg)Kvx zM#DCsE3?L^7x18DXG3}`q*CD(({VZmmicP%n=L4_jBZrhwDcA~`)3)JR)qGDP!kKv z{CmM$rY9ir=qni8=VxS};iCV-rfFH&kQ9asJ?Ja9gm*9(IlfCBAU)DQfD(fI0!Wgu z;Qoz!nG@=(sp~zC1rr0SfJ5UG^O#nPd@8;IkD78^3F=pYhi)%LY?znr>@ z2Ur0|kKvYV4Jh)DDh9GOx;EJwyQxQV63kb!H7NaLlr-`Q+8Q^9PIG{ z)>M>%qq0i22H6^fYn0#N#m;-IeI{F@GrP)EyMCnN}lx%XKJev|k zi68d>59zotPB4hGix7>+rahhz&9742p5pcsjoUvqAsX^=1N(R609aBDTXlV%SDWj- z!^j4*7~tta$As;S73ttty4 zi_C({7k=bs1aY^Do+kZbLGRK}0*4%{yA3?!QDABH@Qg#%i-|8*HDfggY+cL_VR^M!vkDxavB`M1) zBT=&idPGXk{*`&GOQ5Pj26(pv9f^`U9SI$hE%=>{o{7*D z2UiPalM(lV-VPn7*L92py5y{Kj&BWU&Dd;0lkCiK(3#36Y8&0^ZRuW?*Dn+As(PfVyjDF49*#t+V>ooO~44jv(VmDA<6fXnf!TWo-2WdYu?vgv)f_xD3~dXy20-m2M6$dqYrL4 zOC7$VEC^Ks3Z&JwO?Rsem!5v*&O5i?c=?U@!S^`@fTr1GrAo(Ej)7J>Ev{gdN>JG( zsb9!3ZkiSwhIbC&qHH8}!}XO^ENvMItXg)r-*e!Rm_8OX9zO9gYY@nu8`Rz(%_CRk zomda@xK_m$0w1R9?RxfEj;;>W&4;Mh&z}>Y zaOeBu0z&0z-@y3TUb9YU-3iX~5rg95*%+bz)E*RJs~xX!1o9zCyhN$jlIT0oxa9SM&v&nBTilsZ3nsYIRGW! zFG9r|0ox-;IhENg`4F55ZGS@VmIxdta2%ooJpCsjl=+PE1V@fhVQB6asOPP&)nt** zAH9?iB)zI!@j-8`ZU9w=0g>9CyTwv#ahtGw90>})tXvY;IcBr3GYYMa0f0Jy>6pN? zpq%DB@5blwumtcv5 zM#YurN6OH|fZVd*n%k91uVk0G!Y-r&3J?r$wYz3h01KzYTR9^C_vGi2COp`huv%I7 z%6CxlY#>;R#ynC!V&dyEPd{E3dmyphz4&ah{F@wZ-t2dn>j31L9mCe$2(_r!fEZ6w zZ{w3hL&E?nvjk~DABxY7RGk|i=kI{`yd7>mkbk@H!ZGebP$-r{v3@94)=7z~OG-YA zImS|n&7VysKzo5JyLo}LS492mB1LG4j5eH36uTn@X{QiW4={Iz zOH+`x1qt$ItD`lb+`g`tSMa~u^4EGTC||^!`c|(GuE?uUb#DOgKST71270gUE-I`0 zp|nMS(pTCPq@^J3Xb|}kEVfOoH$CD(+CP|AuEw*J6U_?cvlyivWY5&0V)!^ zmPfW`j-X2|Yo$cxwdY9fU(PB^=$7s_fe0BjJ{-)bQOVA6U3b!jj+>R0#Y00s{Iky{0}F{+dJKgvLU{&*B4G#wPUA z#kyU+q%1@K}u0icN<$F$i(OE)mSfkO??ta8l!ta)(7yf zCQx8}1OQ zBT)M3IMYu>5rQAV`y5KFfEXc=vP6!soA#kpg>cZS5GIg%^Dl$cn=5|m&G{Il1twt^ zXc6R`8=*I(oEu=zg=iLJFH0Y_p=2A1#dUIr*bNf&1iq@KWE)Dhp=6t`onwA4 zRQBY6N4fzig^hd5#x}lY00A)~q%k7=a2{EG?+mafltsib6HS+THplVHCld@CW{pr@ z4dvBD@-hZQETg$Chab`;U0xyngR!P95%O@*kcVAj*8}ouelRygfmgCuxstw|prkcd z?`#R&UJHQQ)NOgc85YIukQ?4zql`XSg z?(Mh)g&)RV-br!O-89)5#&ZDapL5DAqx=(0^enWx>m<);rc7n=gaa*kjr;{A!&_Hv zLQ6S~9fJx>?4mOZv3a{|8QOi_&YXZi?+l*kaSfh>mgRT@{{DCJe@jY^l-yXA94B`3 zf!dc)=lF%J#Y*maE>w73@>5oFpAR30!yV&+hQXarBbUWv*3rCNmKA&okJKIZk8W#i z12jRi=m5q05^x7~b+_1cw_r2EzJq?*zqtPyIMo`@hGOCXMrb?zx~3LGL-#%b5mAI; zF6FLQmf@QFy45#I>Mwi>L$0hbI;POT)GQ7-k3tI=iSQpv81gdeD!10ee8JW*T}@Lz zkCO!?u)D&6rEqVe7Z#&M|4*~ZECQuG)u*!e?9S@p$?p8_8OrYT)*@whK6)Kv_10m~ zmX!I~DLbaSm#Vo(6y%9=JIR9?p85InS!Hhy33RfCu-FhxE$X$@NCQxjdALZKf?=K( zB@M8pcLl!>6?^U8lpmu8_;JL*ucEd8p|JLAQ8~UEmE(G|DMw8t04k~T(hwf7&nR=8 zQbY7XXI-c>$|;7TX*(X41*aip*4O)b$Avc5HlrAWgWFoDGfec_ZSPvLf4VlMT4Gi4c%2)scRj}Orgp(%Fd6=x%jYj zcjY5>zZ%k2sx>|(Z1bou;iNEn{Q^4x{GA@Xe&G$|*@x!l>_C`izW_xna2)21NQU+T zmmG2iY87qPa7vp&)^&9&$#Pu7TLLxd6HG>L_(=)KY#L6(9hT~_IlVB@>@XZ`wnaTqk z(KH&ip#xCCSQzl2WM@N~s{w)@u9%K9Gq4O;gWqgHnPp@u+NPzq_}M?pu=FCdheUeO zf-?VJ@RsQbNI?1uhWPmz*=M-uzp!aqtUW0V7dp~c*MxU47dgI59Uwi@KY$W~Gy}+$ zu;BiUdzlmJtEuZfjt3JP>s~3(Ki#U4PsLa8J4m9ha1Nyz%VXJql$MhEt<<$PviKJS zsjks+B5kkp&E!^n!1zpvCE-B$^M#iXK#3yiE zSSu}CZ<_UH9+o>t&y6BmTJC|twAX$qhym$&eo_^$7pNRJT6MYH3~dn2X5t;Q$2yxcPwg%xE<#%||^B!xU9cZc*hswy-AY0?Hvo*fUOSFTaiPDo|DB0vdc{U}8 zA{`fiNBp>FoL~@V7a^J(V}(tAmE!gkx1VU-{;>(sklrD^Gm73>4Dj@zdBXPGc5XWt zhu5d#k^2z5enNuRp(-K_uOEk?W&UU9l&7%LI<8ZM)K{Ly!i z3fBqQ%RH(d`Mlk?{G{JQ(_-fnNQ|S*f+Nym=hN`OZ&g_cnI}1`moe(v?HCTk@8N&Y zNo8ZD6f}gGV5UK|6>DW}o#%0d1psVtOsny^Nwn~X-CpswzGK+08&=P-i-quKa3RFG zuU)gckfGxgOTbt%x%hVMLHsI{@PN8KKGOv(>1yAznw<`0N(XN!VVV-ASt41n^aaSO zOQ7bG>)!+RNJ%+M4Ue7;I8IigN;afKr9MYh;gF5B&(WWdUq*h}B>l3zDYywVYfFno z)^2irFH|qnY&}{PAW4h@RR{F%E?zv3kVrwT?J|y2)0=FF!_BImh<~G-_-%p=WmHIH*l!6D=y; z>TT&>qSr4I1LV=sHv7>kT+j*l43J5!dY06?qeP(VSVkeb9ECEMtGD=bdAm%#gf08c*-Dz9r#f|DcBtDIM6L(2y;b!Xms z`Sq{nFDVNTy7oN>W)omSU`Ns3R3XpzlbQT^Wu7a6hHKu}U9;O^6e*Z2jJm-#tUe=B zfdlxy(FaeQWe;Cb7KADR1=8x;rn}XKOHaRY=bhVcy!^)d0EnIfOw(+#YNcZ<$3QEc z7FV!hB`9u^^e?0tH%*HT!#f9{Q8tpg;rdD{mbnZyRxP{R?_pD0rjSLaXEsipBGxOA zJvXSmKbl9<$~&=^ao)h{q%DToF6%wUwa*L~fOGM4e9bD9umQa7Za}>glq+daKe|z!oP8sg z0##}^7)%}=BDZh2_x*7J(Q>qJV0>(_1mY5iJ35Ffx!!*k?^^#QRP(uz^9)_V^y!5hr)Neub>YcG_!|o=NeiNw_H*Q*G<4g z88R&^J5hBebREPhU&Gn;8bTme-`35J*0Z}%n%iD(^lck*G_*Q|SML=|o*D-YeoePq zpuVqNyY~QJ5$VQwNH-+EQU*l#itGN5K_mOCKCb&zNVmNEK@F6O12yk0+*(QP?1ko# z+S!+Ur1whd4MUMd0FGtPIo-og3{4*{Cc0x0xuiQsX9$00tzbAS?pna?Os1PViQw?IE{b*&~-Xg_)>AxwHzx#Gj# zTHOG?3V_G(pRuK_w9F{%*YJ`M2t@69VLk7<}yq2v(j9$?JWv(Is~d&r#*5#Ig# zgm;s6C+%+5ZQb4(x#TmN>Pv0zsm=Y7&NoZ+BV}x2KziA4&FxC1SF+1oVHZ*X1qg?? z+Fi3LfQ3`yt&A-EJ^8t$2@kd=tX9^&8Xj~jHxR8wV_s?`z_&yZKU>Om^tQ2qf1nw0 z!P_pACE?_F^Jc%pTnAv#>=?H0M)*a&X2f`sdK;f48XN{xndL|e4pDq=*W>v zueSWPUJEK1F{{4S>w_!uDwN(EK>p7VeRxm&j8Lam9>72u)u)x!{U~P=0ZLzKQ;?Q| zw4*`fN3h;DvEuZI2WkIcUb!02Q%*E1l+R+6c91<&hnnHzfayeXo|EoHaoYFXmfitI zQpXJyj>Oc}{Hnf@&hT###@k(0O2)_1Y?$ZK#|_<=o~y$w|S(Bnh{8!*h>w<|emm z?0(JwqbB!Iee=;BW>WB}bNHxTtSC-P@HfHVKKz{scFUOffH>_B=XR&nE7>wv*~I}k z1x0H6J40N-qt}e!a5}pbsijD5d1Pzm2)fj=R!US}dydrp<*c%VZs~3lh>%m`!@-;y zmFyhXbtheDyIHxhcxX6?e|DNUh#>%FnH?i1cpaFxtS+Dn?Rkgq+kNQBHipRQr`z3l zUleaFH2f^iqh)MDLtQM~1^gi>HOyTDkOE^`>bED?ShuaaTNnwf6@pBB@?MQK zwAa)pailTA2jqPK|7rpi#z#=#CyMtZEil6x%uihknCjz{^b+gs^*9ijOqd|#x5SL# z^&=T8Gs5o@GXkZbjx+sqowp(JBY2-fi4`y-1X7mR5w^OWvSSYJAxfz?lzNl@mqF^y z6+iXnd<@b8ldzk}i{N!MIj5X`*Wd+|UxM6;ED-2IGz+qqB_2M!6(!qDVFhbY;L3_X zhh(-ABLXGc9GqC3peOKEH6`0nvJEBMbnP7Td!e!?2TalpNGWXGTQ;`wH3JBU5h0Bc z;fM3c>w9N_J)tZjj+tn>)U!E`Up|>&*f48^@@gor#&*gvNSMO{I++YS@%WMaKM{EZK zH;)r;-haJUjWnB4*AU-MVmrVMkVtia9}v$0VBfXnwX$W_%e@_!pm5fD0Jx4>ESg7n zIcxE9%W#W%cI2Qz;Yo4R-L%;m#&ZDapL5DAqx^f{=r~aFCNX ze?iIc=2e@}RZe5a0QAK^IugO|44&w54W5I(<#+@B{&(_!OG=KE+*p-dET;Mr>Kwn2 zwOGkr&xH!FOMc1Ym^gqD+D^Z&sm0LHy-y&0rwGGb%3ZH4 z!!`GH3-hsl;ZqoLWsT7>g$AZ(alm;LTEIwz|4_n^mr+-_wI=2Zwub3yn)-R1EFgj1 z6%H&hEhJ_T#c0w0)2uR!Kq*i4xw-f3&b7mn-TB=!l-=pAMau3>t-;$h)?2-G7_=p2 zeh$oyO3F-qL_wY?x05`W;hCR5pWUNF0?n);EH(sFi+U~VqyebNoPuFm9moU9QK}l! z06~b9ioJGk%8yY4{5WFZuc5X7p|JMXqjLOKRF3P-gzi%k0F{(yI3zU@NH;m9h6bJu z)SIwA)EVUzL(#My56epA=7fXWTBtKj^xAFjTC(Jc!qej@8Al-+$#Ih7qd+oal;i); zS!Eu98pYDh1yvSvmI6IMhlJW_}YAIM?x10vx6 z2PziZ*T4qvSO>=KgRXr~drd7dVAASCTM0&G?<~ibTv4kp5i zk{MNQ-s0$KY$WQdI97q~L#xYRX8?o#ZGFeEUuR9%i-quKusMLU^V&7&F&x$>VCCCtyF7VyRt7=UqT_z8oLzO0nkETW0wNkg&_6WfpCMD0+dg}aR|hd z8Xz?=SPgs+8eS^OE1jkzGL|yRx!~Px({Ysxp1_aFapcN>&(&?$yD|$s#6$1#+U(O@ z1Dt1z*V6!+W0?&@TY>y0d?9OsL`Uf#C>~4V|I-rx4^y?y$ZktwsBo>i%5!9(3788M z3~fmJxQaT!^H{vYjCV&QB;k1_^&9&$#Pu7Tq{ts28;P31LGtua_Lq2n1ToelMt{#L zCqaONkj)%lnaTs;4K|W(K!XX5@%-RH$!>;13IhZ>TrnNz{$Vv12EW;YGRx??v`tHI z@w5M9h9$6~xh~Yff-?VJ@RsQVNOAoNCXw+ovd?f4e__+KSOaVrZpYPDYXW4NiyYsj ztd}0?e+6X&iEQ9~TX6rzz03)8Tio`l2w z3iTXHEtV0v0m+e*1a(}N4<4dWTdQ(AKe9d4S8zbW!sq{h)8o3fv%&r@<(MJ0!Tv(e zn4ZRcgUC%LTf@ zN&U{`QdFgfs8qPxjgLir5r+@@wWonQg$`m^$`$XTQ(7=PZmkNQS<`Goc5v4Q+qTi` zZ*bzn$>D_Zk`k_CWQ^)Xd;-UXwbHWnrU`)nF|wUh0Xnix!yB23&ZI%mag{H1z2_B^ zyoO`)QmoO>Z|i19>)BlxVtcvKw`~}vRtLECUa_?7Ust@Q+bsuySYCocM#gxIj9DlY zK_#D=-NvetGyWQ8WhOBg4o+^O)X7X;I+lkL6DB@csC{ZGzo49PTuh4x`v9Wx$11h_ z?~Tu<@DN0@DGp+jFtRCPcIu;vSqJ*qV2xQw7n4olxMWl4BllBHT2f?FP_oM?$u8rw zDKb%;VtRF)CdC@T6iBrRo(-84P}za0`$?(W`yQesn4&%!<)6p~l05wCI`i=34nDyY z1XFlbjs#Q4So+95N@95H(Mp;pX;S>3XXBBNo)r1WCWo=JDWMPPxPULD8&)J^ap4py>gW^;1xOxZ_KOuqQ zP!$&j#g9X%G5;UWDNkVmM_hLZ>8?DD#T;ps8;O-2?Y`wDSVc?s-~%~F;DA(i{0&i7 zk$M`3db(9*?w^SJv)YMn$ADO(4*!EjWg9D{pz+WC6?d$y^K7bcZ3pbv(W+G7LaI_} zmMI9$az((aeG8U2lqx+Kf1rN?-d$qSugr52umnp=_}{Ty<}bo-exI{1zJMAw7h(iX zE>nr9*^nBQI^B6-@mM?E{fIm;QgQpQ;wI3RQ$Vj$Sh{wT=l=-h%PKH=-Wop=6t#FM zZ=6FCdSZ)Gmb-A^BPekRrNWg|%3p#+SiNO9h0^^Vi)Iz|E~epWfh_X4!wZ8ptLD{~x@zhQ|fhg__r4UtbaX|s=l&wzn5~gY*GpmNe zcf)@*$zNcM2dwv&l+zj;$O6)o@x| z!76p2vPV*SkbB!SEjA4A96%m3HAO6)7YdA8cDLVi;E|XX6ii7z!#vYr?72br zKr-)Tt`@Tw;>xfC{9RgI`K&VYrlEHt&DNN*WS_U;BL!+q-|QkKO}{NXwJXXAM);L- z*9Xe8%IUA@wyAd-M)p$THv`8tc5%BC>Eafs+w8N-DI6E*DoUTqlI#l zBc^O1%;QuGXri>c_rW|yLVhMBSi322U;yw28MH%MRu|Z5H7KNovf`8POyw7q zC%_3cJ9^y$3kMDZb%WX9pFN?RV6s8f%gXs$Az+NR9A)*mtvQQuOXGz=u(k zQ1tFO)CvPcfA)k6W!bAzlx|&UhOtKJ{x4!5I;{GH*QGLi^#!!jk$f(~1IMFO#k&ue zazI4a(YV-!=4Ms+fAgbI-UtQN5&}|ey2!;iXfB2XGRgi#qTnA8RQuPn$|a9XZl3Qd zts=yAHcA$HQwZ@<$wL1%+5d-CUUT!7$?GRQc^x4O1=NIiLOo>ZW8w+zw8|s9SVu6M zB2lXU;@){eg$N&kCsbTH8c*ndCQF?p|L%wZLjjQ}|J^7I(L@mn7OrJ>j2uyf0we47 z-6vlhD2mXZj*ZUi#&|1P|A;6;N$x+Y+<$C}Scx5!n7kf|07c5eU!%Hcs*4__E_!^l z8wu>7KN+7nF;K)&9GHlaYKV6zHDIJljD?Y4LAUs0VqX{JZiOvD5_<~C%J^218S zYY3`^g-*98*m`j5b0Gp`J;58QFx*SR*dh1n5)+W+>XV?*MAE;%8D|2$j36v-9hV!$M$oVPs zLptZDm+DNMpVV!z>6G8YK7#|g4HD-kaegMnibu|sp)y85qo&I;m7+&V3 zyX5?=aB9p{&QJ0A9yve7RRcLc#W_Ihm&o}k)a?My&(P4)IX^vja3JR=KWP}}XE0!y z^ON6!I6natWx}9=ud97@j^>S~=#kWp+4QCSV z8gYL5ttE=rbh`!ArPr=;C$pqZ0CymRh8r}8hx1ct!4%HV3gaFn&QFN7Ci#z~5l1OO zO6h??oS)zTP(+R*a*qe+XIS)e&d*!%0A1DVsWjFA9dUjh+M*5O{B&(+g9Uylo1U`i zU1$`A+|CmSswHuLdg409`RTP+CC<-1X9XN~`+xsR_Br;($!U3$OxCX@&I#>o={=+P zuyl9jBekFEdyE{fNrIRvm8!^PXJ4+qghULC+rPjL1Z`A_^AlPD5$7i?4+v}J7*qyy zusP`Q(lF{E>X(geeC=YdmjMm(M&SGuB76kSPjMx2ekR3zk8mvHv6lSA`AM9g!{bFs zv5KS0`RQd+kEc?VXf+NkLbHEzpJ+9RR)c6Yh*o1yoZW}K1Z#Y>8VQ`AE+dv zIZ@zAo6ubUAF7=vW(!<`NO4>VtN{oInJ9E(hDu)j;WZcpR!%1zUEaXn}Ess zOVg<~>!XHHCHCzqFL!{-XN?U@7Q(^%F@}T*Gn0mEud73HLnDmwiDs(b{Jn7gXJ9F0&3-MYFB94D^B9oX^xT+_Kqj zX}V>$I$8sG4eNS&1^=rpf34Soyh|i9Z1wtZ&8xcQ7~tuUADvjI7;XAh*}qm?#QI6B zpKR;<&YAie?tR;E`!+dsobzD!6bKV=>L$(l=@*?yte-+*N=1cUaCxjng~a+vZq}o^ zS$k7v6JXyE=X+%R6ju#o{S@b^J8(v3?TkC#<_w?y(ar zTAonA^bsq46AhZf!}=+-U<&JJl~_N^N4RKbU;SZX{RHThSU-vN(p0vB6J?WI@Sms z(S9D<2p!RW`lT5ZRimid1fyz+w4WY8_k?mB0%qAcUVpPLqGUw-Nwl9vI9GEY(*xIu z_VbY_Gg(52wq;m7!wzNJ61bEve=x4Z%cN-DB*3K+Xg`GrAA$B$TuHQ_NrB%Z4E^k1 zf%OQIpJ+dc_LFEoSxNlvN>y))nEK*)N>z#W^UxwR`zQB_r;B*H$`U(BrNWXTwKzS9 z`Xb9^Hvor*FbQ^p2ve98?^Zrq$`#CxTdP8jd(&($A@zO(K=59F<9!?1uqKy8DW&1B zQT6kLg87F$dvb!r8ntoqbPBn$yh3k6 zCy4fwXg|B*2;N$i=Z8n!U84O2LeL`1UMQ*G*+=eDLMNPYr0G+%nsM|(5a+FK$0(K5 z2jegBoj|mDa_9t@6MLckj0LSF8cz}JkdCL&fqM4GE@LS-M84E`tQDgDq*%(M^3gm# z;0FQU1bmN^_A|Gg+c(0u}W}uR7L+5}8CX`FH`Ve zz}ojLC<6m#x8MyG8B&pBpfRZhKW z=pAGhcrwZ*6B=4N?WgAs4y66$Ck>L!<#WIX2qq z+HH;bYia?}#CG>SP!wn&1x&pIl{Fo`ZEWD5+~;!zJPdt=cT&OWk(DVMLCj*QuCH*d$Kn*Z)u035=fX5@MsK zfIIyiBz)O*%x*uUgGBr3_0CRGKI>~$UTcK=f;X|_YKQF;U*UY5zmxw$PB8K?B~&bI z7fR4t$x}fe=ilzTa1Okuh#W=ad|z5tD~ZZT@~+21`xzGfg7)*46ro!c8IWU*&=KwD zp^eZH?I+QGy4dZ(F{wN+08J&EdY6(v$iTnGF9AFwJ@Q z@-Sq=Te-3J<>~?=5nkRN=N@@?*PapUg|313*eoA`^gyEhWM1H-fn!AbdC&@}67BgB zXg`GrAA$B$TuHQ_MEe<1WBZsGK8oZg+E1eW94%P!h&0V-m6L$Z*Z}4qUzy5(Mwx@2 zy|#hTGnNyL2PIn`jC7;Z#498to!RdOzuAH^%dAFi)6!e~?4M?^5cRz+y=RF2EDOqf z=yNRc3NaTymiEH;B=yuB?b7CpSlag<( z@sPQujY6g}IdkIX>NsgXy$IxZh_ncuAar7v>8Ghd*2jlPi_8f!Cv4r`DZjJBGG}Vb zYh}yaO|m|j6J$;RfmArG_ZXWKd!hY|1+C-so?f&=I-WuYy5@z?A=>g4-Rg^IOZFMw z9r?m$xWkaP=CKNl$66uUPqJ1HkF`RypG5mPil@4$@a)sf?Do&S|E9O+wqM$QY2Ro+ zJ;6RA?WgC&6746^eiH4c0I>GPl(uhdnMC`Unuks)$jG`Ia+(f#$T=|uS*||`N=^1k z`{@;(NVK0qVM;}XUas0$iwcSMliaLFb+h)S%qEZ#lZNQkZt~txoN1SnSF=*|0zU$* z)G6#I9~YqX95|uxLhboSP?F9_DL2^P$&t8FoytM0Q<0LIWd)I_M*^)O_U_-Y&DVgQ|O0u+E1@4ooGLa_H!4spSO6Y`PU$qKc&^e-BfFN zKy7F}nN+KKiRZmNfsPH=B!2^I$Xek>HHOC0qJg$r%tz8T^lI1+HmRVSMI!X`;C|1cpq+Y%5iP8 z$*OOTtsKi|l+)r0R(u1+I+D^`P|h?>iw(m&Co4u$H(VXaxTfI0cqKm647KcTzlSXo zm=+Wj;cT2ZMJ$z+J$L4K{@)&nl7wlI zA5HqH;KAMlN-sl$;jst%52^|^W-UNZBK#a*FXftM8;M11cLU5Cd;_O#R3~Rl z$j)4)#(CqSL*$)^xPSR^0m~)9?eqsryl2FF=JTE<8WXa|Il#I6wX*}XXeEX0a#e%M zzb(ONHiNHaJ^5dV>~e<`&@HS_{e_JfGuwEAXqnS*`HxDJc> z!BQ?tw2U7B)ABD(uIo7)fwtN2fc4$&Ks`stuyr@mmA<+ry3)f*>TP_6sD1*~#DGX3 zh{b<>9Fmh_ZYPsq0xvV^1JVb+K9EI!qK1%sn3z{6d1n9Y9OG9&H%IU*TtU}s(O>8q zVnSiHkiE+Ntjmsm-++>gO~clJ1I)0>4s(`Pg!VYW-o`z_#8Lk?G|eOWWM1wfpfS## zhQcJ*(`~)&Xpn(vFzJ`qG;v7FtIvC|-18_#nP|EWzuonl>o0gZs)`aP7yMcV64tX{ zOr62|tKanQQH~4sk>Q^QHd?+-;K&w(3I~(9X$<8&?VhD+Y7qw^`iy$lp};L(!#XHz zChAp0-5>uz6LG2vpE7@hI6!ZN$s_y5_(x`TLvvQC-r}qji9h2yjt`m+EafW8LPh3- z6C=Cx<8S7dIW$o;p*L%P{Nok@x=2^0N@cam8!0fM5b94*U30=E_;@%#1oqawBT6e|H$7W4eN|@ywSB--`IagPL=Oe zkqSiBnXG7zunMfL3Kinpogqu6#>5#11lcasx;>}ZhBhNw&+fLsf?CS)zHS8{9K#znx}b&} zu&+5MM)2vha#>jb2%~8tjsX2|>Q=XLFZleJ@~m=E`kcF0;j=!YU`imw%)?-)T0-wtQSMli$Z06|vp$uvaf&NSQxL>tXkCpdl# zkF#hQcF_tShY4#M%o@d3K==s3r_;(6B^&!xyI%>P0FT`?dhXT(6XTxJtQ3^R_{Yi5 znpI9Tdrm7fJ<|Z4&KdAqX-(i4#13D<_a0MVK8@|3-U(fBOvxz6jh)bG)5^SZY_k!* z4(2gjByi@&n&VBsObTHey_rslvLZUrAqD+|iZmL5KI45x$*jP*N<{F3Yo zI8qk%;P6 z{iE(zgHNZGobnk6szVnv_-mWn2}=}yJNT^Kk4ziSK$>dyhs~^hy`LXsu@9*L diff --git a/modules/image/Image_gan/gan/stgan_bald/processor.py b/modules/image/Image_gan/gan/stgan_bald/processor.py index 7846df26c..782751e74 100644 --- a/modules/image/Image_gan/gan/stgan_bald/processor.py +++ b/modules/image/Image_gan/gan/stgan_bald/processor.py @@ -1,11 +1,10 @@ # -*- coding:utf-8 -*- import os -import time import base64 import cv2 -from PIL import Image import numpy as np +from PIL import Image __all__ = ['cv2_to_base64', 'base64_to_cv2', 'postprocess'] @@ -22,7 +21,12 @@ def base64_to_cv2(b64str): return data -def postprocess(data_out, org_im, org_im_path, output_dir, visualization, thresh=120): +def postprocess(data_out, + org_im, + org_im_path, + output_dir, + visualization, + thresh=120): """ Postprocess output of network. one image at a time. @@ -41,7 +45,7 @@ def postprocess(data_out, org_im, org_im_path, output_dir, visualization, thresh result = dict() for i, img in enumerate(data_out): - img = np.squeeze(img[0].as_ndarray(), 0).transpose((1, 2, 0)) + img = np.squeeze(img.copy_to_cpu(), 0).transpose((1, 2, 0)) img = ((img + 1) * 127.5).astype(np.uint8) img = cv2.resize(img, (256, 341), cv2.INTER_CUBIC) fake_image = Image.fromarray(img) @@ -76,6 +80,7 @@ def get_save_image_name(org_im_path, output_dir, num): # save image path save_im_path = os.path.join(output_dir, im_prefix + ext) if os.path.exists(save_im_path): - save_im_path = os.path.join(output_dir, im_prefix + str(num) + ext) + save_im_path = os.path.join( + output_dir, im_prefix + str(num) + ext) return save_im_path diff --git a/modules/image/Image_gan/gan/stgan_bald/requirements.txt b/modules/image/Image_gan/gan/stgan_bald/requirements.txt deleted file mode 100644 index 00a00fcc8..000000000 --- a/modules/image/Image_gan/gan/stgan_bald/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -paddlehub>=1.8.0 diff --git a/modules/image/Image_gan/gan/stgan_bald/test.py b/modules/image/Image_gan/gan/stgan_bald/test.py new file mode 100644 index 000000000..05ca0bac8 --- /dev/null +++ b/modules/image/Image_gan/gan/stgan_bald/test.py @@ -0,0 +1,85 @@ +import os +import shutil +import unittest + +import cv2 +import requests +import numpy as np +import paddlehub as hub + + +class TestHubModule(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + img_url = 'https://ai-studio-static-online.cdn.bcebos.com/68313e182f5e4ad9907e69dac9ece8fc50840d7ffbd24fa88396f009958f969a' + if not os.path.exists('tests'): + os.makedirs('tests') + response = requests.get(img_url) + assert response.status_code == 200, 'Network Error.' + with open('tests/test.jpg', 'wb') as f: + f.write(response.content) + cls.module = hub.Module(name="stgan_bald") + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree('tests') + shutil.rmtree('inference') + shutil.rmtree('bald_output') + + def test_bald1(self): + results = self.module.bald( + paths=['tests/test.jpg'] + ) + data_0 = results[0]['data_0'] + data_1 = results[0]['data_1'] + data_2 = results[0]['data_2'] + self.assertIsInstance(data_0, np.ndarray) + self.assertIsInstance(data_1, np.ndarray) + self.assertIsInstance(data_2, np.ndarray) + + def test_bald2(self): + results = self.module.bald( + images=[cv2.imread('tests/test.jpg')] + ) + data_0 = results[0]['data_0'] + data_1 = results[0]['data_1'] + data_2 = results[0]['data_2'] + self.assertIsInstance(data_0, np.ndarray) + self.assertIsInstance(data_1, np.ndarray) + self.assertIsInstance(data_2, np.ndarray) + + def test_bald3(self): + results = self.module.bald( + images=[cv2.imread('tests/test.jpg')], + visualization=False + ) + data_0 = results[0]['data_0'] + data_1 = results[0]['data_1'] + data_2 = results[0]['data_2'] + self.assertIsInstance(data_0, np.ndarray) + self.assertIsInstance(data_1, np.ndarray) + self.assertIsInstance(data_2, np.ndarray) + + def test_bald4(self): + self.assertRaises( + AssertionError, + self.module.bald, + paths=['no.jpg'] + ) + + def test_bald5(self): + self.assertRaises( + cv2.error, + self.module.bald, + images=['tests/test.jpg'] + ) + + def test_save_inference_model(self): + self.module.save_inference_model('./inference/model') + + self.assertTrue(os.path.exists('./inference/model.pdmodel')) + self.assertTrue(os.path.exists('./inference/model.pdiparams')) + + +if __name__ == "__main__": + unittest.main() From 7cd67aba38c19a835c3229d9b4be21798c5c8673 Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Fri, 14 Oct 2022 14:51:17 +0800 Subject: [PATCH 100/117] update efficientnetb0_imagenet (#2041) * update efficientnetb0_imagenet * remove unused print --- .../efficientnetb0_imagenet/README.md | 7 +- .../efficientnetb0_imagenet/README_en.md | 7 +- .../efficientnetb0_imagenet/__init__.py | 0 .../efficientnetb0_imagenet/data_feed.py | 93 ++ .../efficientnetb0_imagenet/label_list.txt | 1000 +++++++++++++++++ .../efficientnetb0_imagenet/module.py | 925 +++------------ .../efficientnetb0_imagenet/processor.py | 65 ++ .../efficientnetb0_imagenet/test.py | 63 ++ 8 files changed, 1401 insertions(+), 759 deletions(-) create mode 100644 modules/image/classification/efficientnetb0_imagenet/__init__.py create mode 100644 modules/image/classification/efficientnetb0_imagenet/data_feed.py create mode 100644 modules/image/classification/efficientnetb0_imagenet/label_list.txt create mode 100644 modules/image/classification/efficientnetb0_imagenet/processor.py create mode 100644 modules/image/classification/efficientnetb0_imagenet/test.py diff --git a/modules/image/classification/efficientnetb0_imagenet/README.md b/modules/image/classification/efficientnetb0_imagenet/README.md index a1013ab01..b5a2ab256 100644 --- a/modules/image/classification/efficientnetb0_imagenet/README.md +++ b/modules/image/classification/efficientnetb0_imagenet/README.md @@ -132,6 +132,11 @@ * 1.1.0 提升预测性能以及易用性 + +* 1.2.0 + + 移除 Fluid API + - ```shell - $ hub install efficientnetb0_imagenet==1.1.0 + $ hub install efficientnetb0_imagenet==1.2.0 ``` diff --git a/modules/image/classification/efficientnetb0_imagenet/README_en.md b/modules/image/classification/efficientnetb0_imagenet/README_en.md index b9e7f8d6f..819448729 100644 --- a/modules/image/classification/efficientnetb0_imagenet/README_en.md +++ b/modules/image/classification/efficientnetb0_imagenet/README_en.md @@ -131,6 +131,11 @@ * 1.1.0 Improve the prediction performance and users' experience + +* 1.2.0 + + Remove Fluid API + - ```shell - $ hub install efficientnetb0_imagenet==1.1.0 + $ hub install efficientnetb0_imagenet==1.2.0 ``` diff --git a/modules/image/classification/efficientnetb0_imagenet/__init__.py b/modules/image/classification/efficientnetb0_imagenet/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/modules/image/classification/efficientnetb0_imagenet/data_feed.py b/modules/image/classification/efficientnetb0_imagenet/data_feed.py new file mode 100644 index 000000000..60a33f4df --- /dev/null +++ b/modules/image/classification/efficientnetb0_imagenet/data_feed.py @@ -0,0 +1,93 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import time +from collections import OrderedDict + +import numpy as np +from PIL import Image + +__all__ = ['reader'] + +DATA_DIM = 224 +img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) +img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) + + +def resize_short(img, target_size): + percent = float(target_size) / min(img.size[0], img.size[1]) + resized_width = int(round(img.size[0] * percent)) + resized_height = int(round(img.size[1] * percent)) + img = img.resize((resized_width, resized_height), Image.LANCZOS) + return img + + +def crop_image(img, target_size, center): + width, height = img.size + size = target_size + if center == True: + w_start = (width - size) / 2 + h_start = (height - size) / 2 + else: + w_start = np.random.randint(0, width - size + 1) + h_start = np.random.randint(0, height - size + 1) + w_end = w_start + size + h_end = h_start + size + img = img.crop((w_start, h_start, w_end, h_end)) + return img + + +def process_image(img): + img = resize_short(img, target_size=256) + img = crop_image(img, target_size=DATA_DIM, center=True) + if img.mode != 'RGB': + img = img.convert('RGB') + img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255 + img -= img_mean + img /= img_std + return img + + +def reader(images=None, paths=None): + """ + Preprocess to yield image. + + Args: + images (list[numpy.ndarray]): images data, shape of each is [H, W, C]. + paths (list[str]): paths to images. + + Yield: + each (collections.OrderedDict): info of original image, preprocessed image. + """ + component = list() + if paths: + for im_path in paths: + each = OrderedDict() + assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path) + each['org_im_path'] = im_path + each['org_im'] = Image.open(im_path) + each['org_im_width'], each['org_im_height'] = each['org_im'].size + component.append(each) + if images is not None: + assert type(images), "images is a list." + for im in images: + each = OrderedDict() + each['org_im'] = Image.fromarray(im[:, :, ::-1]) + each['org_im_path'] = 'ndarray_time={}'.format(round(time.time(), 6) * 1e6) + each['org_im_width'], each['org_im_height'] = each['org_im'].size + component.append(each) + + for element in component: + element['image'] = process_image(element['org_im']) + yield element diff --git a/modules/image/classification/efficientnetb0_imagenet/label_list.txt b/modules/image/classification/efficientnetb0_imagenet/label_list.txt new file mode 100644 index 000000000..a509c0074 --- /dev/null +++ b/modules/image/classification/efficientnetb0_imagenet/label_list.txt @@ -0,0 +1,1000 @@ +tench, Tinca tinca +goldfish, Carassius auratus +great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias +tiger shark, Galeocerdo cuvieri +hammerhead, hammerhead shark +electric ray, crampfish, numbfish, torpedo +stingray +cock +hen +ostrich, Struthio camelus +brambling, Fringilla montifringilla +goldfinch, Carduelis carduelis +house finch, linnet, Carpodacus mexicanus +junco, snowbird +indigo bunting, indigo finch, indigo bird, Passerina cyanea +robin, American robin, Turdus migratorius +bulbul +jay +magpie +chickadee +water ouzel, dipper +kite +bald eagle, American eagle, Haliaeetus leucocephalus +vulture +great grey owl, great gray owl, Strix nebulosa +European fire salamander, Salamandra salamandra +common newt, Triturus vulgaris +eft +spotted salamander, Ambystoma maculatum +axolotl, mud puppy, Ambystoma mexicanum +bullfrog, Rana catesbeiana +tree frog, tree-frog +tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui +loggerhead, loggerhead turtle, Caretta caretta +leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea +mud turtle +terrapin +box turtle, box tortoise +banded gecko +common iguana, iguana, Iguana iguana +American chameleon, anole, Anolis carolinensis +whiptail, whiptail lizard +agama +frilled lizard, Chlamydosaurus kingi +alligator lizard +Gila monster, Heloderma suspectum +green lizard, Lacerta viridis +African chameleon, Chamaeleo chamaeleon +Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis +African crocodile, Nile crocodile, Crocodylus niloticus +American alligator, Alligator mississipiensis +triceratops +thunder snake, worm snake, Carphophis amoenus +ringneck snake, ring-necked snake, ring snake +hognose snake, puff adder, sand viper +green snake, grass snake +king snake, kingsnake +garter snake, grass snake +water snake +vine snake +night snake, Hypsiglena torquata +boa constrictor, Constrictor constrictor +rock python, rock snake, Python sebae +Indian cobra, Naja naja +green mamba +sea snake +horned viper, cerastes, sand viper, horned asp, Cerastes cornutus +diamondback, diamondback rattlesnake, Crotalus adamanteus +sidewinder, horned rattlesnake, Crotalus cerastes +trilobite +harvestman, daddy longlegs, Phalangium opilio +scorpion +black and gold garden spider, Argiope aurantia +barn spider, Araneus cavaticus +garden spider, Aranea diademata +black widow, Latrodectus mactans +tarantula +wolf spider, hunting spider +tick +centipede +black grouse +ptarmigan +ruffed grouse, partridge, Bonasa umbellus +prairie chicken, prairie grouse, prairie fowl +peacock +quail +partridge +African grey, African gray, Psittacus erithacus +macaw +sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita +lorikeet +coucal +bee eater +hornbill +hummingbird +jacamar +toucan +drake +red-breasted merganser, Mergus serrator +goose +black swan, Cygnus atratus +tusker +echidna, spiny anteater, anteater +platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus +wallaby, brush kangaroo +koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus +wombat +jellyfish +sea anemone, anemone +brain coral +flatworm, platyhelminth +nematode, nematode worm, roundworm +conch +snail +slug +sea slug, nudibranch +chiton, coat-of-mail shell, sea cradle, polyplacophore +chambered nautilus, pearly nautilus, nautilus +Dungeness crab, Cancer magister +rock crab, Cancer irroratus +fiddler crab +king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica +American lobster, Northern lobster, Maine lobster, Homarus americanus +spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish +crayfish, crawfish, crawdad, crawdaddy +hermit crab +isopod +white stork, Ciconia ciconia +black stork, Ciconia nigra +spoonbill +flamingo +little blue heron, Egretta caerulea +American egret, great white heron, Egretta albus +bittern +crane +limpkin, Aramus pictus +European gallinule, Porphyrio porphyrio +American coot, marsh hen, mud hen, water hen, Fulica americana +bustard +ruddy turnstone, Arenaria interpres +red-backed sandpiper, dunlin, Erolia alpina +redshank, Tringa totanus +dowitcher +oystercatcher, oyster catcher +pelican +king penguin, Aptenodytes patagonica +albatross, mollymawk +grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus +killer whale, killer, orca, grampus, sea wolf, Orcinus orca +dugong, Dugong dugon +sea lion +Chihuahua +Japanese spaniel +Maltese dog, Maltese terrier, Maltese +Pekinese, Pekingese, Peke +Shih-Tzu +Blenheim spaniel +papillon +toy terrier +Rhodesian ridgeback +Afghan hound, Afghan +basset, basset hound +beagle +bloodhound, sleuthhound +bluetick +black-and-tan coonhound +Walker hound, Walker foxhound +English foxhound +redbone +borzoi, Russian wolfhound +Irish wolfhound +Italian greyhound +whippet +Ibizan hound, Ibizan Podenco +Norwegian elkhound, elkhound +otterhound, otter hound +Saluki, gazelle hound +Scottish deerhound, deerhound +Weimaraner +Staffordshire bullterrier, Staffordshire bull terrier +American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier +Bedlington terrier +Border terrier +Kerry blue terrier +Irish terrier +Norfolk terrier +Norwich terrier +Yorkshire terrier +wire-haired fox terrier +Lakeland terrier +Sealyham terrier, Sealyham +Airedale, Airedale terrier +cairn, cairn terrier +Australian terrier +Dandie Dinmont, Dandie Dinmont terrier +Boston bull, Boston terrier +miniature schnauzer +giant schnauzer +standard schnauzer +Scotch terrier, Scottish terrier, Scottie +Tibetan terrier, chrysanthemum dog +silky terrier, Sydney silky +soft-coated wheaten terrier +West Highland white terrier +Lhasa, Lhasa apso +flat-coated retriever +curly-coated retriever +golden retriever +Labrador retriever +Chesapeake Bay retriever +German short-haired pointer +vizsla, Hungarian pointer +English setter +Irish setter, red setter +Gordon setter +Brittany spaniel +clumber, clumber spaniel +English springer, English springer spaniel +Welsh springer spaniel +cocker spaniel, English cocker spaniel, cocker +Sussex spaniel +Irish water spaniel +kuvasz +schipperke +groenendael +malinois +briard +kelpie +komondor +Old English sheepdog, bobtail +Shetland sheepdog, Shetland sheep dog, Shetland +collie +Border collie +Bouvier des Flandres, Bouviers des Flandres +Rottweiler +German shepherd, German shepherd dog, German police dog, alsatian +Doberman, Doberman pinscher +miniature pinscher +Greater Swiss Mountain dog +Bernese mountain dog +Appenzeller +EntleBucher +boxer +bull mastiff +Tibetan mastiff +French bulldog +Great Dane +Saint Bernard, St Bernard +Eskimo dog, husky +malamute, malemute, Alaskan malamute +Siberian husky +dalmatian, coach dog, carriage dog +affenpinscher, monkey pinscher, monkey dog +basenji +pug, pug-dog +Leonberg +Newfoundland, Newfoundland dog +Great Pyrenees +Samoyed, Samoyede +Pomeranian +chow, chow chow +keeshond +Brabancon griffon +Pembroke, Pembroke Welsh corgi +Cardigan, Cardigan Welsh corgi +toy poodle +miniature poodle +standard poodle +Mexican hairless +timber wolf, grey wolf, gray wolf, Canis lupus +white wolf, Arctic wolf, Canis lupus tundrarum +red wolf, maned wolf, Canis rufus, Canis niger +coyote, prairie wolf, brush wolf, Canis latrans +dingo, warrigal, warragal, Canis dingo +dhole, Cuon alpinus +African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus +hyena, hyaena +red fox, Vulpes vulpes +kit fox, Vulpes macrotis +Arctic fox, white fox, Alopex lagopus +grey fox, gray fox, Urocyon cinereoargenteus +tabby, tabby cat +tiger cat +Persian cat +Siamese cat, Siamese +Egyptian cat +cougar, puma, catamount, mountain lion, painter, panther, Felis concolor +lynx, catamount +leopard, Panthera pardus +snow leopard, ounce, Panthera uncia +jaguar, panther, Panthera onca, Felis onca +lion, king of beasts, Panthera leo +tiger, Panthera tigris +cheetah, chetah, Acinonyx jubatus +brown bear, bruin, Ursus arctos +American black bear, black bear, Ursus americanus, Euarctos americanus +ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus +sloth bear, Melursus ursinus, Ursus ursinus +mongoose +meerkat, mierkat +tiger beetle +ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle +ground beetle, carabid beetle +long-horned beetle, longicorn, longicorn beetle +leaf beetle, chrysomelid +dung beetle +rhinoceros beetle +weevil +fly +bee +ant, emmet, pismire +grasshopper, hopper +cricket +walking stick, walkingstick, stick insect +cockroach, roach +mantis, mantid +cicada, cicala +leafhopper +lacewing, lacewing fly +dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk +damselfly +admiral +ringlet, ringlet butterfly +monarch, monarch butterfly, milkweed butterfly, Danaus plexippus +cabbage butterfly +sulphur butterfly, sulfur butterfly +lycaenid, lycaenid butterfly +starfish, sea star +sea urchin +sea cucumber, holothurian +wood rabbit, cottontail, cottontail rabbit +hare +Angora, Angora rabbit +hamster +porcupine, hedgehog +fox squirrel, eastern fox squirrel, Sciurus niger +marmot +beaver +guinea pig, Cavia cobaya +sorrel +zebra +hog, pig, grunter, squealer, Sus scrofa +wild boar, boar, Sus scrofa +warthog +hippopotamus, hippo, river horse, Hippopotamus amphibius +ox +water buffalo, water ox, Asiatic buffalo, Bubalus bubalis +bison +ram, tup +bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis +ibex, Capra ibex +hartebeest +impala, Aepyceros melampus +gazelle +Arabian camel, dromedary, Camelus dromedarius +llama +weasel +mink +polecat, fitch, foulmart, foumart, Mustela putorius +black-footed ferret, ferret, Mustela nigripes +otter +skunk, polecat, wood pussy +badger +armadillo +three-toed sloth, ai, Bradypus tridactylus +orangutan, orang, orangutang, Pongo pygmaeus +gorilla, Gorilla gorilla +chimpanzee, chimp, Pan troglodytes +gibbon, Hylobates lar +siamang, Hylobates syndactylus, Symphalangus syndactylus +guenon, guenon monkey +patas, hussar monkey, Erythrocebus patas +baboon +macaque +langur +colobus, colobus monkey +proboscis monkey, Nasalis larvatus +marmoset +capuchin, ringtail, Cebus capucinus +howler monkey, howler +titi, titi monkey +spider monkey, Ateles geoffroyi +squirrel monkey, Saimiri sciureus +Madagascar cat, ring-tailed lemur, Lemur catta +indri, indris, Indri indri, Indri brevicaudatus +Indian elephant, Elephas maximus +African elephant, Loxodonta africana +lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens +giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca +barracouta, snoek +eel +coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch +rock beauty, Holocanthus tricolor +anemone fish +sturgeon +gar, garfish, garpike, billfish, Lepisosteus osseus +lionfish +puffer, pufferfish, blowfish, globefish +abacus +abaya +academic gown, academic robe, judge's robe +accordion, piano accordion, squeeze box +acoustic guitar +aircraft carrier, carrier, flattop, attack aircraft carrier +airliner +airship, dirigible +altar +ambulance +amphibian, amphibious vehicle +analog clock +apiary, bee house +apron +ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin +assault rifle, assault gun +backpack, back pack, knapsack, packsack, rucksack, haversack +bakery, bakeshop, bakehouse +balance beam, beam +balloon +ballpoint, ballpoint pen, ballpen, Biro +Band Aid +banjo +bannister, banister, balustrade, balusters, handrail +barbell +barber chair +barbershop +barn +barometer +barrel, cask +barrow, garden cart, lawn cart, wheelbarrow +baseball +basketball +bassinet +bassoon +bathing cap, swimming cap +bath towel +bathtub, bathing tub, bath, tub +beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon +beacon, lighthouse, beacon light, pharos +beaker +bearskin, busby, shako +beer bottle +beer glass +bell cote, bell cot +bib +bicycle-built-for-two, tandem bicycle, tandem +bikini, two-piece +binder, ring-binder +binoculars, field glasses, opera glasses +birdhouse +boathouse +bobsled, bobsleigh, bob +bolo tie, bolo, bola tie, bola +bonnet, poke bonnet +bookcase +bookshop, bookstore, bookstall +bottlecap +bow +bow tie, bow-tie, bowtie +brass, memorial tablet, plaque +brassiere, bra, bandeau +breakwater, groin, groyne, mole, bulwark, seawall, jetty +breastplate, aegis, egis +broom +bucket, pail +buckle +bulletproof vest +bullet train, bullet +butcher shop, meat market +cab, hack, taxi, taxicab +caldron, cauldron +candle, taper, wax light +cannon +canoe +can opener, tin opener +cardigan +car mirror +carousel, carrousel, merry-go-round, roundabout, whirligig +carpenter's kit, tool kit +carton +car wheel +cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM +cassette +cassette player +castle +catamaran +CD player +cello, violoncello +cellular telephone, cellular phone, cellphone, cell, mobile phone +chain +chainlink fence +chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour +chain saw, chainsaw +chest +chiffonier, commode +chime, bell, gong +china cabinet, china closet +Christmas stocking +church, church building +cinema, movie theater, movie theatre, movie house, picture palace +cleaver, meat cleaver, chopper +cliff dwelling +cloak +clog, geta, patten, sabot +cocktail shaker +coffee mug +coffeepot +coil, spiral, volute, whorl, helix +combination lock +computer keyboard, keypad +confectionery, confectionary, candy store +container ship, containership, container vessel +convertible +corkscrew, bottle screw +cornet, horn, trumpet, trump +cowboy boot +cowboy hat, ten-gallon hat +cradle +crane +crash helmet +crate +crib, cot +Crock Pot +croquet ball +crutch +cuirass +dam, dike, dyke +desk +desktop computer +dial telephone, dial phone +diaper, nappy, napkin +digital clock +digital watch +dining table, board +dishrag, dishcloth +dishwasher, dish washer, dishwashing machine +disk brake, disc brake +dock, dockage, docking facility +dogsled, dog sled, dog sleigh +dome +doormat, welcome mat +drilling platform, offshore rig +drum, membranophone, tympan +drumstick +dumbbell +Dutch oven +electric fan, blower +electric guitar +electric locomotive +entertainment center +envelope +espresso maker +face powder +feather boa, boa +file, file cabinet, filing cabinet +fireboat +fire engine, fire truck +fire screen, fireguard +flagpole, flagstaff +flute, transverse flute +folding chair +football helmet +forklift +fountain +fountain pen +four-poster +freight car +French horn, horn +frying pan, frypan, skillet +fur coat +garbage truck, dustcart +gasmask, respirator, gas helmet +gas pump, gasoline pump, petrol pump, island dispenser +goblet +go-kart +golf ball +golfcart, golf cart +gondola +gong, tam-tam +gown +grand piano, grand +greenhouse, nursery, glasshouse +grille, radiator grille +grocery store, grocery, food market, market +guillotine +hair slide +hair spray +half track +hammer +hamper +hand blower, blow dryer, blow drier, hair dryer, hair drier +hand-held computer, hand-held microcomputer +handkerchief, hankie, hanky, hankey +hard disc, hard disk, fixed disk +harmonica, mouth organ, harp, mouth harp +harp +harvester, reaper +hatchet +holster +home theater, home theatre +honeycomb +hook, claw +hoopskirt, crinoline +horizontal bar, high bar +horse cart, horse-cart +hourglass +iPod +iron, smoothing iron +jack-o'-lantern +jean, blue jean, denim +jeep, landrover +jersey, T-shirt, tee shirt +jigsaw puzzle +jinrikisha, ricksha, rickshaw +joystick +kimono +knee pad +knot +lab coat, laboratory coat +ladle +lampshade, lamp shade +laptop, laptop computer +lawn mower, mower +lens cap, lens cover +letter opener, paper knife, paperknife +library +lifeboat +lighter, light, igniter, ignitor +limousine, limo +liner, ocean liner +lipstick, lip rouge +Loafer +lotion +loudspeaker, speaker, speaker unit, loudspeaker system, speaker system +loupe, jeweler's loupe +lumbermill, sawmill +magnetic compass +mailbag, postbag +mailbox, letter box +maillot +maillot, tank suit +manhole cover +maraca +marimba, xylophone +mask +matchstick +maypole +maze, labyrinth +measuring cup +medicine chest, medicine cabinet +megalith, megalithic structure +microphone, mike +microwave, microwave oven +military uniform +milk can +minibus +miniskirt, mini +minivan +missile +mitten +mixing bowl +mobile home, manufactured home +Model T +modem +monastery +monitor +moped +mortar +mortarboard +mosque +mosquito net +motor scooter, scooter +mountain bike, all-terrain bike, off-roader +mountain tent +mouse, computer mouse +mousetrap +moving van +muzzle +nail +neck brace +necklace +nipple +notebook, notebook computer +obelisk +oboe, hautboy, hautbois +ocarina, sweet potato +odometer, hodometer, mileometer, milometer +oil filter +organ, pipe organ +oscilloscope, scope, cathode-ray oscilloscope, CRO +overskirt +oxcart +oxygen mask +packet +paddle, boat paddle +paddlewheel, paddle wheel +padlock +paintbrush +pajama, pyjama, pj's, jammies +palace +panpipe, pandean pipe, syrinx +paper towel +parachute, chute +parallel bars, bars +park bench +parking meter +passenger car, coach, carriage +patio, terrace +pay-phone, pay-station +pedestal, plinth, footstall +pencil box, pencil case +pencil sharpener +perfume, essence +Petri dish +photocopier +pick, plectrum, plectron +pickelhaube +picket fence, paling +pickup, pickup truck +pier +piggy bank, penny bank +pill bottle +pillow +ping-pong ball +pinwheel +pirate, pirate ship +pitcher, ewer +plane, carpenter's plane, woodworking plane +planetarium +plastic bag +plate rack +plow, plough +plunger, plumber's helper +Polaroid camera, Polaroid Land camera +pole +police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria +poncho +pool table, billiard table, snooker table +pop bottle, soda bottle +pot, flowerpot +potter's wheel +power drill +prayer rug, prayer mat +printer +prison, prison house +projectile, missile +projector +puck, hockey puck +punching bag, punch bag, punching ball, punchball +purse +quill, quill pen +quilt, comforter, comfort, puff +racer, race car, racing car +racket, racquet +radiator +radio, wireless +radio telescope, radio reflector +rain barrel +recreational vehicle, RV, R.V. +reel +reflex camera +refrigerator, icebox +remote control, remote +restaurant, eating house, eating place, eatery +revolver, six-gun, six-shooter +rifle +rocking chair, rocker +rotisserie +rubber eraser, rubber, pencil eraser +rugby ball +rule, ruler +running shoe +safe +safety pin +saltshaker, salt shaker +sandal +sarong +sax, saxophone +scabbard +scale, weighing machine +school bus +schooner +scoreboard +screen, CRT screen +screw +screwdriver +seat belt, seatbelt +sewing machine +shield, buckler +shoe shop, shoe-shop, shoe store +shoji +shopping basket +shopping cart +shovel +shower cap +shower curtain +ski +ski mask +sleeping bag +slide rule, slipstick +sliding door +slot, one-armed bandit +snorkel +snowmobile +snowplow, snowplough +soap dispenser +soccer ball +sock +solar dish, solar collector, solar furnace +sombrero +soup bowl +space bar +space heater +space shuttle +spatula +speedboat +spider web, spider's web +spindle +sports car, sport car +spotlight, spot +stage +steam locomotive +steel arch bridge +steel drum +stethoscope +stole +stone wall +stopwatch, stop watch +stove +strainer +streetcar, tram, tramcar, trolley, trolley car +stretcher +studio couch, day bed +stupa, tope +submarine, pigboat, sub, U-boat +suit, suit of clothes +sundial +sunglass +sunglasses, dark glasses, shades +sunscreen, sunblock, sun blocker +suspension bridge +swab, swob, mop +sweatshirt +swimming trunks, bathing trunks +swing +switch, electric switch, electrical switch +syringe +table lamp +tank, army tank, armored combat vehicle, armoured combat vehicle +tape player +teapot +teddy, teddy bear +television, television system +tennis ball +thatch, thatched roof +theater curtain, theatre curtain +thimble +thresher, thrasher, threshing machine +throne +tile roof +toaster +tobacco shop, tobacconist shop, tobacconist +toilet seat +torch +totem pole +tow truck, tow car, wrecker +toyshop +tractor +trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi +tray +trench coat +tricycle, trike, velocipede +trimaran +tripod +triumphal arch +trolleybus, trolley coach, trackless trolley +trombone +tub, vat +turnstile +typewriter keyboard +umbrella +unicycle, monocycle +upright, upright piano +vacuum, vacuum cleaner +vase +vault +velvet +vending machine +vestment +viaduct +violin, fiddle +volleyball +waffle iron +wall clock +wallet, billfold, notecase, pocketbook +wardrobe, closet, press +warplane, military plane +washbasin, handbasin, washbowl, lavabo, wash-hand basin +washer, automatic washer, washing machine +water bottle +water jug +water tower +whiskey jug +whistle +wig +window screen +window shade +Windsor tie +wine bottle +wing +wok +wooden spoon +wool, woolen, woollen +worm fence, snake fence, snake-rail fence, Virginia fence +wreck +yawl +yurt +web site, website, internet site, site +comic book +crossword puzzle, crossword +street sign +traffic light, traffic signal, stoplight +book jacket, dust cover, dust jacket, dust wrapper +menu +plate +guacamole +consomme +hot pot, hotpot +trifle +ice cream, icecream +ice lolly, lolly, lollipop, popsicle +French loaf +bagel, beigel +pretzel +cheeseburger +hotdog, hot dog, red hot +mashed potato +head cabbage +broccoli +cauliflower +zucchini, courgette +spaghetti squash +acorn squash +butternut squash +cucumber, cuke +artichoke, globe artichoke +bell pepper +cardoon +mushroom +Granny Smith +strawberry +orange +lemon +fig +pineapple, ananas +banana +jackfruit, jak, jack +custard apple +pomegranate +hay +carbonara +chocolate sauce, chocolate syrup +dough +meat loaf, meatloaf +pizza, pizza pie +potpie +burrito +red wine +espresso +cup +eggnog +alp +bubble +cliff, drop, drop-off +coral reef +geyser +lakeside, lakeshore +promontory, headland, head, foreland +sandbar, sand bar +seashore, coast, seacoast, sea-coast +valley, vale +volcano +ballplayer, baseball player +groom, bridegroom +scuba diver +rapeseed +daisy +yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum +corn +acorn +hip, rose hip, rosehip +buckeye, horse chestnut, conker +coral fungus +agaric +gyromitra +stinkhorn, carrion fungus +earthstar +hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa +bolete +ear, spike, capitulum +toilet tissue, toilet paper, bathroom tissue diff --git a/modules/image/classification/efficientnetb0_imagenet/module.py b/modules/image/classification/efficientnetb0_imagenet/module.py index ec8c27142..dc53c7a41 100644 --- a/modules/image/classification/efficientnetb0_imagenet/module.py +++ b/modules/image/classification/efficientnetb0_imagenet/module.py @@ -11,781 +11,192 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import absolute_import +from __future__ import division +import argparse +import ast import os -import math -import collections -import re -import copy - -import paddle -from paddle import ParamAttr -import paddle.nn as nn -import paddle.nn.functional as F -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - -GlobalParams = collections.namedtuple('GlobalParams', [ - 'batch_norm_momentum', - 'batch_norm_epsilon', - 'dropout_rate', - 'num_classes', - 'width_coefficient', - 'depth_coefficient', - 'depth_divisor', - 'min_depth', - 'drop_connect_rate', -]) - -BlockArgs = collections.namedtuple( - 'BlockArgs', - ['kernel_size', 'num_repeat', 'input_filters', 'output_filters', 'expand_ratio', 'id_skip', 'stride', 'se_ratio']) - -GlobalParams.__new__.__defaults__ = (None, ) * len(GlobalParams._fields) -BlockArgs.__new__.__defaults__ = (None, ) * len(BlockArgs._fields) - - -def efficientnet_params(model_name: str): - """ Map EfficientNet model name to parameter coefficients. """ - params_dict = { - # Coefficients: width,depth,resolution,dropout - 'efficientnet-b0': (1.0, 1.0, 224, 0.2) - } - return params_dict[model_name] - - -def efficientnet(width_coefficient: float = None, - depth_coefficient: float = None, - dropout_rate: float = 0.2, - drop_connect_rate: float = 0.2): - """ Get block arguments according to parameter and coefficients. """ - blocks_args = [ - 'r1_k3_s11_e1_i32_o16_se0.25', - 'r2_k3_s22_e6_i16_o24_se0.25', - 'r2_k5_s22_e6_i24_o40_se0.25', - 'r3_k3_s22_e6_i40_o80_se0.25', - 'r3_k5_s11_e6_i80_o112_se0.25', - 'r4_k5_s22_e6_i112_o192_se0.25', - 'r1_k3_s11_e6_i192_o320_se0.25', - ] - blocks_args = BlockDecoder.decode(blocks_args) - - global_params = GlobalParams( - batch_norm_momentum=0.99, - batch_norm_epsilon=1e-3, - dropout_rate=dropout_rate, - drop_connect_rate=drop_connect_rate, - num_classes=1000, - width_coefficient=width_coefficient, - depth_coefficient=depth_coefficient, - depth_divisor=8, - min_depth=None) - - return blocks_args, global_params - - -def get_model_params(model_name: str, override_params: dict): - """ Get the block args and global params for a given model """ - if model_name.startswith('efficientnet'): - w, d, _, p = efficientnet_params(model_name) - blocks_args, global_params = efficientnet(width_coefficient=w, depth_coefficient=d, dropout_rate=p) - else: - raise NotImplementedError('model name is not pre-defined: %s' % model_name) - if override_params: - global_params = global_params._replace(**override_params) - return blocks_args, global_params +import numpy as np +from paddle.inference import Config +from paddle.inference import create_predictor -def round_filters(filters: int, global_params: dict): - """ Calculate and round number of filters based on depth multiplier. """ - multiplier = global_params.width_coefficient - if not multiplier: - return filters - divisor = global_params.depth_divisor - min_depth = global_params.min_depth - filters *= multiplier - min_depth = min_depth or divisor - new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor) - if new_filters < 0.9 * filters: # prevent rounding by more than 10% - new_filters += divisor - return int(new_filters) +from .data_feed import reader +from .processor import base64_to_cv2 +from .processor import postprocess +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving -def round_repeats(repeats: int, global_params: dict): - """ Round number of filters based on depth multiplier. """ - multiplier = global_params.depth_coefficient - if not multiplier: - return repeats - return int(math.ceil(multiplier * repeats)) +@moduleinfo(name="efficientnetb0_imagenet", + type="CV/image_classification", + author="paddlepaddle", + author_email="paddle-dev@baidu.com", + summary="EfficientNetB0 is a image classfication model, this module is trained with imagenet datasets.", + version="1.2.0") +class EfficientNetB0ImageNet: + def __init__(self): + self.default_pretrained_model_path = os.path.join(self.directory, "efficientnetb0_imagenet_infer_model", + "model") + label_file = os.path.join(self.directory, "label_list.txt") + with open(label_file, 'r', encoding='utf-8') as file: + self.label_list = file.read().split("\n")[:-1] + self._set_config() -class BlockDecoder(object): - """ - Block Decoder, straight from the official TensorFlow repository. - """ + def get_expected_image_width(self): + return 224 - @staticmethod - def _decode_block_string(block_string: str): - """ Gets a block through a string notation of arguments. """ - assert isinstance(block_string, str) + def get_expected_image_height(self): + return 224 - ops = block_string.split('_') - options = {} - for op in ops: - splits = re.split(r'(\d.*)', op) - if len(splits) >= 2: - key, value = splits[:2] - options[key] = value + def get_pretrained_images_mean(self): + im_mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3) + return im_mean - # Check stride - cond_1 = ('s' in options and len(options['s']) == 1) - cond_2 = ((len(options['s']) == 2) and (options['s'][0] == options['s'][1])) - assert (cond_1 or cond_2) + def get_pretrained_images_std(self): + im_std = np.array([0.229, 0.224, 0.225]).reshape(1, 3) + return im_std - return BlockArgs( - kernel_size=int(options['k']), - num_repeat=int(options['r']), - input_filters=int(options['i']), - output_filters=int(options['o']), - expand_ratio=int(options['e']), - id_skip=('noskip' not in block_string), - se_ratio=float(options['se']) if 'se' in options else None, - stride=[int(options['s'][0])]) + def _set_config(self): + """ + predictor config setting + """ + model = self.default_pretrained_model_path + '.pdmodel' + params = self.default_pretrained_model_path + '.pdiparams' + cpu_config = Config(model, params) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + self.cpu_predictor = create_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = Config(model, params) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) + self.gpu_predictor = create_predictor(gpu_config) + + def classification(self, images=None, paths=None, batch_size=1, use_gpu=False, top_k=1): + """ + API for image classification. - @staticmethod - def _encode_block_string(block): - """Encodes a block to a string.""" - args = [ - 'r%d' % block.num_repeat, - 'k%d' % block.kernel_size, - 's%d%d' % (block.strides[0], block.strides[1]), - 'e%s' % block.expand_ratio, - 'i%d' % block.input_filters, - 'o%d' % block.output_filters - ] - if 0 < block.se_ratio <= 1: - args.append('se%s' % block.se_ratio) - if block.id_skip is False: - args.append('noskip') - return '_'.join(args) + Args: + images (list[numpy.ndarray]): data of images, shape of each is [H, W, C], color space must be BGR. + paths (list[str]): The paths of images. + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + top_k (int): Return top k results. - @staticmethod - def decode(string_list: list): + Returns: + res (list[dict]): The classfication results. """ - Decode a list of string notations to specify blocks in the network. - - string_list: list of strings, each string is a notation of block - return - list of BlockArgs namedtuples of block args + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + all_data = list() + for yield_data in reader(images, paths): + all_data.append(yield_data) + + total_num = len(all_data) + loop_num = int(np.ceil(total_num / batch_size)) + + res = list() + for iter_id in range(loop_num): + batch_data = list() + handle_id = iter_id * batch_size + for image_id in range(batch_size): + try: + batch_data.append(all_data[handle_id + image_id]) + except: + pass + # feed batch image + batch_image = np.array([data['image'] for data in batch_data]) + + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + input_handle.copy_from_cpu(batch_image.copy()) + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) + + out = postprocess(data_out=output_handle.copy_to_cpu(), label_list=self.label_list, top_k=top_k) + res += out + return res + + @serving + def serving_method(self, images, **kwargs): """ - assert isinstance(string_list, list) - blocks_args = [] - for block_string in string_list: - blocks_args.append(BlockDecoder._decode_block_string(block_string)) - return blocks_args - - @staticmethod - def encode(blocks_args: list): + Run as a service. """ - Encodes a list of BlockArgs to a list of strings. + images_decode = [base64_to_cv2(image) for image in images] + results = self.classify(images=images_decode, **kwargs) + return results - :param blocks_args: a list of BlockArgs namedtuples of block args - :return: a list of strings, each string is a notation of block + @runnable + def run_cmd(self, argvs): """ - block_strings = [] - for block in blocks_args: - block_strings.append(BlockDecoder._encode_block_string(block)) - return block_strings - - -def initial_type(name: str, use_bias: bool = False): - param_attr = ParamAttr(name=name + "_weights") - if use_bias: - bias_attr = ParamAttr(name=name + "_offset") - else: - bias_attr = False - return param_attr, bias_attr - - -def init_batch_norm_layer(name: str = "batch_norm"): - param_attr = ParamAttr(name=name + "_scale") - bias_attr = ParamAttr(name=name + "_offset") - return param_attr, bias_attr - - -def init_fc_layer(name: str = "fc"): - param_attr = ParamAttr(name=name + "_weights") - bias_attr = ParamAttr(name=name + "_offset") - return param_attr, bias_attr - - -def cal_padding(img_size: int, stride: int, filter_size: int, dilation: int = 1): - """Calculate padding size.""" - if img_size % stride == 0: - out_size = max(filter_size - stride, 0) - else: - out_size = max(filter_size - (img_size % stride), 0) - return out_size // 2, out_size - out_size // 2 - - -inp_shape = {"b0": [224, 112, 112, 56, 28, 14, 14, 7]} - - -def _drop_connect(inputs: paddle.Tensor, prob: float, is_test: bool): - """Drop input connection""" - if is_test: - return inputs - keep_prob = 1.0 - prob - inputs_shape = paddle.shape(inputs) - random_tensor = keep_prob + paddle.rand(shape=[inputs_shape[0], 1, 1, 1]) - binary_tensor = paddle.floor(random_tensor) - output = inputs / keep_prob * binary_tensor - return output - - -class Conv2ds(nn.Layer): - """Basic conv layer""" - - def __init__(self, - input_channels: int, - output_channels: int, - filter_size: int, - stride: int = 1, - padding: int = 0, - groups: int = None, - name: str = "conv2d", - act: str = None, - use_bias: bool = False, - padding_type: str = None, - model_name: str = None, - cur_stage: str = None): - super(Conv2ds, self).__init__() - assert act in [None, "swish", "sigmoid"] - self.act = act - - param_attr, bias_attr = initial_type(name=name, use_bias=use_bias) - - def get_padding(filter_size, stride=1, dilation=1): - padding = ((stride - 1) + dilation * (filter_size - 1)) // 2 - return padding - - inps = 1 if model_name == None and cur_stage == None else inp_shape[model_name][cur_stage] - self.need_crop = False - if padding_type == "SAME": - top_padding, bottom_padding = cal_padding(inps, stride, filter_size) - left_padding, right_padding = cal_padding(inps, stride, filter_size) - height_padding = bottom_padding - width_padding = right_padding - if top_padding != bottom_padding or left_padding != right_padding: - height_padding = top_padding + stride - width_padding = left_padding + stride - self.need_crop = True - padding = [height_padding, width_padding] - elif padding_type == "VALID": - height_padding = 0 - width_padding = 0 - padding = [height_padding, width_padding] - elif padding_type == "DYNAMIC": - padding = get_padding(filter_size, stride) - else: - padding = padding_type - - groups = 1 if groups is None else groups - self._conv = Conv2d( - input_channels, - output_channels, - filter_size, - groups=groups, - stride=stride, - padding=padding, - weight_attr=param_attr, - bias_attr=bias_attr) - - def forward(self, inputs: paddle.Tensor): - x = self._conv(inputs) - if self.act == "swish": - x = F.swish(x) - elif self.act == "sigmoid": - x = F.sigmoid(x) - - if self.need_crop: - x = x[:, :, 1:, 1:] - return x - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - - def __init__(self, - input_channels: int, - filter_size: int, - output_channels: int, - stride: int = 1, - num_groups: int = 1, - padding_type: str = "SAME", - conv_act: str = None, - bn_act: str = "swish", - use_bn: bool = True, - use_bias: bool = False, - name: str = None, - conv_name: str = None, - bn_name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ConvBNLayer, self).__init__() - - self._conv = Conv2ds( - input_channels=input_channels, - output_channels=output_channels, - filter_size=filter_size, - stride=stride, - groups=num_groups, - act=conv_act, - padding_type=padding_type, - name=conv_name, - use_bias=use_bias, - model_name=model_name, - cur_stage=cur_stage) - self.use_bn = use_bn - if use_bn is True: - bn_name = name + bn_name - param_attr, bias_attr = init_batch_norm_layer(bn_name) - - self._bn = BatchNorm( - num_channels=output_channels, - act=bn_act, - momentum=0.99, - epsilon=0.001, - moving_mean_name=bn_name + "_mean", - moving_variance_name=bn_name + "_variance", - param_attr=param_attr, - bias_attr=bias_attr) - - def forward(self, inputs: paddle.Tensor): - if self.use_bn: - x = self._conv(inputs) - x = self._bn(x) - return x - else: - return self._conv(inputs) - - -class ExpandConvNorm(nn.Layer): - """Expand conv norm layer.""" - - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ExpandConvNorm, self).__init__() - - self.oup = block_args.input_filters * block_args.expand_ratio - self.expand_ratio = block_args.expand_ratio - - if self.expand_ratio != 1: - self._conv = ConvBNLayer( - input_channels, - 1, - self.oup, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_expand_conv", - bn_name="_bn0", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - if self.expand_ratio != 1: - return self._conv(inputs) - else: - return inputs - - -class DepthwiseConvNorm(nn.Layer): - """Depthwise conv norm layer.""" - - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(DepthwiseConvNorm, self).__init__() - - self.k = block_args.kernel_size - self.s = block_args.stride - if isinstance(self.s, list) or isinstance(self.s, tuple): - self.s = self.s[0] - oup = block_args.input_filters * block_args.expand_ratio - - self._conv = ConvBNLayer( - input_channels, - self.k, - oup, - self.s, - num_groups=input_channels, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_depthwise_conv", - bn_name="_bn1", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class ProjectConvNorm(nn.Layer): - """Projection conv bn layer.""" - - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ProjectConvNorm, self).__init__() - - final_oup = block_args.output_filters - - self._conv = ConvBNLayer( - input_channels, - 1, - final_oup, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_project_conv", - bn_name="_bn2", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class SEBlock(nn.Layer): - """Basic Squeeze-and-Excitation block for Efficientnet.""" - - def __init__(self, - input_channels: int, - num_squeezed_channels: int, - oup: int, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(SEBlock, self).__init__() - - self._pool = AdaptiveAvgPool2d(1) - self._conv1 = Conv2ds( - input_channels, - num_squeezed_channels, - 1, - use_bias=True, - padding_type=padding_type, - act="swish", - name=name + "_se_reduce") - - self._conv2 = Conv2ds( - num_squeezed_channels, - oup, - 1, - act="sigmoid", - use_bias=True, - padding_type=padding_type, - name=name + "_se_expand") - - def forward(self, inputs: paddle.Tensor): - x = self._pool(inputs) - x = self._conv1(x) - x = self._conv2(x) - return paddle.multiply(inputs, x) - - -class MbConvBlock(nn.Layer): - """Mobile inverted bottleneck convolution for Efficientnet.""" - - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - use_se: bool, - name: str = None, - drop_connect_rate: float = None, - is_test: bool = False, - model_name: str = None, - cur_stage: str = None): - super(MbConvBlock, self).__init__() - - oup = block_args.input_filters * block_args.expand_ratio - self.block_args = block_args - self.has_se = use_se and (block_args.se_ratio is not None) and (0 < block_args.se_ratio <= 1) - self.id_skip = block_args.id_skip - self.expand_ratio = block_args.expand_ratio - self.drop_connect_rate = drop_connect_rate - self.is_test = is_test - - if self.expand_ratio != 1: - self._ecn = ExpandConvNorm( - input_channels, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - self._dcn = DepthwiseConvNorm( - input_channels * block_args.expand_ratio, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - if self.has_se: - num_squeezed_channels = max(1, int(block_args.input_filters * block_args.se_ratio)) - self._se = SEBlock( - input_channels * block_args.expand_ratio, - num_squeezed_channels, - oup, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - self._pcn = ProjectConvNorm( - input_channels * block_args.expand_ratio, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - x = inputs - if self.expand_ratio != 1: - x = self._ecn(x) - x = F.swish(x) - x = self._dcn(x) - x = F.swish(x) - if self.has_se: - x = self._se(x) - x = self._pcn(x) - if self.id_skip and \ - self.block_args.stride == 1 and \ - self.block_args.input_filters == self.block_args.output_filters: - if self.drop_connect_rate: - x = _drop_connect(x, self.drop_connect_rate, self.is_test) - x = paddle.elementwise_add(x, inputs) - return x - - -class ConvStemNorm(nn.Layer): - """Basic conv stem norm block for extracting features.""" - - def __init__(self, - input_channels: int, - padding_type: str, - _global_params: dict, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ConvStemNorm, self).__init__() - - output_channels = round_filters(32, _global_params) - self._conv = ConvBNLayer( - input_channels, - filter_size=3, - output_channels=output_channels, - stride=2, - bn_act=None, - padding_type=padding_type, - name="", - conv_name="_conv_stem", - bn_name="_bn0", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class ExtractFeatures(nn.Layer): - """Extract features.""" - - def __init__(self, - input_channels: int, - _block_args: dict, - _global_params: dict, - padding_type: str, - use_se: bool, - is_test: bool, - model_name: str = None): - super(ExtractFeatures, self).__init__() - - self._global_params = _global_params - - self._conv_stem = ConvStemNorm( - input_channels, - padding_type=padding_type, - _global_params=_global_params, - model_name=model_name, - cur_stage=0) - - self.block_args_copy = copy.deepcopy(_block_args) - idx = 0 - block_size = 0 - for block_arg in self.block_args_copy: - block_arg = block_arg._replace( - input_filters=round_filters(block_arg.input_filters, _global_params), - output_filters=round_filters(block_arg.output_filters, _global_params), - num_repeat=round_repeats(block_arg.num_repeat, _global_params)) - block_size += 1 - for _ in range(block_arg.num_repeat - 1): - block_size += 1 - - self.conv_seq = [] - cur_stage = 1 - for block_args in _block_args: - block_args = block_args._replace( - input_filters=round_filters(block_args.input_filters, _global_params), - output_filters=round_filters(block_args.output_filters, _global_params), - num_repeat=round_repeats(block_args.num_repeat, _global_params)) - - drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 - if drop_connect_rate: - drop_connect_rate *= float(idx) / block_size - - _mc_block = self.add_sublayer( - "_blocks." + str(idx) + ".", - MbConvBlock( - block_args.input_filters, - block_args=block_args, - padding_type=padding_type, - use_se=use_se, - name="_blocks." + str(idx) + ".", - drop_connect_rate=drop_connect_rate, - model_name=model_name, - cur_stage=cur_stage)) - self.conv_seq.append(_mc_block) - idx += 1 - if block_args.num_repeat > 1: - block_args = block_args._replace(input_filters=block_args.output_filters, stride=1) - for _ in range(block_args.num_repeat - 1): - drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 - if drop_connect_rate: - drop_connect_rate *= float(idx) / block_size - _mc_block = self.add_sublayer( - "block." + str(idx) + ".", - MbConvBlock( - block_args.input_filters, - block_args, - padding_type=padding_type, - use_se=use_se, - name="_blocks." + str(idx) + ".", - drop_connect_rate=drop_connect_rate, - model_name=model_name, - cur_stage=cur_stage)) - self.conv_seq.append(_mc_block) - idx += 1 - cur_stage += 1 - - def forward(self, inputs: paddle.Tensor): - x = self._conv_stem(inputs) - x = F.swish(x) - for _mc_block in self.conv_seq: - x = _mc_block(x) - return x - - -@moduleinfo( - name="efficientnetb0_imagenet", - type="cv/classification", - author="paddlepaddle", - author_email="", - summary="efficientnetb0_imagenet is a classification model, " - "this module is trained with Imagenet dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class EfficientNet_B0(nn.Layer): - def __init__(self, - is_test: bool = False, - padding_type: str = "SAME", - override_params: dict = None, - use_se: bool = True, - class_dim: int = 1000, - load_checkpoint: str = None): - super(EfficientNet_B0, self).__init__() - - model_name = 'efficientnet-b0' - self.name = "b0" - self._block_args, self._global_params = get_model_params(model_name, override_params) - self.padding_type = padding_type - self.use_se = use_se - self.is_test = is_test - - self._ef = ExtractFeatures( - 3, - self._block_args, - self._global_params, - self.padding_type, - self.use_se, - self.is_test, - model_name=self.name) - - output_channels = round_filters(1280, self._global_params) - oup = 320 - - self._conv = ConvBNLayer( - oup, - 1, - output_channels, - bn_act="swish", - padding_type=self.padding_type, - name="", - conv_name="_conv_head", - bn_name="_bn1", - model_name=self.name, - cur_stage=7) - self._pool = AdaptiveAvgPool2d(1) - - if self._global_params.dropout_rate: - self._drop = Dropout(p=self._global_params.dropout_rate, mode="upscale_in_train") - - param_attr, bias_attr = init_fc_layer("_fc") - self._fc = Linear(output_channels, class_dim, weight_attr=param_attr, bias_attr=bias_attr) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") + Run as a command. + """ + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.classify(paths=[args.input_path], batch_size=args.batch_size, use_gpu=args.use_gpu) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument('--use_gpu', + type=ast.literal_eval, + default=False, + help="whether use GPU or not.") + self.arg_config_group.add_argument('--batch_size', type=ast.literal_eval, default=1, help="batch size.") + self.arg_config_group.add_argument('--top_k', type=ast.literal_eval, default=1, help="Return top k results.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") - else: - checkpoint = os.path.join(self.directory, 'efficientnet_b0_imagenet.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/efficientnet_b0_imagenet.pdparams -O ' - + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - def forward(self, inputs: paddle.Tensor): - x = self._ef(inputs) - x = self._conv(x) - x = self._pool(x) - if self._global_params.dropout_rate: - x = self._drop(x) - x = paddle.squeeze(x, axis=[2, 3]) - x = self._fc(x) - return x +if __name__ == '__main__': + b0 = EfficientNetB0ImageNet() + b0.context() + import cv2 + test_image = [ + cv2.imread( + '/mnt/zhangxuefei/program-paddle/PaddleHub/hub_module/tests/image_dataset/classification/animals/dog.jpeg') + ] + res = b0.classification(images=test_image) + print(res) + res = b0.classification(paths=[ + '/mnt/zhangxuefei/program-paddle/PaddleHub/hub_module/tests/image_dataset/classification/animals/dog.jpeg' + ]) + print(res) + res = b0.classification(images=test_image) + print(res) + res = b0.classify(images=test_image) + print(res) diff --git a/modules/image/classification/efficientnetb0_imagenet/processor.py b/modules/image/classification/efficientnetb0_imagenet/processor.py new file mode 100644 index 000000000..9cb22a1b7 --- /dev/null +++ b/modules/image/classification/efficientnetb0_imagenet/processor.py @@ -0,0 +1,65 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import base64 + +import cv2 +import numpy as np + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def softmax(x): + if len(x.shape) > 1: + tmp = np.max(x, axis=1) + x -= tmp.reshape((x.shape[0], 1)) + x = np.exp(x) + tmp = np.sum(x, axis=1) + x /= tmp.reshape((x.shape[0], 1)) + else: + tmp = np.max(x) + x -= tmp + x = np.exp(x) + tmp = np.sum(x) + x /= tmp + return x + + +def postprocess(data_out, label_list, top_k): + """ + Postprocess output of network, one image at a time. + + Args: + data_out (numpy.ndarray): output data of network. + label_list (list): list of label. + top_k (int): Return top k results. + """ + output = [] + for result in data_out: + result_i = softmax(result) + output_i = {} + indexs = np.argsort(result_i)[::-1][0:top_k] + for index in indexs: + label = label_list[index].split(',')[0] + output_i[label] = float(result_i[index]) + output.append(output_i) + return output diff --git a/modules/image/classification/efficientnetb0_imagenet/test.py b/modules/image/classification/efficientnetb0_imagenet/test.py new file mode 100644 index 000000000..9ce47d0c1 --- /dev/null +++ b/modules/image/classification/efficientnetb0_imagenet/test.py @@ -0,0 +1,63 @@ +import os +import shutil +import unittest + +import cv2 +import requests + +import paddlehub as hub + +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + + +class TestHubModule(unittest.TestCase): + + @classmethod + def setUpClass(cls) -> None: + img_url = 'https://unsplash.com/photos/brFsZ7qszSY/download?ixid=MnwxMjA3fDB8MXxzZWFyY2h8OHx8ZG9nfGVufDB8fHx8MTY2MzA1ODQ1MQ&force=true&w=640' + if not os.path.exists('tests'): + os.makedirs('tests') + response = requests.get(img_url) + assert response.status_code == 200, 'Network Error.' + with open('tests/test.jpg', 'wb') as f: + f.write(response.content) + cls.module = hub.Module(name="efficientnetb0_imagenet") + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree('tests') + shutil.rmtree('inference') + + def test_classification1(self): + results = self.module.classification(paths=['tests/test.jpg']) + data = results[0] + self.assertTrue('Pembroke' in data) + self.assertTrue(data['Pembroke'] > 0.5) + + def test_classification2(self): + results = self.module.classification(images=[cv2.imread('tests/test.jpg')]) + data = results[0] + self.assertTrue('Pembroke' in data) + self.assertTrue(data['Pembroke'] > 0.5) + + def test_classification3(self): + results = self.module.classification(images=[cv2.imread('tests/test.jpg')], use_gpu=True) + data = results[0] + self.assertTrue('Pembroke' in data) + self.assertTrue(data['Pembroke'] > 0.5) + + def test_classification4(self): + self.assertRaises(AssertionError, self.module.classification, paths=['no.jpg']) + + def test_classification5(self): + self.assertRaises(TypeError, self.module.classification, images=['tests/test.jpg']) + + def test_save_inference_model(self): + self.module.save_inference_model('./inference/model') + + self.assertTrue(os.path.exists('./inference/model.pdmodel')) + self.assertTrue(os.path.exists('./inference/model.pdiparams')) + + +if __name__ == "__main__": + unittest.main() From 3d2171c8e1252a7cb5957783a2aa3385d464eece Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Fri, 14 Oct 2022 14:52:27 +0800 Subject: [PATCH 101/117] update efficientnetb0_small_imagenet (#2048) * update efficientnetb0_small_imagenet * remove unused print --- .../efficientnetb0_small_imagenet/README.md | 6 +- .../README_en.md | 6 +- .../efficientnetb0_small_imagenet/__init__.py | 0 .../data_feed.py | 93 ++ .../label_list.txt | 1000 +++++++++++++++++ .../efficientnetb0_small_imagenet/module.py | 924 +++------------ .../processor.py | 66 ++ .../efficientnetb0_small_imagenet/test.py | 63 ++ 8 files changed, 1397 insertions(+), 761 deletions(-) create mode 100644 modules/image/classification/efficientnetb0_small_imagenet/__init__.py create mode 100644 modules/image/classification/efficientnetb0_small_imagenet/data_feed.py create mode 100644 modules/image/classification/efficientnetb0_small_imagenet/label_list.txt create mode 100644 modules/image/classification/efficientnetb0_small_imagenet/processor.py create mode 100644 modules/image/classification/efficientnetb0_small_imagenet/test.py diff --git a/modules/image/classification/efficientnetb0_small_imagenet/README.md b/modules/image/classification/efficientnetb0_small_imagenet/README.md index be464cc8c..1ce4ce50a 100644 --- a/modules/image/classification/efficientnetb0_small_imagenet/README.md +++ b/modules/image/classification/efficientnetb0_small_imagenet/README.md @@ -131,6 +131,10 @@ 初始发布 +* 1.1.0 + + 移除 Fluid API + - ```shell - $ hub install efficientnetb0_small_imagenet==1.0.0 + $ hub install efficientnetb0_small_imagenet==1.1.0 ``` diff --git a/modules/image/classification/efficientnetb0_small_imagenet/README_en.md b/modules/image/classification/efficientnetb0_small_imagenet/README_en.md index 6aba803d3..42a0be15d 100644 --- a/modules/image/classification/efficientnetb0_small_imagenet/README_en.md +++ b/modules/image/classification/efficientnetb0_small_imagenet/README_en.md @@ -130,6 +130,10 @@ First release +* 1.1.0 + + Remove Fluid API + - ```shell - $ hub install efficientnetb0_small_imagenet==1.0.0 + $ hub install efficientnetb0_small_imagenet==1.1.0 ``` diff --git a/modules/image/classification/efficientnetb0_small_imagenet/__init__.py b/modules/image/classification/efficientnetb0_small_imagenet/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/modules/image/classification/efficientnetb0_small_imagenet/data_feed.py b/modules/image/classification/efficientnetb0_small_imagenet/data_feed.py new file mode 100644 index 000000000..60a33f4df --- /dev/null +++ b/modules/image/classification/efficientnetb0_small_imagenet/data_feed.py @@ -0,0 +1,93 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import time +from collections import OrderedDict + +import numpy as np +from PIL import Image + +__all__ = ['reader'] + +DATA_DIM = 224 +img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) +img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) + + +def resize_short(img, target_size): + percent = float(target_size) / min(img.size[0], img.size[1]) + resized_width = int(round(img.size[0] * percent)) + resized_height = int(round(img.size[1] * percent)) + img = img.resize((resized_width, resized_height), Image.LANCZOS) + return img + + +def crop_image(img, target_size, center): + width, height = img.size + size = target_size + if center == True: + w_start = (width - size) / 2 + h_start = (height - size) / 2 + else: + w_start = np.random.randint(0, width - size + 1) + h_start = np.random.randint(0, height - size + 1) + w_end = w_start + size + h_end = h_start + size + img = img.crop((w_start, h_start, w_end, h_end)) + return img + + +def process_image(img): + img = resize_short(img, target_size=256) + img = crop_image(img, target_size=DATA_DIM, center=True) + if img.mode != 'RGB': + img = img.convert('RGB') + img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255 + img -= img_mean + img /= img_std + return img + + +def reader(images=None, paths=None): + """ + Preprocess to yield image. + + Args: + images (list[numpy.ndarray]): images data, shape of each is [H, W, C]. + paths (list[str]): paths to images. + + Yield: + each (collections.OrderedDict): info of original image, preprocessed image. + """ + component = list() + if paths: + for im_path in paths: + each = OrderedDict() + assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path) + each['org_im_path'] = im_path + each['org_im'] = Image.open(im_path) + each['org_im_width'], each['org_im_height'] = each['org_im'].size + component.append(each) + if images is not None: + assert type(images), "images is a list." + for im in images: + each = OrderedDict() + each['org_im'] = Image.fromarray(im[:, :, ::-1]) + each['org_im_path'] = 'ndarray_time={}'.format(round(time.time(), 6) * 1e6) + each['org_im_width'], each['org_im_height'] = each['org_im'].size + component.append(each) + + for element in component: + element['image'] = process_image(element['org_im']) + yield element diff --git a/modules/image/classification/efficientnetb0_small_imagenet/label_list.txt b/modules/image/classification/efficientnetb0_small_imagenet/label_list.txt new file mode 100644 index 000000000..a509c0074 --- /dev/null +++ b/modules/image/classification/efficientnetb0_small_imagenet/label_list.txt @@ -0,0 +1,1000 @@ +tench, Tinca tinca +goldfish, Carassius auratus +great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias +tiger shark, Galeocerdo cuvieri +hammerhead, hammerhead shark +electric ray, crampfish, numbfish, torpedo +stingray +cock +hen +ostrich, Struthio camelus +brambling, Fringilla montifringilla +goldfinch, Carduelis carduelis +house finch, linnet, Carpodacus mexicanus +junco, snowbird +indigo bunting, indigo finch, indigo bird, Passerina cyanea +robin, American robin, Turdus migratorius +bulbul +jay +magpie +chickadee +water ouzel, dipper +kite +bald eagle, American eagle, Haliaeetus leucocephalus +vulture +great grey owl, great gray owl, Strix nebulosa +European fire salamander, Salamandra salamandra +common newt, Triturus vulgaris +eft +spotted salamander, Ambystoma maculatum +axolotl, mud puppy, Ambystoma mexicanum +bullfrog, Rana catesbeiana +tree frog, tree-frog +tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui +loggerhead, loggerhead turtle, Caretta caretta +leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea +mud turtle +terrapin +box turtle, box tortoise +banded gecko +common iguana, iguana, Iguana iguana +American chameleon, anole, Anolis carolinensis +whiptail, whiptail lizard +agama +frilled lizard, Chlamydosaurus kingi +alligator lizard +Gila monster, Heloderma suspectum +green lizard, Lacerta viridis +African chameleon, Chamaeleo chamaeleon +Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis +African crocodile, Nile crocodile, Crocodylus niloticus +American alligator, Alligator mississipiensis +triceratops +thunder snake, worm snake, Carphophis amoenus +ringneck snake, ring-necked snake, ring snake +hognose snake, puff adder, sand viper +green snake, grass snake +king snake, kingsnake +garter snake, grass snake +water snake +vine snake +night snake, Hypsiglena torquata +boa constrictor, Constrictor constrictor +rock python, rock snake, Python sebae +Indian cobra, Naja naja +green mamba +sea snake +horned viper, cerastes, sand viper, horned asp, Cerastes cornutus +diamondback, diamondback rattlesnake, Crotalus adamanteus +sidewinder, horned rattlesnake, Crotalus cerastes +trilobite +harvestman, daddy longlegs, Phalangium opilio +scorpion +black and gold garden spider, Argiope aurantia +barn spider, Araneus cavaticus +garden spider, Aranea diademata +black widow, Latrodectus mactans +tarantula +wolf spider, hunting spider +tick +centipede +black grouse +ptarmigan +ruffed grouse, partridge, Bonasa umbellus +prairie chicken, prairie grouse, prairie fowl +peacock +quail +partridge +African grey, African gray, Psittacus erithacus +macaw +sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita +lorikeet +coucal +bee eater +hornbill +hummingbird +jacamar +toucan +drake +red-breasted merganser, Mergus serrator +goose +black swan, Cygnus atratus +tusker +echidna, spiny anteater, anteater +platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus +wallaby, brush kangaroo +koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus +wombat +jellyfish +sea anemone, anemone +brain coral +flatworm, platyhelminth +nematode, nematode worm, roundworm +conch +snail +slug +sea slug, nudibranch +chiton, coat-of-mail shell, sea cradle, polyplacophore +chambered nautilus, pearly nautilus, nautilus +Dungeness crab, Cancer magister +rock crab, Cancer irroratus +fiddler crab +king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica +American lobster, Northern lobster, Maine lobster, Homarus americanus +spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish +crayfish, crawfish, crawdad, crawdaddy +hermit crab +isopod +white stork, Ciconia ciconia +black stork, Ciconia nigra +spoonbill +flamingo +little blue heron, Egretta caerulea +American egret, great white heron, Egretta albus +bittern +crane +limpkin, Aramus pictus +European gallinule, Porphyrio porphyrio +American coot, marsh hen, mud hen, water hen, Fulica americana +bustard +ruddy turnstone, Arenaria interpres +red-backed sandpiper, dunlin, Erolia alpina +redshank, Tringa totanus +dowitcher +oystercatcher, oyster catcher +pelican +king penguin, Aptenodytes patagonica +albatross, mollymawk +grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus +killer whale, killer, orca, grampus, sea wolf, Orcinus orca +dugong, Dugong dugon +sea lion +Chihuahua +Japanese spaniel +Maltese dog, Maltese terrier, Maltese +Pekinese, Pekingese, Peke +Shih-Tzu +Blenheim spaniel +papillon +toy terrier +Rhodesian ridgeback +Afghan hound, Afghan +basset, basset hound +beagle +bloodhound, sleuthhound +bluetick +black-and-tan coonhound +Walker hound, Walker foxhound +English foxhound +redbone +borzoi, Russian wolfhound +Irish wolfhound +Italian greyhound +whippet +Ibizan hound, Ibizan Podenco +Norwegian elkhound, elkhound +otterhound, otter hound +Saluki, gazelle hound +Scottish deerhound, deerhound +Weimaraner +Staffordshire bullterrier, Staffordshire bull terrier +American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier +Bedlington terrier +Border terrier +Kerry blue terrier +Irish terrier +Norfolk terrier +Norwich terrier +Yorkshire terrier +wire-haired fox terrier +Lakeland terrier +Sealyham terrier, Sealyham +Airedale, Airedale terrier +cairn, cairn terrier +Australian terrier +Dandie Dinmont, Dandie Dinmont terrier +Boston bull, Boston terrier +miniature schnauzer +giant schnauzer +standard schnauzer +Scotch terrier, Scottish terrier, Scottie +Tibetan terrier, chrysanthemum dog +silky terrier, Sydney silky +soft-coated wheaten terrier +West Highland white terrier +Lhasa, Lhasa apso +flat-coated retriever +curly-coated retriever +golden retriever +Labrador retriever +Chesapeake Bay retriever +German short-haired pointer +vizsla, Hungarian pointer +English setter +Irish setter, red setter +Gordon setter +Brittany spaniel +clumber, clumber spaniel +English springer, English springer spaniel +Welsh springer spaniel +cocker spaniel, English cocker spaniel, cocker +Sussex spaniel +Irish water spaniel +kuvasz +schipperke +groenendael +malinois +briard +kelpie +komondor +Old English sheepdog, bobtail +Shetland sheepdog, Shetland sheep dog, Shetland +collie +Border collie +Bouvier des Flandres, Bouviers des Flandres +Rottweiler +German shepherd, German shepherd dog, German police dog, alsatian +Doberman, Doberman pinscher +miniature pinscher +Greater Swiss Mountain dog +Bernese mountain dog +Appenzeller +EntleBucher +boxer +bull mastiff +Tibetan mastiff +French bulldog +Great Dane +Saint Bernard, St Bernard +Eskimo dog, husky +malamute, malemute, Alaskan malamute +Siberian husky +dalmatian, coach dog, carriage dog +affenpinscher, monkey pinscher, monkey dog +basenji +pug, pug-dog +Leonberg +Newfoundland, Newfoundland dog +Great Pyrenees +Samoyed, Samoyede +Pomeranian +chow, chow chow +keeshond +Brabancon griffon +Pembroke, Pembroke Welsh corgi +Cardigan, Cardigan Welsh corgi +toy poodle +miniature poodle +standard poodle +Mexican hairless +timber wolf, grey wolf, gray wolf, Canis lupus +white wolf, Arctic wolf, Canis lupus tundrarum +red wolf, maned wolf, Canis rufus, Canis niger +coyote, prairie wolf, brush wolf, Canis latrans +dingo, warrigal, warragal, Canis dingo +dhole, Cuon alpinus +African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus +hyena, hyaena +red fox, Vulpes vulpes +kit fox, Vulpes macrotis +Arctic fox, white fox, Alopex lagopus +grey fox, gray fox, Urocyon cinereoargenteus +tabby, tabby cat +tiger cat +Persian cat +Siamese cat, Siamese +Egyptian cat +cougar, puma, catamount, mountain lion, painter, panther, Felis concolor +lynx, catamount +leopard, Panthera pardus +snow leopard, ounce, Panthera uncia +jaguar, panther, Panthera onca, Felis onca +lion, king of beasts, Panthera leo +tiger, Panthera tigris +cheetah, chetah, Acinonyx jubatus +brown bear, bruin, Ursus arctos +American black bear, black bear, Ursus americanus, Euarctos americanus +ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus +sloth bear, Melursus ursinus, Ursus ursinus +mongoose +meerkat, mierkat +tiger beetle +ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle +ground beetle, carabid beetle +long-horned beetle, longicorn, longicorn beetle +leaf beetle, chrysomelid +dung beetle +rhinoceros beetle +weevil +fly +bee +ant, emmet, pismire +grasshopper, hopper +cricket +walking stick, walkingstick, stick insect +cockroach, roach +mantis, mantid +cicada, cicala +leafhopper +lacewing, lacewing fly +dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk +damselfly +admiral +ringlet, ringlet butterfly +monarch, monarch butterfly, milkweed butterfly, Danaus plexippus +cabbage butterfly +sulphur butterfly, sulfur butterfly +lycaenid, lycaenid butterfly +starfish, sea star +sea urchin +sea cucumber, holothurian +wood rabbit, cottontail, cottontail rabbit +hare +Angora, Angora rabbit +hamster +porcupine, hedgehog +fox squirrel, eastern fox squirrel, Sciurus niger +marmot +beaver +guinea pig, Cavia cobaya +sorrel +zebra +hog, pig, grunter, squealer, Sus scrofa +wild boar, boar, Sus scrofa +warthog +hippopotamus, hippo, river horse, Hippopotamus amphibius +ox +water buffalo, water ox, Asiatic buffalo, Bubalus bubalis +bison +ram, tup +bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis +ibex, Capra ibex +hartebeest +impala, Aepyceros melampus +gazelle +Arabian camel, dromedary, Camelus dromedarius +llama +weasel +mink +polecat, fitch, foulmart, foumart, Mustela putorius +black-footed ferret, ferret, Mustela nigripes +otter +skunk, polecat, wood pussy +badger +armadillo +three-toed sloth, ai, Bradypus tridactylus +orangutan, orang, orangutang, Pongo pygmaeus +gorilla, Gorilla gorilla +chimpanzee, chimp, Pan troglodytes +gibbon, Hylobates lar +siamang, Hylobates syndactylus, Symphalangus syndactylus +guenon, guenon monkey +patas, hussar monkey, Erythrocebus patas +baboon +macaque +langur +colobus, colobus monkey +proboscis monkey, Nasalis larvatus +marmoset +capuchin, ringtail, Cebus capucinus +howler monkey, howler +titi, titi monkey +spider monkey, Ateles geoffroyi +squirrel monkey, Saimiri sciureus +Madagascar cat, ring-tailed lemur, Lemur catta +indri, indris, Indri indri, Indri brevicaudatus +Indian elephant, Elephas maximus +African elephant, Loxodonta africana +lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens +giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca +barracouta, snoek +eel +coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch +rock beauty, Holocanthus tricolor +anemone fish +sturgeon +gar, garfish, garpike, billfish, Lepisosteus osseus +lionfish +puffer, pufferfish, blowfish, globefish +abacus +abaya +academic gown, academic robe, judge's robe +accordion, piano accordion, squeeze box +acoustic guitar +aircraft carrier, carrier, flattop, attack aircraft carrier +airliner +airship, dirigible +altar +ambulance +amphibian, amphibious vehicle +analog clock +apiary, bee house +apron +ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin +assault rifle, assault gun +backpack, back pack, knapsack, packsack, rucksack, haversack +bakery, bakeshop, bakehouse +balance beam, beam +balloon +ballpoint, ballpoint pen, ballpen, Biro +Band Aid +banjo +bannister, banister, balustrade, balusters, handrail +barbell +barber chair +barbershop +barn +barometer +barrel, cask +barrow, garden cart, lawn cart, wheelbarrow +baseball +basketball +bassinet +bassoon +bathing cap, swimming cap +bath towel +bathtub, bathing tub, bath, tub +beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon +beacon, lighthouse, beacon light, pharos +beaker +bearskin, busby, shako +beer bottle +beer glass +bell cote, bell cot +bib +bicycle-built-for-two, tandem bicycle, tandem +bikini, two-piece +binder, ring-binder +binoculars, field glasses, opera glasses +birdhouse +boathouse +bobsled, bobsleigh, bob +bolo tie, bolo, bola tie, bola +bonnet, poke bonnet +bookcase +bookshop, bookstore, bookstall +bottlecap +bow +bow tie, bow-tie, bowtie +brass, memorial tablet, plaque +brassiere, bra, bandeau +breakwater, groin, groyne, mole, bulwark, seawall, jetty +breastplate, aegis, egis +broom +bucket, pail +buckle +bulletproof vest +bullet train, bullet +butcher shop, meat market +cab, hack, taxi, taxicab +caldron, cauldron +candle, taper, wax light +cannon +canoe +can opener, tin opener +cardigan +car mirror +carousel, carrousel, merry-go-round, roundabout, whirligig +carpenter's kit, tool kit +carton +car wheel +cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM +cassette +cassette player +castle +catamaran +CD player +cello, violoncello +cellular telephone, cellular phone, cellphone, cell, mobile phone +chain +chainlink fence +chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour +chain saw, chainsaw +chest +chiffonier, commode +chime, bell, gong +china cabinet, china closet +Christmas stocking +church, church building +cinema, movie theater, movie theatre, movie house, picture palace +cleaver, meat cleaver, chopper +cliff dwelling +cloak +clog, geta, patten, sabot +cocktail shaker +coffee mug +coffeepot +coil, spiral, volute, whorl, helix +combination lock +computer keyboard, keypad +confectionery, confectionary, candy store +container ship, containership, container vessel +convertible +corkscrew, bottle screw +cornet, horn, trumpet, trump +cowboy boot +cowboy hat, ten-gallon hat +cradle +crane +crash helmet +crate +crib, cot +Crock Pot +croquet ball +crutch +cuirass +dam, dike, dyke +desk +desktop computer +dial telephone, dial phone +diaper, nappy, napkin +digital clock +digital watch +dining table, board +dishrag, dishcloth +dishwasher, dish washer, dishwashing machine +disk brake, disc brake +dock, dockage, docking facility +dogsled, dog sled, dog sleigh +dome +doormat, welcome mat +drilling platform, offshore rig +drum, membranophone, tympan +drumstick +dumbbell +Dutch oven +electric fan, blower +electric guitar +electric locomotive +entertainment center +envelope +espresso maker +face powder +feather boa, boa +file, file cabinet, filing cabinet +fireboat +fire engine, fire truck +fire screen, fireguard +flagpole, flagstaff +flute, transverse flute +folding chair +football helmet +forklift +fountain +fountain pen +four-poster +freight car +French horn, horn +frying pan, frypan, skillet +fur coat +garbage truck, dustcart +gasmask, respirator, gas helmet +gas pump, gasoline pump, petrol pump, island dispenser +goblet +go-kart +golf ball +golfcart, golf cart +gondola +gong, tam-tam +gown +grand piano, grand +greenhouse, nursery, glasshouse +grille, radiator grille +grocery store, grocery, food market, market +guillotine +hair slide +hair spray +half track +hammer +hamper +hand blower, blow dryer, blow drier, hair dryer, hair drier +hand-held computer, hand-held microcomputer +handkerchief, hankie, hanky, hankey +hard disc, hard disk, fixed disk +harmonica, mouth organ, harp, mouth harp +harp +harvester, reaper +hatchet +holster +home theater, home theatre +honeycomb +hook, claw +hoopskirt, crinoline +horizontal bar, high bar +horse cart, horse-cart +hourglass +iPod +iron, smoothing iron +jack-o'-lantern +jean, blue jean, denim +jeep, landrover +jersey, T-shirt, tee shirt +jigsaw puzzle +jinrikisha, ricksha, rickshaw +joystick +kimono +knee pad +knot +lab coat, laboratory coat +ladle +lampshade, lamp shade +laptop, laptop computer +lawn mower, mower +lens cap, lens cover +letter opener, paper knife, paperknife +library +lifeboat +lighter, light, igniter, ignitor +limousine, limo +liner, ocean liner +lipstick, lip rouge +Loafer +lotion +loudspeaker, speaker, speaker unit, loudspeaker system, speaker system +loupe, jeweler's loupe +lumbermill, sawmill +magnetic compass +mailbag, postbag +mailbox, letter box +maillot +maillot, tank suit +manhole cover +maraca +marimba, xylophone +mask +matchstick +maypole +maze, labyrinth +measuring cup +medicine chest, medicine cabinet +megalith, megalithic structure +microphone, mike +microwave, microwave oven +military uniform +milk can +minibus +miniskirt, mini +minivan +missile +mitten +mixing bowl +mobile home, manufactured home +Model T +modem +monastery +monitor +moped +mortar +mortarboard +mosque +mosquito net +motor scooter, scooter +mountain bike, all-terrain bike, off-roader +mountain tent +mouse, computer mouse +mousetrap +moving van +muzzle +nail +neck brace +necklace +nipple +notebook, notebook computer +obelisk +oboe, hautboy, hautbois +ocarina, sweet potato +odometer, hodometer, mileometer, milometer +oil filter +organ, pipe organ +oscilloscope, scope, cathode-ray oscilloscope, CRO +overskirt +oxcart +oxygen mask +packet +paddle, boat paddle +paddlewheel, paddle wheel +padlock +paintbrush +pajama, pyjama, pj's, jammies +palace +panpipe, pandean pipe, syrinx +paper towel +parachute, chute +parallel bars, bars +park bench +parking meter +passenger car, coach, carriage +patio, terrace +pay-phone, pay-station +pedestal, plinth, footstall +pencil box, pencil case +pencil sharpener +perfume, essence +Petri dish +photocopier +pick, plectrum, plectron +pickelhaube +picket fence, paling +pickup, pickup truck +pier +piggy bank, penny bank +pill bottle +pillow +ping-pong ball +pinwheel +pirate, pirate ship +pitcher, ewer +plane, carpenter's plane, woodworking plane +planetarium +plastic bag +plate rack +plow, plough +plunger, plumber's helper +Polaroid camera, Polaroid Land camera +pole +police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria +poncho +pool table, billiard table, snooker table +pop bottle, soda bottle +pot, flowerpot +potter's wheel +power drill +prayer rug, prayer mat +printer +prison, prison house +projectile, missile +projector +puck, hockey puck +punching bag, punch bag, punching ball, punchball +purse +quill, quill pen +quilt, comforter, comfort, puff +racer, race car, racing car +racket, racquet +radiator +radio, wireless +radio telescope, radio reflector +rain barrel +recreational vehicle, RV, R.V. +reel +reflex camera +refrigerator, icebox +remote control, remote +restaurant, eating house, eating place, eatery +revolver, six-gun, six-shooter +rifle +rocking chair, rocker +rotisserie +rubber eraser, rubber, pencil eraser +rugby ball +rule, ruler +running shoe +safe +safety pin +saltshaker, salt shaker +sandal +sarong +sax, saxophone +scabbard +scale, weighing machine +school bus +schooner +scoreboard +screen, CRT screen +screw +screwdriver +seat belt, seatbelt +sewing machine +shield, buckler +shoe shop, shoe-shop, shoe store +shoji +shopping basket +shopping cart +shovel +shower cap +shower curtain +ski +ski mask +sleeping bag +slide rule, slipstick +sliding door +slot, one-armed bandit +snorkel +snowmobile +snowplow, snowplough +soap dispenser +soccer ball +sock +solar dish, solar collector, solar furnace +sombrero +soup bowl +space bar +space heater +space shuttle +spatula +speedboat +spider web, spider's web +spindle +sports car, sport car +spotlight, spot +stage +steam locomotive +steel arch bridge +steel drum +stethoscope +stole +stone wall +stopwatch, stop watch +stove +strainer +streetcar, tram, tramcar, trolley, trolley car +stretcher +studio couch, day bed +stupa, tope +submarine, pigboat, sub, U-boat +suit, suit of clothes +sundial +sunglass +sunglasses, dark glasses, shades +sunscreen, sunblock, sun blocker +suspension bridge +swab, swob, mop +sweatshirt +swimming trunks, bathing trunks +swing +switch, electric switch, electrical switch +syringe +table lamp +tank, army tank, armored combat vehicle, armoured combat vehicle +tape player +teapot +teddy, teddy bear +television, television system +tennis ball +thatch, thatched roof +theater curtain, theatre curtain +thimble +thresher, thrasher, threshing machine +throne +tile roof +toaster +tobacco shop, tobacconist shop, tobacconist +toilet seat +torch +totem pole +tow truck, tow car, wrecker +toyshop +tractor +trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi +tray +trench coat +tricycle, trike, velocipede +trimaran +tripod +triumphal arch +trolleybus, trolley coach, trackless trolley +trombone +tub, vat +turnstile +typewriter keyboard +umbrella +unicycle, monocycle +upright, upright piano +vacuum, vacuum cleaner +vase +vault +velvet +vending machine +vestment +viaduct +violin, fiddle +volleyball +waffle iron +wall clock +wallet, billfold, notecase, pocketbook +wardrobe, closet, press +warplane, military plane +washbasin, handbasin, washbowl, lavabo, wash-hand basin +washer, automatic washer, washing machine +water bottle +water jug +water tower +whiskey jug +whistle +wig +window screen +window shade +Windsor tie +wine bottle +wing +wok +wooden spoon +wool, woolen, woollen +worm fence, snake fence, snake-rail fence, Virginia fence +wreck +yawl +yurt +web site, website, internet site, site +comic book +crossword puzzle, crossword +street sign +traffic light, traffic signal, stoplight +book jacket, dust cover, dust jacket, dust wrapper +menu +plate +guacamole +consomme +hot pot, hotpot +trifle +ice cream, icecream +ice lolly, lolly, lollipop, popsicle +French loaf +bagel, beigel +pretzel +cheeseburger +hotdog, hot dog, red hot +mashed potato +head cabbage +broccoli +cauliflower +zucchini, courgette +spaghetti squash +acorn squash +butternut squash +cucumber, cuke +artichoke, globe artichoke +bell pepper +cardoon +mushroom +Granny Smith +strawberry +orange +lemon +fig +pineapple, ananas +banana +jackfruit, jak, jack +custard apple +pomegranate +hay +carbonara +chocolate sauce, chocolate syrup +dough +meat loaf, meatloaf +pizza, pizza pie +potpie +burrito +red wine +espresso +cup +eggnog +alp +bubble +cliff, drop, drop-off +coral reef +geyser +lakeside, lakeshore +promontory, headland, head, foreland +sandbar, sand bar +seashore, coast, seacoast, sea-coast +valley, vale +volcano +ballplayer, baseball player +groom, bridegroom +scuba diver +rapeseed +daisy +yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum +corn +acorn +hip, rose hip, rosehip +buckeye, horse chestnut, conker +coral fungus +agaric +gyromitra +stinkhorn, carrion fungus +earthstar +hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa +bolete +ear, spike, capitulum +toilet tissue, toilet paper, bathroom tissue diff --git a/modules/image/classification/efficientnetb0_small_imagenet/module.py b/modules/image/classification/efficientnetb0_small_imagenet/module.py index 57716199d..db8357156 100644 --- a/modules/image/classification/efficientnetb0_small_imagenet/module.py +++ b/modules/image/classification/efficientnetb0_small_imagenet/module.py @@ -11,781 +11,187 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import absolute_import +from __future__ import division +import argparse +import ast import os -import math -import collections -import re -import copy - -import paddle -from paddle import ParamAttr -import paddle.nn as nn -import paddle.nn.functional as F -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - -GlobalParams = collections.namedtuple('GlobalParams', [ - 'batch_norm_momentum', - 'batch_norm_epsilon', - 'dropout_rate', - 'num_classes', - 'width_coefficient', - 'depth_coefficient', - 'depth_divisor', - 'min_depth', - 'drop_connect_rate', -]) - -BlockArgs = collections.namedtuple( - 'BlockArgs', - ['kernel_size', 'num_repeat', 'input_filters', 'output_filters', 'expand_ratio', 'id_skip', 'stride', 'se_ratio']) - -GlobalParams.__new__.__defaults__ = (None, ) * len(GlobalParams._fields) -BlockArgs.__new__.__defaults__ = (None, ) * len(BlockArgs._fields) - - -def efficientnet_params(model_name: str): - """ Map EfficientNet model name to parameter coefficients. """ - params_dict = { - # Coefficients: width,depth,resolution,dropout - 'efficientnet-b0': (1.0, 1.0, 224, 0.2) - } - return params_dict[model_name] +import numpy as np +from paddle.inference import Config +from paddle.inference import create_predictor -def efficientnet(width_coefficient: float = None, - depth_coefficient: float = None, - dropout_rate: float = 0.2, - drop_connect_rate: float = 0.2): - """ Get block arguments according to parameter and coefficients. """ - blocks_args = [ - 'r1_k3_s11_e1_i32_o16_se0.25', - 'r2_k3_s22_e6_i16_o24_se0.25', - 'r2_k5_s22_e6_i24_o40_se0.25', - 'r3_k3_s22_e6_i40_o80_se0.25', - 'r3_k5_s11_e6_i80_o112_se0.25', - 'r4_k5_s22_e6_i112_o192_se0.25', - 'r1_k3_s11_e6_i192_o320_se0.25', - ] - blocks_args = BlockDecoder.decode(blocks_args) - - global_params = GlobalParams( - batch_norm_momentum=0.99, - batch_norm_epsilon=1e-3, - dropout_rate=dropout_rate, - drop_connect_rate=drop_connect_rate, - num_classes=1000, - width_coefficient=width_coefficient, - depth_coefficient=depth_coefficient, - depth_divisor=8, - min_depth=None) - - return blocks_args, global_params - - -def get_model_params(model_name: str, override_params: dict): - """ Get the block args and global params for a given model """ - if model_name.startswith('efficientnet'): - w, d, _, p = efficientnet_params(model_name) - blocks_args, global_params = efficientnet(width_coefficient=w, depth_coefficient=d, dropout_rate=p) - else: - raise NotImplementedError('model name is not pre-defined: %s' % model_name) - if override_params: - global_params = global_params._replace(**override_params) - return blocks_args, global_params - - -def round_filters(filters: int, global_params: dict): - """ Calculate and round number of filters based on depth multiplier. """ - multiplier = global_params.width_coefficient - if not multiplier: - return filters - divisor = global_params.depth_divisor - min_depth = global_params.min_depth - filters *= multiplier - min_depth = min_depth or divisor - new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor) - if new_filters < 0.9 * filters: # prevent rounding by more than 10% - new_filters += divisor - return int(new_filters) +from .data_feed import reader +from .processor import base64_to_cv2 +from .processor import postprocess +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving -def round_repeats(repeats: int, global_params: dict): - """ Round number of filters based on depth multiplier. """ - multiplier = global_params.depth_coefficient - if not multiplier: - return repeats - return int(math.ceil(multiplier * repeats)) +@moduleinfo(name="efficientnetb0_small_imagenet", + type="CV/image_classification", + author="paddlepaddle", + author_email="paddle-dev@baidu.com", + summary="EfficientNetB0 is a image classfication model, this module is trained with imagenet datasets.", + version="1.1.0") +class EfficientNetB0SmallImageNet: + def __init__(self): + self.default_pretrained_model_path = os.path.join(self.directory, "efficientnetb0_small_imagenet_infer_model", + "model") + label_file = os.path.join(self.directory, "label_list.txt") + with open(label_file, 'r', encoding='utf-8') as file: + self.label_list = file.read().split("\n")[:-1] + self._set_config() -class BlockDecoder(object): - """ - Block Decoder, straight from the official TensorFlow repository. - """ + def get_expected_image_width(self): + return 224 - @staticmethod - def _decode_block_string(block_string: str): - """ Gets a block through a string notation of arguments. """ - assert isinstance(block_string, str) + def get_expected_image_height(self): + return 224 - ops = block_string.split('_') - options = {} - for op in ops: - splits = re.split(r'(\d.*)', op) - if len(splits) >= 2: - key, value = splits[:2] - options[key] = value + def get_pretrained_images_mean(self): + im_mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3) + return im_mean - # Check stride - cond_1 = ('s' in options and len(options['s']) == 1) - cond_2 = ((len(options['s']) == 2) and (options['s'][0] == options['s'][1])) - assert (cond_1 or cond_2) + def get_pretrained_images_std(self): + im_std = np.array([0.229, 0.224, 0.225]).reshape(1, 3) + return im_std - return BlockArgs( - kernel_size=int(options['k']), - num_repeat=int(options['r']), - input_filters=int(options['i']), - output_filters=int(options['o']), - expand_ratio=int(options['e']), - id_skip=('noskip' not in block_string), - se_ratio=float(options['se']) if 'se' in options else None, - stride=[int(options['s'][0])]) + def _set_config(self): + """ + predictor config setting + """ + model = self.default_pretrained_model_path + '.pdmodel' + params = self.default_pretrained_model_path + '.pdiparams' + cpu_config = Config(model, params) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + self.cpu_predictor = create_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = Config(model, params) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) + self.gpu_predictor = create_predictor(gpu_config) + + def classification(self, images=None, paths=None, batch_size=1, use_gpu=False, top_k=1): + """ + API for image classification. - @staticmethod - def _encode_block_string(block): - """Encodes a block to a string.""" - args = [ - 'r%d' % block.num_repeat, - 'k%d' % block.kernel_size, - 's%d%d' % (block.strides[0], block.strides[1]), - 'e%s' % block.expand_ratio, - 'i%d' % block.input_filters, - 'o%d' % block.output_filters - ] - if 0 < block.se_ratio <= 1: - args.append('se%s' % block.se_ratio) - if block.id_skip is False: - args.append('noskip') - return '_'.join(args) + Args: + images (list[numpy.ndarray]): data of images, shape of each is [H, W, C], color space must be BGR. + paths (list[str]): The paths of images. + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + top_k (int): Return top k results. - @staticmethod - def decode(string_list: list): + Returns: + res (list[dict]): The classfication results. """ - Decode a list of string notations to specify blocks in the network. - - string_list: list of strings, each string is a notation of block - return - list of BlockArgs namedtuples of block args + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + all_data = list() + for yield_data in reader(images, paths): + all_data.append(yield_data) + + total_num = len(all_data) + loop_num = int(np.ceil(total_num / batch_size)) + + res = list() + for iter_id in range(loop_num): + batch_data = list() + handle_id = iter_id * batch_size + for image_id in range(batch_size): + try: + batch_data.append(all_data[handle_id + image_id]) + except: + pass + # feed batch image + batch_image = np.array([data['image'] for data in batch_data]) + + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + input_handle.copy_from_cpu(batch_image.copy()) + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) + + out = postprocess(data_out=output_handle.copy_to_cpu(), label_list=self.label_list, top_k=top_k) + res += out + return res + + @serving + def serving_method(self, images, **kwargs): """ - assert isinstance(string_list, list) - blocks_args = [] - for block_string in string_list: - blocks_args.append(BlockDecoder._decode_block_string(block_string)) - return blocks_args - - @staticmethod - def encode(blocks_args: list): + Run as a service. """ - Encodes a list of BlockArgs to a list of strings. + images_decode = [base64_to_cv2(image) for image in images] + results = self.classify(images=images_decode, **kwargs) + return results - :param blocks_args: a list of BlockArgs namedtuples of block args - :return: a list of strings, each string is a notation of block + @runnable + def run_cmd(self, argvs): """ - block_strings = [] - for block in blocks_args: - block_strings.append(BlockDecoder._encode_block_string(block)) - return block_strings - - -def initial_type(name: str, use_bias: bool = False): - param_attr = ParamAttr(name=name + "_weights") - if use_bias: - bias_attr = ParamAttr(name=name + "_offset") - else: - bias_attr = False - return param_attr, bias_attr - - -def init_batch_norm_layer(name: str = "batch_norm"): - param_attr = ParamAttr(name=name + "_scale") - bias_attr = ParamAttr(name=name + "_offset") - return param_attr, bias_attr - - -def init_fc_layer(name: str = "fc"): - param_attr = ParamAttr(name=name + "_weights") - bias_attr = ParamAttr(name=name + "_offset") - return param_attr, bias_attr - - -def cal_padding(img_size: int, stride: int, filter_size: int, dilation: int = 1): - """Calculate padding size.""" - if img_size % stride == 0: - out_size = max(filter_size - stride, 0) - else: - out_size = max(filter_size - (img_size % stride), 0) - return out_size // 2, out_size - out_size // 2 - - -inp_shape = {"b0": [224, 112, 112, 56, 28, 14, 14, 7]} - - -def _drop_connect(inputs: paddle.Tensor, prob: float, is_test: bool): - """Drop input connection""" - if is_test: - return inputs - keep_prob = 1.0 - prob - inputs_shape = paddle.shape(inputs) - random_tensor = keep_prob + paddle.rand(shape=[inputs_shape[0], 1, 1, 1]) - binary_tensor = paddle.floor(random_tensor) - output = inputs / keep_prob * binary_tensor - return output - - -class Conv2ds(nn.Layer): - """Basic conv layer""" - - def __init__(self, - input_channels: int, - output_channels: int, - filter_size: int, - stride: int = 1, - padding: int = 0, - groups: int = None, - name: str = "conv2d", - act: str = None, - use_bias: bool = False, - padding_type: str = None, - model_name: str = None, - cur_stage: str = None): - super(Conv2ds, self).__init__() - assert act in [None, "swish", "sigmoid"] - self.act = act - - param_attr, bias_attr = initial_type(name=name, use_bias=use_bias) - - def get_padding(filter_size, stride=1, dilation=1): - padding = ((stride - 1) + dilation * (filter_size - 1)) // 2 - return padding - - inps = 1 if model_name == None and cur_stage == None else inp_shape[model_name][cur_stage] - self.need_crop = False - if padding_type == "SAME": - top_padding, bottom_padding = cal_padding(inps, stride, filter_size) - left_padding, right_padding = cal_padding(inps, stride, filter_size) - height_padding = bottom_padding - width_padding = right_padding - if top_padding != bottom_padding or left_padding != right_padding: - height_padding = top_padding + stride - width_padding = left_padding + stride - self.need_crop = True - padding = [height_padding, width_padding] - elif padding_type == "VALID": - height_padding = 0 - width_padding = 0 - padding = [height_padding, width_padding] - elif padding_type == "DYNAMIC": - padding = get_padding(filter_size, stride) - else: - padding = padding_type - - groups = 1 if groups is None else groups - self._conv = Conv2d( - input_channels, - output_channels, - filter_size, - groups=groups, - stride=stride, - padding=padding, - weight_attr=param_attr, - bias_attr=bias_attr) - - def forward(self, inputs: paddle.Tensor): - x = self._conv(inputs) - if self.act == "swish": - x = F.swish(x) - elif self.act == "sigmoid": - x = F.sigmoid(x) - - if self.need_crop: - x = x[:, :, 1:, 1:] - return x - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - - def __init__(self, - input_channels: int, - filter_size: int, - output_channels: int, - stride: int = 1, - num_groups: int = 1, - padding_type: str = "SAME", - conv_act: str = None, - bn_act: str = "swish", - use_bn: bool = True, - use_bias: bool = False, - name: str = None, - conv_name: str = None, - bn_name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ConvBNLayer, self).__init__() - - self._conv = Conv2ds( - input_channels=input_channels, - output_channels=output_channels, - filter_size=filter_size, - stride=stride, - groups=num_groups, - act=conv_act, - padding_type=padding_type, - name=conv_name, - use_bias=use_bias, - model_name=model_name, - cur_stage=cur_stage) - self.use_bn = use_bn - if use_bn is True: - bn_name = name + bn_name - param_attr, bias_attr = init_batch_norm_layer(bn_name) - - self._bn = BatchNorm( - num_channels=output_channels, - act=bn_act, - momentum=0.99, - epsilon=0.001, - moving_mean_name=bn_name + "_mean", - moving_variance_name=bn_name + "_variance", - param_attr=param_attr, - bias_attr=bias_attr) - - def forward(self, inputs: paddle.Tensor): - if self.use_bn: - x = self._conv(inputs) - x = self._bn(x) - return x - else: - return self._conv(inputs) - - -class ExpandConvNorm(nn.Layer): - """Expand conv norm layer.""" - - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ExpandConvNorm, self).__init__() - - self.oup = block_args.input_filters * block_args.expand_ratio - self.expand_ratio = block_args.expand_ratio - - if self.expand_ratio != 1: - self._conv = ConvBNLayer( - input_channels, - 1, - self.oup, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_expand_conv", - bn_name="_bn0", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - if self.expand_ratio != 1: - return self._conv(inputs) - else: - return inputs - - -class DepthwiseConvNorm(nn.Layer): - """Depthwise conv norm layer.""" - - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(DepthwiseConvNorm, self).__init__() - - self.k = block_args.kernel_size - self.s = block_args.stride - if isinstance(self.s, list) or isinstance(self.s, tuple): - self.s = self.s[0] - oup = block_args.input_filters * block_args.expand_ratio - - self._conv = ConvBNLayer( - input_channels, - self.k, - oup, - self.s, - num_groups=input_channels, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_depthwise_conv", - bn_name="_bn1", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class ProjectConvNorm(nn.Layer): - """Projection conv bn layer.""" - - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ProjectConvNorm, self).__init__() - - final_oup = block_args.output_filters - - self._conv = ConvBNLayer( - input_channels, - 1, - final_oup, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_project_conv", - bn_name="_bn2", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class SEBlock(nn.Layer): - """Basic Squeeze-and-Excitation block for Efficientnet.""" - - def __init__(self, - input_channels: int, - num_squeezed_channels: int, - oup: int, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(SEBlock, self).__init__() - - self._pool = AdaptiveAvgPool2d(1) - self._conv1 = Conv2ds( - input_channels, - num_squeezed_channels, - 1, - use_bias=True, - padding_type=padding_type, - act="swish", - name=name + "_se_reduce") - - self._conv2 = Conv2ds( - num_squeezed_channels, - oup, - 1, - act="sigmoid", - use_bias=True, - padding_type=padding_type, - name=name + "_se_expand") - - def forward(self, inputs: paddle.Tensor): - x = self._pool(inputs) - x = self._conv1(x) - x = self._conv2(x) - return paddle.multiply(inputs, x) - - -class MbConvBlock(nn.Layer): - """Mobile inverted bottleneck convolution for Efficientnet.""" - - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - use_se: bool, - name: str = None, - drop_connect_rate: float = None, - is_test: bool = False, - model_name: str = None, - cur_stage: str = None): - super(MbConvBlock, self).__init__() - - oup = block_args.input_filters * block_args.expand_ratio - self.block_args = block_args - self.has_se = use_se and (block_args.se_ratio is not None) and (0 < block_args.se_ratio <= 1) - self.id_skip = block_args.id_skip - self.expand_ratio = block_args.expand_ratio - self.drop_connect_rate = drop_connect_rate - self.is_test = is_test - - if self.expand_ratio != 1: - self._ecn = ExpandConvNorm( - input_channels, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - self._dcn = DepthwiseConvNorm( - input_channels * block_args.expand_ratio, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - if self.has_se: - num_squeezed_channels = max(1, int(block_args.input_filters * block_args.se_ratio)) - self._se = SEBlock( - input_channels * block_args.expand_ratio, - num_squeezed_channels, - oup, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - self._pcn = ProjectConvNorm( - input_channels * block_args.expand_ratio, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - x = inputs - if self.expand_ratio != 1: - x = self._ecn(x) - x = F.swish(x) - x = self._dcn(x) - x = F.swish(x) - if self.has_se: - x = self._se(x) - x = self._pcn(x) - if self.id_skip and \ - self.block_args.stride == 1 and \ - self.block_args.input_filters == self.block_args.output_filters: - if self.drop_connect_rate: - x = _drop_connect(x, self.drop_connect_rate, self.is_test) - x = paddle.elementwise_add(x, inputs) - return x - - -class ConvStemNorm(nn.Layer): - """Basic conv stem norm block for extracting features.""" - - def __init__(self, - input_channels: int, - padding_type: str, - _global_params: dict, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ConvStemNorm, self).__init__() - - output_channels = round_filters(32, _global_params) - self._conv = ConvBNLayer( - input_channels, - filter_size=3, - output_channels=output_channels, - stride=2, - bn_act=None, - padding_type=padding_type, - name="", - conv_name="_conv_stem", - bn_name="_bn0", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class ExtractFeatures(nn.Layer): - """Extract features.""" - - def __init__(self, - input_channels: int, - _block_args: dict, - _global_params: dict, - padding_type: str, - use_se: bool, - is_test: bool, - model_name: str = None): - super(ExtractFeatures, self).__init__() - - self._global_params = _global_params - - self._conv_stem = ConvStemNorm( - input_channels, - padding_type=padding_type, - _global_params=_global_params, - model_name=model_name, - cur_stage=0) - - self.block_args_copy = copy.deepcopy(_block_args) - idx = 0 - block_size = 0 - for block_arg in self.block_args_copy: - block_arg = block_arg._replace( - input_filters=round_filters(block_arg.input_filters, _global_params), - output_filters=round_filters(block_arg.output_filters, _global_params), - num_repeat=round_repeats(block_arg.num_repeat, _global_params)) - block_size += 1 - for _ in range(block_arg.num_repeat - 1): - block_size += 1 - - self.conv_seq = [] - cur_stage = 1 - for block_args in _block_args: - block_args = block_args._replace( - input_filters=round_filters(block_args.input_filters, _global_params), - output_filters=round_filters(block_args.output_filters, _global_params), - num_repeat=round_repeats(block_args.num_repeat, _global_params)) - - drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 - if drop_connect_rate: - drop_connect_rate *= float(idx) / block_size - - _mc_block = self.add_sublayer( - "_blocks." + str(idx) + ".", - MbConvBlock( - block_args.input_filters, - block_args=block_args, - padding_type=padding_type, - use_se=use_se, - name="_blocks." + str(idx) + ".", - drop_connect_rate=drop_connect_rate, - model_name=model_name, - cur_stage=cur_stage)) - self.conv_seq.append(_mc_block) - idx += 1 - if block_args.num_repeat > 1: - block_args = block_args._replace(input_filters=block_args.output_filters, stride=1) - for _ in range(block_args.num_repeat - 1): - drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 - if drop_connect_rate: - drop_connect_rate *= float(idx) / block_size - _mc_block = self.add_sublayer( - "block." + str(idx) + ".", - MbConvBlock( - block_args.input_filters, - block_args, - padding_type=padding_type, - use_se=use_se, - name="_blocks." + str(idx) + ".", - drop_connect_rate=drop_connect_rate, - model_name=model_name, - cur_stage=cur_stage)) - self.conv_seq.append(_mc_block) - idx += 1 - cur_stage += 1 - - def forward(self, inputs: paddle.Tensor): - x = self._conv_stem(inputs) - x = F.swish(x) - for _mc_block in self.conv_seq: - x = _mc_block(x) - return x - - -@moduleinfo( - name="efficientnetb0_small_imagenet", - type="cv/classification", - author="paddlepaddle", - author_email="", - summary="efficientnet_b0_small_imagenet is a classification model, " - "this module is trained with Imagenet dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class EfficientNet_B0(nn.Layer): - def __init__(self, - is_test: bool = True, - padding_type: str = 'DYNAMIC', - override_params: dict = None, - use_se: bool = False, - class_dim: int = 1000, - load_checkpoint: str = None): - super(EfficientNet_B0, self).__init__() - - model_name = 'efficientnet-b0' - self.name = "b0" - self._block_args, self._global_params = get_model_params(model_name, override_params) - self.padding_type = padding_type - self.use_se = use_se - self.is_test = is_test - - self._ef = ExtractFeatures( - 3, - self._block_args, - self._global_params, - self.padding_type, - self.use_se, - self.is_test, - model_name=self.name) - - output_channels = round_filters(1280, self._global_params) - oup = 320 - - self._conv = ConvBNLayer( - oup, - 1, - output_channels, - bn_act="swish", - padding_type=self.padding_type, - name="", - conv_name="_conv_head", - bn_name="_bn1", - model_name=self.name, - cur_stage=7) - self._pool = AdaptiveAvgPool2d(1) - - if self._global_params.dropout_rate: - self._drop = Dropout(p=self._global_params.dropout_rate, mode="upscale_in_train") - - param_attr, bias_attr = init_fc_layer("_fc") - self._fc = Linear(output_channels, class_dim, weight_attr=param_attr, bias_attr=bias_attr) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'efficientnet_b0_small_imagenet.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/efficientnet_b0_small_imagenet.pdparams -O ' - + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def forward(self, inputs: paddle.Tensor): - x = self._ef(inputs) - x = self._conv(x) - x = self._pool(x) - if self._global_params.dropout_rate: - x = self._drop(x) - x = paddle.squeeze(x, axis=[2, 3]) - x = self._fc(x) - return x + Run as a command. + """ + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.classify(paths=[args.input_path], batch_size=args.batch_size, use_gpu=args.use_gpu) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument('--use_gpu', + type=ast.literal_eval, + default=False, + help="whether use GPU or not.") + self.arg_config_group.add_argument('--batch_size', type=ast.literal_eval, default=1, help="batch size.") + self.arg_config_group.add_argument('--top_k', type=ast.literal_eval, default=1, help="Return top k results.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") + + +if __name__ == '__main__': + b0 = EfficientNetB0SmallImageNet() + b0.context() + import cv2 + test_image = [cv2.imread('dog.jpeg')] + res = b0.classification(images=test_image) + print(res) + res = b0.classification(paths=['dog.jpeg']) + print(res) + res = b0.classification(images=test_image) + print(res) + res = b0.classify(images=test_image) + print(res) diff --git a/modules/image/classification/efficientnetb0_small_imagenet/processor.py b/modules/image/classification/efficientnetb0_small_imagenet/processor.py new file mode 100644 index 000000000..c3ed4ea05 --- /dev/null +++ b/modules/image/classification/efficientnetb0_small_imagenet/processor.py @@ -0,0 +1,66 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import base64 + +import cv2 +import numpy as np + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def softmax(x): + orig_shape = x.shape + if len(x.shape) > 1: + tmp = np.max(x, axis=1) + x -= tmp.reshape((x.shape[0], 1)) + x = np.exp(x) + tmp = np.sum(x, axis=1) + x /= tmp.reshape((x.shape[0], 1)) + else: + tmp = np.max(x) + x -= tmp + x = np.exp(x) + tmp = np.sum(x) + x /= tmp + return x + + +def postprocess(data_out, label_list, top_k): + """ + Postprocess output of network, one image at a time. + + Args: + data_out (numpy.ndarray): output data of network. + label_list (list): list of label. + top_k (int): Return top k results. + """ + output = [] + for result in data_out: + result_i = softmax(result) + output_i = {} + indexs = np.argsort(result_i)[::-1][0:top_k] + for index in indexs: + label = label_list[index].split(',')[0] + output_i[label] = float(result_i[index]) + output.append(output_i) + return output diff --git a/modules/image/classification/efficientnetb0_small_imagenet/test.py b/modules/image/classification/efficientnetb0_small_imagenet/test.py new file mode 100644 index 000000000..b2bd87728 --- /dev/null +++ b/modules/image/classification/efficientnetb0_small_imagenet/test.py @@ -0,0 +1,63 @@ +import os +import shutil +import unittest + +import cv2 +import requests + +import paddlehub as hub + +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + + +class TestHubModule(unittest.TestCase): + + @classmethod + def setUpClass(cls) -> None: + img_url = 'https://unsplash.com/photos/brFsZ7qszSY/download?ixid=MnwxMjA3fDB8MXxzZWFyY2h8OHx8ZG9nfGVufDB8fHx8MTY2MzA1ODQ1MQ&force=true&w=640' + if not os.path.exists('tests'): + os.makedirs('tests') + response = requests.get(img_url) + assert response.status_code == 200, 'Network Error.' + with open('tests/test.jpg', 'wb') as f: + f.write(response.content) + cls.module = hub.Module(name="efficientnetb0_small_imagenet") + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree('tests') + shutil.rmtree('inference') + + def test_classification1(self): + results = self.module.classification(paths=['tests/test.jpg']) + data = results[0] + self.assertTrue('Pembroke' in data) + self.assertTrue(data['Pembroke'] > 0.5) + + def test_classification2(self): + results = self.module.classification(images=[cv2.imread('tests/test.jpg')]) + data = results[0] + self.assertTrue('Pembroke' in data) + self.assertTrue(data['Pembroke'] > 0.5) + + def test_classification3(self): + results = self.module.classification(images=[cv2.imread('tests/test.jpg')], use_gpu=True) + data = results[0] + self.assertTrue('Pembroke' in data) + self.assertTrue(data['Pembroke'] > 0.5) + + def test_classification4(self): + self.assertRaises(AssertionError, self.module.classification, paths=['no.jpg']) + + def test_classification5(self): + self.assertRaises(TypeError, self.module.classification, images=['tests/test.jpg']) + + def test_save_inference_model(self): + self.module.save_inference_model('./inference/model') + + self.assertTrue(os.path.exists('./inference/model.pdmodel')) + self.assertTrue(os.path.exists('./inference/model.pdiparams')) + + +if __name__ == "__main__": + unittest.main() From 1322f8dbbd701ee2456672c8f64adf9738e72fb5 Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Fri, 14 Oct 2022 14:52:53 +0800 Subject: [PATCH 102/117] update efficientnetb1_imagenet (#2049) * update efficientnetb1_imagenet * remove unused print --- .../efficientnetb1_imagenet/README.md | 7 +- .../efficientnetb1_imagenet/README_en.md | 7 +- .../efficientnetb1_imagenet/__init__.py | 0 .../efficientnetb1_imagenet/data_feed.py | 93 ++ .../efficientnetb1_imagenet/label_list.txt | 1000 +++++++++++++++++ .../efficientnetb1_imagenet/module.py | 925 +++------------ .../efficientnetb1_imagenet/processor.py | 65 ++ .../efficientnetb1_imagenet/test.py | 63 ++ 8 files changed, 1401 insertions(+), 759 deletions(-) create mode 100644 modules/image/classification/efficientnetb1_imagenet/__init__.py create mode 100644 modules/image/classification/efficientnetb1_imagenet/data_feed.py create mode 100644 modules/image/classification/efficientnetb1_imagenet/label_list.txt create mode 100644 modules/image/classification/efficientnetb1_imagenet/processor.py create mode 100644 modules/image/classification/efficientnetb1_imagenet/test.py diff --git a/modules/image/classification/efficientnetb1_imagenet/README.md b/modules/image/classification/efficientnetb1_imagenet/README.md index fe5981ece..76bb7cac1 100644 --- a/modules/image/classification/efficientnetb1_imagenet/README.md +++ b/modules/image/classification/efficientnetb1_imagenet/README.md @@ -131,6 +131,11 @@ * 1.1.0 提升预测性能以及易用性 + +* 1.2.0 + + 移除 Fluid API + - ```shell - $ hub install efficientnetb1_imagenet==1.1.0 + $ hub install efficientnetb1_imagenet==1.2.0 ``` diff --git a/modules/image/classification/efficientnetb1_imagenet/README_en.md b/modules/image/classification/efficientnetb1_imagenet/README_en.md index e578354f2..686fd9a9f 100644 --- a/modules/image/classification/efficientnetb1_imagenet/README_en.md +++ b/modules/image/classification/efficientnetb1_imagenet/README_en.md @@ -129,6 +129,11 @@ * 1.1.0 Improve the prediction performance and users' experience + +* 1.2.0 + + Remove Fluid API + - ```shell - $ hub install efficientnetb1_imagenet==1.1.0 + $ hub install efficientnetb1_imagenet==1.2.0 ``` diff --git a/modules/image/classification/efficientnetb1_imagenet/__init__.py b/modules/image/classification/efficientnetb1_imagenet/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/modules/image/classification/efficientnetb1_imagenet/data_feed.py b/modules/image/classification/efficientnetb1_imagenet/data_feed.py new file mode 100644 index 000000000..60a33f4df --- /dev/null +++ b/modules/image/classification/efficientnetb1_imagenet/data_feed.py @@ -0,0 +1,93 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import time +from collections import OrderedDict + +import numpy as np +from PIL import Image + +__all__ = ['reader'] + +DATA_DIM = 224 +img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) +img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) + + +def resize_short(img, target_size): + percent = float(target_size) / min(img.size[0], img.size[1]) + resized_width = int(round(img.size[0] * percent)) + resized_height = int(round(img.size[1] * percent)) + img = img.resize((resized_width, resized_height), Image.LANCZOS) + return img + + +def crop_image(img, target_size, center): + width, height = img.size + size = target_size + if center == True: + w_start = (width - size) / 2 + h_start = (height - size) / 2 + else: + w_start = np.random.randint(0, width - size + 1) + h_start = np.random.randint(0, height - size + 1) + w_end = w_start + size + h_end = h_start + size + img = img.crop((w_start, h_start, w_end, h_end)) + return img + + +def process_image(img): + img = resize_short(img, target_size=256) + img = crop_image(img, target_size=DATA_DIM, center=True) + if img.mode != 'RGB': + img = img.convert('RGB') + img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255 + img -= img_mean + img /= img_std + return img + + +def reader(images=None, paths=None): + """ + Preprocess to yield image. + + Args: + images (list[numpy.ndarray]): images data, shape of each is [H, W, C]. + paths (list[str]): paths to images. + + Yield: + each (collections.OrderedDict): info of original image, preprocessed image. + """ + component = list() + if paths: + for im_path in paths: + each = OrderedDict() + assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path) + each['org_im_path'] = im_path + each['org_im'] = Image.open(im_path) + each['org_im_width'], each['org_im_height'] = each['org_im'].size + component.append(each) + if images is not None: + assert type(images), "images is a list." + for im in images: + each = OrderedDict() + each['org_im'] = Image.fromarray(im[:, :, ::-1]) + each['org_im_path'] = 'ndarray_time={}'.format(round(time.time(), 6) * 1e6) + each['org_im_width'], each['org_im_height'] = each['org_im'].size + component.append(each) + + for element in component: + element['image'] = process_image(element['org_im']) + yield element diff --git a/modules/image/classification/efficientnetb1_imagenet/label_list.txt b/modules/image/classification/efficientnetb1_imagenet/label_list.txt new file mode 100644 index 000000000..a509c0074 --- /dev/null +++ b/modules/image/classification/efficientnetb1_imagenet/label_list.txt @@ -0,0 +1,1000 @@ +tench, Tinca tinca +goldfish, Carassius auratus +great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias +tiger shark, Galeocerdo cuvieri +hammerhead, hammerhead shark +electric ray, crampfish, numbfish, torpedo +stingray +cock +hen +ostrich, Struthio camelus +brambling, Fringilla montifringilla +goldfinch, Carduelis carduelis +house finch, linnet, Carpodacus mexicanus +junco, snowbird +indigo bunting, indigo finch, indigo bird, Passerina cyanea +robin, American robin, Turdus migratorius +bulbul +jay +magpie +chickadee +water ouzel, dipper +kite +bald eagle, American eagle, Haliaeetus leucocephalus +vulture +great grey owl, great gray owl, Strix nebulosa +European fire salamander, Salamandra salamandra +common newt, Triturus vulgaris +eft +spotted salamander, Ambystoma maculatum +axolotl, mud puppy, Ambystoma mexicanum +bullfrog, Rana catesbeiana +tree frog, tree-frog +tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui +loggerhead, loggerhead turtle, Caretta caretta +leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea +mud turtle +terrapin +box turtle, box tortoise +banded gecko +common iguana, iguana, Iguana iguana +American chameleon, anole, Anolis carolinensis +whiptail, whiptail lizard +agama +frilled lizard, Chlamydosaurus kingi +alligator lizard +Gila monster, Heloderma suspectum +green lizard, Lacerta viridis +African chameleon, Chamaeleo chamaeleon +Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis +African crocodile, Nile crocodile, Crocodylus niloticus +American alligator, Alligator mississipiensis +triceratops +thunder snake, worm snake, Carphophis amoenus +ringneck snake, ring-necked snake, ring snake +hognose snake, puff adder, sand viper +green snake, grass snake +king snake, kingsnake +garter snake, grass snake +water snake +vine snake +night snake, Hypsiglena torquata +boa constrictor, Constrictor constrictor +rock python, rock snake, Python sebae +Indian cobra, Naja naja +green mamba +sea snake +horned viper, cerastes, sand viper, horned asp, Cerastes cornutus +diamondback, diamondback rattlesnake, Crotalus adamanteus +sidewinder, horned rattlesnake, Crotalus cerastes +trilobite +harvestman, daddy longlegs, Phalangium opilio +scorpion +black and gold garden spider, Argiope aurantia +barn spider, Araneus cavaticus +garden spider, Aranea diademata +black widow, Latrodectus mactans +tarantula +wolf spider, hunting spider +tick +centipede +black grouse +ptarmigan +ruffed grouse, partridge, Bonasa umbellus +prairie chicken, prairie grouse, prairie fowl +peacock +quail +partridge +African grey, African gray, Psittacus erithacus +macaw +sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita +lorikeet +coucal +bee eater +hornbill +hummingbird +jacamar +toucan +drake +red-breasted merganser, Mergus serrator +goose +black swan, Cygnus atratus +tusker +echidna, spiny anteater, anteater +platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus +wallaby, brush kangaroo +koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus +wombat +jellyfish +sea anemone, anemone +brain coral +flatworm, platyhelminth +nematode, nematode worm, roundworm +conch +snail +slug +sea slug, nudibranch +chiton, coat-of-mail shell, sea cradle, polyplacophore +chambered nautilus, pearly nautilus, nautilus +Dungeness crab, Cancer magister +rock crab, Cancer irroratus +fiddler crab +king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica +American lobster, Northern lobster, Maine lobster, Homarus americanus +spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish +crayfish, crawfish, crawdad, crawdaddy +hermit crab +isopod +white stork, Ciconia ciconia +black stork, Ciconia nigra +spoonbill +flamingo +little blue heron, Egretta caerulea +American egret, great white heron, Egretta albus +bittern +crane +limpkin, Aramus pictus +European gallinule, Porphyrio porphyrio +American coot, marsh hen, mud hen, water hen, Fulica americana +bustard +ruddy turnstone, Arenaria interpres +red-backed sandpiper, dunlin, Erolia alpina +redshank, Tringa totanus +dowitcher +oystercatcher, oyster catcher +pelican +king penguin, Aptenodytes patagonica +albatross, mollymawk +grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus +killer whale, killer, orca, grampus, sea wolf, Orcinus orca +dugong, Dugong dugon +sea lion +Chihuahua +Japanese spaniel +Maltese dog, Maltese terrier, Maltese +Pekinese, Pekingese, Peke +Shih-Tzu +Blenheim spaniel +papillon +toy terrier +Rhodesian ridgeback +Afghan hound, Afghan +basset, basset hound +beagle +bloodhound, sleuthhound +bluetick +black-and-tan coonhound +Walker hound, Walker foxhound +English foxhound +redbone +borzoi, Russian wolfhound +Irish wolfhound +Italian greyhound +whippet +Ibizan hound, Ibizan Podenco +Norwegian elkhound, elkhound +otterhound, otter hound +Saluki, gazelle hound +Scottish deerhound, deerhound +Weimaraner +Staffordshire bullterrier, Staffordshire bull terrier +American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier +Bedlington terrier +Border terrier +Kerry blue terrier +Irish terrier +Norfolk terrier +Norwich terrier +Yorkshire terrier +wire-haired fox terrier +Lakeland terrier +Sealyham terrier, Sealyham +Airedale, Airedale terrier +cairn, cairn terrier +Australian terrier +Dandie Dinmont, Dandie Dinmont terrier +Boston bull, Boston terrier +miniature schnauzer +giant schnauzer +standard schnauzer +Scotch terrier, Scottish terrier, Scottie +Tibetan terrier, chrysanthemum dog +silky terrier, Sydney silky +soft-coated wheaten terrier +West Highland white terrier +Lhasa, Lhasa apso +flat-coated retriever +curly-coated retriever +golden retriever +Labrador retriever +Chesapeake Bay retriever +German short-haired pointer +vizsla, Hungarian pointer +English setter +Irish setter, red setter +Gordon setter +Brittany spaniel +clumber, clumber spaniel +English springer, English springer spaniel +Welsh springer spaniel +cocker spaniel, English cocker spaniel, cocker +Sussex spaniel +Irish water spaniel +kuvasz +schipperke +groenendael +malinois +briard +kelpie +komondor +Old English sheepdog, bobtail +Shetland sheepdog, Shetland sheep dog, Shetland +collie +Border collie +Bouvier des Flandres, Bouviers des Flandres +Rottweiler +German shepherd, German shepherd dog, German police dog, alsatian +Doberman, Doberman pinscher +miniature pinscher +Greater Swiss Mountain dog +Bernese mountain dog +Appenzeller +EntleBucher +boxer +bull mastiff +Tibetan mastiff +French bulldog +Great Dane +Saint Bernard, St Bernard +Eskimo dog, husky +malamute, malemute, Alaskan malamute +Siberian husky +dalmatian, coach dog, carriage dog +affenpinscher, monkey pinscher, monkey dog +basenji +pug, pug-dog +Leonberg +Newfoundland, Newfoundland dog +Great Pyrenees +Samoyed, Samoyede +Pomeranian +chow, chow chow +keeshond +Brabancon griffon +Pembroke, Pembroke Welsh corgi +Cardigan, Cardigan Welsh corgi +toy poodle +miniature poodle +standard poodle +Mexican hairless +timber wolf, grey wolf, gray wolf, Canis lupus +white wolf, Arctic wolf, Canis lupus tundrarum +red wolf, maned wolf, Canis rufus, Canis niger +coyote, prairie wolf, brush wolf, Canis latrans +dingo, warrigal, warragal, Canis dingo +dhole, Cuon alpinus +African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus +hyena, hyaena +red fox, Vulpes vulpes +kit fox, Vulpes macrotis +Arctic fox, white fox, Alopex lagopus +grey fox, gray fox, Urocyon cinereoargenteus +tabby, tabby cat +tiger cat +Persian cat +Siamese cat, Siamese +Egyptian cat +cougar, puma, catamount, mountain lion, painter, panther, Felis concolor +lynx, catamount +leopard, Panthera pardus +snow leopard, ounce, Panthera uncia +jaguar, panther, Panthera onca, Felis onca +lion, king of beasts, Panthera leo +tiger, Panthera tigris +cheetah, chetah, Acinonyx jubatus +brown bear, bruin, Ursus arctos +American black bear, black bear, Ursus americanus, Euarctos americanus +ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus +sloth bear, Melursus ursinus, Ursus ursinus +mongoose +meerkat, mierkat +tiger beetle +ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle +ground beetle, carabid beetle +long-horned beetle, longicorn, longicorn beetle +leaf beetle, chrysomelid +dung beetle +rhinoceros beetle +weevil +fly +bee +ant, emmet, pismire +grasshopper, hopper +cricket +walking stick, walkingstick, stick insect +cockroach, roach +mantis, mantid +cicada, cicala +leafhopper +lacewing, lacewing fly +dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk +damselfly +admiral +ringlet, ringlet butterfly +monarch, monarch butterfly, milkweed butterfly, Danaus plexippus +cabbage butterfly +sulphur butterfly, sulfur butterfly +lycaenid, lycaenid butterfly +starfish, sea star +sea urchin +sea cucumber, holothurian +wood rabbit, cottontail, cottontail rabbit +hare +Angora, Angora rabbit +hamster +porcupine, hedgehog +fox squirrel, eastern fox squirrel, Sciurus niger +marmot +beaver +guinea pig, Cavia cobaya +sorrel +zebra +hog, pig, grunter, squealer, Sus scrofa +wild boar, boar, Sus scrofa +warthog +hippopotamus, hippo, river horse, Hippopotamus amphibius +ox +water buffalo, water ox, Asiatic buffalo, Bubalus bubalis +bison +ram, tup +bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis +ibex, Capra ibex +hartebeest +impala, Aepyceros melampus +gazelle +Arabian camel, dromedary, Camelus dromedarius +llama +weasel +mink +polecat, fitch, foulmart, foumart, Mustela putorius +black-footed ferret, ferret, Mustela nigripes +otter +skunk, polecat, wood pussy +badger +armadillo +three-toed sloth, ai, Bradypus tridactylus +orangutan, orang, orangutang, Pongo pygmaeus +gorilla, Gorilla gorilla +chimpanzee, chimp, Pan troglodytes +gibbon, Hylobates lar +siamang, Hylobates syndactylus, Symphalangus syndactylus +guenon, guenon monkey +patas, hussar monkey, Erythrocebus patas +baboon +macaque +langur +colobus, colobus monkey +proboscis monkey, Nasalis larvatus +marmoset +capuchin, ringtail, Cebus capucinus +howler monkey, howler +titi, titi monkey +spider monkey, Ateles geoffroyi +squirrel monkey, Saimiri sciureus +Madagascar cat, ring-tailed lemur, Lemur catta +indri, indris, Indri indri, Indri brevicaudatus +Indian elephant, Elephas maximus +African elephant, Loxodonta africana +lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens +giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca +barracouta, snoek +eel +coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch +rock beauty, Holocanthus tricolor +anemone fish +sturgeon +gar, garfish, garpike, billfish, Lepisosteus osseus +lionfish +puffer, pufferfish, blowfish, globefish +abacus +abaya +academic gown, academic robe, judge's robe +accordion, piano accordion, squeeze box +acoustic guitar +aircraft carrier, carrier, flattop, attack aircraft carrier +airliner +airship, dirigible +altar +ambulance +amphibian, amphibious vehicle +analog clock +apiary, bee house +apron +ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin +assault rifle, assault gun +backpack, back pack, knapsack, packsack, rucksack, haversack +bakery, bakeshop, bakehouse +balance beam, beam +balloon +ballpoint, ballpoint pen, ballpen, Biro +Band Aid +banjo +bannister, banister, balustrade, balusters, handrail +barbell +barber chair +barbershop +barn +barometer +barrel, cask +barrow, garden cart, lawn cart, wheelbarrow +baseball +basketball +bassinet +bassoon +bathing cap, swimming cap +bath towel +bathtub, bathing tub, bath, tub +beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon +beacon, lighthouse, beacon light, pharos +beaker +bearskin, busby, shako +beer bottle +beer glass +bell cote, bell cot +bib +bicycle-built-for-two, tandem bicycle, tandem +bikini, two-piece +binder, ring-binder +binoculars, field glasses, opera glasses +birdhouse +boathouse +bobsled, bobsleigh, bob +bolo tie, bolo, bola tie, bola +bonnet, poke bonnet +bookcase +bookshop, bookstore, bookstall +bottlecap +bow +bow tie, bow-tie, bowtie +brass, memorial tablet, plaque +brassiere, bra, bandeau +breakwater, groin, groyne, mole, bulwark, seawall, jetty +breastplate, aegis, egis +broom +bucket, pail +buckle +bulletproof vest +bullet train, bullet +butcher shop, meat market +cab, hack, taxi, taxicab +caldron, cauldron +candle, taper, wax light +cannon +canoe +can opener, tin opener +cardigan +car mirror +carousel, carrousel, merry-go-round, roundabout, whirligig +carpenter's kit, tool kit +carton +car wheel +cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM +cassette +cassette player +castle +catamaran +CD player +cello, violoncello +cellular telephone, cellular phone, cellphone, cell, mobile phone +chain +chainlink fence +chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour +chain saw, chainsaw +chest +chiffonier, commode +chime, bell, gong +china cabinet, china closet +Christmas stocking +church, church building +cinema, movie theater, movie theatre, movie house, picture palace +cleaver, meat cleaver, chopper +cliff dwelling +cloak +clog, geta, patten, sabot +cocktail shaker +coffee mug +coffeepot +coil, spiral, volute, whorl, helix +combination lock +computer keyboard, keypad +confectionery, confectionary, candy store +container ship, containership, container vessel +convertible +corkscrew, bottle screw +cornet, horn, trumpet, trump +cowboy boot +cowboy hat, ten-gallon hat +cradle +crane +crash helmet +crate +crib, cot +Crock Pot +croquet ball +crutch +cuirass +dam, dike, dyke +desk +desktop computer +dial telephone, dial phone +diaper, nappy, napkin +digital clock +digital watch +dining table, board +dishrag, dishcloth +dishwasher, dish washer, dishwashing machine +disk brake, disc brake +dock, dockage, docking facility +dogsled, dog sled, dog sleigh +dome +doormat, welcome mat +drilling platform, offshore rig +drum, membranophone, tympan +drumstick +dumbbell +Dutch oven +electric fan, blower +electric guitar +electric locomotive +entertainment center +envelope +espresso maker +face powder +feather boa, boa +file, file cabinet, filing cabinet +fireboat +fire engine, fire truck +fire screen, fireguard +flagpole, flagstaff +flute, transverse flute +folding chair +football helmet +forklift +fountain +fountain pen +four-poster +freight car +French horn, horn +frying pan, frypan, skillet +fur coat +garbage truck, dustcart +gasmask, respirator, gas helmet +gas pump, gasoline pump, petrol pump, island dispenser +goblet +go-kart +golf ball +golfcart, golf cart +gondola +gong, tam-tam +gown +grand piano, grand +greenhouse, nursery, glasshouse +grille, radiator grille +grocery store, grocery, food market, market +guillotine +hair slide +hair spray +half track +hammer +hamper +hand blower, blow dryer, blow drier, hair dryer, hair drier +hand-held computer, hand-held microcomputer +handkerchief, hankie, hanky, hankey +hard disc, hard disk, fixed disk +harmonica, mouth organ, harp, mouth harp +harp +harvester, reaper +hatchet +holster +home theater, home theatre +honeycomb +hook, claw +hoopskirt, crinoline +horizontal bar, high bar +horse cart, horse-cart +hourglass +iPod +iron, smoothing iron +jack-o'-lantern +jean, blue jean, denim +jeep, landrover +jersey, T-shirt, tee shirt +jigsaw puzzle +jinrikisha, ricksha, rickshaw +joystick +kimono +knee pad +knot +lab coat, laboratory coat +ladle +lampshade, lamp shade +laptop, laptop computer +lawn mower, mower +lens cap, lens cover +letter opener, paper knife, paperknife +library +lifeboat +lighter, light, igniter, ignitor +limousine, limo +liner, ocean liner +lipstick, lip rouge +Loafer +lotion +loudspeaker, speaker, speaker unit, loudspeaker system, speaker system +loupe, jeweler's loupe +lumbermill, sawmill +magnetic compass +mailbag, postbag +mailbox, letter box +maillot +maillot, tank suit +manhole cover +maraca +marimba, xylophone +mask +matchstick +maypole +maze, labyrinth +measuring cup +medicine chest, medicine cabinet +megalith, megalithic structure +microphone, mike +microwave, microwave oven +military uniform +milk can +minibus +miniskirt, mini +minivan +missile +mitten +mixing bowl +mobile home, manufactured home +Model T +modem +monastery +monitor +moped +mortar +mortarboard +mosque +mosquito net +motor scooter, scooter +mountain bike, all-terrain bike, off-roader +mountain tent +mouse, computer mouse +mousetrap +moving van +muzzle +nail +neck brace +necklace +nipple +notebook, notebook computer +obelisk +oboe, hautboy, hautbois +ocarina, sweet potato +odometer, hodometer, mileometer, milometer +oil filter +organ, pipe organ +oscilloscope, scope, cathode-ray oscilloscope, CRO +overskirt +oxcart +oxygen mask +packet +paddle, boat paddle +paddlewheel, paddle wheel +padlock +paintbrush +pajama, pyjama, pj's, jammies +palace +panpipe, pandean pipe, syrinx +paper towel +parachute, chute +parallel bars, bars +park bench +parking meter +passenger car, coach, carriage +patio, terrace +pay-phone, pay-station +pedestal, plinth, footstall +pencil box, pencil case +pencil sharpener +perfume, essence +Petri dish +photocopier +pick, plectrum, plectron +pickelhaube +picket fence, paling +pickup, pickup truck +pier +piggy bank, penny bank +pill bottle +pillow +ping-pong ball +pinwheel +pirate, pirate ship +pitcher, ewer +plane, carpenter's plane, woodworking plane +planetarium +plastic bag +plate rack +plow, plough +plunger, plumber's helper +Polaroid camera, Polaroid Land camera +pole +police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria +poncho +pool table, billiard table, snooker table +pop bottle, soda bottle +pot, flowerpot +potter's wheel +power drill +prayer rug, prayer mat +printer +prison, prison house +projectile, missile +projector +puck, hockey puck +punching bag, punch bag, punching ball, punchball +purse +quill, quill pen +quilt, comforter, comfort, puff +racer, race car, racing car +racket, racquet +radiator +radio, wireless +radio telescope, radio reflector +rain barrel +recreational vehicle, RV, R.V. +reel +reflex camera +refrigerator, icebox +remote control, remote +restaurant, eating house, eating place, eatery +revolver, six-gun, six-shooter +rifle +rocking chair, rocker +rotisserie +rubber eraser, rubber, pencil eraser +rugby ball +rule, ruler +running shoe +safe +safety pin +saltshaker, salt shaker +sandal +sarong +sax, saxophone +scabbard +scale, weighing machine +school bus +schooner +scoreboard +screen, CRT screen +screw +screwdriver +seat belt, seatbelt +sewing machine +shield, buckler +shoe shop, shoe-shop, shoe store +shoji +shopping basket +shopping cart +shovel +shower cap +shower curtain +ski +ski mask +sleeping bag +slide rule, slipstick +sliding door +slot, one-armed bandit +snorkel +snowmobile +snowplow, snowplough +soap dispenser +soccer ball +sock +solar dish, solar collector, solar furnace +sombrero +soup bowl +space bar +space heater +space shuttle +spatula +speedboat +spider web, spider's web +spindle +sports car, sport car +spotlight, spot +stage +steam locomotive +steel arch bridge +steel drum +stethoscope +stole +stone wall +stopwatch, stop watch +stove +strainer +streetcar, tram, tramcar, trolley, trolley car +stretcher +studio couch, day bed +stupa, tope +submarine, pigboat, sub, U-boat +suit, suit of clothes +sundial +sunglass +sunglasses, dark glasses, shades +sunscreen, sunblock, sun blocker +suspension bridge +swab, swob, mop +sweatshirt +swimming trunks, bathing trunks +swing +switch, electric switch, electrical switch +syringe +table lamp +tank, army tank, armored combat vehicle, armoured combat vehicle +tape player +teapot +teddy, teddy bear +television, television system +tennis ball +thatch, thatched roof +theater curtain, theatre curtain +thimble +thresher, thrasher, threshing machine +throne +tile roof +toaster +tobacco shop, tobacconist shop, tobacconist +toilet seat +torch +totem pole +tow truck, tow car, wrecker +toyshop +tractor +trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi +tray +trench coat +tricycle, trike, velocipede +trimaran +tripod +triumphal arch +trolleybus, trolley coach, trackless trolley +trombone +tub, vat +turnstile +typewriter keyboard +umbrella +unicycle, monocycle +upright, upright piano +vacuum, vacuum cleaner +vase +vault +velvet +vending machine +vestment +viaduct +violin, fiddle +volleyball +waffle iron +wall clock +wallet, billfold, notecase, pocketbook +wardrobe, closet, press +warplane, military plane +washbasin, handbasin, washbowl, lavabo, wash-hand basin +washer, automatic washer, washing machine +water bottle +water jug +water tower +whiskey jug +whistle +wig +window screen +window shade +Windsor tie +wine bottle +wing +wok +wooden spoon +wool, woolen, woollen +worm fence, snake fence, snake-rail fence, Virginia fence +wreck +yawl +yurt +web site, website, internet site, site +comic book +crossword puzzle, crossword +street sign +traffic light, traffic signal, stoplight +book jacket, dust cover, dust jacket, dust wrapper +menu +plate +guacamole +consomme +hot pot, hotpot +trifle +ice cream, icecream +ice lolly, lolly, lollipop, popsicle +French loaf +bagel, beigel +pretzel +cheeseburger +hotdog, hot dog, red hot +mashed potato +head cabbage +broccoli +cauliflower +zucchini, courgette +spaghetti squash +acorn squash +butternut squash +cucumber, cuke +artichoke, globe artichoke +bell pepper +cardoon +mushroom +Granny Smith +strawberry +orange +lemon +fig +pineapple, ananas +banana +jackfruit, jak, jack +custard apple +pomegranate +hay +carbonara +chocolate sauce, chocolate syrup +dough +meat loaf, meatloaf +pizza, pizza pie +potpie +burrito +red wine +espresso +cup +eggnog +alp +bubble +cliff, drop, drop-off +coral reef +geyser +lakeside, lakeshore +promontory, headland, head, foreland +sandbar, sand bar +seashore, coast, seacoast, sea-coast +valley, vale +volcano +ballplayer, baseball player +groom, bridegroom +scuba diver +rapeseed +daisy +yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum +corn +acorn +hip, rose hip, rosehip +buckeye, horse chestnut, conker +coral fungus +agaric +gyromitra +stinkhorn, carrion fungus +earthstar +hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa +bolete +ear, spike, capitulum +toilet tissue, toilet paper, bathroom tissue diff --git a/modules/image/classification/efficientnetb1_imagenet/module.py b/modules/image/classification/efficientnetb1_imagenet/module.py index 2a5bd31f4..10e1ca365 100644 --- a/modules/image/classification/efficientnetb1_imagenet/module.py +++ b/modules/image/classification/efficientnetb1_imagenet/module.py @@ -11,781 +11,192 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import absolute_import +from __future__ import division +import argparse +import ast import os -import math -import collections -import re -import copy - -import paddle -from paddle import ParamAttr -import paddle.nn as nn -import paddle.nn.functional as F -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - -GlobalParams = collections.namedtuple('GlobalParams', [ - 'batch_norm_momentum', - 'batch_norm_epsilon', - 'dropout_rate', - 'num_classes', - 'width_coefficient', - 'depth_coefficient', - 'depth_divisor', - 'min_depth', - 'drop_connect_rate', -]) - -BlockArgs = collections.namedtuple( - 'BlockArgs', - ['kernel_size', 'num_repeat', 'input_filters', 'output_filters', 'expand_ratio', 'id_skip', 'stride', 'se_ratio']) - -GlobalParams.__new__.__defaults__ = (None, ) * len(GlobalParams._fields) -BlockArgs.__new__.__defaults__ = (None, ) * len(BlockArgs._fields) - - -def efficientnet_params(model_name: str): - """ Map EfficientNet model name to parameter coefficients. """ - params_dict = { - # Coefficients: width,depth,resolution,dropout - 'efficientnet-b1': (1.0, 1.1, 240, 0.2) - } - return params_dict[model_name] - - -def efficientnet(width_coefficient: float = None, - depth_coefficient: float = None, - dropout_rate: float = 0.2, - drop_connect_rate: float = 0.2): - """ Get block arguments according to parameter and coefficients. """ - blocks_args = [ - 'r1_k3_s11_e1_i32_o16_se0.25', - 'r2_k3_s22_e6_i16_o24_se0.25', - 'r2_k5_s22_e6_i24_o40_se0.25', - 'r3_k3_s22_e6_i40_o80_se0.25', - 'r3_k5_s11_e6_i80_o112_se0.25', - 'r4_k5_s22_e6_i112_o192_se0.25', - 'r1_k3_s11_e6_i192_o320_se0.25', - ] - blocks_args = BlockDecoder.decode(blocks_args) - - global_params = GlobalParams( - batch_norm_momentum=0.99, - batch_norm_epsilon=1e-3, - dropout_rate=dropout_rate, - drop_connect_rate=drop_connect_rate, - num_classes=1000, - width_coefficient=width_coefficient, - depth_coefficient=depth_coefficient, - depth_divisor=8, - min_depth=None) - - return blocks_args, global_params - - -def get_model_params(model_name: str, override_params: dict): - """ Get the block args and global params for a given model """ - if model_name.startswith('efficientnet'): - w, d, _, p = efficientnet_params(model_name) - blocks_args, global_params = efficientnet(width_coefficient=w, depth_coefficient=d, dropout_rate=p) - else: - raise NotImplementedError('model name is not pre-defined: %s' % model_name) - if override_params: - global_params = global_params._replace(**override_params) - return blocks_args, global_params +import numpy as np +from paddle.inference import Config +from paddle.inference import create_predictor -def round_filters(filters: int, global_params: dict): - """ Calculate and round number of filters based on depth multiplier. """ - multiplier = global_params.width_coefficient - if not multiplier: - return filters - divisor = global_params.depth_divisor - min_depth = global_params.min_depth - filters *= multiplier - min_depth = min_depth or divisor - new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor) - if new_filters < 0.9 * filters: # prevent rounding by more than 10% - new_filters += divisor - return int(new_filters) +from .data_feed import reader +from .processor import base64_to_cv2 +from .processor import postprocess +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving -def round_repeats(repeats: int, global_params: dict): - """ Round number of filters based on depth multiplier. """ - multiplier = global_params.depth_coefficient - if not multiplier: - return repeats - return int(math.ceil(multiplier * repeats)) +@moduleinfo(name="efficientnetb1_imagenet", + type="CV/image_classification", + author="paddlepaddle", + author_email="paddle-dev@baidu.com", + summary="EfficientNetB1 is a image classfication model, this module is trained with imagenet datasets.", + version="1.2.0") +class EfficientNetB1ImageNet: + def __init__(self): + self.default_pretrained_model_path = os.path.join(self.directory, "efficientnetb1_imagenet_infer_model", + "model") + label_file = os.path.join(self.directory, "label_list.txt") + with open(label_file, 'r', encoding='utf-8') as file: + self.label_list = file.read().split("\n")[:-1] + self._set_config() -class BlockDecoder(object): - """ - Block Decoder, straight from the official TensorFlow repository. - """ + def get_expected_image_width(self): + return 224 - @staticmethod - def _decode_block_string(block_string: str): - """ Gets a block through a string notation of arguments. """ - assert isinstance(block_string, str) + def get_expected_image_height(self): + return 224 - ops = block_string.split('_') - options = {} - for op in ops: - splits = re.split(r'(\d.*)', op) - if len(splits) >= 2: - key, value = splits[:2] - options[key] = value + def get_pretrained_images_mean(self): + im_mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3) + return im_mean - # Check stride - cond_1 = ('s' in options and len(options['s']) == 1) - cond_2 = ((len(options['s']) == 2) and (options['s'][0] == options['s'][1])) - assert (cond_1 or cond_2) + def get_pretrained_images_std(self): + im_std = np.array([0.229, 0.224, 0.225]).reshape(1, 3) + return im_std - return BlockArgs( - kernel_size=int(options['k']), - num_repeat=int(options['r']), - input_filters=int(options['i']), - output_filters=int(options['o']), - expand_ratio=int(options['e']), - id_skip=('noskip' not in block_string), - se_ratio=float(options['se']) if 'se' in options else None, - stride=[int(options['s'][0])]) + def _set_config(self): + """ + predictor config setting + """ + model = self.default_pretrained_model_path + '.pdmodel' + params = self.default_pretrained_model_path + '.pdiparams' + cpu_config = Config(model, params) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + self.cpu_predictor = create_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = Config(model, params) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) + self.gpu_predictor = create_predictor(gpu_config) + + def classification(self, images=None, paths=None, batch_size=1, use_gpu=False, top_k=1): + """ + API for image classification. - @staticmethod - def _encode_block_string(block): - """Encodes a block to a string.""" - args = [ - 'r%d' % block.num_repeat, - 'k%d' % block.kernel_size, - 's%d%d' % (block.strides[0], block.strides[1]), - 'e%s' % block.expand_ratio, - 'i%d' % block.input_filters, - 'o%d' % block.output_filters - ] - if 0 < block.se_ratio <= 1: - args.append('se%s' % block.se_ratio) - if block.id_skip is False: - args.append('noskip') - return '_'.join(args) + Args: + images (list[numpy.ndarray]): data of images, shape of each is [H, W, C], color space must be BGR. + paths (list[str]): The paths of images. + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + top_k (int): Return top k results. - @staticmethod - def decode(string_list: list): + Returns: + res (list[dict]): The classfication results. """ - Decode a list of string notations to specify blocks in the network. - - string_list: list of strings, each string is a notation of block - return - list of BlockArgs namedtuples of block args + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + all_data = list() + for yield_data in reader(images, paths): + all_data.append(yield_data) + + total_num = len(all_data) + loop_num = int(np.ceil(total_num / batch_size)) + + res = list() + for iter_id in range(loop_num): + batch_data = list() + handle_id = iter_id * batch_size + for image_id in range(batch_size): + try: + batch_data.append(all_data[handle_id + image_id]) + except: + pass + # feed batch image + batch_image = np.array([data['image'] for data in batch_data]) + + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + input_handle.copy_from_cpu(batch_image.copy()) + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) + + out = postprocess(data_out=output_handle.copy_to_cpu(), label_list=self.label_list, top_k=top_k) + res += out + return res + + @serving + def serving_method(self, images, **kwargs): """ - assert isinstance(string_list, list) - blocks_args = [] - for block_string in string_list: - blocks_args.append(BlockDecoder._decode_block_string(block_string)) - return blocks_args - - @staticmethod - def encode(blocks_args: list): + Run as a service. """ - Encodes a list of BlockArgs to a list of strings. + images_decode = [base64_to_cv2(image) for image in images] + results = self.classify(images=images_decode, **kwargs) + return results - :param blocks_args: a list of BlockArgs namedtuples of block args - :return: a list of strings, each string is a notation of block + @runnable + def run_cmd(self, argvs): """ - block_strings = [] - for block in blocks_args: - block_strings.append(BlockDecoder._encode_block_string(block)) - return block_strings - - -def initial_type(name: str, use_bias: bool = False): - param_attr = ParamAttr(name=name + "_weights") - if use_bias: - bias_attr = ParamAttr(name=name + "_offset") - else: - bias_attr = False - return param_attr, bias_attr - - -def init_batch_norm_layer(name: str = "batch_norm"): - param_attr = ParamAttr(name=name + "_scale") - bias_attr = ParamAttr(name=name + "_offset") - return param_attr, bias_attr - - -def init_fc_layer(name: str = "fc"): - param_attr = ParamAttr(name=name + "_weights") - bias_attr = ParamAttr(name=name + "_offset") - return param_attr, bias_attr - - -def cal_padding(img_size: int, stride: int, filter_size: int, dilation: int = 1): - """Calculate padding size.""" - if img_size % stride == 0: - out_size = max(filter_size - stride, 0) - else: - out_size = max(filter_size - (img_size % stride), 0) - return out_size // 2, out_size - out_size // 2 - - -inp_shape = {"b1": [240, 120, 120, 60, 30, 15, 15, 8]} - - -def _drop_connect(inputs: paddle.Tensor, prob: float, is_test: bool): - """Drop input connection""" - if is_test: - return inputs - keep_prob = 1.0 - prob - inputs_shape = paddle.shape(inputs) - random_tensor = keep_prob + paddle.rand(shape=[inputs_shape[0], 1, 1, 1]) - binary_tensor = paddle.floor(random_tensor) - output = inputs / keep_prob * binary_tensor - return output - - -class Conv2ds(nn.Layer): - """Basic conv layer""" - - def __init__(self, - input_channels: int, - output_channels: int, - filter_size: int, - stride: int = 1, - padding: int = 0, - groups: int = None, - name: str = "conv2d", - act: str = None, - use_bias: bool = False, - padding_type: str = None, - model_name: str = None, - cur_stage: str = None): - super(Conv2ds, self).__init__() - assert act in [None, "swish", "sigmoid"] - self.act = act - - param_attr, bias_attr = initial_type(name=name, use_bias=use_bias) - - def get_padding(filter_size, stride=1, dilation=1): - padding = ((stride - 1) + dilation * (filter_size - 1)) // 2 - return padding - - inps = 1 if model_name == None and cur_stage == None else inp_shape[model_name][cur_stage] - self.need_crop = False - if padding_type == "SAME": - top_padding, bottom_padding = cal_padding(inps, stride, filter_size) - left_padding, right_padding = cal_padding(inps, stride, filter_size) - height_padding = bottom_padding - width_padding = right_padding - if top_padding != bottom_padding or left_padding != right_padding: - height_padding = top_padding + stride - width_padding = left_padding + stride - self.need_crop = True - padding = [height_padding, width_padding] - elif padding_type == "VALID": - height_padding = 0 - width_padding = 0 - padding = [height_padding, width_padding] - elif padding_type == "DYNAMIC": - padding = get_padding(filter_size, stride) - else: - padding = padding_type - - groups = 1 if groups is None else groups - self._conv = Conv2d( - input_channels, - output_channels, - filter_size, - groups=groups, - stride=stride, - padding=padding, - weight_attr=param_attr, - bias_attr=bias_attr) - - def forward(self, inputs: paddle.Tensor): - x = self._conv(inputs) - if self.act == "swish": - x = F.swish(x) - elif self.act == "sigmoid": - x = F.sigmoid(x) - - if self.need_crop: - x = x[:, :, 1:, 1:] - return x - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - - def __init__(self, - input_channels: int, - filter_size: int, - output_channels: int, - stride: int = 1, - num_groups: int = 1, - padding_type: str = "SAME", - conv_act: str = None, - bn_act: str = "swish", - use_bn: bool = True, - use_bias: bool = False, - name: str = None, - conv_name: str = None, - bn_name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ConvBNLayer, self).__init__() - - self._conv = Conv2ds( - input_channels=input_channels, - output_channels=output_channels, - filter_size=filter_size, - stride=stride, - groups=num_groups, - act=conv_act, - padding_type=padding_type, - name=conv_name, - use_bias=use_bias, - model_name=model_name, - cur_stage=cur_stage) - self.use_bn = use_bn - if use_bn is True: - bn_name = name + bn_name - param_attr, bias_attr = init_batch_norm_layer(bn_name) - - self._bn = BatchNorm( - num_channels=output_channels, - act=bn_act, - momentum=0.99, - epsilon=0.001, - moving_mean_name=bn_name + "_mean", - moving_variance_name=bn_name + "_variance", - param_attr=param_attr, - bias_attr=bias_attr) - - def forward(self, inputs: paddle.Tensor): - if self.use_bn: - x = self._conv(inputs) - x = self._bn(x) - return x - else: - return self._conv(inputs) - - -class ExpandConvNorm(nn.Layer): - """Expand conv norm layer.""" - - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ExpandConvNorm, self).__init__() - - self.oup = block_args.input_filters * block_args.expand_ratio - self.expand_ratio = block_args.expand_ratio - - if self.expand_ratio != 1: - self._conv = ConvBNLayer( - input_channels, - 1, - self.oup, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_expand_conv", - bn_name="_bn0", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - if self.expand_ratio != 1: - return self._conv(inputs) - else: - return inputs - - -class DepthwiseConvNorm(nn.Layer): - """Depthwise conv norm layer.""" - - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(DepthwiseConvNorm, self).__init__() - - self.k = block_args.kernel_size - self.s = block_args.stride - if isinstance(self.s, list) or isinstance(self.s, tuple): - self.s = self.s[0] - oup = block_args.input_filters * block_args.expand_ratio - - self._conv = ConvBNLayer( - input_channels, - self.k, - oup, - self.s, - num_groups=input_channels, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_depthwise_conv", - bn_name="_bn1", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class ProjectConvNorm(nn.Layer): - """Projection conv bn layer.""" - - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ProjectConvNorm, self).__init__() - - final_oup = block_args.output_filters - - self._conv = ConvBNLayer( - input_channels, - 1, - final_oup, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_project_conv", - bn_name="_bn2", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class SEBlock(nn.Layer): - """Basic Squeeze-and-Excitation block for Efficientnet.""" - - def __init__(self, - input_channels: int, - num_squeezed_channels: int, - oup: int, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(SEBlock, self).__init__() - - self._pool = AdaptiveAvgPool2d(1) - self._conv1 = Conv2ds( - input_channels, - num_squeezed_channels, - 1, - use_bias=True, - padding_type=padding_type, - act="swish", - name=name + "_se_reduce") - - self._conv2 = Conv2ds( - num_squeezed_channels, - oup, - 1, - act="sigmoid", - use_bias=True, - padding_type=padding_type, - name=name + "_se_expand") - - def forward(self, inputs: paddle.Tensor): - x = self._pool(inputs) - x = self._conv1(x) - x = self._conv2(x) - return paddle.multiply(inputs, x) - - -class MbConvBlock(nn.Layer): - """Mobile inverted bottleneck convolution for Efficientnet.""" - - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - use_se: bool, - name: str = None, - drop_connect_rate: float = None, - is_test: bool = False, - model_name: str = None, - cur_stage: str = None): - super(MbConvBlock, self).__init__() - - oup = block_args.input_filters * block_args.expand_ratio - self.block_args = block_args - self.has_se = use_se and (block_args.se_ratio is not None) and (0 < block_args.se_ratio <= 1) - self.id_skip = block_args.id_skip - self.expand_ratio = block_args.expand_ratio - self.drop_connect_rate = drop_connect_rate - self.is_test = is_test - - if self.expand_ratio != 1: - self._ecn = ExpandConvNorm( - input_channels, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - self._dcn = DepthwiseConvNorm( - input_channels * block_args.expand_ratio, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - if self.has_se: - num_squeezed_channels = max(1, int(block_args.input_filters * block_args.se_ratio)) - self._se = SEBlock( - input_channels * block_args.expand_ratio, - num_squeezed_channels, - oup, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - self._pcn = ProjectConvNorm( - input_channels * block_args.expand_ratio, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - x = inputs - if self.expand_ratio != 1: - x = self._ecn(x) - x = F.swish(x) - x = self._dcn(x) - x = F.swish(x) - if self.has_se: - x = self._se(x) - x = self._pcn(x) - if self.id_skip and \ - self.block_args.stride == 1 and \ - self.block_args.input_filters == self.block_args.output_filters: - if self.drop_connect_rate: - x = _drop_connect(x, self.drop_connect_rate, self.is_test) - x = paddle.elementwise_add(x, inputs) - return x - - -class ConvStemNorm(nn.Layer): - """Basic conv stem norm block for extracting features.""" - - def __init__(self, - input_channels: int, - padding_type: str, - _global_params: dict, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ConvStemNorm, self).__init__() - - output_channels = round_filters(32, _global_params) - self._conv = ConvBNLayer( - input_channels, - filter_size=3, - output_channels=output_channels, - stride=2, - bn_act=None, - padding_type=padding_type, - name="", - conv_name="_conv_stem", - bn_name="_bn0", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class ExtractFeatures(nn.Layer): - """Extract features.""" - - def __init__(self, - input_channels: int, - _block_args: dict, - _global_params: dict, - padding_type: str, - use_se: bool, - is_test: bool, - model_name: str = None): - super(ExtractFeatures, self).__init__() - - self._global_params = _global_params - - self._conv_stem = ConvStemNorm( - input_channels, - padding_type=padding_type, - _global_params=_global_params, - model_name=model_name, - cur_stage=0) - - self.block_args_copy = copy.deepcopy(_block_args) - idx = 0 - block_size = 0 - for block_arg in self.block_args_copy: - block_arg = block_arg._replace( - input_filters=round_filters(block_arg.input_filters, _global_params), - output_filters=round_filters(block_arg.output_filters, _global_params), - num_repeat=round_repeats(block_arg.num_repeat, _global_params)) - block_size += 1 - for _ in range(block_arg.num_repeat - 1): - block_size += 1 - - self.conv_seq = [] - cur_stage = 1 - for block_args in _block_args: - block_args = block_args._replace( - input_filters=round_filters(block_args.input_filters, _global_params), - output_filters=round_filters(block_args.output_filters, _global_params), - num_repeat=round_repeats(block_args.num_repeat, _global_params)) - - drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 - if drop_connect_rate: - drop_connect_rate *= float(idx) / block_size - - _mc_block = self.add_sublayer( - "_blocks." + str(idx) + ".", - MbConvBlock( - block_args.input_filters, - block_args=block_args, - padding_type=padding_type, - use_se=use_se, - name="_blocks." + str(idx) + ".", - drop_connect_rate=drop_connect_rate, - model_name=model_name, - cur_stage=cur_stage)) - self.conv_seq.append(_mc_block) - idx += 1 - if block_args.num_repeat > 1: - block_args = block_args._replace(input_filters=block_args.output_filters, stride=1) - for _ in range(block_args.num_repeat - 1): - drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 - if drop_connect_rate: - drop_connect_rate *= float(idx) / block_size - _mc_block = self.add_sublayer( - "block." + str(idx) + ".", - MbConvBlock( - block_args.input_filters, - block_args, - padding_type=padding_type, - use_se=use_se, - name="_blocks." + str(idx) + ".", - drop_connect_rate=drop_connect_rate, - model_name=model_name, - cur_stage=cur_stage)) - self.conv_seq.append(_mc_block) - idx += 1 - cur_stage += 1 - - def forward(self, inputs: paddle.Tensor): - x = self._conv_stem(inputs) - x = F.swish(x) - for _mc_block in self.conv_seq: - x = _mc_block(x) - return x - - -@moduleinfo( - name="efficientnetb1_imagenet", - type="cv/classification", - author="paddlepaddle", - author_email="", - summary="efficientnetb1_imagenet is a classification model, " - "this module is trained with Imagenet dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class EfficientNet_B1(nn.Layer): - def __init__(self, - is_test: bool = False, - padding_type: str = "SAME", - override_params: dict = None, - use_se: bool = True, - class_dim: int = 1000, - load_checkpoint: str = None): - super(EfficientNet_B1, self).__init__() - - model_name = 'efficientnet-b1' - self.name = "b1" - self._block_args, self._global_params = get_model_params(model_name, override_params) - self.padding_type = padding_type - self.use_se = use_se - self.is_test = is_test - - self._ef = ExtractFeatures( - 3, - self._block_args, - self._global_params, - self.padding_type, - self.use_se, - self.is_test, - model_name=self.name) - - output_channels = round_filters(1280, self._global_params) - oup = 320 - - self._conv = ConvBNLayer( - oup, - 1, - output_channels, - bn_act="swish", - padding_type=self.padding_type, - name="", - conv_name="_conv_head", - bn_name="_bn1", - model_name=self.name, - cur_stage=7) - self._pool = AdaptiveAvgPool2d(1) - - if self._global_params.dropout_rate: - self._drop = Dropout(p=self._global_params.dropout_rate, mode="upscale_in_train") - - param_attr, bias_attr = init_fc_layer("_fc") - self._fc = Linear(output_channels, class_dim, weight_attr=param_attr, bias_attr=bias_attr) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") + Run as a command. + """ + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.classify(paths=[args.input_path], batch_size=args.batch_size, use_gpu=args.use_gpu) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument('--use_gpu', + type=ast.literal_eval, + default=False, + help="whether use GPU or not.") + self.arg_config_group.add_argument('--batch_size', type=ast.literal_eval, default=1, help="batch size.") + self.arg_config_group.add_argument('--top_k', type=ast.literal_eval, default=1, help="Return top k results.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") - else: - checkpoint = os.path.join(self.directory, 'efficientnet_b1_imagenet.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/efficientnet_b1_imagenet.pdparams -O ' - + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - def forward(self, inputs: paddle.Tensor): - x = self._ef(inputs) - x = self._conv(x) - x = self._pool(x) - if self._global_params.dropout_rate: - x = self._drop(x) - x = paddle.squeeze(x, axis=[2, 3]) - x = self._fc(x) - return x +if __name__ == '__main__': + b1 = EfficientNetB1ImageNet() + b1.context() + import cv2 + test_image = [ + cv2.imread( + '/mnt/zhangxuefei/program-paddle/PaddleHub/hub_module/tests/image_dataset/classification/animals/dog.jpeg') + ] + res = b1.classification(images=test_image) + print(res) + res = b1.classification(paths=[ + '/mnt/zhangxuefei/program-paddle/PaddleHub/hub_module/tests/image_dataset/classification/animals/dog.jpeg' + ]) + print(res) + res = b1.classification(images=test_image) + print(res) + res = b1.classify(images=test_image) + print(res) diff --git a/modules/image/classification/efficientnetb1_imagenet/processor.py b/modules/image/classification/efficientnetb1_imagenet/processor.py new file mode 100644 index 000000000..9cb22a1b7 --- /dev/null +++ b/modules/image/classification/efficientnetb1_imagenet/processor.py @@ -0,0 +1,65 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import base64 + +import cv2 +import numpy as np + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def softmax(x): + if len(x.shape) > 1: + tmp = np.max(x, axis=1) + x -= tmp.reshape((x.shape[0], 1)) + x = np.exp(x) + tmp = np.sum(x, axis=1) + x /= tmp.reshape((x.shape[0], 1)) + else: + tmp = np.max(x) + x -= tmp + x = np.exp(x) + tmp = np.sum(x) + x /= tmp + return x + + +def postprocess(data_out, label_list, top_k): + """ + Postprocess output of network, one image at a time. + + Args: + data_out (numpy.ndarray): output data of network. + label_list (list): list of label. + top_k (int): Return top k results. + """ + output = [] + for result in data_out: + result_i = softmax(result) + output_i = {} + indexs = np.argsort(result_i)[::-1][0:top_k] + for index in indexs: + label = label_list[index].split(',')[0] + output_i[label] = float(result_i[index]) + output.append(output_i) + return output diff --git a/modules/image/classification/efficientnetb1_imagenet/test.py b/modules/image/classification/efficientnetb1_imagenet/test.py new file mode 100644 index 000000000..8906c96b1 --- /dev/null +++ b/modules/image/classification/efficientnetb1_imagenet/test.py @@ -0,0 +1,63 @@ +import os +import shutil +import unittest + +import cv2 +import requests + +import paddlehub as hub + +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + + +class TestHubModule(unittest.TestCase): + + @classmethod + def setUpClass(cls) -> None: + img_url = 'https://unsplash.com/photos/brFsZ7qszSY/download?ixid=MnwxMjA3fDB8MXxzZWFyY2h8OHx8ZG9nfGVufDB8fHx8MTY2MzA1ODQ1MQ&force=true&w=640' + if not os.path.exists('tests'): + os.makedirs('tests') + response = requests.get(img_url) + assert response.status_code == 200, 'Network Error.' + with open('tests/test.jpg', 'wb') as f: + f.write(response.content) + cls.module = hub.Module(name="efficientnetb1_imagenet") + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree('tests') + shutil.rmtree('inference') + + def test_classification1(self): + results = self.module.classification(paths=['tests/test.jpg']) + data = results[0] + self.assertTrue('Pembroke' in data) + self.assertTrue(data['Pembroke'] > 0.5) + + def test_classification2(self): + results = self.module.classification(images=[cv2.imread('tests/test.jpg')]) + data = results[0] + self.assertTrue('Pembroke' in data) + self.assertTrue(data['Pembroke'] > 0.5) + + def test_classification3(self): + results = self.module.classification(images=[cv2.imread('tests/test.jpg')], use_gpu=True) + data = results[0] + self.assertTrue('Pembroke' in data) + self.assertTrue(data['Pembroke'] > 0.5) + + def test_classification4(self): + self.assertRaises(AssertionError, self.module.classification, paths=['no.jpg']) + + def test_classification5(self): + self.assertRaises(TypeError, self.module.classification, images=['tests/test.jpg']) + + def test_save_inference_model(self): + self.module.save_inference_model('./inference/model') + + self.assertTrue(os.path.exists('./inference/model.pdmodel')) + self.assertTrue(os.path.exists('./inference/model.pdiparams')) + + +if __name__ == "__main__": + unittest.main() From 219f1f83ac624f2c5f2e1db125148d51c77cf3bb Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Fri, 14 Oct 2022 14:54:31 +0800 Subject: [PATCH 103/117] update efficientnetb2_imagenet (#2050) * update efficientnetb2_imagenet * remove unused print --- .../efficientnetb2_imagenet/README.md | 7 +- .../efficientnetb2_imagenet/README_en.md | 7 +- .../efficientnetb2_imagenet/__init__.py | 0 .../efficientnetb2_imagenet/data_feed.py | 93 ++ .../efficientnetb2_imagenet/label_list.txt | 1000 +++++++++++++++++ .../efficientnetb2_imagenet/module.py | 924 +++------------ .../efficientnetb2_imagenet/processor.py | 65 ++ .../efficientnetb2_imagenet/test.py | 63 ++ 8 files changed, 1400 insertions(+), 759 deletions(-) create mode 100644 modules/image/classification/efficientnetb2_imagenet/__init__.py create mode 100644 modules/image/classification/efficientnetb2_imagenet/data_feed.py create mode 100644 modules/image/classification/efficientnetb2_imagenet/label_list.txt create mode 100644 modules/image/classification/efficientnetb2_imagenet/processor.py create mode 100644 modules/image/classification/efficientnetb2_imagenet/test.py diff --git a/modules/image/classification/efficientnetb2_imagenet/README.md b/modules/image/classification/efficientnetb2_imagenet/README.md index 3972b35e1..a8a226f7c 100644 --- a/modules/image/classification/efficientnetb2_imagenet/README.md +++ b/modules/image/classification/efficientnetb2_imagenet/README.md @@ -131,6 +131,11 @@ * 1.1.0 提升预测性能以及易用性 + +* 1.2.0 + + 移除 Fluid API + - ```shell - $ hub install efficientnetb2_imagenet==1.1.0 + $ hub install efficientnetb2_imagenet==1.2.0 ``` diff --git a/modules/image/classification/efficientnetb2_imagenet/README_en.md b/modules/image/classification/efficientnetb2_imagenet/README_en.md index 01f5180bd..ef2c2fab5 100644 --- a/modules/image/classification/efficientnetb2_imagenet/README_en.md +++ b/modules/image/classification/efficientnetb2_imagenet/README_en.md @@ -130,6 +130,11 @@ * 1.1.0 Improve the prediction performance and users' experience + +* 1.2.0 + + Remove Fluid API + - ```shell - $ hub install efficientnetb2_imagenet==1.1.0 + $ hub install efficientnetb2_imagenet==1.2.0 ``` diff --git a/modules/image/classification/efficientnetb2_imagenet/__init__.py b/modules/image/classification/efficientnetb2_imagenet/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/modules/image/classification/efficientnetb2_imagenet/data_feed.py b/modules/image/classification/efficientnetb2_imagenet/data_feed.py new file mode 100644 index 000000000..60a33f4df --- /dev/null +++ b/modules/image/classification/efficientnetb2_imagenet/data_feed.py @@ -0,0 +1,93 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import time +from collections import OrderedDict + +import numpy as np +from PIL import Image + +__all__ = ['reader'] + +DATA_DIM = 224 +img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) +img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) + + +def resize_short(img, target_size): + percent = float(target_size) / min(img.size[0], img.size[1]) + resized_width = int(round(img.size[0] * percent)) + resized_height = int(round(img.size[1] * percent)) + img = img.resize((resized_width, resized_height), Image.LANCZOS) + return img + + +def crop_image(img, target_size, center): + width, height = img.size + size = target_size + if center == True: + w_start = (width - size) / 2 + h_start = (height - size) / 2 + else: + w_start = np.random.randint(0, width - size + 1) + h_start = np.random.randint(0, height - size + 1) + w_end = w_start + size + h_end = h_start + size + img = img.crop((w_start, h_start, w_end, h_end)) + return img + + +def process_image(img): + img = resize_short(img, target_size=256) + img = crop_image(img, target_size=DATA_DIM, center=True) + if img.mode != 'RGB': + img = img.convert('RGB') + img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255 + img -= img_mean + img /= img_std + return img + + +def reader(images=None, paths=None): + """ + Preprocess to yield image. + + Args: + images (list[numpy.ndarray]): images data, shape of each is [H, W, C]. + paths (list[str]): paths to images. + + Yield: + each (collections.OrderedDict): info of original image, preprocessed image. + """ + component = list() + if paths: + for im_path in paths: + each = OrderedDict() + assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path) + each['org_im_path'] = im_path + each['org_im'] = Image.open(im_path) + each['org_im_width'], each['org_im_height'] = each['org_im'].size + component.append(each) + if images is not None: + assert type(images), "images is a list." + for im in images: + each = OrderedDict() + each['org_im'] = Image.fromarray(im[:, :, ::-1]) + each['org_im_path'] = 'ndarray_time={}'.format(round(time.time(), 6) * 1e6) + each['org_im_width'], each['org_im_height'] = each['org_im'].size + component.append(each) + + for element in component: + element['image'] = process_image(element['org_im']) + yield element diff --git a/modules/image/classification/efficientnetb2_imagenet/label_list.txt b/modules/image/classification/efficientnetb2_imagenet/label_list.txt new file mode 100644 index 000000000..a509c0074 --- /dev/null +++ b/modules/image/classification/efficientnetb2_imagenet/label_list.txt @@ -0,0 +1,1000 @@ +tench, Tinca tinca +goldfish, Carassius auratus +great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias +tiger shark, Galeocerdo cuvieri +hammerhead, hammerhead shark +electric ray, crampfish, numbfish, torpedo +stingray +cock +hen +ostrich, Struthio camelus +brambling, Fringilla montifringilla +goldfinch, Carduelis carduelis +house finch, linnet, Carpodacus mexicanus +junco, snowbird +indigo bunting, indigo finch, indigo bird, Passerina cyanea +robin, American robin, Turdus migratorius +bulbul +jay +magpie +chickadee +water ouzel, dipper +kite +bald eagle, American eagle, Haliaeetus leucocephalus +vulture +great grey owl, great gray owl, Strix nebulosa +European fire salamander, Salamandra salamandra +common newt, Triturus vulgaris +eft +spotted salamander, Ambystoma maculatum +axolotl, mud puppy, Ambystoma mexicanum +bullfrog, Rana catesbeiana +tree frog, tree-frog +tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui +loggerhead, loggerhead turtle, Caretta caretta +leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea +mud turtle +terrapin +box turtle, box tortoise +banded gecko +common iguana, iguana, Iguana iguana +American chameleon, anole, Anolis carolinensis +whiptail, whiptail lizard +agama +frilled lizard, Chlamydosaurus kingi +alligator lizard +Gila monster, Heloderma suspectum +green lizard, Lacerta viridis +African chameleon, Chamaeleo chamaeleon +Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis +African crocodile, Nile crocodile, Crocodylus niloticus +American alligator, Alligator mississipiensis +triceratops +thunder snake, worm snake, Carphophis amoenus +ringneck snake, ring-necked snake, ring snake +hognose snake, puff adder, sand viper +green snake, grass snake +king snake, kingsnake +garter snake, grass snake +water snake +vine snake +night snake, Hypsiglena torquata +boa constrictor, Constrictor constrictor +rock python, rock snake, Python sebae +Indian cobra, Naja naja +green mamba +sea snake +horned viper, cerastes, sand viper, horned asp, Cerastes cornutus +diamondback, diamondback rattlesnake, Crotalus adamanteus +sidewinder, horned rattlesnake, Crotalus cerastes +trilobite +harvestman, daddy longlegs, Phalangium opilio +scorpion +black and gold garden spider, Argiope aurantia +barn spider, Araneus cavaticus +garden spider, Aranea diademata +black widow, Latrodectus mactans +tarantula +wolf spider, hunting spider +tick +centipede +black grouse +ptarmigan +ruffed grouse, partridge, Bonasa umbellus +prairie chicken, prairie grouse, prairie fowl +peacock +quail +partridge +African grey, African gray, Psittacus erithacus +macaw +sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita +lorikeet +coucal +bee eater +hornbill +hummingbird +jacamar +toucan +drake +red-breasted merganser, Mergus serrator +goose +black swan, Cygnus atratus +tusker +echidna, spiny anteater, anteater +platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus +wallaby, brush kangaroo +koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus +wombat +jellyfish +sea anemone, anemone +brain coral +flatworm, platyhelminth +nematode, nematode worm, roundworm +conch +snail +slug +sea slug, nudibranch +chiton, coat-of-mail shell, sea cradle, polyplacophore +chambered nautilus, pearly nautilus, nautilus +Dungeness crab, Cancer magister +rock crab, Cancer irroratus +fiddler crab +king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica +American lobster, Northern lobster, Maine lobster, Homarus americanus +spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish +crayfish, crawfish, crawdad, crawdaddy +hermit crab +isopod +white stork, Ciconia ciconia +black stork, Ciconia nigra +spoonbill +flamingo +little blue heron, Egretta caerulea +American egret, great white heron, Egretta albus +bittern +crane +limpkin, Aramus pictus +European gallinule, Porphyrio porphyrio +American coot, marsh hen, mud hen, water hen, Fulica americana +bustard +ruddy turnstone, Arenaria interpres +red-backed sandpiper, dunlin, Erolia alpina +redshank, Tringa totanus +dowitcher +oystercatcher, oyster catcher +pelican +king penguin, Aptenodytes patagonica +albatross, mollymawk +grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus +killer whale, killer, orca, grampus, sea wolf, Orcinus orca +dugong, Dugong dugon +sea lion +Chihuahua +Japanese spaniel +Maltese dog, Maltese terrier, Maltese +Pekinese, Pekingese, Peke +Shih-Tzu +Blenheim spaniel +papillon +toy terrier +Rhodesian ridgeback +Afghan hound, Afghan +basset, basset hound +beagle +bloodhound, sleuthhound +bluetick +black-and-tan coonhound +Walker hound, Walker foxhound +English foxhound +redbone +borzoi, Russian wolfhound +Irish wolfhound +Italian greyhound +whippet +Ibizan hound, Ibizan Podenco +Norwegian elkhound, elkhound +otterhound, otter hound +Saluki, gazelle hound +Scottish deerhound, deerhound +Weimaraner +Staffordshire bullterrier, Staffordshire bull terrier +American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier +Bedlington terrier +Border terrier +Kerry blue terrier +Irish terrier +Norfolk terrier +Norwich terrier +Yorkshire terrier +wire-haired fox terrier +Lakeland terrier +Sealyham terrier, Sealyham +Airedale, Airedale terrier +cairn, cairn terrier +Australian terrier +Dandie Dinmont, Dandie Dinmont terrier +Boston bull, Boston terrier +miniature schnauzer +giant schnauzer +standard schnauzer +Scotch terrier, Scottish terrier, Scottie +Tibetan terrier, chrysanthemum dog +silky terrier, Sydney silky +soft-coated wheaten terrier +West Highland white terrier +Lhasa, Lhasa apso +flat-coated retriever +curly-coated retriever +golden retriever +Labrador retriever +Chesapeake Bay retriever +German short-haired pointer +vizsla, Hungarian pointer +English setter +Irish setter, red setter +Gordon setter +Brittany spaniel +clumber, clumber spaniel +English springer, English springer spaniel +Welsh springer spaniel +cocker spaniel, English cocker spaniel, cocker +Sussex spaniel +Irish water spaniel +kuvasz +schipperke +groenendael +malinois +briard +kelpie +komondor +Old English sheepdog, bobtail +Shetland sheepdog, Shetland sheep dog, Shetland +collie +Border collie +Bouvier des Flandres, Bouviers des Flandres +Rottweiler +German shepherd, German shepherd dog, German police dog, alsatian +Doberman, Doberman pinscher +miniature pinscher +Greater Swiss Mountain dog +Bernese mountain dog +Appenzeller +EntleBucher +boxer +bull mastiff +Tibetan mastiff +French bulldog +Great Dane +Saint Bernard, St Bernard +Eskimo dog, husky +malamute, malemute, Alaskan malamute +Siberian husky +dalmatian, coach dog, carriage dog +affenpinscher, monkey pinscher, monkey dog +basenji +pug, pug-dog +Leonberg +Newfoundland, Newfoundland dog +Great Pyrenees +Samoyed, Samoyede +Pomeranian +chow, chow chow +keeshond +Brabancon griffon +Pembroke, Pembroke Welsh corgi +Cardigan, Cardigan Welsh corgi +toy poodle +miniature poodle +standard poodle +Mexican hairless +timber wolf, grey wolf, gray wolf, Canis lupus +white wolf, Arctic wolf, Canis lupus tundrarum +red wolf, maned wolf, Canis rufus, Canis niger +coyote, prairie wolf, brush wolf, Canis latrans +dingo, warrigal, warragal, Canis dingo +dhole, Cuon alpinus +African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus +hyena, hyaena +red fox, Vulpes vulpes +kit fox, Vulpes macrotis +Arctic fox, white fox, Alopex lagopus +grey fox, gray fox, Urocyon cinereoargenteus +tabby, tabby cat +tiger cat +Persian cat +Siamese cat, Siamese +Egyptian cat +cougar, puma, catamount, mountain lion, painter, panther, Felis concolor +lynx, catamount +leopard, Panthera pardus +snow leopard, ounce, Panthera uncia +jaguar, panther, Panthera onca, Felis onca +lion, king of beasts, Panthera leo +tiger, Panthera tigris +cheetah, chetah, Acinonyx jubatus +brown bear, bruin, Ursus arctos +American black bear, black bear, Ursus americanus, Euarctos americanus +ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus +sloth bear, Melursus ursinus, Ursus ursinus +mongoose +meerkat, mierkat +tiger beetle +ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle +ground beetle, carabid beetle +long-horned beetle, longicorn, longicorn beetle +leaf beetle, chrysomelid +dung beetle +rhinoceros beetle +weevil +fly +bee +ant, emmet, pismire +grasshopper, hopper +cricket +walking stick, walkingstick, stick insect +cockroach, roach +mantis, mantid +cicada, cicala +leafhopper +lacewing, lacewing fly +dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk +damselfly +admiral +ringlet, ringlet butterfly +monarch, monarch butterfly, milkweed butterfly, Danaus plexippus +cabbage butterfly +sulphur butterfly, sulfur butterfly +lycaenid, lycaenid butterfly +starfish, sea star +sea urchin +sea cucumber, holothurian +wood rabbit, cottontail, cottontail rabbit +hare +Angora, Angora rabbit +hamster +porcupine, hedgehog +fox squirrel, eastern fox squirrel, Sciurus niger +marmot +beaver +guinea pig, Cavia cobaya +sorrel +zebra +hog, pig, grunter, squealer, Sus scrofa +wild boar, boar, Sus scrofa +warthog +hippopotamus, hippo, river horse, Hippopotamus amphibius +ox +water buffalo, water ox, Asiatic buffalo, Bubalus bubalis +bison +ram, tup +bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis +ibex, Capra ibex +hartebeest +impala, Aepyceros melampus +gazelle +Arabian camel, dromedary, Camelus dromedarius +llama +weasel +mink +polecat, fitch, foulmart, foumart, Mustela putorius +black-footed ferret, ferret, Mustela nigripes +otter +skunk, polecat, wood pussy +badger +armadillo +three-toed sloth, ai, Bradypus tridactylus +orangutan, orang, orangutang, Pongo pygmaeus +gorilla, Gorilla gorilla +chimpanzee, chimp, Pan troglodytes +gibbon, Hylobates lar +siamang, Hylobates syndactylus, Symphalangus syndactylus +guenon, guenon monkey +patas, hussar monkey, Erythrocebus patas +baboon +macaque +langur +colobus, colobus monkey +proboscis monkey, Nasalis larvatus +marmoset +capuchin, ringtail, Cebus capucinus +howler monkey, howler +titi, titi monkey +spider monkey, Ateles geoffroyi +squirrel monkey, Saimiri sciureus +Madagascar cat, ring-tailed lemur, Lemur catta +indri, indris, Indri indri, Indri brevicaudatus +Indian elephant, Elephas maximus +African elephant, Loxodonta africana +lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens +giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca +barracouta, snoek +eel +coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch +rock beauty, Holocanthus tricolor +anemone fish +sturgeon +gar, garfish, garpike, billfish, Lepisosteus osseus +lionfish +puffer, pufferfish, blowfish, globefish +abacus +abaya +academic gown, academic robe, judge's robe +accordion, piano accordion, squeeze box +acoustic guitar +aircraft carrier, carrier, flattop, attack aircraft carrier +airliner +airship, dirigible +altar +ambulance +amphibian, amphibious vehicle +analog clock +apiary, bee house +apron +ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin +assault rifle, assault gun +backpack, back pack, knapsack, packsack, rucksack, haversack +bakery, bakeshop, bakehouse +balance beam, beam +balloon +ballpoint, ballpoint pen, ballpen, Biro +Band Aid +banjo +bannister, banister, balustrade, balusters, handrail +barbell +barber chair +barbershop +barn +barometer +barrel, cask +barrow, garden cart, lawn cart, wheelbarrow +baseball +basketball +bassinet +bassoon +bathing cap, swimming cap +bath towel +bathtub, bathing tub, bath, tub +beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon +beacon, lighthouse, beacon light, pharos +beaker +bearskin, busby, shako +beer bottle +beer glass +bell cote, bell cot +bib +bicycle-built-for-two, tandem bicycle, tandem +bikini, two-piece +binder, ring-binder +binoculars, field glasses, opera glasses +birdhouse +boathouse +bobsled, bobsleigh, bob +bolo tie, bolo, bola tie, bola +bonnet, poke bonnet +bookcase +bookshop, bookstore, bookstall +bottlecap +bow +bow tie, bow-tie, bowtie +brass, memorial tablet, plaque +brassiere, bra, bandeau +breakwater, groin, groyne, mole, bulwark, seawall, jetty +breastplate, aegis, egis +broom +bucket, pail +buckle +bulletproof vest +bullet train, bullet +butcher shop, meat market +cab, hack, taxi, taxicab +caldron, cauldron +candle, taper, wax light +cannon +canoe +can opener, tin opener +cardigan +car mirror +carousel, carrousel, merry-go-round, roundabout, whirligig +carpenter's kit, tool kit +carton +car wheel +cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM +cassette +cassette player +castle +catamaran +CD player +cello, violoncello +cellular telephone, cellular phone, cellphone, cell, mobile phone +chain +chainlink fence +chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour +chain saw, chainsaw +chest +chiffonier, commode +chime, bell, gong +china cabinet, china closet +Christmas stocking +church, church building +cinema, movie theater, movie theatre, movie house, picture palace +cleaver, meat cleaver, chopper +cliff dwelling +cloak +clog, geta, patten, sabot +cocktail shaker +coffee mug +coffeepot +coil, spiral, volute, whorl, helix +combination lock +computer keyboard, keypad +confectionery, confectionary, candy store +container ship, containership, container vessel +convertible +corkscrew, bottle screw +cornet, horn, trumpet, trump +cowboy boot +cowboy hat, ten-gallon hat +cradle +crane +crash helmet +crate +crib, cot +Crock Pot +croquet ball +crutch +cuirass +dam, dike, dyke +desk +desktop computer +dial telephone, dial phone +diaper, nappy, napkin +digital clock +digital watch +dining table, board +dishrag, dishcloth +dishwasher, dish washer, dishwashing machine +disk brake, disc brake +dock, dockage, docking facility +dogsled, dog sled, dog sleigh +dome +doormat, welcome mat +drilling platform, offshore rig +drum, membranophone, tympan +drumstick +dumbbell +Dutch oven +electric fan, blower +electric guitar +electric locomotive +entertainment center +envelope +espresso maker +face powder +feather boa, boa +file, file cabinet, filing cabinet +fireboat +fire engine, fire truck +fire screen, fireguard +flagpole, flagstaff +flute, transverse flute +folding chair +football helmet +forklift +fountain +fountain pen +four-poster +freight car +French horn, horn +frying pan, frypan, skillet +fur coat +garbage truck, dustcart +gasmask, respirator, gas helmet +gas pump, gasoline pump, petrol pump, island dispenser +goblet +go-kart +golf ball +golfcart, golf cart +gondola +gong, tam-tam +gown +grand piano, grand +greenhouse, nursery, glasshouse +grille, radiator grille +grocery store, grocery, food market, market +guillotine +hair slide +hair spray +half track +hammer +hamper +hand blower, blow dryer, blow drier, hair dryer, hair drier +hand-held computer, hand-held microcomputer +handkerchief, hankie, hanky, hankey +hard disc, hard disk, fixed disk +harmonica, mouth organ, harp, mouth harp +harp +harvester, reaper +hatchet +holster +home theater, home theatre +honeycomb +hook, claw +hoopskirt, crinoline +horizontal bar, high bar +horse cart, horse-cart +hourglass +iPod +iron, smoothing iron +jack-o'-lantern +jean, blue jean, denim +jeep, landrover +jersey, T-shirt, tee shirt +jigsaw puzzle +jinrikisha, ricksha, rickshaw +joystick +kimono +knee pad +knot +lab coat, laboratory coat +ladle +lampshade, lamp shade +laptop, laptop computer +lawn mower, mower +lens cap, lens cover +letter opener, paper knife, paperknife +library +lifeboat +lighter, light, igniter, ignitor +limousine, limo +liner, ocean liner +lipstick, lip rouge +Loafer +lotion +loudspeaker, speaker, speaker unit, loudspeaker system, speaker system +loupe, jeweler's loupe +lumbermill, sawmill +magnetic compass +mailbag, postbag +mailbox, letter box +maillot +maillot, tank suit +manhole cover +maraca +marimba, xylophone +mask +matchstick +maypole +maze, labyrinth +measuring cup +medicine chest, medicine cabinet +megalith, megalithic structure +microphone, mike +microwave, microwave oven +military uniform +milk can +minibus +miniskirt, mini +minivan +missile +mitten +mixing bowl +mobile home, manufactured home +Model T +modem +monastery +monitor +moped +mortar +mortarboard +mosque +mosquito net +motor scooter, scooter +mountain bike, all-terrain bike, off-roader +mountain tent +mouse, computer mouse +mousetrap +moving van +muzzle +nail +neck brace +necklace +nipple +notebook, notebook computer +obelisk +oboe, hautboy, hautbois +ocarina, sweet potato +odometer, hodometer, mileometer, milometer +oil filter +organ, pipe organ +oscilloscope, scope, cathode-ray oscilloscope, CRO +overskirt +oxcart +oxygen mask +packet +paddle, boat paddle +paddlewheel, paddle wheel +padlock +paintbrush +pajama, pyjama, pj's, jammies +palace +panpipe, pandean pipe, syrinx +paper towel +parachute, chute +parallel bars, bars +park bench +parking meter +passenger car, coach, carriage +patio, terrace +pay-phone, pay-station +pedestal, plinth, footstall +pencil box, pencil case +pencil sharpener +perfume, essence +Petri dish +photocopier +pick, plectrum, plectron +pickelhaube +picket fence, paling +pickup, pickup truck +pier +piggy bank, penny bank +pill bottle +pillow +ping-pong ball +pinwheel +pirate, pirate ship +pitcher, ewer +plane, carpenter's plane, woodworking plane +planetarium +plastic bag +plate rack +plow, plough +plunger, plumber's helper +Polaroid camera, Polaroid Land camera +pole +police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria +poncho +pool table, billiard table, snooker table +pop bottle, soda bottle +pot, flowerpot +potter's wheel +power drill +prayer rug, prayer mat +printer +prison, prison house +projectile, missile +projector +puck, hockey puck +punching bag, punch bag, punching ball, punchball +purse +quill, quill pen +quilt, comforter, comfort, puff +racer, race car, racing car +racket, racquet +radiator +radio, wireless +radio telescope, radio reflector +rain barrel +recreational vehicle, RV, R.V. +reel +reflex camera +refrigerator, icebox +remote control, remote +restaurant, eating house, eating place, eatery +revolver, six-gun, six-shooter +rifle +rocking chair, rocker +rotisserie +rubber eraser, rubber, pencil eraser +rugby ball +rule, ruler +running shoe +safe +safety pin +saltshaker, salt shaker +sandal +sarong +sax, saxophone +scabbard +scale, weighing machine +school bus +schooner +scoreboard +screen, CRT screen +screw +screwdriver +seat belt, seatbelt +sewing machine +shield, buckler +shoe shop, shoe-shop, shoe store +shoji +shopping basket +shopping cart +shovel +shower cap +shower curtain +ski +ski mask +sleeping bag +slide rule, slipstick +sliding door +slot, one-armed bandit +snorkel +snowmobile +snowplow, snowplough +soap dispenser +soccer ball +sock +solar dish, solar collector, solar furnace +sombrero +soup bowl +space bar +space heater +space shuttle +spatula +speedboat +spider web, spider's web +spindle +sports car, sport car +spotlight, spot +stage +steam locomotive +steel arch bridge +steel drum +stethoscope +stole +stone wall +stopwatch, stop watch +stove +strainer +streetcar, tram, tramcar, trolley, trolley car +stretcher +studio couch, day bed +stupa, tope +submarine, pigboat, sub, U-boat +suit, suit of clothes +sundial +sunglass +sunglasses, dark glasses, shades +sunscreen, sunblock, sun blocker +suspension bridge +swab, swob, mop +sweatshirt +swimming trunks, bathing trunks +swing +switch, electric switch, electrical switch +syringe +table lamp +tank, army tank, armored combat vehicle, armoured combat vehicle +tape player +teapot +teddy, teddy bear +television, television system +tennis ball +thatch, thatched roof +theater curtain, theatre curtain +thimble +thresher, thrasher, threshing machine +throne +tile roof +toaster +tobacco shop, tobacconist shop, tobacconist +toilet seat +torch +totem pole +tow truck, tow car, wrecker +toyshop +tractor +trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi +tray +trench coat +tricycle, trike, velocipede +trimaran +tripod +triumphal arch +trolleybus, trolley coach, trackless trolley +trombone +tub, vat +turnstile +typewriter keyboard +umbrella +unicycle, monocycle +upright, upright piano +vacuum, vacuum cleaner +vase +vault +velvet +vending machine +vestment +viaduct +violin, fiddle +volleyball +waffle iron +wall clock +wallet, billfold, notecase, pocketbook +wardrobe, closet, press +warplane, military plane +washbasin, handbasin, washbowl, lavabo, wash-hand basin +washer, automatic washer, washing machine +water bottle +water jug +water tower +whiskey jug +whistle +wig +window screen +window shade +Windsor tie +wine bottle +wing +wok +wooden spoon +wool, woolen, woollen +worm fence, snake fence, snake-rail fence, Virginia fence +wreck +yawl +yurt +web site, website, internet site, site +comic book +crossword puzzle, crossword +street sign +traffic light, traffic signal, stoplight +book jacket, dust cover, dust jacket, dust wrapper +menu +plate +guacamole +consomme +hot pot, hotpot +trifle +ice cream, icecream +ice lolly, lolly, lollipop, popsicle +French loaf +bagel, beigel +pretzel +cheeseburger +hotdog, hot dog, red hot +mashed potato +head cabbage +broccoli +cauliflower +zucchini, courgette +spaghetti squash +acorn squash +butternut squash +cucumber, cuke +artichoke, globe artichoke +bell pepper +cardoon +mushroom +Granny Smith +strawberry +orange +lemon +fig +pineapple, ananas +banana +jackfruit, jak, jack +custard apple +pomegranate +hay +carbonara +chocolate sauce, chocolate syrup +dough +meat loaf, meatloaf +pizza, pizza pie +potpie +burrito +red wine +espresso +cup +eggnog +alp +bubble +cliff, drop, drop-off +coral reef +geyser +lakeside, lakeshore +promontory, headland, head, foreland +sandbar, sand bar +seashore, coast, seacoast, sea-coast +valley, vale +volcano +ballplayer, baseball player +groom, bridegroom +scuba diver +rapeseed +daisy +yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum +corn +acorn +hip, rose hip, rosehip +buckeye, horse chestnut, conker +coral fungus +agaric +gyromitra +stinkhorn, carrion fungus +earthstar +hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa +bolete +ear, spike, capitulum +toilet tissue, toilet paper, bathroom tissue diff --git a/modules/image/classification/efficientnetb2_imagenet/module.py b/modules/image/classification/efficientnetb2_imagenet/module.py index fed9968ba..1fda63a09 100644 --- a/modules/image/classification/efficientnetb2_imagenet/module.py +++ b/modules/image/classification/efficientnetb2_imagenet/module.py @@ -11,781 +11,191 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import absolute_import +from __future__ import division +import argparse +import ast import os -import math -import collections -import re -import copy - -import paddle -from paddle import ParamAttr -import paddle.nn as nn -import paddle.nn.functional as F -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - -GlobalParams = collections.namedtuple('GlobalParams', [ - 'batch_norm_momentum', - 'batch_norm_epsilon', - 'dropout_rate', - 'num_classes', - 'width_coefficient', - 'depth_coefficient', - 'depth_divisor', - 'min_depth', - 'drop_connect_rate', -]) - -BlockArgs = collections.namedtuple( - 'BlockArgs', - ['kernel_size', 'num_repeat', 'input_filters', 'output_filters', 'expand_ratio', 'id_skip', 'stride', 'se_ratio']) - -GlobalParams.__new__.__defaults__ = (None, ) * len(GlobalParams._fields) -BlockArgs.__new__.__defaults__ = (None, ) * len(BlockArgs._fields) - - -def efficientnet_params(model_name: str): - """ Map EfficientNet model name to parameter coefficients. """ - params_dict = { - # Coefficients: width,depth,resolution,dropout - 'efficientnet-b2': (1.1, 1.2, 260, 0.3) - } - return params_dict[model_name] - - -def efficientnet(width_coefficient: float = None, - depth_coefficient: float = None, - dropout_rate: float = 0.2, - drop_connect_rate: float = 0.2): - """ Get block arguments according to parameter and coefficients. """ - blocks_args = [ - 'r1_k3_s11_e1_i32_o16_se0.25', - 'r2_k3_s22_e6_i16_o24_se0.25', - 'r2_k5_s22_e6_i24_o40_se0.25', - 'r3_k3_s22_e6_i40_o80_se0.25', - 'r3_k5_s11_e6_i80_o112_se0.25', - 'r4_k5_s22_e6_i112_o192_se0.25', - 'r1_k3_s11_e6_i192_o320_se0.25', - ] - blocks_args = BlockDecoder.decode(blocks_args) - - global_params = GlobalParams( - batch_norm_momentum=0.99, - batch_norm_epsilon=1e-3, - dropout_rate=dropout_rate, - drop_connect_rate=drop_connect_rate, - num_classes=1000, - width_coefficient=width_coefficient, - depth_coefficient=depth_coefficient, - depth_divisor=8, - min_depth=None) - - return blocks_args, global_params - - -def get_model_params(model_name: str, override_params: dict): - """ Get the block args and global params for a given model """ - if model_name.startswith('efficientnet'): - w, d, _, p = efficientnet_params(model_name) - blocks_args, global_params = efficientnet(width_coefficient=w, depth_coefficient=d, dropout_rate=p) - else: - raise NotImplementedError('model name is not pre-defined: %s' % model_name) - if override_params: - global_params = global_params._replace(**override_params) - return blocks_args, global_params +import numpy as np +from paddle.inference import Config +from paddle.inference import create_predictor -def round_filters(filters: int, global_params: dict): - """ Calculate and round number of filters based on depth multiplier. """ - multiplier = global_params.width_coefficient - if not multiplier: - return filters - divisor = global_params.depth_divisor - min_depth = global_params.min_depth - filters *= multiplier - min_depth = min_depth or divisor - new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor) - if new_filters < 0.9 * filters: # prevent rounding by more than 10% - new_filters += divisor - return int(new_filters) +from .data_feed import reader +from .processor import base64_to_cv2 +from .processor import postprocess +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving -def round_repeats(repeats: int, global_params: dict): - """ Round number of filters based on depth multiplier. """ - multiplier = global_params.depth_coefficient - if not multiplier: - return repeats - return int(math.ceil(multiplier * repeats)) +@moduleinfo(name="efficientnetb2_imagenet", + type="CV/image_classification", + author="paddlepaddle", + author_email="paddle-dev@baidu.com", + summary="EfficientNetB2 is a image classfication model, this module is trained with imagenet datasets.", + version="1.2.0") +class EfficientNetB2ImageNet: + def __init__(self): + self.default_pretrained_model_path = os.path.join(self.directory, "efficientnetb2_imagenet_infer_model", + "model") + label_file = os.path.join(self.directory, "label_list.txt") + with open(label_file, 'r', encoding='utf-8') as file: + self.label_list = file.read().split("\n")[:-1] + self._set_config() -class BlockDecoder(object): - """ - Block Decoder, straight from the official TensorFlow repository. - """ + def get_expected_image_width(self): + return 224 - @staticmethod - def _decode_block_string(block_string: str): - """ Gets a block through a string notation of arguments. """ - assert isinstance(block_string, str) + def get_expected_image_height(self): + return 224 - ops = block_string.split('_') - options = {} - for op in ops: - splits = re.split(r'(\d.*)', op) - if len(splits) >= 2: - key, value = splits[:2] - options[key] = value + def get_pretrained_images_mean(self): + im_mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3) + return im_mean - # Check stride - cond_1 = ('s' in options and len(options['s']) == 1) - cond_2 = ((len(options['s']) == 2) and (options['s'][0] == options['s'][1])) - assert (cond_1 or cond_2) + def get_pretrained_images_std(self): + im_std = np.array([0.229, 0.224, 0.225]).reshape(1, 3) + return im_std - return BlockArgs( - kernel_size=int(options['k']), - num_repeat=int(options['r']), - input_filters=int(options['i']), - output_filters=int(options['o']), - expand_ratio=int(options['e']), - id_skip=('noskip' not in block_string), - se_ratio=float(options['se']) if 'se' in options else None, - stride=[int(options['s'][0])]) + def _set_config(self): + """ + predictor config setting + """ + model = self.default_pretrained_model_path + '.pdmodel' + params = self.default_pretrained_model_path + '.pdiparams' + cpu_config = Config(model, params) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + self.cpu_predictor = create_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = Config(model, params) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) + self.gpu_predictor = create_predictor(gpu_config) + + def classification(self, images=None, paths=None, batch_size=1, use_gpu=False, top_k=1): + """ + API for image classification. - @staticmethod - def _encode_block_string(block): - """Encodes a block to a string.""" - args = [ - 'r%d' % block.num_repeat, - 'k%d' % block.kernel_size, - 's%d%d' % (block.strides[0], block.strides[1]), - 'e%s' % block.expand_ratio, - 'i%d' % block.input_filters, - 'o%d' % block.output_filters - ] - if 0 < block.se_ratio <= 1: - args.append('se%s' % block.se_ratio) - if block.id_skip is False: - args.append('noskip') - return '_'.join(args) + Args: + images (list[numpy.ndarray]): data of images, shape of each is [H, W, C], color space must be BGR. + paths (list[str]): The paths of images. + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + top_k (int): Return top k results. - @staticmethod - def decode(string_list: list): + Returns: + res (list[dict]): The classfication results. """ - Decode a list of string notations to specify blocks in the network. - - string_list: list of strings, each string is a notation of block - return - list of BlockArgs namedtuples of block args + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + all_data = list() + for yield_data in reader(images, paths): + all_data.append(yield_data) + + total_num = len(all_data) + loop_num = int(np.ceil(total_num / batch_size)) + + res = list() + for iter_id in range(loop_num): + batch_data = list() + handle_id = iter_id * batch_size + for image_id in range(batch_size): + try: + batch_data.append(all_data[handle_id + image_id]) + except: + pass + # feed batch image + batch_image = np.array([data['image'] for data in batch_data]) + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + input_handle.copy_from_cpu(batch_image.copy()) + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) + + out = postprocess(data_out=output_handle.copy_to_cpu(), label_list=self.label_list, top_k=top_k) + res += out + return res + + @serving + def serving_method(self, images, **kwargs): """ - assert isinstance(string_list, list) - blocks_args = [] - for block_string in string_list: - blocks_args.append(BlockDecoder._decode_block_string(block_string)) - return blocks_args - - @staticmethod - def encode(blocks_args: list): + Run as a service. """ - Encodes a list of BlockArgs to a list of strings. + images_decode = [base64_to_cv2(image) for image in images] + results = self.classify(images=images_decode, **kwargs) + return results - :param blocks_args: a list of BlockArgs namedtuples of block args - :return: a list of strings, each string is a notation of block + @runnable + def run_cmd(self, argvs): """ - block_strings = [] - for block in blocks_args: - block_strings.append(BlockDecoder._encode_block_string(block)) - return block_strings - - -def initial_type(name: str, use_bias: bool = False): - param_attr = ParamAttr(name=name + "_weights") - if use_bias: - bias_attr = ParamAttr(name=name + "_offset") - else: - bias_attr = False - return param_attr, bias_attr - - -def init_batch_norm_layer(name: str = "batch_norm"): - param_attr = ParamAttr(name=name + "_scale") - bias_attr = ParamAttr(name=name + "_offset") - return param_attr, bias_attr - - -def init_fc_layer(name: str = "fc"): - param_attr = ParamAttr(name=name + "_weights") - bias_attr = ParamAttr(name=name + "_offset") - return param_attr, bias_attr - - -def cal_padding(img_size: int, stride: int, filter_size: int, dilation: int = 1): - """Calculate padding size.""" - if img_size % stride == 0: - out_size = max(filter_size - stride, 0) - else: - out_size = max(filter_size - (img_size % stride), 0) - return out_size // 2, out_size - out_size // 2 - - -inp_shape = {"b2": [260, 130, 130, 65, 33, 17, 17, 9]} - - -def _drop_connect(inputs: paddle.Tensor, prob: float, is_test: bool): - """Drop input connection""" - if is_test: - return inputs - keep_prob = 1.0 - prob - inputs_shape = paddle.shape(inputs) - random_tensor = keep_prob + paddle.rand(shape=[inputs_shape[0], 1, 1, 1]) - binary_tensor = paddle.floor(random_tensor) - output = inputs / keep_prob * binary_tensor - return output - - -class Conv2ds(nn.Layer): - """Basic conv layer""" - - def __init__(self, - input_channels: int, - output_channels: int, - filter_size: int, - stride: int = 1, - padding: int = 0, - groups: int = None, - name: str = "conv2d", - act: str = None, - use_bias: bool = False, - padding_type: str = None, - model_name: str = None, - cur_stage: str = None): - super(Conv2ds, self).__init__() - assert act in [None, "swish", "sigmoid"] - self.act = act - - param_attr, bias_attr = initial_type(name=name, use_bias=use_bias) - - def get_padding(filter_size, stride=1, dilation=1): - padding = ((stride - 1) + dilation * (filter_size - 1)) // 2 - return padding - - inps = 1 if model_name == None and cur_stage == None else inp_shape[model_name][cur_stage] - self.need_crop = False - if padding_type == "SAME": - top_padding, bottom_padding = cal_padding(inps, stride, filter_size) - left_padding, right_padding = cal_padding(inps, stride, filter_size) - height_padding = bottom_padding - width_padding = right_padding - if top_padding != bottom_padding or left_padding != right_padding: - height_padding = top_padding + stride - width_padding = left_padding + stride - self.need_crop = True - padding = [height_padding, width_padding] - elif padding_type == "VALID": - height_padding = 0 - width_padding = 0 - padding = [height_padding, width_padding] - elif padding_type == "DYNAMIC": - padding = get_padding(filter_size, stride) - else: - padding = padding_type - - groups = 1 if groups is None else groups - self._conv = Conv2d( - input_channels, - output_channels, - filter_size, - groups=groups, - stride=stride, - padding=padding, - weight_attr=param_attr, - bias_attr=bias_attr) - - def forward(self, inputs: paddle.Tensor): - x = self._conv(inputs) - if self.act == "swish": - x = F.swish(x) - elif self.act == "sigmoid": - x = F.sigmoid(x) - - if self.need_crop: - x = x[:, :, 1:, 1:] - return x - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - - def __init__(self, - input_channels: int, - filter_size: int, - output_channels: int, - stride: int = 1, - num_groups: int = 1, - padding_type: str = "SAME", - conv_act: str = None, - bn_act: str = "swish", - use_bn: bool = True, - use_bias: bool = False, - name: str = None, - conv_name: str = None, - bn_name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ConvBNLayer, self).__init__() - - self._conv = Conv2ds( - input_channels=input_channels, - output_channels=output_channels, - filter_size=filter_size, - stride=stride, - groups=num_groups, - act=conv_act, - padding_type=padding_type, - name=conv_name, - use_bias=use_bias, - model_name=model_name, - cur_stage=cur_stage) - self.use_bn = use_bn - if use_bn is True: - bn_name = name + bn_name - param_attr, bias_attr = init_batch_norm_layer(bn_name) - - self._bn = BatchNorm( - num_channels=output_channels, - act=bn_act, - momentum=0.99, - epsilon=0.001, - moving_mean_name=bn_name + "_mean", - moving_variance_name=bn_name + "_variance", - param_attr=param_attr, - bias_attr=bias_attr) - - def forward(self, inputs: paddle.Tensor): - if self.use_bn: - x = self._conv(inputs) - x = self._bn(x) - return x - else: - return self._conv(inputs) - - -class ExpandConvNorm(nn.Layer): - """Expand conv norm layer.""" - - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ExpandConvNorm, self).__init__() - - self.oup = block_args.input_filters * block_args.expand_ratio - self.expand_ratio = block_args.expand_ratio - - if self.expand_ratio != 1: - self._conv = ConvBNLayer( - input_channels, - 1, - self.oup, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_expand_conv", - bn_name="_bn0", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - if self.expand_ratio != 1: - return self._conv(inputs) - else: - return inputs - - -class DepthwiseConvNorm(nn.Layer): - """Depthwise conv norm layer.""" - - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(DepthwiseConvNorm, self).__init__() - - self.k = block_args.kernel_size - self.s = block_args.stride - if isinstance(self.s, list) or isinstance(self.s, tuple): - self.s = self.s[0] - oup = block_args.input_filters * block_args.expand_ratio - - self._conv = ConvBNLayer( - input_channels, - self.k, - oup, - self.s, - num_groups=input_channels, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_depthwise_conv", - bn_name="_bn1", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class ProjectConvNorm(nn.Layer): - """Projection conv bn layer.""" - - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ProjectConvNorm, self).__init__() - - final_oup = block_args.output_filters - - self._conv = ConvBNLayer( - input_channels, - 1, - final_oup, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_project_conv", - bn_name="_bn2", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class SEBlock(nn.Layer): - """Basic Squeeze-and-Excitation block for Efficientnet.""" - - def __init__(self, - input_channels: int, - num_squeezed_channels: int, - oup: int, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(SEBlock, self).__init__() - - self._pool = AdaptiveAvgPool2d(1) - self._conv1 = Conv2ds( - input_channels, - num_squeezed_channels, - 1, - use_bias=True, - padding_type=padding_type, - act="swish", - name=name + "_se_reduce") - - self._conv2 = Conv2ds( - num_squeezed_channels, - oup, - 1, - act="sigmoid", - use_bias=True, - padding_type=padding_type, - name=name + "_se_expand") - - def forward(self, inputs: paddle.Tensor): - x = self._pool(inputs) - x = self._conv1(x) - x = self._conv2(x) - return paddle.multiply(inputs, x) - - -class MbConvBlock(nn.Layer): - """Mobile inverted bottleneck convolution for Efficientnet.""" - - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - use_se: bool, - name: str = None, - drop_connect_rate: float = None, - is_test: bool = False, - model_name: str = None, - cur_stage: str = None): - super(MbConvBlock, self).__init__() - - oup = block_args.input_filters * block_args.expand_ratio - self.block_args = block_args - self.has_se = use_se and (block_args.se_ratio is not None) and (0 < block_args.se_ratio <= 1) - self.id_skip = block_args.id_skip - self.expand_ratio = block_args.expand_ratio - self.drop_connect_rate = drop_connect_rate - self.is_test = is_test - - if self.expand_ratio != 1: - self._ecn = ExpandConvNorm( - input_channels, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - self._dcn = DepthwiseConvNorm( - input_channels * block_args.expand_ratio, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - if self.has_se: - num_squeezed_channels = max(1, int(block_args.input_filters * block_args.se_ratio)) - self._se = SEBlock( - input_channels * block_args.expand_ratio, - num_squeezed_channels, - oup, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - self._pcn = ProjectConvNorm( - input_channels * block_args.expand_ratio, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - x = inputs - if self.expand_ratio != 1: - x = self._ecn(x) - x = F.swish(x) - x = self._dcn(x) - x = F.swish(x) - if self.has_se: - x = self._se(x) - x = self._pcn(x) - if self.id_skip and \ - self.block_args.stride == 1 and \ - self.block_args.input_filters == self.block_args.output_filters: - if self.drop_connect_rate: - x = _drop_connect(x, self.drop_connect_rate, self.is_test) - x = paddle.elementwise_add(x, inputs) - return x - - -class ConvStemNorm(nn.Layer): - """Basic conv stem norm block for extracting features.""" - - def __init__(self, - input_channels: int, - padding_type: str, - _global_params: dict, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ConvStemNorm, self).__init__() - - output_channels = round_filters(32, _global_params) - self._conv = ConvBNLayer( - input_channels, - filter_size=3, - output_channels=output_channels, - stride=2, - bn_act=None, - padding_type=padding_type, - name="", - conv_name="_conv_stem", - bn_name="_bn0", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class ExtractFeatures(nn.Layer): - """Extract features.""" - - def __init__(self, - input_channels: int, - _block_args: dict, - _global_params: dict, - padding_type: str, - use_se: bool, - is_test: bool, - model_name: str = None): - super(ExtractFeatures, self).__init__() - - self._global_params = _global_params - - self._conv_stem = ConvStemNorm( - input_channels, - padding_type=padding_type, - _global_params=_global_params, - model_name=model_name, - cur_stage=0) - - self.block_args_copy = copy.deepcopy(_block_args) - idx = 0 - block_size = 0 - for block_arg in self.block_args_copy: - block_arg = block_arg._replace( - input_filters=round_filters(block_arg.input_filters, _global_params), - output_filters=round_filters(block_arg.output_filters, _global_params), - num_repeat=round_repeats(block_arg.num_repeat, _global_params)) - block_size += 1 - for _ in range(block_arg.num_repeat - 1): - block_size += 1 - - self.conv_seq = [] - cur_stage = 1 - for block_args in _block_args: - block_args = block_args._replace( - input_filters=round_filters(block_args.input_filters, _global_params), - output_filters=round_filters(block_args.output_filters, _global_params), - num_repeat=round_repeats(block_args.num_repeat, _global_params)) - - drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 - if drop_connect_rate: - drop_connect_rate *= float(idx) / block_size - - _mc_block = self.add_sublayer( - "_blocks." + str(idx) + ".", - MbConvBlock( - block_args.input_filters, - block_args=block_args, - padding_type=padding_type, - use_se=use_se, - name="_blocks." + str(idx) + ".", - drop_connect_rate=drop_connect_rate, - model_name=model_name, - cur_stage=cur_stage)) - self.conv_seq.append(_mc_block) - idx += 1 - if block_args.num_repeat > 1: - block_args = block_args._replace(input_filters=block_args.output_filters, stride=1) - for _ in range(block_args.num_repeat - 1): - drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 - if drop_connect_rate: - drop_connect_rate *= float(idx) / block_size - _mc_block = self.add_sublayer( - "block." + str(idx) + ".", - MbConvBlock( - block_args.input_filters, - block_args, - padding_type=padding_type, - use_se=use_se, - name="_blocks." + str(idx) + ".", - drop_connect_rate=drop_connect_rate, - model_name=model_name, - cur_stage=cur_stage)) - self.conv_seq.append(_mc_block) - idx += 1 - cur_stage += 1 - - def forward(self, inputs: paddle.Tensor): - x = self._conv_stem(inputs) - x = F.swish(x) - for _mc_block in self.conv_seq: - x = _mc_block(x) - return x - - -@moduleinfo( - name="efficientnetb2_imagenet", - type="cv/classification", - author="paddlepaddle", - author_email="", - summary="efficientnetb2_imagenet is a classification model, " - "this module is trained with Imagenet dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class EfficientNet_B2(nn.Layer): - def __init__(self, - is_test: bool = False, - padding_type: str = "SAME", - override_params: dict = None, - use_se: bool = True, - class_dim: int = 1000, - load_checkpoint: str = None): - super(EfficientNet_B2, self).__init__() - - model_name = 'efficientnet-b2' - self.name = "b2" - self._block_args, self._global_params = get_model_params(model_name, override_params) - self.padding_type = padding_type - self.use_se = use_se - self.is_test = is_test - - self._ef = ExtractFeatures( - 3, - self._block_args, - self._global_params, - self.padding_type, - self.use_se, - self.is_test, - model_name=self.name) - - output_channels = round_filters(1280, self._global_params) - oup = 352 - - self._conv = ConvBNLayer( - oup, - 1, - output_channels, - bn_act="swish", - padding_type=self.padding_type, - name="", - conv_name="_conv_head", - bn_name="_bn1", - model_name=self.name, - cur_stage=7) - self._pool = AdaptiveAvgPool2d(1) - - if self._global_params.dropout_rate: - self._drop = Dropout(p=self._global_params.dropout_rate, mode="upscale_in_train") - - param_attr, bias_attr = init_fc_layer("_fc") - self._fc = Linear(output_channels, class_dim, weight_attr=param_attr, bias_attr=bias_attr) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") + Run as a command. + """ + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.classify(paths=[args.input_path], batch_size=args.batch_size, use_gpu=args.use_gpu) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument('--use_gpu', + type=ast.literal_eval, + default=False, + help="whether use GPU or not.") + self.arg_config_group.add_argument('--batch_size', type=ast.literal_eval, default=1, help="batch size.") + self.arg_config_group.add_argument('--top_k', type=ast.literal_eval, default=1, help="Return top k results.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") - else: - checkpoint = os.path.join(self.directory, 'efficientnet_b2_imagenet.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/efficientnet_b2_imagenet.pdparams -O ' - + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - def forward(self, inputs: paddle.Tensor): - x = self._ef(inputs) - x = self._conv(x) - x = self._pool(x) - if self._global_params.dropout_rate: - x = self._drop(x) - x = paddle.squeeze(x, axis=[2, 3]) - x = self._fc(x) - return x +if __name__ == '__main__': + b2 = EfficientNetB2ImageNet() + b2.context() + import cv2 + test_image = [ + cv2.imread( + '/mnt/zhangxuefei/program-paddle/PaddleHub/hub_module/tests/image_dataset/classification/animals/dog.jpeg') + ] + res = b2.classification(images=test_image) + print(res) + res = b2.classification(paths=[ + '/mnt/zhangxuefei/program-paddle/PaddleHub/hub_module/tests/image_dataset/classification/animals/dog.jpeg' + ]) + print(res) + res = b2.classification(images=test_image) + print(res) + res = b2.classify(images=test_image) + print(res) diff --git a/modules/image/classification/efficientnetb2_imagenet/processor.py b/modules/image/classification/efficientnetb2_imagenet/processor.py new file mode 100644 index 000000000..9cb22a1b7 --- /dev/null +++ b/modules/image/classification/efficientnetb2_imagenet/processor.py @@ -0,0 +1,65 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import base64 + +import cv2 +import numpy as np + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def softmax(x): + if len(x.shape) > 1: + tmp = np.max(x, axis=1) + x -= tmp.reshape((x.shape[0], 1)) + x = np.exp(x) + tmp = np.sum(x, axis=1) + x /= tmp.reshape((x.shape[0], 1)) + else: + tmp = np.max(x) + x -= tmp + x = np.exp(x) + tmp = np.sum(x) + x /= tmp + return x + + +def postprocess(data_out, label_list, top_k): + """ + Postprocess output of network, one image at a time. + + Args: + data_out (numpy.ndarray): output data of network. + label_list (list): list of label. + top_k (int): Return top k results. + """ + output = [] + for result in data_out: + result_i = softmax(result) + output_i = {} + indexs = np.argsort(result_i)[::-1][0:top_k] + for index in indexs: + label = label_list[index].split(',')[0] + output_i[label] = float(result_i[index]) + output.append(output_i) + return output diff --git a/modules/image/classification/efficientnetb2_imagenet/test.py b/modules/image/classification/efficientnetb2_imagenet/test.py new file mode 100644 index 000000000..78bd777ba --- /dev/null +++ b/modules/image/classification/efficientnetb2_imagenet/test.py @@ -0,0 +1,63 @@ +import os +import shutil +import unittest + +import cv2 +import requests + +import paddlehub as hub + +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + + +class TestHubModule(unittest.TestCase): + + @classmethod + def setUpClass(cls) -> None: + img_url = 'https://unsplash.com/photos/brFsZ7qszSY/download?ixid=MnwxMjA3fDB8MXxzZWFyY2h8OHx8ZG9nfGVufDB8fHx8MTY2MzA1ODQ1MQ&force=true&w=640' + if not os.path.exists('tests'): + os.makedirs('tests') + response = requests.get(img_url) + assert response.status_code == 200, 'Network Error.' + with open('tests/test.jpg', 'wb') as f: + f.write(response.content) + cls.module = hub.Module(name="efficientnetb2_imagenet") + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree('tests') + shutil.rmtree('inference') + + def test_classification1(self): + results = self.module.classification(paths=['tests/test.jpg']) + data = results[0] + self.assertTrue('Pembroke' in data) + self.assertTrue(data['Pembroke'] > 0.5) + + def test_classification2(self): + results = self.module.classification(images=[cv2.imread('tests/test.jpg')]) + data = results[0] + self.assertTrue('Pembroke' in data) + self.assertTrue(data['Pembroke'] > 0.5) + + def test_classification3(self): + results = self.module.classification(images=[cv2.imread('tests/test.jpg')], use_gpu=True) + data = results[0] + self.assertTrue('Pembroke' in data) + self.assertTrue(data['Pembroke'] > 0.5) + + def test_classification4(self): + self.assertRaises(AssertionError, self.module.classification, paths=['no.jpg']) + + def test_classification5(self): + self.assertRaises(TypeError, self.module.classification, images=['tests/test.jpg']) + + def test_save_inference_model(self): + self.module.save_inference_model('./inference/model') + + self.assertTrue(os.path.exists('./inference/model.pdmodel')) + self.assertTrue(os.path.exists('./inference/model.pdiparams')) + + +if __name__ == "__main__": + unittest.main() From 1abbded3aab01c6498c6d7583e0e97219929919f Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Fri, 14 Oct 2022 14:55:02 +0800 Subject: [PATCH 104/117] update efficientnetb3_imagenet (#2051) * update efficientnetb3_imagenet * remove unused print --- .../efficientnetb3_imagenet/README.md | 7 +- .../efficientnetb3_imagenet/README_en.md | 7 +- .../efficientnetb3_imagenet/__init__.py | 0 .../efficientnetb3_imagenet/data_feed.py | 93 ++ .../efficientnetb3_imagenet/label_list.txt | 1000 +++++++++++++++++ .../efficientnetb3_imagenet/module.py | 926 +++------------ .../efficientnetb3_imagenet/processor.py | 65 ++ .../efficientnetb3_imagenet/test.py | 63 ++ 8 files changed, 1402 insertions(+), 759 deletions(-) create mode 100644 modules/image/classification/efficientnetb3_imagenet/__init__.py create mode 100644 modules/image/classification/efficientnetb3_imagenet/data_feed.py create mode 100644 modules/image/classification/efficientnetb3_imagenet/label_list.txt create mode 100644 modules/image/classification/efficientnetb3_imagenet/processor.py create mode 100644 modules/image/classification/efficientnetb3_imagenet/test.py diff --git a/modules/image/classification/efficientnetb3_imagenet/README.md b/modules/image/classification/efficientnetb3_imagenet/README.md index 3bb627392..663c62606 100644 --- a/modules/image/classification/efficientnetb3_imagenet/README.md +++ b/modules/image/classification/efficientnetb3_imagenet/README.md @@ -131,6 +131,11 @@ * 1.1.0 提升预测性能以及易用性 + +* 1.2.0 + + 移除 Fluid API + - ```shell - $ hub install efficientnetb3_imagenet==1.1.0 + $ hub install efficientnetb3_imagenet==1.2.0 ``` diff --git a/modules/image/classification/efficientnetb3_imagenet/README_en.md b/modules/image/classification/efficientnetb3_imagenet/README_en.md index d305e4c75..8d10166be 100644 --- a/modules/image/classification/efficientnetb3_imagenet/README_en.md +++ b/modules/image/classification/efficientnetb3_imagenet/README_en.md @@ -129,6 +129,11 @@ * 1.1.0 Improve the prediction performance and users' experience + +* 1.2.0 + + Remove Fluid API + - ```shell - $ hub install efficientnetb3_imagenet==1.1.0 + $ hub install efficientnetb3_imagenet==1.2.0 ``` diff --git a/modules/image/classification/efficientnetb3_imagenet/__init__.py b/modules/image/classification/efficientnetb3_imagenet/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/modules/image/classification/efficientnetb3_imagenet/data_feed.py b/modules/image/classification/efficientnetb3_imagenet/data_feed.py new file mode 100644 index 000000000..60a33f4df --- /dev/null +++ b/modules/image/classification/efficientnetb3_imagenet/data_feed.py @@ -0,0 +1,93 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import time +from collections import OrderedDict + +import numpy as np +from PIL import Image + +__all__ = ['reader'] + +DATA_DIM = 224 +img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) +img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) + + +def resize_short(img, target_size): + percent = float(target_size) / min(img.size[0], img.size[1]) + resized_width = int(round(img.size[0] * percent)) + resized_height = int(round(img.size[1] * percent)) + img = img.resize((resized_width, resized_height), Image.LANCZOS) + return img + + +def crop_image(img, target_size, center): + width, height = img.size + size = target_size + if center == True: + w_start = (width - size) / 2 + h_start = (height - size) / 2 + else: + w_start = np.random.randint(0, width - size + 1) + h_start = np.random.randint(0, height - size + 1) + w_end = w_start + size + h_end = h_start + size + img = img.crop((w_start, h_start, w_end, h_end)) + return img + + +def process_image(img): + img = resize_short(img, target_size=256) + img = crop_image(img, target_size=DATA_DIM, center=True) + if img.mode != 'RGB': + img = img.convert('RGB') + img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255 + img -= img_mean + img /= img_std + return img + + +def reader(images=None, paths=None): + """ + Preprocess to yield image. + + Args: + images (list[numpy.ndarray]): images data, shape of each is [H, W, C]. + paths (list[str]): paths to images. + + Yield: + each (collections.OrderedDict): info of original image, preprocessed image. + """ + component = list() + if paths: + for im_path in paths: + each = OrderedDict() + assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path) + each['org_im_path'] = im_path + each['org_im'] = Image.open(im_path) + each['org_im_width'], each['org_im_height'] = each['org_im'].size + component.append(each) + if images is not None: + assert type(images), "images is a list." + for im in images: + each = OrderedDict() + each['org_im'] = Image.fromarray(im[:, :, ::-1]) + each['org_im_path'] = 'ndarray_time={}'.format(round(time.time(), 6) * 1e6) + each['org_im_width'], each['org_im_height'] = each['org_im'].size + component.append(each) + + for element in component: + element['image'] = process_image(element['org_im']) + yield element diff --git a/modules/image/classification/efficientnetb3_imagenet/label_list.txt b/modules/image/classification/efficientnetb3_imagenet/label_list.txt new file mode 100644 index 000000000..a509c0074 --- /dev/null +++ b/modules/image/classification/efficientnetb3_imagenet/label_list.txt @@ -0,0 +1,1000 @@ +tench, Tinca tinca +goldfish, Carassius auratus +great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias +tiger shark, Galeocerdo cuvieri +hammerhead, hammerhead shark +electric ray, crampfish, numbfish, torpedo +stingray +cock +hen +ostrich, Struthio camelus +brambling, Fringilla montifringilla +goldfinch, Carduelis carduelis +house finch, linnet, Carpodacus mexicanus +junco, snowbird +indigo bunting, indigo finch, indigo bird, Passerina cyanea +robin, American robin, Turdus migratorius +bulbul +jay +magpie +chickadee +water ouzel, dipper +kite +bald eagle, American eagle, Haliaeetus leucocephalus +vulture +great grey owl, great gray owl, Strix nebulosa +European fire salamander, Salamandra salamandra +common newt, Triturus vulgaris +eft +spotted salamander, Ambystoma maculatum +axolotl, mud puppy, Ambystoma mexicanum +bullfrog, Rana catesbeiana +tree frog, tree-frog +tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui +loggerhead, loggerhead turtle, Caretta caretta +leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea +mud turtle +terrapin +box turtle, box tortoise +banded gecko +common iguana, iguana, Iguana iguana +American chameleon, anole, Anolis carolinensis +whiptail, whiptail lizard +agama +frilled lizard, Chlamydosaurus kingi +alligator lizard +Gila monster, Heloderma suspectum +green lizard, Lacerta viridis +African chameleon, Chamaeleo chamaeleon +Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis +African crocodile, Nile crocodile, Crocodylus niloticus +American alligator, Alligator mississipiensis +triceratops +thunder snake, worm snake, Carphophis amoenus +ringneck snake, ring-necked snake, ring snake +hognose snake, puff adder, sand viper +green snake, grass snake +king snake, kingsnake +garter snake, grass snake +water snake +vine snake +night snake, Hypsiglena torquata +boa constrictor, Constrictor constrictor +rock python, rock snake, Python sebae +Indian cobra, Naja naja +green mamba +sea snake +horned viper, cerastes, sand viper, horned asp, Cerastes cornutus +diamondback, diamondback rattlesnake, Crotalus adamanteus +sidewinder, horned rattlesnake, Crotalus cerastes +trilobite +harvestman, daddy longlegs, Phalangium opilio +scorpion +black and gold garden spider, Argiope aurantia +barn spider, Araneus cavaticus +garden spider, Aranea diademata +black widow, Latrodectus mactans +tarantula +wolf spider, hunting spider +tick +centipede +black grouse +ptarmigan +ruffed grouse, partridge, Bonasa umbellus +prairie chicken, prairie grouse, prairie fowl +peacock +quail +partridge +African grey, African gray, Psittacus erithacus +macaw +sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita +lorikeet +coucal +bee eater +hornbill +hummingbird +jacamar +toucan +drake +red-breasted merganser, Mergus serrator +goose +black swan, Cygnus atratus +tusker +echidna, spiny anteater, anteater +platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus +wallaby, brush kangaroo +koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus +wombat +jellyfish +sea anemone, anemone +brain coral +flatworm, platyhelminth +nematode, nematode worm, roundworm +conch +snail +slug +sea slug, nudibranch +chiton, coat-of-mail shell, sea cradle, polyplacophore +chambered nautilus, pearly nautilus, nautilus +Dungeness crab, Cancer magister +rock crab, Cancer irroratus +fiddler crab +king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica +American lobster, Northern lobster, Maine lobster, Homarus americanus +spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish +crayfish, crawfish, crawdad, crawdaddy +hermit crab +isopod +white stork, Ciconia ciconia +black stork, Ciconia nigra +spoonbill +flamingo +little blue heron, Egretta caerulea +American egret, great white heron, Egretta albus +bittern +crane +limpkin, Aramus pictus +European gallinule, Porphyrio porphyrio +American coot, marsh hen, mud hen, water hen, Fulica americana +bustard +ruddy turnstone, Arenaria interpres +red-backed sandpiper, dunlin, Erolia alpina +redshank, Tringa totanus +dowitcher +oystercatcher, oyster catcher +pelican +king penguin, Aptenodytes patagonica +albatross, mollymawk +grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus +killer whale, killer, orca, grampus, sea wolf, Orcinus orca +dugong, Dugong dugon +sea lion +Chihuahua +Japanese spaniel +Maltese dog, Maltese terrier, Maltese +Pekinese, Pekingese, Peke +Shih-Tzu +Blenheim spaniel +papillon +toy terrier +Rhodesian ridgeback +Afghan hound, Afghan +basset, basset hound +beagle +bloodhound, sleuthhound +bluetick +black-and-tan coonhound +Walker hound, Walker foxhound +English foxhound +redbone +borzoi, Russian wolfhound +Irish wolfhound +Italian greyhound +whippet +Ibizan hound, Ibizan Podenco +Norwegian elkhound, elkhound +otterhound, otter hound +Saluki, gazelle hound +Scottish deerhound, deerhound +Weimaraner +Staffordshire bullterrier, Staffordshire bull terrier +American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier +Bedlington terrier +Border terrier +Kerry blue terrier +Irish terrier +Norfolk terrier +Norwich terrier +Yorkshire terrier +wire-haired fox terrier +Lakeland terrier +Sealyham terrier, Sealyham +Airedale, Airedale terrier +cairn, cairn terrier +Australian terrier +Dandie Dinmont, Dandie Dinmont terrier +Boston bull, Boston terrier +miniature schnauzer +giant schnauzer +standard schnauzer +Scotch terrier, Scottish terrier, Scottie +Tibetan terrier, chrysanthemum dog +silky terrier, Sydney silky +soft-coated wheaten terrier +West Highland white terrier +Lhasa, Lhasa apso +flat-coated retriever +curly-coated retriever +golden retriever +Labrador retriever +Chesapeake Bay retriever +German short-haired pointer +vizsla, Hungarian pointer +English setter +Irish setter, red setter +Gordon setter +Brittany spaniel +clumber, clumber spaniel +English springer, English springer spaniel +Welsh springer spaniel +cocker spaniel, English cocker spaniel, cocker +Sussex spaniel +Irish water spaniel +kuvasz +schipperke +groenendael +malinois +briard +kelpie +komondor +Old English sheepdog, bobtail +Shetland sheepdog, Shetland sheep dog, Shetland +collie +Border collie +Bouvier des Flandres, Bouviers des Flandres +Rottweiler +German shepherd, German shepherd dog, German police dog, alsatian +Doberman, Doberman pinscher +miniature pinscher +Greater Swiss Mountain dog +Bernese mountain dog +Appenzeller +EntleBucher +boxer +bull mastiff +Tibetan mastiff +French bulldog +Great Dane +Saint Bernard, St Bernard +Eskimo dog, husky +malamute, malemute, Alaskan malamute +Siberian husky +dalmatian, coach dog, carriage dog +affenpinscher, monkey pinscher, monkey dog +basenji +pug, pug-dog +Leonberg +Newfoundland, Newfoundland dog +Great Pyrenees +Samoyed, Samoyede +Pomeranian +chow, chow chow +keeshond +Brabancon griffon +Pembroke, Pembroke Welsh corgi +Cardigan, Cardigan Welsh corgi +toy poodle +miniature poodle +standard poodle +Mexican hairless +timber wolf, grey wolf, gray wolf, Canis lupus +white wolf, Arctic wolf, Canis lupus tundrarum +red wolf, maned wolf, Canis rufus, Canis niger +coyote, prairie wolf, brush wolf, Canis latrans +dingo, warrigal, warragal, Canis dingo +dhole, Cuon alpinus +African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus +hyena, hyaena +red fox, Vulpes vulpes +kit fox, Vulpes macrotis +Arctic fox, white fox, Alopex lagopus +grey fox, gray fox, Urocyon cinereoargenteus +tabby, tabby cat +tiger cat +Persian cat +Siamese cat, Siamese +Egyptian cat +cougar, puma, catamount, mountain lion, painter, panther, Felis concolor +lynx, catamount +leopard, Panthera pardus +snow leopard, ounce, Panthera uncia +jaguar, panther, Panthera onca, Felis onca +lion, king of beasts, Panthera leo +tiger, Panthera tigris +cheetah, chetah, Acinonyx jubatus +brown bear, bruin, Ursus arctos +American black bear, black bear, Ursus americanus, Euarctos americanus +ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus +sloth bear, Melursus ursinus, Ursus ursinus +mongoose +meerkat, mierkat +tiger beetle +ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle +ground beetle, carabid beetle +long-horned beetle, longicorn, longicorn beetle +leaf beetle, chrysomelid +dung beetle +rhinoceros beetle +weevil +fly +bee +ant, emmet, pismire +grasshopper, hopper +cricket +walking stick, walkingstick, stick insect +cockroach, roach +mantis, mantid +cicada, cicala +leafhopper +lacewing, lacewing fly +dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk +damselfly +admiral +ringlet, ringlet butterfly +monarch, monarch butterfly, milkweed butterfly, Danaus plexippus +cabbage butterfly +sulphur butterfly, sulfur butterfly +lycaenid, lycaenid butterfly +starfish, sea star +sea urchin +sea cucumber, holothurian +wood rabbit, cottontail, cottontail rabbit +hare +Angora, Angora rabbit +hamster +porcupine, hedgehog +fox squirrel, eastern fox squirrel, Sciurus niger +marmot +beaver +guinea pig, Cavia cobaya +sorrel +zebra +hog, pig, grunter, squealer, Sus scrofa +wild boar, boar, Sus scrofa +warthog +hippopotamus, hippo, river horse, Hippopotamus amphibius +ox +water buffalo, water ox, Asiatic buffalo, Bubalus bubalis +bison +ram, tup +bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis +ibex, Capra ibex +hartebeest +impala, Aepyceros melampus +gazelle +Arabian camel, dromedary, Camelus dromedarius +llama +weasel +mink +polecat, fitch, foulmart, foumart, Mustela putorius +black-footed ferret, ferret, Mustela nigripes +otter +skunk, polecat, wood pussy +badger +armadillo +three-toed sloth, ai, Bradypus tridactylus +orangutan, orang, orangutang, Pongo pygmaeus +gorilla, Gorilla gorilla +chimpanzee, chimp, Pan troglodytes +gibbon, Hylobates lar +siamang, Hylobates syndactylus, Symphalangus syndactylus +guenon, guenon monkey +patas, hussar monkey, Erythrocebus patas +baboon +macaque +langur +colobus, colobus monkey +proboscis monkey, Nasalis larvatus +marmoset +capuchin, ringtail, Cebus capucinus +howler monkey, howler +titi, titi monkey +spider monkey, Ateles geoffroyi +squirrel monkey, Saimiri sciureus +Madagascar cat, ring-tailed lemur, Lemur catta +indri, indris, Indri indri, Indri brevicaudatus +Indian elephant, Elephas maximus +African elephant, Loxodonta africana +lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens +giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca +barracouta, snoek +eel +coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch +rock beauty, Holocanthus tricolor +anemone fish +sturgeon +gar, garfish, garpike, billfish, Lepisosteus osseus +lionfish +puffer, pufferfish, blowfish, globefish +abacus +abaya +academic gown, academic robe, judge's robe +accordion, piano accordion, squeeze box +acoustic guitar +aircraft carrier, carrier, flattop, attack aircraft carrier +airliner +airship, dirigible +altar +ambulance +amphibian, amphibious vehicle +analog clock +apiary, bee house +apron +ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin +assault rifle, assault gun +backpack, back pack, knapsack, packsack, rucksack, haversack +bakery, bakeshop, bakehouse +balance beam, beam +balloon +ballpoint, ballpoint pen, ballpen, Biro +Band Aid +banjo +bannister, banister, balustrade, balusters, handrail +barbell +barber chair +barbershop +barn +barometer +barrel, cask +barrow, garden cart, lawn cart, wheelbarrow +baseball +basketball +bassinet +bassoon +bathing cap, swimming cap +bath towel +bathtub, bathing tub, bath, tub +beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon +beacon, lighthouse, beacon light, pharos +beaker +bearskin, busby, shako +beer bottle +beer glass +bell cote, bell cot +bib +bicycle-built-for-two, tandem bicycle, tandem +bikini, two-piece +binder, ring-binder +binoculars, field glasses, opera glasses +birdhouse +boathouse +bobsled, bobsleigh, bob +bolo tie, bolo, bola tie, bola +bonnet, poke bonnet +bookcase +bookshop, bookstore, bookstall +bottlecap +bow +bow tie, bow-tie, bowtie +brass, memorial tablet, plaque +brassiere, bra, bandeau +breakwater, groin, groyne, mole, bulwark, seawall, jetty +breastplate, aegis, egis +broom +bucket, pail +buckle +bulletproof vest +bullet train, bullet +butcher shop, meat market +cab, hack, taxi, taxicab +caldron, cauldron +candle, taper, wax light +cannon +canoe +can opener, tin opener +cardigan +car mirror +carousel, carrousel, merry-go-round, roundabout, whirligig +carpenter's kit, tool kit +carton +car wheel +cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM +cassette +cassette player +castle +catamaran +CD player +cello, violoncello +cellular telephone, cellular phone, cellphone, cell, mobile phone +chain +chainlink fence +chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour +chain saw, chainsaw +chest +chiffonier, commode +chime, bell, gong +china cabinet, china closet +Christmas stocking +church, church building +cinema, movie theater, movie theatre, movie house, picture palace +cleaver, meat cleaver, chopper +cliff dwelling +cloak +clog, geta, patten, sabot +cocktail shaker +coffee mug +coffeepot +coil, spiral, volute, whorl, helix +combination lock +computer keyboard, keypad +confectionery, confectionary, candy store +container ship, containership, container vessel +convertible +corkscrew, bottle screw +cornet, horn, trumpet, trump +cowboy boot +cowboy hat, ten-gallon hat +cradle +crane +crash helmet +crate +crib, cot +Crock Pot +croquet ball +crutch +cuirass +dam, dike, dyke +desk +desktop computer +dial telephone, dial phone +diaper, nappy, napkin +digital clock +digital watch +dining table, board +dishrag, dishcloth +dishwasher, dish washer, dishwashing machine +disk brake, disc brake +dock, dockage, docking facility +dogsled, dog sled, dog sleigh +dome +doormat, welcome mat +drilling platform, offshore rig +drum, membranophone, tympan +drumstick +dumbbell +Dutch oven +electric fan, blower +electric guitar +electric locomotive +entertainment center +envelope +espresso maker +face powder +feather boa, boa +file, file cabinet, filing cabinet +fireboat +fire engine, fire truck +fire screen, fireguard +flagpole, flagstaff +flute, transverse flute +folding chair +football helmet +forklift +fountain +fountain pen +four-poster +freight car +French horn, horn +frying pan, frypan, skillet +fur coat +garbage truck, dustcart +gasmask, respirator, gas helmet +gas pump, gasoline pump, petrol pump, island dispenser +goblet +go-kart +golf ball +golfcart, golf cart +gondola +gong, tam-tam +gown +grand piano, grand +greenhouse, nursery, glasshouse +grille, radiator grille +grocery store, grocery, food market, market +guillotine +hair slide +hair spray +half track +hammer +hamper +hand blower, blow dryer, blow drier, hair dryer, hair drier +hand-held computer, hand-held microcomputer +handkerchief, hankie, hanky, hankey +hard disc, hard disk, fixed disk +harmonica, mouth organ, harp, mouth harp +harp +harvester, reaper +hatchet +holster +home theater, home theatre +honeycomb +hook, claw +hoopskirt, crinoline +horizontal bar, high bar +horse cart, horse-cart +hourglass +iPod +iron, smoothing iron +jack-o'-lantern +jean, blue jean, denim +jeep, landrover +jersey, T-shirt, tee shirt +jigsaw puzzle +jinrikisha, ricksha, rickshaw +joystick +kimono +knee pad +knot +lab coat, laboratory coat +ladle +lampshade, lamp shade +laptop, laptop computer +lawn mower, mower +lens cap, lens cover +letter opener, paper knife, paperknife +library +lifeboat +lighter, light, igniter, ignitor +limousine, limo +liner, ocean liner +lipstick, lip rouge +Loafer +lotion +loudspeaker, speaker, speaker unit, loudspeaker system, speaker system +loupe, jeweler's loupe +lumbermill, sawmill +magnetic compass +mailbag, postbag +mailbox, letter box +maillot +maillot, tank suit +manhole cover +maraca +marimba, xylophone +mask +matchstick +maypole +maze, labyrinth +measuring cup +medicine chest, medicine cabinet +megalith, megalithic structure +microphone, mike +microwave, microwave oven +military uniform +milk can +minibus +miniskirt, mini +minivan +missile +mitten +mixing bowl +mobile home, manufactured home +Model T +modem +monastery +monitor +moped +mortar +mortarboard +mosque +mosquito net +motor scooter, scooter +mountain bike, all-terrain bike, off-roader +mountain tent +mouse, computer mouse +mousetrap +moving van +muzzle +nail +neck brace +necklace +nipple +notebook, notebook computer +obelisk +oboe, hautboy, hautbois +ocarina, sweet potato +odometer, hodometer, mileometer, milometer +oil filter +organ, pipe organ +oscilloscope, scope, cathode-ray oscilloscope, CRO +overskirt +oxcart +oxygen mask +packet +paddle, boat paddle +paddlewheel, paddle wheel +padlock +paintbrush +pajama, pyjama, pj's, jammies +palace +panpipe, pandean pipe, syrinx +paper towel +parachute, chute +parallel bars, bars +park bench +parking meter +passenger car, coach, carriage +patio, terrace +pay-phone, pay-station +pedestal, plinth, footstall +pencil box, pencil case +pencil sharpener +perfume, essence +Petri dish +photocopier +pick, plectrum, plectron +pickelhaube +picket fence, paling +pickup, pickup truck +pier +piggy bank, penny bank +pill bottle +pillow +ping-pong ball +pinwheel +pirate, pirate ship +pitcher, ewer +plane, carpenter's plane, woodworking plane +planetarium +plastic bag +plate rack +plow, plough +plunger, plumber's helper +Polaroid camera, Polaroid Land camera +pole +police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria +poncho +pool table, billiard table, snooker table +pop bottle, soda bottle +pot, flowerpot +potter's wheel +power drill +prayer rug, prayer mat +printer +prison, prison house +projectile, missile +projector +puck, hockey puck +punching bag, punch bag, punching ball, punchball +purse +quill, quill pen +quilt, comforter, comfort, puff +racer, race car, racing car +racket, racquet +radiator +radio, wireless +radio telescope, radio reflector +rain barrel +recreational vehicle, RV, R.V. +reel +reflex camera +refrigerator, icebox +remote control, remote +restaurant, eating house, eating place, eatery +revolver, six-gun, six-shooter +rifle +rocking chair, rocker +rotisserie +rubber eraser, rubber, pencil eraser +rugby ball +rule, ruler +running shoe +safe +safety pin +saltshaker, salt shaker +sandal +sarong +sax, saxophone +scabbard +scale, weighing machine +school bus +schooner +scoreboard +screen, CRT screen +screw +screwdriver +seat belt, seatbelt +sewing machine +shield, buckler +shoe shop, shoe-shop, shoe store +shoji +shopping basket +shopping cart +shovel +shower cap +shower curtain +ski +ski mask +sleeping bag +slide rule, slipstick +sliding door +slot, one-armed bandit +snorkel +snowmobile +snowplow, snowplough +soap dispenser +soccer ball +sock +solar dish, solar collector, solar furnace +sombrero +soup bowl +space bar +space heater +space shuttle +spatula +speedboat +spider web, spider's web +spindle +sports car, sport car +spotlight, spot +stage +steam locomotive +steel arch bridge +steel drum +stethoscope +stole +stone wall +stopwatch, stop watch +stove +strainer +streetcar, tram, tramcar, trolley, trolley car +stretcher +studio couch, day bed +stupa, tope +submarine, pigboat, sub, U-boat +suit, suit of clothes +sundial +sunglass +sunglasses, dark glasses, shades +sunscreen, sunblock, sun blocker +suspension bridge +swab, swob, mop +sweatshirt +swimming trunks, bathing trunks +swing +switch, electric switch, electrical switch +syringe +table lamp +tank, army tank, armored combat vehicle, armoured combat vehicle +tape player +teapot +teddy, teddy bear +television, television system +tennis ball +thatch, thatched roof +theater curtain, theatre curtain +thimble +thresher, thrasher, threshing machine +throne +tile roof +toaster +tobacco shop, tobacconist shop, tobacconist +toilet seat +torch +totem pole +tow truck, tow car, wrecker +toyshop +tractor +trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi +tray +trench coat +tricycle, trike, velocipede +trimaran +tripod +triumphal arch +trolleybus, trolley coach, trackless trolley +trombone +tub, vat +turnstile +typewriter keyboard +umbrella +unicycle, monocycle +upright, upright piano +vacuum, vacuum cleaner +vase +vault +velvet +vending machine +vestment +viaduct +violin, fiddle +volleyball +waffle iron +wall clock +wallet, billfold, notecase, pocketbook +wardrobe, closet, press +warplane, military plane +washbasin, handbasin, washbowl, lavabo, wash-hand basin +washer, automatic washer, washing machine +water bottle +water jug +water tower +whiskey jug +whistle +wig +window screen +window shade +Windsor tie +wine bottle +wing +wok +wooden spoon +wool, woolen, woollen +worm fence, snake fence, snake-rail fence, Virginia fence +wreck +yawl +yurt +web site, website, internet site, site +comic book +crossword puzzle, crossword +street sign +traffic light, traffic signal, stoplight +book jacket, dust cover, dust jacket, dust wrapper +menu +plate +guacamole +consomme +hot pot, hotpot +trifle +ice cream, icecream +ice lolly, lolly, lollipop, popsicle +French loaf +bagel, beigel +pretzel +cheeseburger +hotdog, hot dog, red hot +mashed potato +head cabbage +broccoli +cauliflower +zucchini, courgette +spaghetti squash +acorn squash +butternut squash +cucumber, cuke +artichoke, globe artichoke +bell pepper +cardoon +mushroom +Granny Smith +strawberry +orange +lemon +fig +pineapple, ananas +banana +jackfruit, jak, jack +custard apple +pomegranate +hay +carbonara +chocolate sauce, chocolate syrup +dough +meat loaf, meatloaf +pizza, pizza pie +potpie +burrito +red wine +espresso +cup +eggnog +alp +bubble +cliff, drop, drop-off +coral reef +geyser +lakeside, lakeshore +promontory, headland, head, foreland +sandbar, sand bar +seashore, coast, seacoast, sea-coast +valley, vale +volcano +ballplayer, baseball player +groom, bridegroom +scuba diver +rapeseed +daisy +yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum +corn +acorn +hip, rose hip, rosehip +buckeye, horse chestnut, conker +coral fungus +agaric +gyromitra +stinkhorn, carrion fungus +earthstar +hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa +bolete +ear, spike, capitulum +toilet tissue, toilet paper, bathroom tissue diff --git a/modules/image/classification/efficientnetb3_imagenet/module.py b/modules/image/classification/efficientnetb3_imagenet/module.py index 4b3903db6..b0db4f39c 100644 --- a/modules/image/classification/efficientnetb3_imagenet/module.py +++ b/modules/image/classification/efficientnetb3_imagenet/module.py @@ -11,781 +11,193 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import absolute_import +from __future__ import division +import argparse +import ast import os -import math -import collections -import re -import copy - -import paddle -from paddle import ParamAttr -import paddle.nn as nn -import paddle.nn.functional as F -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - -GlobalParams = collections.namedtuple('GlobalParams', [ - 'batch_norm_momentum', - 'batch_norm_epsilon', - 'dropout_rate', - 'num_classes', - 'width_coefficient', - 'depth_coefficient', - 'depth_divisor', - 'min_depth', - 'drop_connect_rate', -]) - -BlockArgs = collections.namedtuple( - 'BlockArgs', - ['kernel_size', 'num_repeat', 'input_filters', 'output_filters', 'expand_ratio', 'id_skip', 'stride', 'se_ratio']) - -GlobalParams.__new__.__defaults__ = (None, ) * len(GlobalParams._fields) -BlockArgs.__new__.__defaults__ = (None, ) * len(BlockArgs._fields) - - -def efficientnet_params(model_name: str): - """ Map EfficientNet model name to parameter coefficients. """ - params_dict = { - # Coefficients: width,depth,resolution,dropout - 'efficientnet-b3': (1.2, 1.4, 300, 0.3) - } - return params_dict[model_name] - - -def efficientnet(width_coefficient: float = None, - depth_coefficient: float = None, - dropout_rate: float = 0.2, - drop_connect_rate: float = 0.2): - """ Get block arguments according to parameter and coefficients. """ - blocks_args = [ - 'r1_k3_s11_e1_i32_o16_se0.25', - 'r2_k3_s22_e6_i16_o24_se0.25', - 'r2_k5_s22_e6_i24_o40_se0.25', - 'r3_k3_s22_e6_i40_o80_se0.25', - 'r3_k5_s11_e6_i80_o112_se0.25', - 'r4_k5_s22_e6_i112_o192_se0.25', - 'r1_k3_s11_e6_i192_o320_se0.25', - ] - blocks_args = BlockDecoder.decode(blocks_args) - - global_params = GlobalParams( - batch_norm_momentum=0.99, - batch_norm_epsilon=1e-3, - dropout_rate=dropout_rate, - drop_connect_rate=drop_connect_rate, - num_classes=1000, - width_coefficient=width_coefficient, - depth_coefficient=depth_coefficient, - depth_divisor=8, - min_depth=None) - - return blocks_args, global_params - - -def get_model_params(model_name: str, override_params: dict): - """ Get the block args and global params for a given model """ - if model_name.startswith('efficientnet'): - w, d, _, p = efficientnet_params(model_name) - blocks_args, global_params = efficientnet(width_coefficient=w, depth_coefficient=d, dropout_rate=p) - else: - raise NotImplementedError('model name is not pre-defined: %s' % model_name) - if override_params: - global_params = global_params._replace(**override_params) - return blocks_args, global_params +import numpy as np +from paddle.inference import Config +from paddle.inference import create_predictor -def round_filters(filters: int, global_params: dict): - """ Calculate and round number of filters based on depth multiplier. """ - multiplier = global_params.width_coefficient - if not multiplier: - return filters - divisor = global_params.depth_divisor - min_depth = global_params.min_depth - filters *= multiplier - min_depth = min_depth or divisor - new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor) - if new_filters < 0.9 * filters: # prevent rounding by more than 10% - new_filters += divisor - return int(new_filters) +from .data_feed import reader +from .processor import base64_to_cv2 +from .processor import postprocess +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving -def round_repeats(repeats: int, global_params: dict): - """ Round number of filters based on depth multiplier. """ - multiplier = global_params.depth_coefficient - if not multiplier: - return repeats - return int(math.ceil(multiplier * repeats)) +@moduleinfo(name="efficientnetb3_imagenet", + type="CV/image_classification", + author="paddlepaddle", + author_email="paddle-dev@baidu.com", + summary="EfficientNetB3 is a image classfication model, this module is trained with imagenet datasets.", + version="1.2.0") +class EfficientNetB3ImageNet: + def __init__(self): + self.default_pretrained_model_path = os.path.join(self.directory, "efficientnetb3_imagenet_infer_model", + "model") + label_file = os.path.join(self.directory, "label_list.txt") + with open(label_file, 'r', encoding='utf-8') as file: + self.label_list = file.read().split("\n")[:-1] + self._set_config() -class BlockDecoder(object): - """ - Block Decoder, straight from the official TensorFlow repository. - """ + def get_expected_image_width(self): + return 224 - @staticmethod - def _decode_block_string(block_string: str): - """ Gets a block through a string notation of arguments. """ - assert isinstance(block_string, str) + def get_expected_image_height(self): + return 224 - ops = block_string.split('_') - options = {} - for op in ops: - splits = re.split(r'(\d.*)', op) - if len(splits) >= 2: - key, value = splits[:2] - options[key] = value + def get_pretrained_images_mean(self): + im_mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3) + return im_mean - # Check stride - cond_1 = ('s' in options and len(options['s']) == 1) - cond_2 = ((len(options['s']) == 2) and (options['s'][0] == options['s'][1])) - assert (cond_1 or cond_2) + def get_pretrained_images_std(self): + im_std = np.array([0.229, 0.224, 0.225]).reshape(1, 3) + return im_std - return BlockArgs( - kernel_size=int(options['k']), - num_repeat=int(options['r']), - input_filters=int(options['i']), - output_filters=int(options['o']), - expand_ratio=int(options['e']), - id_skip=('noskip' not in block_string), - se_ratio=float(options['se']) if 'se' in options else None, - stride=[int(options['s'][0])]) + def _set_config(self): + """ + predictor config setting + """ + model = self.default_pretrained_model_path + '.pdmodel' + params = self.default_pretrained_model_path + '.pdiparams' + cpu_config = Config(model, params) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + self.cpu_predictor = create_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = Config(model, params) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) + self.gpu_predictor = create_predictor(gpu_config) + + def classification(self, images=None, paths=None, batch_size=1, use_gpu=False, top_k=1): + """ + API for image classification. - @staticmethod - def _encode_block_string(block): - """Encodes a block to a string.""" - args = [ - 'r%d' % block.num_repeat, - 'k%d' % block.kernel_size, - 's%d%d' % (block.strides[0], block.strides[1]), - 'e%s' % block.expand_ratio, - 'i%d' % block.input_filters, - 'o%d' % block.output_filters - ] - if 0 < block.se_ratio <= 1: - args.append('se%s' % block.se_ratio) - if block.id_skip is False: - args.append('noskip') - return '_'.join(args) + Args: + images (list[numpy.ndarray]): data of images, shape of each is [H, W, C], color space must be BGR. + paths (list[str]): The paths of images. + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + top_k (int): Return top k results. - @staticmethod - def decode(string_list: list): + Returns: + res (list[dict]): The classfication results. """ - Decode a list of string notations to specify blocks in the network. - - string_list: list of strings, each string is a notation of block - return - list of BlockArgs namedtuples of block args + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + all_data = list() + for yield_data in reader(images, paths): + all_data.append(yield_data) + + total_num = len(all_data) + loop_num = int(np.ceil(total_num / batch_size)) + + res = list() + for iter_id in range(loop_num): + batch_data = list() + handle_id = iter_id * batch_size + for image_id in range(batch_size): + try: + batch_data.append(all_data[handle_id + image_id]) + except: + pass + # feed batch image + batch_image = np.array([data['image'] for data in batch_data]) + + batch_image = np.array([data['image'] for data in batch_data]) + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + input_handle.copy_from_cpu(batch_image.copy()) + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) + + out = postprocess(data_out=output_handle.copy_to_cpu(), label_list=self.label_list, top_k=top_k) + res += out + return res + + @serving + def serving_method(self, images, **kwargs): """ - assert isinstance(string_list, list) - blocks_args = [] - for block_string in string_list: - blocks_args.append(BlockDecoder._decode_block_string(block_string)) - return blocks_args - - @staticmethod - def encode(blocks_args: list): + Run as a service. """ - Encodes a list of BlockArgs to a list of strings. + images_decode = [base64_to_cv2(image) for image in images] + results = self.classify(images=images_decode, **kwargs) + return results - :param blocks_args: a list of BlockArgs namedtuples of block args - :return: a list of strings, each string is a notation of block + @runnable + def run_cmd(self, argvs): """ - block_strings = [] - for block in blocks_args: - block_strings.append(BlockDecoder._encode_block_string(block)) - return block_strings - - -def initial_type(name: str, use_bias: bool = False): - param_attr = ParamAttr(name=name + "_weights") - if use_bias: - bias_attr = ParamAttr(name=name + "_offset") - else: - bias_attr = False - return param_attr, bias_attr - - -def init_batch_norm_layer(name: str = "batch_norm"): - param_attr = ParamAttr(name=name + "_scale") - bias_attr = ParamAttr(name=name + "_offset") - return param_attr, bias_attr - - -def init_fc_layer(name: str = "fc"): - param_attr = ParamAttr(name=name + "_weights") - bias_attr = ParamAttr(name=name + "_offset") - return param_attr, bias_attr - - -def cal_padding(img_size: int, stride: int, filter_size: int, dilation: int = 1): - """Calculate padding size.""" - if img_size % stride == 0: - out_size = max(filter_size - stride, 0) - else: - out_size = max(filter_size - (img_size % stride), 0) - return out_size // 2, out_size - out_size // 2 - - -inp_shape = {"b3": [300, 150, 150, 75, 38, 19, 19, 10]} - - -def _drop_connect(inputs: paddle.Tensor, prob: float, is_test: bool): - """Drop input connection""" - if is_test: - return inputs - keep_prob = 1.0 - prob - inputs_shape = paddle.shape(inputs) - random_tensor = keep_prob + paddle.rand(shape=[inputs_shape[0], 1, 1, 1]) - binary_tensor = paddle.floor(random_tensor) - output = inputs / keep_prob * binary_tensor - return output - - -class Conv2ds(nn.Layer): - """Basic conv layer""" - - def __init__(self, - input_channels: int, - output_channels: int, - filter_size: int, - stride: int = 1, - padding: int = 0, - groups: int = None, - name: str = "conv2d", - act: str = None, - use_bias: bool = False, - padding_type: str = None, - model_name: str = None, - cur_stage: str = None): - super(Conv2ds, self).__init__() - assert act in [None, "swish", "sigmoid"] - self.act = act - - param_attr, bias_attr = initial_type(name=name, use_bias=use_bias) - - def get_padding(filter_size, stride=1, dilation=1): - padding = ((stride - 1) + dilation * (filter_size - 1)) // 2 - return padding - - inps = 1 if model_name == None and cur_stage == None else inp_shape[model_name][cur_stage] - self.need_crop = False - if padding_type == "SAME": - top_padding, bottom_padding = cal_padding(inps, stride, filter_size) - left_padding, right_padding = cal_padding(inps, stride, filter_size) - height_padding = bottom_padding - width_padding = right_padding - if top_padding != bottom_padding or left_padding != right_padding: - height_padding = top_padding + stride - width_padding = left_padding + stride - self.need_crop = True - padding = [height_padding, width_padding] - elif padding_type == "VALID": - height_padding = 0 - width_padding = 0 - padding = [height_padding, width_padding] - elif padding_type == "DYNAMIC": - padding = get_padding(filter_size, stride) - else: - padding = padding_type - - groups = 1 if groups is None else groups - self._conv = Conv2d( - input_channels, - output_channels, - filter_size, - groups=groups, - stride=stride, - padding=padding, - weight_attr=param_attr, - bias_attr=bias_attr) - - def forward(self, inputs: paddle.Tensor): - x = self._conv(inputs) - if self.act == "swish": - x = F.swish(x) - elif self.act == "sigmoid": - x = F.sigmoid(x) - - if self.need_crop: - x = x[:, :, 1:, 1:] - return x - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - - def __init__(self, - input_channels: int, - filter_size: int, - output_channels: int, - stride: int = 1, - num_groups: int = 1, - padding_type: str = "SAME", - conv_act: str = None, - bn_act: str = "swish", - use_bn: bool = True, - use_bias: bool = False, - name: str = None, - conv_name: str = None, - bn_name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ConvBNLayer, self).__init__() - - self._conv = Conv2ds( - input_channels=input_channels, - output_channels=output_channels, - filter_size=filter_size, - stride=stride, - groups=num_groups, - act=conv_act, - padding_type=padding_type, - name=conv_name, - use_bias=use_bias, - model_name=model_name, - cur_stage=cur_stage) - self.use_bn = use_bn - if use_bn is True: - bn_name = name + bn_name - param_attr, bias_attr = init_batch_norm_layer(bn_name) - - self._bn = BatchNorm( - num_channels=output_channels, - act=bn_act, - momentum=0.99, - epsilon=0.001, - moving_mean_name=bn_name + "_mean", - moving_variance_name=bn_name + "_variance", - param_attr=param_attr, - bias_attr=bias_attr) - - def forward(self, inputs: paddle.Tensor): - if self.use_bn: - x = self._conv(inputs) - x = self._bn(x) - return x - else: - return self._conv(inputs) - - -class ExpandConvNorm(nn.Layer): - """Expand conv norm layer.""" - - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ExpandConvNorm, self).__init__() - - self.oup = block_args.input_filters * block_args.expand_ratio - self.expand_ratio = block_args.expand_ratio - - if self.expand_ratio != 1: - self._conv = ConvBNLayer( - input_channels, - 1, - self.oup, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_expand_conv", - bn_name="_bn0", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - if self.expand_ratio != 1: - return self._conv(inputs) - else: - return inputs - - -class DepthwiseConvNorm(nn.Layer): - """Depthwise conv norm layer.""" - - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(DepthwiseConvNorm, self).__init__() - - self.k = block_args.kernel_size - self.s = block_args.stride - if isinstance(self.s, list) or isinstance(self.s, tuple): - self.s = self.s[0] - oup = block_args.input_filters * block_args.expand_ratio - - self._conv = ConvBNLayer( - input_channels, - self.k, - oup, - self.s, - num_groups=input_channels, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_depthwise_conv", - bn_name="_bn1", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class ProjectConvNorm(nn.Layer): - """Projection conv bn layer.""" - - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ProjectConvNorm, self).__init__() - - final_oup = block_args.output_filters - - self._conv = ConvBNLayer( - input_channels, - 1, - final_oup, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_project_conv", - bn_name="_bn2", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class SEBlock(nn.Layer): - """Basic Squeeze-and-Excitation block for Efficientnet.""" - - def __init__(self, - input_channels: int, - num_squeezed_channels: int, - oup: int, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(SEBlock, self).__init__() - - self._pool = AdaptiveAvgPool2d(1) - self._conv1 = Conv2ds( - input_channels, - num_squeezed_channels, - 1, - use_bias=True, - padding_type=padding_type, - act="swish", - name=name + "_se_reduce") - - self._conv2 = Conv2ds( - num_squeezed_channels, - oup, - 1, - act="sigmoid", - use_bias=True, - padding_type=padding_type, - name=name + "_se_expand") - - def forward(self, inputs: paddle.Tensor): - x = self._pool(inputs) - x = self._conv1(x) - x = self._conv2(x) - return paddle.multiply(inputs, x) - - -class MbConvBlock(nn.Layer): - """Mobile inverted bottleneck convolution for Efficientnet.""" - - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - use_se: bool, - name: str = None, - drop_connect_rate: float = None, - is_test: bool = False, - model_name: str = None, - cur_stage: str = None): - super(MbConvBlock, self).__init__() - - oup = block_args.input_filters * block_args.expand_ratio - self.block_args = block_args - self.has_se = use_se and (block_args.se_ratio is not None) and (0 < block_args.se_ratio <= 1) - self.id_skip = block_args.id_skip - self.expand_ratio = block_args.expand_ratio - self.drop_connect_rate = drop_connect_rate - self.is_test = is_test - - if self.expand_ratio != 1: - self._ecn = ExpandConvNorm( - input_channels, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - self._dcn = DepthwiseConvNorm( - input_channels * block_args.expand_ratio, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - if self.has_se: - num_squeezed_channels = max(1, int(block_args.input_filters * block_args.se_ratio)) - self._se = SEBlock( - input_channels * block_args.expand_ratio, - num_squeezed_channels, - oup, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - self._pcn = ProjectConvNorm( - input_channels * block_args.expand_ratio, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - x = inputs - if self.expand_ratio != 1: - x = self._ecn(x) - x = F.swish(x) - x = self._dcn(x) - x = F.swish(x) - if self.has_se: - x = self._se(x) - x = self._pcn(x) - if self.id_skip and \ - self.block_args.stride == 1 and \ - self.block_args.input_filters == self.block_args.output_filters: - if self.drop_connect_rate: - x = _drop_connect(x, self.drop_connect_rate, self.is_test) - x = paddle.elementwise_add(x, inputs) - return x - - -class ConvStemNorm(nn.Layer): - """Basic conv stem norm block for extracting features.""" - - def __init__(self, - input_channels: int, - padding_type: str, - _global_params: dict, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ConvStemNorm, self).__init__() - - output_channels = round_filters(32, _global_params) - self._conv = ConvBNLayer( - input_channels, - filter_size=3, - output_channels=output_channels, - stride=2, - bn_act=None, - padding_type=padding_type, - name="", - conv_name="_conv_stem", - bn_name="_bn0", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class ExtractFeatures(nn.Layer): - """Extract features.""" - - def __init__(self, - input_channels: int, - _block_args: dict, - _global_params: dict, - padding_type: str, - use_se: bool, - is_test: bool, - model_name: str = None): - super(ExtractFeatures, self).__init__() - - self._global_params = _global_params - - self._conv_stem = ConvStemNorm( - input_channels, - padding_type=padding_type, - _global_params=_global_params, - model_name=model_name, - cur_stage=0) - - self.block_args_copy = copy.deepcopy(_block_args) - idx = 0 - block_size = 0 - for block_arg in self.block_args_copy: - block_arg = block_arg._replace( - input_filters=round_filters(block_arg.input_filters, _global_params), - output_filters=round_filters(block_arg.output_filters, _global_params), - num_repeat=round_repeats(block_arg.num_repeat, _global_params)) - block_size += 1 - for _ in range(block_arg.num_repeat - 1): - block_size += 1 - - self.conv_seq = [] - cur_stage = 1 - for block_args in _block_args: - block_args = block_args._replace( - input_filters=round_filters(block_args.input_filters, _global_params), - output_filters=round_filters(block_args.output_filters, _global_params), - num_repeat=round_repeats(block_args.num_repeat, _global_params)) - - drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 - if drop_connect_rate: - drop_connect_rate *= float(idx) / block_size - - _mc_block = self.add_sublayer( - "_blocks." + str(idx) + ".", - MbConvBlock( - block_args.input_filters, - block_args=block_args, - padding_type=padding_type, - use_se=use_se, - name="_blocks." + str(idx) + ".", - drop_connect_rate=drop_connect_rate, - model_name=model_name, - cur_stage=cur_stage)) - self.conv_seq.append(_mc_block) - idx += 1 - if block_args.num_repeat > 1: - block_args = block_args._replace(input_filters=block_args.output_filters, stride=1) - for _ in range(block_args.num_repeat - 1): - drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 - if drop_connect_rate: - drop_connect_rate *= float(idx) / block_size - _mc_block = self.add_sublayer( - "block." + str(idx) + ".", - MbConvBlock( - block_args.input_filters, - block_args, - padding_type=padding_type, - use_se=use_se, - name="_blocks." + str(idx) + ".", - drop_connect_rate=drop_connect_rate, - model_name=model_name, - cur_stage=cur_stage)) - self.conv_seq.append(_mc_block) - idx += 1 - cur_stage += 1 - - def forward(self, inputs: paddle.Tensor): - x = self._conv_stem(inputs) - x = F.swish(x) - for _mc_block in self.conv_seq: - x = _mc_block(x) - return x - - -@moduleinfo( - name="efficientnetb3_imagenet", - type="cv/classification", - author="paddlepaddle", - author_email="", - summary="efficientnetb3_imagenet is a classification model, " - "this module is trained with Imagenet dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class EfficientNet_B3(nn.Layer): - def __init__(self, - is_test: bool = False, - padding_type: str = "SAME", - override_params: dict = None, - use_se: bool = True, - class_dim: int = 1000, - load_checkpoint: str = None): - super(EfficientNet_B3, self).__init__() - - model_name = 'efficientnet-b3' - self.name = "b3" - self._block_args, self._global_params = get_model_params(model_name, override_params) - self.padding_type = padding_type - self.use_se = use_se - self.is_test = is_test - - self._ef = ExtractFeatures( - 3, - self._block_args, - self._global_params, - self.padding_type, - self.use_se, - self.is_test, - model_name=self.name) - - output_channels = round_filters(1280, self._global_params) - oup = 384 - - self._conv = ConvBNLayer( - oup, - 1, - output_channels, - bn_act="swish", - padding_type=self.padding_type, - name="", - conv_name="_conv_head", - bn_name="_bn1", - model_name=self.name, - cur_stage=7) - self._pool = AdaptiveAvgPool2d(1) - - if self._global_params.dropout_rate: - self._drop = Dropout(p=self._global_params.dropout_rate, mode="upscale_in_train") - - param_attr, bias_attr = init_fc_layer("_fc") - self._fc = Linear(output_channels, class_dim, weight_attr=param_attr, bias_attr=bias_attr) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") + Run as a command. + """ + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.classify(paths=[args.input_path], batch_size=args.batch_size, use_gpu=args.use_gpu) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument('--use_gpu', + type=ast.literal_eval, + default=False, + help="whether use GPU or not.") + self.arg_config_group.add_argument('--batch_size', type=ast.literal_eval, default=1, help="batch size.") + self.arg_config_group.add_argument('--top_k', type=ast.literal_eval, default=1, help="Return top k results.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") - else: - checkpoint = os.path.join(self.directory, 'efficientnet_b3_imagenet.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/efficientnet_b3_imagenet.pdparams -O ' - + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - def forward(self, inputs: paddle.Tensor): - x = self._ef(inputs) - x = self._conv(x) - x = self._pool(x) - if self._global_params.dropout_rate: - x = self._drop(x) - x = paddle.squeeze(x, axis=[2, 3]) - x = self._fc(x) - return x +if __name__ == '__main__': + b3 = EfficientNetB3ImageNet() + b3.context() + import cv2 + test_image = [ + cv2.imread( + '/mnt/zhangxuefei/program-paddle/PaddleHub/hub_module/tests/image_dataset/classification/animals/dog.jpeg') + ] + res = b3.classification(images=test_image) + print(res) + res = b3.classification(paths=[ + '/mnt/zhangxuefei/program-paddle/PaddleHub/hub_module/tests/image_dataset/classification/animals/dog.jpeg' + ]) + print(res) + res = b3.classification(images=test_image) + print(res) + res = b3.classify(images=test_image) + print(res) diff --git a/modules/image/classification/efficientnetb3_imagenet/processor.py b/modules/image/classification/efficientnetb3_imagenet/processor.py new file mode 100644 index 000000000..9cb22a1b7 --- /dev/null +++ b/modules/image/classification/efficientnetb3_imagenet/processor.py @@ -0,0 +1,65 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import base64 + +import cv2 +import numpy as np + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def softmax(x): + if len(x.shape) > 1: + tmp = np.max(x, axis=1) + x -= tmp.reshape((x.shape[0], 1)) + x = np.exp(x) + tmp = np.sum(x, axis=1) + x /= tmp.reshape((x.shape[0], 1)) + else: + tmp = np.max(x) + x -= tmp + x = np.exp(x) + tmp = np.sum(x) + x /= tmp + return x + + +def postprocess(data_out, label_list, top_k): + """ + Postprocess output of network, one image at a time. + + Args: + data_out (numpy.ndarray): output data of network. + label_list (list): list of label. + top_k (int): Return top k results. + """ + output = [] + for result in data_out: + result_i = softmax(result) + output_i = {} + indexs = np.argsort(result_i)[::-1][0:top_k] + for index in indexs: + label = label_list[index].split(',')[0] + output_i[label] = float(result_i[index]) + output.append(output_i) + return output diff --git a/modules/image/classification/efficientnetb3_imagenet/test.py b/modules/image/classification/efficientnetb3_imagenet/test.py new file mode 100644 index 000000000..ca31c0817 --- /dev/null +++ b/modules/image/classification/efficientnetb3_imagenet/test.py @@ -0,0 +1,63 @@ +import os +import shutil +import unittest + +import cv2 +import requests + +import paddlehub as hub + +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + + +class TestHubModule(unittest.TestCase): + + @classmethod + def setUpClass(cls) -> None: + img_url = 'https://unsplash.com/photos/brFsZ7qszSY/download?ixid=MnwxMjA3fDB8MXxzZWFyY2h8OHx8ZG9nfGVufDB8fHx8MTY2MzA1ODQ1MQ&force=true&w=640' + if not os.path.exists('tests'): + os.makedirs('tests') + response = requests.get(img_url) + assert response.status_code == 200, 'Network Error.' + with open('tests/test.jpg', 'wb') as f: + f.write(response.content) + cls.module = hub.Module(name="efficientnetb3_imagenet") + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree('tests') + shutil.rmtree('inference') + + def test_classification1(self): + results = self.module.classification(paths=['tests/test.jpg']) + data = results[0] + self.assertTrue('Pembroke' in data) + self.assertTrue(data['Pembroke'] > 0.5) + + def test_classification2(self): + results = self.module.classification(images=[cv2.imread('tests/test.jpg')]) + data = results[0] + self.assertTrue('Pembroke' in data) + self.assertTrue(data['Pembroke'] > 0.5) + + def test_classification3(self): + results = self.module.classification(images=[cv2.imread('tests/test.jpg')], use_gpu=True) + data = results[0] + self.assertTrue('Pembroke' in data) + self.assertTrue(data['Pembroke'] > 0.5) + + def test_classification4(self): + self.assertRaises(AssertionError, self.module.classification, paths=['no.jpg']) + + def test_classification5(self): + self.assertRaises(TypeError, self.module.classification, images=['tests/test.jpg']) + + def test_save_inference_model(self): + self.module.save_inference_model('./inference/model') + + self.assertTrue(os.path.exists('./inference/model.pdmodel')) + self.assertTrue(os.path.exists('./inference/model.pdiparams')) + + +if __name__ == "__main__": + unittest.main() From ee6f74b16dbf62c4394430b943016df6d2aa3a2d Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Fri, 14 Oct 2022 14:55:34 +0800 Subject: [PATCH 105/117] update efficientnetb4_imagenet (#2052) --- .../efficientnetb4_imagenet/README.md | 8 +- .../efficientnetb4_imagenet/README_en.md | 7 +- .../efficientnetb4_imagenet/__init__.py | 0 .../efficientnetb4_imagenet/data_feed.py | 93 ++ .../efficientnetb4_imagenet/label_list.txt | 1000 +++++++++++++++++ .../efficientnetb4_imagenet/module.py | 925 +++------------ .../efficientnetb4_imagenet/processor.py | 65 ++ .../efficientnetb4_imagenet/test.py | 63 ++ 8 files changed, 1402 insertions(+), 759 deletions(-) create mode 100644 modules/image/classification/efficientnetb4_imagenet/__init__.py create mode 100644 modules/image/classification/efficientnetb4_imagenet/data_feed.py create mode 100644 modules/image/classification/efficientnetb4_imagenet/label_list.txt create mode 100644 modules/image/classification/efficientnetb4_imagenet/processor.py create mode 100644 modules/image/classification/efficientnetb4_imagenet/test.py diff --git a/modules/image/classification/efficientnetb4_imagenet/README.md b/modules/image/classification/efficientnetb4_imagenet/README.md index 1a7d0e9ba..2e4fda47f 100644 --- a/modules/image/classification/efficientnetb4_imagenet/README.md +++ b/modules/image/classification/efficientnetb4_imagenet/README.md @@ -132,6 +132,12 @@ * 1.1.0 提升预测性能以及易用性 + + +* 1.2.0 + + 移除 Fluid API + - ```shell - $ hub install efficientnetb4_imagenet==1.1.0 + $ hub install efficientnetb4_imagenet==1.2.0 ``` diff --git a/modules/image/classification/efficientnetb4_imagenet/README_en.md b/modules/image/classification/efficientnetb4_imagenet/README_en.md index e04148127..86b5142be 100644 --- a/modules/image/classification/efficientnetb4_imagenet/README_en.md +++ b/modules/image/classification/efficientnetb4_imagenet/README_en.md @@ -131,6 +131,11 @@ * 1.1.0 Improve the prediction performance and users' experience + +* 1.2.0 + + Remove Fluid API + - ```shell - $ hub install efficientnetb4_imagenet==1.1.0 + $ hub install efficientnetb4_imagenet==1.2.0 ``` diff --git a/modules/image/classification/efficientnetb4_imagenet/__init__.py b/modules/image/classification/efficientnetb4_imagenet/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/modules/image/classification/efficientnetb4_imagenet/data_feed.py b/modules/image/classification/efficientnetb4_imagenet/data_feed.py new file mode 100644 index 000000000..60a33f4df --- /dev/null +++ b/modules/image/classification/efficientnetb4_imagenet/data_feed.py @@ -0,0 +1,93 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import time +from collections import OrderedDict + +import numpy as np +from PIL import Image + +__all__ = ['reader'] + +DATA_DIM = 224 +img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) +img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) + + +def resize_short(img, target_size): + percent = float(target_size) / min(img.size[0], img.size[1]) + resized_width = int(round(img.size[0] * percent)) + resized_height = int(round(img.size[1] * percent)) + img = img.resize((resized_width, resized_height), Image.LANCZOS) + return img + + +def crop_image(img, target_size, center): + width, height = img.size + size = target_size + if center == True: + w_start = (width - size) / 2 + h_start = (height - size) / 2 + else: + w_start = np.random.randint(0, width - size + 1) + h_start = np.random.randint(0, height - size + 1) + w_end = w_start + size + h_end = h_start + size + img = img.crop((w_start, h_start, w_end, h_end)) + return img + + +def process_image(img): + img = resize_short(img, target_size=256) + img = crop_image(img, target_size=DATA_DIM, center=True) + if img.mode != 'RGB': + img = img.convert('RGB') + img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255 + img -= img_mean + img /= img_std + return img + + +def reader(images=None, paths=None): + """ + Preprocess to yield image. + + Args: + images (list[numpy.ndarray]): images data, shape of each is [H, W, C]. + paths (list[str]): paths to images. + + Yield: + each (collections.OrderedDict): info of original image, preprocessed image. + """ + component = list() + if paths: + for im_path in paths: + each = OrderedDict() + assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path) + each['org_im_path'] = im_path + each['org_im'] = Image.open(im_path) + each['org_im_width'], each['org_im_height'] = each['org_im'].size + component.append(each) + if images is not None: + assert type(images), "images is a list." + for im in images: + each = OrderedDict() + each['org_im'] = Image.fromarray(im[:, :, ::-1]) + each['org_im_path'] = 'ndarray_time={}'.format(round(time.time(), 6) * 1e6) + each['org_im_width'], each['org_im_height'] = each['org_im'].size + component.append(each) + + for element in component: + element['image'] = process_image(element['org_im']) + yield element diff --git a/modules/image/classification/efficientnetb4_imagenet/label_list.txt b/modules/image/classification/efficientnetb4_imagenet/label_list.txt new file mode 100644 index 000000000..a509c0074 --- /dev/null +++ b/modules/image/classification/efficientnetb4_imagenet/label_list.txt @@ -0,0 +1,1000 @@ +tench, Tinca tinca +goldfish, Carassius auratus +great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias +tiger shark, Galeocerdo cuvieri +hammerhead, hammerhead shark +electric ray, crampfish, numbfish, torpedo +stingray +cock +hen +ostrich, Struthio camelus +brambling, Fringilla montifringilla +goldfinch, Carduelis carduelis +house finch, linnet, Carpodacus mexicanus +junco, snowbird +indigo bunting, indigo finch, indigo bird, Passerina cyanea +robin, American robin, Turdus migratorius +bulbul +jay +magpie +chickadee +water ouzel, dipper +kite +bald eagle, American eagle, Haliaeetus leucocephalus +vulture +great grey owl, great gray owl, Strix nebulosa +European fire salamander, Salamandra salamandra +common newt, Triturus vulgaris +eft +spotted salamander, Ambystoma maculatum +axolotl, mud puppy, Ambystoma mexicanum +bullfrog, Rana catesbeiana +tree frog, tree-frog +tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui +loggerhead, loggerhead turtle, Caretta caretta +leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea +mud turtle +terrapin +box turtle, box tortoise +banded gecko +common iguana, iguana, Iguana iguana +American chameleon, anole, Anolis carolinensis +whiptail, whiptail lizard +agama +frilled lizard, Chlamydosaurus kingi +alligator lizard +Gila monster, Heloderma suspectum +green lizard, Lacerta viridis +African chameleon, Chamaeleo chamaeleon +Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis +African crocodile, Nile crocodile, Crocodylus niloticus +American alligator, Alligator mississipiensis +triceratops +thunder snake, worm snake, Carphophis amoenus +ringneck snake, ring-necked snake, ring snake +hognose snake, puff adder, sand viper +green snake, grass snake +king snake, kingsnake +garter snake, grass snake +water snake +vine snake +night snake, Hypsiglena torquata +boa constrictor, Constrictor constrictor +rock python, rock snake, Python sebae +Indian cobra, Naja naja +green mamba +sea snake +horned viper, cerastes, sand viper, horned asp, Cerastes cornutus +diamondback, diamondback rattlesnake, Crotalus adamanteus +sidewinder, horned rattlesnake, Crotalus cerastes +trilobite +harvestman, daddy longlegs, Phalangium opilio +scorpion +black and gold garden spider, Argiope aurantia +barn spider, Araneus cavaticus +garden spider, Aranea diademata +black widow, Latrodectus mactans +tarantula +wolf spider, hunting spider +tick +centipede +black grouse +ptarmigan +ruffed grouse, partridge, Bonasa umbellus +prairie chicken, prairie grouse, prairie fowl +peacock +quail +partridge +African grey, African gray, Psittacus erithacus +macaw +sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita +lorikeet +coucal +bee eater +hornbill +hummingbird +jacamar +toucan +drake +red-breasted merganser, Mergus serrator +goose +black swan, Cygnus atratus +tusker +echidna, spiny anteater, anteater +platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus +wallaby, brush kangaroo +koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus +wombat +jellyfish +sea anemone, anemone +brain coral +flatworm, platyhelminth +nematode, nematode worm, roundworm +conch +snail +slug +sea slug, nudibranch +chiton, coat-of-mail shell, sea cradle, polyplacophore +chambered nautilus, pearly nautilus, nautilus +Dungeness crab, Cancer magister +rock crab, Cancer irroratus +fiddler crab +king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica +American lobster, Northern lobster, Maine lobster, Homarus americanus +spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish +crayfish, crawfish, crawdad, crawdaddy +hermit crab +isopod +white stork, Ciconia ciconia +black stork, Ciconia nigra +spoonbill +flamingo +little blue heron, Egretta caerulea +American egret, great white heron, Egretta albus +bittern +crane +limpkin, Aramus pictus +European gallinule, Porphyrio porphyrio +American coot, marsh hen, mud hen, water hen, Fulica americana +bustard +ruddy turnstone, Arenaria interpres +red-backed sandpiper, dunlin, Erolia alpina +redshank, Tringa totanus +dowitcher +oystercatcher, oyster catcher +pelican +king penguin, Aptenodytes patagonica +albatross, mollymawk +grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus +killer whale, killer, orca, grampus, sea wolf, Orcinus orca +dugong, Dugong dugon +sea lion +Chihuahua +Japanese spaniel +Maltese dog, Maltese terrier, Maltese +Pekinese, Pekingese, Peke +Shih-Tzu +Blenheim spaniel +papillon +toy terrier +Rhodesian ridgeback +Afghan hound, Afghan +basset, basset hound +beagle +bloodhound, sleuthhound +bluetick +black-and-tan coonhound +Walker hound, Walker foxhound +English foxhound +redbone +borzoi, Russian wolfhound +Irish wolfhound +Italian greyhound +whippet +Ibizan hound, Ibizan Podenco +Norwegian elkhound, elkhound +otterhound, otter hound +Saluki, gazelle hound +Scottish deerhound, deerhound +Weimaraner +Staffordshire bullterrier, Staffordshire bull terrier +American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier +Bedlington terrier +Border terrier +Kerry blue terrier +Irish terrier +Norfolk terrier +Norwich terrier +Yorkshire terrier +wire-haired fox terrier +Lakeland terrier +Sealyham terrier, Sealyham +Airedale, Airedale terrier +cairn, cairn terrier +Australian terrier +Dandie Dinmont, Dandie Dinmont terrier +Boston bull, Boston terrier +miniature schnauzer +giant schnauzer +standard schnauzer +Scotch terrier, Scottish terrier, Scottie +Tibetan terrier, chrysanthemum dog +silky terrier, Sydney silky +soft-coated wheaten terrier +West Highland white terrier +Lhasa, Lhasa apso +flat-coated retriever +curly-coated retriever +golden retriever +Labrador retriever +Chesapeake Bay retriever +German short-haired pointer +vizsla, Hungarian pointer +English setter +Irish setter, red setter +Gordon setter +Brittany spaniel +clumber, clumber spaniel +English springer, English springer spaniel +Welsh springer spaniel +cocker spaniel, English cocker spaniel, cocker +Sussex spaniel +Irish water spaniel +kuvasz +schipperke +groenendael +malinois +briard +kelpie +komondor +Old English sheepdog, bobtail +Shetland sheepdog, Shetland sheep dog, Shetland +collie +Border collie +Bouvier des Flandres, Bouviers des Flandres +Rottweiler +German shepherd, German shepherd dog, German police dog, alsatian +Doberman, Doberman pinscher +miniature pinscher +Greater Swiss Mountain dog +Bernese mountain dog +Appenzeller +EntleBucher +boxer +bull mastiff +Tibetan mastiff +French bulldog +Great Dane +Saint Bernard, St Bernard +Eskimo dog, husky +malamute, malemute, Alaskan malamute +Siberian husky +dalmatian, coach dog, carriage dog +affenpinscher, monkey pinscher, monkey dog +basenji +pug, pug-dog +Leonberg +Newfoundland, Newfoundland dog +Great Pyrenees +Samoyed, Samoyede +Pomeranian +chow, chow chow +keeshond +Brabancon griffon +Pembroke, Pembroke Welsh corgi +Cardigan, Cardigan Welsh corgi +toy poodle +miniature poodle +standard poodle +Mexican hairless +timber wolf, grey wolf, gray wolf, Canis lupus +white wolf, Arctic wolf, Canis lupus tundrarum +red wolf, maned wolf, Canis rufus, Canis niger +coyote, prairie wolf, brush wolf, Canis latrans +dingo, warrigal, warragal, Canis dingo +dhole, Cuon alpinus +African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus +hyena, hyaena +red fox, Vulpes vulpes +kit fox, Vulpes macrotis +Arctic fox, white fox, Alopex lagopus +grey fox, gray fox, Urocyon cinereoargenteus +tabby, tabby cat +tiger cat +Persian cat +Siamese cat, Siamese +Egyptian cat +cougar, puma, catamount, mountain lion, painter, panther, Felis concolor +lynx, catamount +leopard, Panthera pardus +snow leopard, ounce, Panthera uncia +jaguar, panther, Panthera onca, Felis onca +lion, king of beasts, Panthera leo +tiger, Panthera tigris +cheetah, chetah, Acinonyx jubatus +brown bear, bruin, Ursus arctos +American black bear, black bear, Ursus americanus, Euarctos americanus +ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus +sloth bear, Melursus ursinus, Ursus ursinus +mongoose +meerkat, mierkat +tiger beetle +ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle +ground beetle, carabid beetle +long-horned beetle, longicorn, longicorn beetle +leaf beetle, chrysomelid +dung beetle +rhinoceros beetle +weevil +fly +bee +ant, emmet, pismire +grasshopper, hopper +cricket +walking stick, walkingstick, stick insect +cockroach, roach +mantis, mantid +cicada, cicala +leafhopper +lacewing, lacewing fly +dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk +damselfly +admiral +ringlet, ringlet butterfly +monarch, monarch butterfly, milkweed butterfly, Danaus plexippus +cabbage butterfly +sulphur butterfly, sulfur butterfly +lycaenid, lycaenid butterfly +starfish, sea star +sea urchin +sea cucumber, holothurian +wood rabbit, cottontail, cottontail rabbit +hare +Angora, Angora rabbit +hamster +porcupine, hedgehog +fox squirrel, eastern fox squirrel, Sciurus niger +marmot +beaver +guinea pig, Cavia cobaya +sorrel +zebra +hog, pig, grunter, squealer, Sus scrofa +wild boar, boar, Sus scrofa +warthog +hippopotamus, hippo, river horse, Hippopotamus amphibius +ox +water buffalo, water ox, Asiatic buffalo, Bubalus bubalis +bison +ram, tup +bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis +ibex, Capra ibex +hartebeest +impala, Aepyceros melampus +gazelle +Arabian camel, dromedary, Camelus dromedarius +llama +weasel +mink +polecat, fitch, foulmart, foumart, Mustela putorius +black-footed ferret, ferret, Mustela nigripes +otter +skunk, polecat, wood pussy +badger +armadillo +three-toed sloth, ai, Bradypus tridactylus +orangutan, orang, orangutang, Pongo pygmaeus +gorilla, Gorilla gorilla +chimpanzee, chimp, Pan troglodytes +gibbon, Hylobates lar +siamang, Hylobates syndactylus, Symphalangus syndactylus +guenon, guenon monkey +patas, hussar monkey, Erythrocebus patas +baboon +macaque +langur +colobus, colobus monkey +proboscis monkey, Nasalis larvatus +marmoset +capuchin, ringtail, Cebus capucinus +howler monkey, howler +titi, titi monkey +spider monkey, Ateles geoffroyi +squirrel monkey, Saimiri sciureus +Madagascar cat, ring-tailed lemur, Lemur catta +indri, indris, Indri indri, Indri brevicaudatus +Indian elephant, Elephas maximus +African elephant, Loxodonta africana +lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens +giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca +barracouta, snoek +eel +coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch +rock beauty, Holocanthus tricolor +anemone fish +sturgeon +gar, garfish, garpike, billfish, Lepisosteus osseus +lionfish +puffer, pufferfish, blowfish, globefish +abacus +abaya +academic gown, academic robe, judge's robe +accordion, piano accordion, squeeze box +acoustic guitar +aircraft carrier, carrier, flattop, attack aircraft carrier +airliner +airship, dirigible +altar +ambulance +amphibian, amphibious vehicle +analog clock +apiary, bee house +apron +ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin +assault rifle, assault gun +backpack, back pack, knapsack, packsack, rucksack, haversack +bakery, bakeshop, bakehouse +balance beam, beam +balloon +ballpoint, ballpoint pen, ballpen, Biro +Band Aid +banjo +bannister, banister, balustrade, balusters, handrail +barbell +barber chair +barbershop +barn +barometer +barrel, cask +barrow, garden cart, lawn cart, wheelbarrow +baseball +basketball +bassinet +bassoon +bathing cap, swimming cap +bath towel +bathtub, bathing tub, bath, tub +beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon +beacon, lighthouse, beacon light, pharos +beaker +bearskin, busby, shako +beer bottle +beer glass +bell cote, bell cot +bib +bicycle-built-for-two, tandem bicycle, tandem +bikini, two-piece +binder, ring-binder +binoculars, field glasses, opera glasses +birdhouse +boathouse +bobsled, bobsleigh, bob +bolo tie, bolo, bola tie, bola +bonnet, poke bonnet +bookcase +bookshop, bookstore, bookstall +bottlecap +bow +bow tie, bow-tie, bowtie +brass, memorial tablet, plaque +brassiere, bra, bandeau +breakwater, groin, groyne, mole, bulwark, seawall, jetty +breastplate, aegis, egis +broom +bucket, pail +buckle +bulletproof vest +bullet train, bullet +butcher shop, meat market +cab, hack, taxi, taxicab +caldron, cauldron +candle, taper, wax light +cannon +canoe +can opener, tin opener +cardigan +car mirror +carousel, carrousel, merry-go-round, roundabout, whirligig +carpenter's kit, tool kit +carton +car wheel +cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM +cassette +cassette player +castle +catamaran +CD player +cello, violoncello +cellular telephone, cellular phone, cellphone, cell, mobile phone +chain +chainlink fence +chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour +chain saw, chainsaw +chest +chiffonier, commode +chime, bell, gong +china cabinet, china closet +Christmas stocking +church, church building +cinema, movie theater, movie theatre, movie house, picture palace +cleaver, meat cleaver, chopper +cliff dwelling +cloak +clog, geta, patten, sabot +cocktail shaker +coffee mug +coffeepot +coil, spiral, volute, whorl, helix +combination lock +computer keyboard, keypad +confectionery, confectionary, candy store +container ship, containership, container vessel +convertible +corkscrew, bottle screw +cornet, horn, trumpet, trump +cowboy boot +cowboy hat, ten-gallon hat +cradle +crane +crash helmet +crate +crib, cot +Crock Pot +croquet ball +crutch +cuirass +dam, dike, dyke +desk +desktop computer +dial telephone, dial phone +diaper, nappy, napkin +digital clock +digital watch +dining table, board +dishrag, dishcloth +dishwasher, dish washer, dishwashing machine +disk brake, disc brake +dock, dockage, docking facility +dogsled, dog sled, dog sleigh +dome +doormat, welcome mat +drilling platform, offshore rig +drum, membranophone, tympan +drumstick +dumbbell +Dutch oven +electric fan, blower +electric guitar +electric locomotive +entertainment center +envelope +espresso maker +face powder +feather boa, boa +file, file cabinet, filing cabinet +fireboat +fire engine, fire truck +fire screen, fireguard +flagpole, flagstaff +flute, transverse flute +folding chair +football helmet +forklift +fountain +fountain pen +four-poster +freight car +French horn, horn +frying pan, frypan, skillet +fur coat +garbage truck, dustcart +gasmask, respirator, gas helmet +gas pump, gasoline pump, petrol pump, island dispenser +goblet +go-kart +golf ball +golfcart, golf cart +gondola +gong, tam-tam +gown +grand piano, grand +greenhouse, nursery, glasshouse +grille, radiator grille +grocery store, grocery, food market, market +guillotine +hair slide +hair spray +half track +hammer +hamper +hand blower, blow dryer, blow drier, hair dryer, hair drier +hand-held computer, hand-held microcomputer +handkerchief, hankie, hanky, hankey +hard disc, hard disk, fixed disk +harmonica, mouth organ, harp, mouth harp +harp +harvester, reaper +hatchet +holster +home theater, home theatre +honeycomb +hook, claw +hoopskirt, crinoline +horizontal bar, high bar +horse cart, horse-cart +hourglass +iPod +iron, smoothing iron +jack-o'-lantern +jean, blue jean, denim +jeep, landrover +jersey, T-shirt, tee shirt +jigsaw puzzle +jinrikisha, ricksha, rickshaw +joystick +kimono +knee pad +knot +lab coat, laboratory coat +ladle +lampshade, lamp shade +laptop, laptop computer +lawn mower, mower +lens cap, lens cover +letter opener, paper knife, paperknife +library +lifeboat +lighter, light, igniter, ignitor +limousine, limo +liner, ocean liner +lipstick, lip rouge +Loafer +lotion +loudspeaker, speaker, speaker unit, loudspeaker system, speaker system +loupe, jeweler's loupe +lumbermill, sawmill +magnetic compass +mailbag, postbag +mailbox, letter box +maillot +maillot, tank suit +manhole cover +maraca +marimba, xylophone +mask +matchstick +maypole +maze, labyrinth +measuring cup +medicine chest, medicine cabinet +megalith, megalithic structure +microphone, mike +microwave, microwave oven +military uniform +milk can +minibus +miniskirt, mini +minivan +missile +mitten +mixing bowl +mobile home, manufactured home +Model T +modem +monastery +monitor +moped +mortar +mortarboard +mosque +mosquito net +motor scooter, scooter +mountain bike, all-terrain bike, off-roader +mountain tent +mouse, computer mouse +mousetrap +moving van +muzzle +nail +neck brace +necklace +nipple +notebook, notebook computer +obelisk +oboe, hautboy, hautbois +ocarina, sweet potato +odometer, hodometer, mileometer, milometer +oil filter +organ, pipe organ +oscilloscope, scope, cathode-ray oscilloscope, CRO +overskirt +oxcart +oxygen mask +packet +paddle, boat paddle +paddlewheel, paddle wheel +padlock +paintbrush +pajama, pyjama, pj's, jammies +palace +panpipe, pandean pipe, syrinx +paper towel +parachute, chute +parallel bars, bars +park bench +parking meter +passenger car, coach, carriage +patio, terrace +pay-phone, pay-station +pedestal, plinth, footstall +pencil box, pencil case +pencil sharpener +perfume, essence +Petri dish +photocopier +pick, plectrum, plectron +pickelhaube +picket fence, paling +pickup, pickup truck +pier +piggy bank, penny bank +pill bottle +pillow +ping-pong ball +pinwheel +pirate, pirate ship +pitcher, ewer +plane, carpenter's plane, woodworking plane +planetarium +plastic bag +plate rack +plow, plough +plunger, plumber's helper +Polaroid camera, Polaroid Land camera +pole +police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria +poncho +pool table, billiard table, snooker table +pop bottle, soda bottle +pot, flowerpot +potter's wheel +power drill +prayer rug, prayer mat +printer +prison, prison house +projectile, missile +projector +puck, hockey puck +punching bag, punch bag, punching ball, punchball +purse +quill, quill pen +quilt, comforter, comfort, puff +racer, race car, racing car +racket, racquet +radiator +radio, wireless +radio telescope, radio reflector +rain barrel +recreational vehicle, RV, R.V. +reel +reflex camera +refrigerator, icebox +remote control, remote +restaurant, eating house, eating place, eatery +revolver, six-gun, six-shooter +rifle +rocking chair, rocker +rotisserie +rubber eraser, rubber, pencil eraser +rugby ball +rule, ruler +running shoe +safe +safety pin +saltshaker, salt shaker +sandal +sarong +sax, saxophone +scabbard +scale, weighing machine +school bus +schooner +scoreboard +screen, CRT screen +screw +screwdriver +seat belt, seatbelt +sewing machine +shield, buckler +shoe shop, shoe-shop, shoe store +shoji +shopping basket +shopping cart +shovel +shower cap +shower curtain +ski +ski mask +sleeping bag +slide rule, slipstick +sliding door +slot, one-armed bandit +snorkel +snowmobile +snowplow, snowplough +soap dispenser +soccer ball +sock +solar dish, solar collector, solar furnace +sombrero +soup bowl +space bar +space heater +space shuttle +spatula +speedboat +spider web, spider's web +spindle +sports car, sport car +spotlight, spot +stage +steam locomotive +steel arch bridge +steel drum +stethoscope +stole +stone wall +stopwatch, stop watch +stove +strainer +streetcar, tram, tramcar, trolley, trolley car +stretcher +studio couch, day bed +stupa, tope +submarine, pigboat, sub, U-boat +suit, suit of clothes +sundial +sunglass +sunglasses, dark glasses, shades +sunscreen, sunblock, sun blocker +suspension bridge +swab, swob, mop +sweatshirt +swimming trunks, bathing trunks +swing +switch, electric switch, electrical switch +syringe +table lamp +tank, army tank, armored combat vehicle, armoured combat vehicle +tape player +teapot +teddy, teddy bear +television, television system +tennis ball +thatch, thatched roof +theater curtain, theatre curtain +thimble +thresher, thrasher, threshing machine +throne +tile roof +toaster +tobacco shop, tobacconist shop, tobacconist +toilet seat +torch +totem pole +tow truck, tow car, wrecker +toyshop +tractor +trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi +tray +trench coat +tricycle, trike, velocipede +trimaran +tripod +triumphal arch +trolleybus, trolley coach, trackless trolley +trombone +tub, vat +turnstile +typewriter keyboard +umbrella +unicycle, monocycle +upright, upright piano +vacuum, vacuum cleaner +vase +vault +velvet +vending machine +vestment +viaduct +violin, fiddle +volleyball +waffle iron +wall clock +wallet, billfold, notecase, pocketbook +wardrobe, closet, press +warplane, military plane +washbasin, handbasin, washbowl, lavabo, wash-hand basin +washer, automatic washer, washing machine +water bottle +water jug +water tower +whiskey jug +whistle +wig +window screen +window shade +Windsor tie +wine bottle +wing +wok +wooden spoon +wool, woolen, woollen +worm fence, snake fence, snake-rail fence, Virginia fence +wreck +yawl +yurt +web site, website, internet site, site +comic book +crossword puzzle, crossword +street sign +traffic light, traffic signal, stoplight +book jacket, dust cover, dust jacket, dust wrapper +menu +plate +guacamole +consomme +hot pot, hotpot +trifle +ice cream, icecream +ice lolly, lolly, lollipop, popsicle +French loaf +bagel, beigel +pretzel +cheeseburger +hotdog, hot dog, red hot +mashed potato +head cabbage +broccoli +cauliflower +zucchini, courgette +spaghetti squash +acorn squash +butternut squash +cucumber, cuke +artichoke, globe artichoke +bell pepper +cardoon +mushroom +Granny Smith +strawberry +orange +lemon +fig +pineapple, ananas +banana +jackfruit, jak, jack +custard apple +pomegranate +hay +carbonara +chocolate sauce, chocolate syrup +dough +meat loaf, meatloaf +pizza, pizza pie +potpie +burrito +red wine +espresso +cup +eggnog +alp +bubble +cliff, drop, drop-off +coral reef +geyser +lakeside, lakeshore +promontory, headland, head, foreland +sandbar, sand bar +seashore, coast, seacoast, sea-coast +valley, vale +volcano +ballplayer, baseball player +groom, bridegroom +scuba diver +rapeseed +daisy +yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum +corn +acorn +hip, rose hip, rosehip +buckeye, horse chestnut, conker +coral fungus +agaric +gyromitra +stinkhorn, carrion fungus +earthstar +hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa +bolete +ear, spike, capitulum +toilet tissue, toilet paper, bathroom tissue diff --git a/modules/image/classification/efficientnetb4_imagenet/module.py b/modules/image/classification/efficientnetb4_imagenet/module.py index 58d1f91eb..c9c40b5c5 100644 --- a/modules/image/classification/efficientnetb4_imagenet/module.py +++ b/modules/image/classification/efficientnetb4_imagenet/module.py @@ -11,781 +11,192 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import absolute_import +from __future__ import division +import argparse +import ast import os -import math -import collections -import re -import copy - -import paddle -from paddle import ParamAttr -import paddle.nn as nn -import paddle.nn.functional as F -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - -GlobalParams = collections.namedtuple('GlobalParams', [ - 'batch_norm_momentum', - 'batch_norm_epsilon', - 'dropout_rate', - 'num_classes', - 'width_coefficient', - 'depth_coefficient', - 'depth_divisor', - 'min_depth', - 'drop_connect_rate', -]) - -BlockArgs = collections.namedtuple( - 'BlockArgs', - ['kernel_size', 'num_repeat', 'input_filters', 'output_filters', 'expand_ratio', 'id_skip', 'stride', 'se_ratio']) - -GlobalParams.__new__.__defaults__ = (None, ) * len(GlobalParams._fields) -BlockArgs.__new__.__defaults__ = (None, ) * len(BlockArgs._fields) - - -def efficientnet_params(model_name: str): - """ Map EfficientNet model name to parameter coefficients. """ - params_dict = { - # Coefficients: width,depth,resolution,dropout - 'efficientnet-b4': (1.4, 1.8, 380, 0.4) - } - return params_dict[model_name] - - -def efficientnet(width_coefficient: float = None, - depth_coefficient: float = None, - dropout_rate: float = 0.2, - drop_connect_rate: float = 0.2): - """ Get block arguments according to parameter and coefficients. """ - blocks_args = [ - 'r1_k3_s11_e1_i32_o16_se0.25', - 'r2_k3_s22_e6_i16_o24_se0.25', - 'r2_k5_s22_e6_i24_o40_se0.25', - 'r3_k3_s22_e6_i40_o80_se0.25', - 'r3_k5_s11_e6_i80_o112_se0.25', - 'r4_k5_s22_e6_i112_o192_se0.25', - 'r1_k3_s11_e6_i192_o320_se0.25', - ] - blocks_args = BlockDecoder.decode(blocks_args) - - global_params = GlobalParams( - batch_norm_momentum=0.99, - batch_norm_epsilon=1e-3, - dropout_rate=dropout_rate, - drop_connect_rate=drop_connect_rate, - num_classes=1000, - width_coefficient=width_coefficient, - depth_coefficient=depth_coefficient, - depth_divisor=8, - min_depth=None) - - return blocks_args, global_params - - -def get_model_params(model_name: str, override_params: dict): - """ Get the block args and global params for a given model """ - if model_name.startswith('efficientnet'): - w, d, _, p = efficientnet_params(model_name) - blocks_args, global_params = efficientnet(width_coefficient=w, depth_coefficient=d, dropout_rate=p) - else: - raise NotImplementedError('model name is not pre-defined: %s' % model_name) - if override_params: - global_params = global_params._replace(**override_params) - return blocks_args, global_params +import numpy as np +from paddle.inference import Config +from paddle.inference import create_predictor -def round_filters(filters: int, global_params: dict): - """ Calculate and round number of filters based on depth multiplier. """ - multiplier = global_params.width_coefficient - if not multiplier: - return filters - divisor = global_params.depth_divisor - min_depth = global_params.min_depth - filters *= multiplier - min_depth = min_depth or divisor - new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor) - if new_filters < 0.9 * filters: # prevent rounding by more than 10% - new_filters += divisor - return int(new_filters) +from .data_feed import reader +from .processor import base64_to_cv2 +from .processor import postprocess +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving -def round_repeats(repeats: int, global_params: dict): - """ Round number of filters based on depth multiplier. """ - multiplier = global_params.depth_coefficient - if not multiplier: - return repeats - return int(math.ceil(multiplier * repeats)) +@moduleinfo(name="efficientnetb4_imagenet", + type="CV/image_classification", + author="paddlepaddle", + author_email="paddle-dev@baidu.com", + summary="EfficientNetB4 is a image classfication model, this module is trained with imagenet datasets.", + version="1.2.0") +class EfficientNetB4ImageNet: + def __init__(self): + self.default_pretrained_model_path = os.path.join(self.directory, "efficientnetb4_imagenet_infer_model", + "model") + label_file = os.path.join(self.directory, "label_list.txt") + with open(label_file, 'r', encoding='utf-8') as file: + self.label_list = file.read().split("\n")[:-1] + self._set_config() -class BlockDecoder(object): - """ - Block Decoder, straight from the official TensorFlow repository. - """ + def get_expected_image_width(self): + return 224 - @staticmethod - def _decode_block_string(block_string: str): - """ Gets a block through a string notation of arguments. """ - assert isinstance(block_string, str) + def get_expected_image_height(self): + return 224 - ops = block_string.split('_') - options = {} - for op in ops: - splits = re.split(r'(\d.*)', op) - if len(splits) >= 2: - key, value = splits[:2] - options[key] = value + def get_pretrained_images_mean(self): + im_mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3) + return im_mean - # Check stride - cond_1 = ('s' in options and len(options['s']) == 1) - cond_2 = ((len(options['s']) == 2) and (options['s'][0] == options['s'][1])) - assert (cond_1 or cond_2) + def get_pretrained_images_std(self): + im_std = np.array([0.229, 0.224, 0.225]).reshape(1, 3) + return im_std - return BlockArgs( - kernel_size=int(options['k']), - num_repeat=int(options['r']), - input_filters=int(options['i']), - output_filters=int(options['o']), - expand_ratio=int(options['e']), - id_skip=('noskip' not in block_string), - se_ratio=float(options['se']) if 'se' in options else None, - stride=[int(options['s'][0])]) + def _set_config(self): + """ + predictor config setting + """ + model = self.default_pretrained_model_path + '.pdmodel' + params = self.default_pretrained_model_path + '.pdiparams' + cpu_config = Config(model, params) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + self.cpu_predictor = create_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = Config(model, params) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) + self.gpu_predictor = create_predictor(gpu_config) + + def classification(self, images=None, paths=None, batch_size=1, use_gpu=False, top_k=1): + """ + API for image classification. - @staticmethod - def _encode_block_string(block): - """Encodes a block to a string.""" - args = [ - 'r%d' % block.num_repeat, - 'k%d' % block.kernel_size, - 's%d%d' % (block.strides[0], block.strides[1]), - 'e%s' % block.expand_ratio, - 'i%d' % block.input_filters, - 'o%d' % block.output_filters - ] - if 0 < block.se_ratio <= 1: - args.append('se%s' % block.se_ratio) - if block.id_skip is False: - args.append('noskip') - return '_'.join(args) + Args: + images (list[numpy.ndarray]): data of images, shape of each is [H, W, C], color space must be BGR. + paths (list[str]): The paths of images. + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + top_k (int): Return top k results. - @staticmethod - def decode(string_list: list): + Returns: + res (list[dict]): The classfication results. """ - Decode a list of string notations to specify blocks in the network. - - string_list: list of strings, each string is a notation of block - return - list of BlockArgs namedtuples of block args + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + all_data = list() + for yield_data in reader(images, paths): + all_data.append(yield_data) + + total_num = len(all_data) + loop_num = int(np.ceil(total_num / batch_size)) + + res = list() + for iter_id in range(loop_num): + batch_data = list() + handle_id = iter_id * batch_size + for image_id in range(batch_size): + try: + batch_data.append(all_data[handle_id + image_id]) + except: + pass + # feed batch image + batch_image = np.array([data['image'] for data in batch_data]) + + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + input_handle.copy_from_cpu(batch_image.copy()) + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) + + out = postprocess(data_out=output_handle.copy_to_cpu(), label_list=self.label_list, top_k=top_k) + res += out + return res + + @serving + def serving_method(self, images, **kwargs): """ - assert isinstance(string_list, list) - blocks_args = [] - for block_string in string_list: - blocks_args.append(BlockDecoder._decode_block_string(block_string)) - return blocks_args - - @staticmethod - def encode(blocks_args: list): + Run as a service. """ - Encodes a list of BlockArgs to a list of strings. + images_decode = [base64_to_cv2(image) for image in images] + results = self.classify(images=images_decode, **kwargs) + return results - :param blocks_args: a list of BlockArgs namedtuples of block args - :return: a list of strings, each string is a notation of block + @runnable + def run_cmd(self, argvs): """ - block_strings = [] - for block in blocks_args: - block_strings.append(BlockDecoder._encode_block_string(block)) - return block_strings - - -def initial_type(name: str, use_bias: bool = False): - param_attr = ParamAttr(name=name + "_weights") - if use_bias: - bias_attr = ParamAttr(name=name + "_offset") - else: - bias_attr = False - return param_attr, bias_attr - - -def init_batch_norm_layer(name: str = "batch_norm"): - param_attr = ParamAttr(name=name + "_scale") - bias_attr = ParamAttr(name=name + "_offset") - return param_attr, bias_attr - - -def init_fc_layer(name: str = "fc"): - param_attr = ParamAttr(name=name + "_weights") - bias_attr = ParamAttr(name=name + "_offset") - return param_attr, bias_attr - - -def cal_padding(img_size: int, stride: int, filter_size: int, dilation: int = 1): - """Calculate padding size.""" - if img_size % stride == 0: - out_size = max(filter_size - stride, 0) - else: - out_size = max(filter_size - (img_size % stride), 0) - return out_size // 2, out_size - out_size // 2 - - -inp_shape = {"b4": [380, 190, 190, 95, 48, 24, 24, 12]} - - -def _drop_connect(inputs: paddle.Tensor, prob: float, is_test: bool): - """Drop input connection""" - if is_test: - return inputs - keep_prob = 1.0 - prob - inputs_shape = paddle.shape(inputs) - random_tensor = keep_prob + paddle.rand(shape=[inputs_shape[0], 1, 1, 1]) - binary_tensor = paddle.floor(random_tensor) - output = inputs / keep_prob * binary_tensor - return output - - -class Conv2ds(nn.Layer): - """Basic conv layer""" - - def __init__(self, - input_channels: int, - output_channels: int, - filter_size: int, - stride: int = 1, - padding: int = 0, - groups: int = None, - name: str = "conv2d", - act: str = None, - use_bias: bool = False, - padding_type: str = None, - model_name: str = None, - cur_stage: str = None): - super(Conv2ds, self).__init__() - assert act in [None, "swish", "sigmoid"] - self.act = act - - param_attr, bias_attr = initial_type(name=name, use_bias=use_bias) - - def get_padding(filter_size, stride=1, dilation=1): - padding = ((stride - 1) + dilation * (filter_size - 1)) // 2 - return padding - - inps = 1 if model_name == None and cur_stage == None else inp_shape[model_name][cur_stage] - self.need_crop = False - if padding_type == "SAME": - top_padding, bottom_padding = cal_padding(inps, stride, filter_size) - left_padding, right_padding = cal_padding(inps, stride, filter_size) - height_padding = bottom_padding - width_padding = right_padding - if top_padding != bottom_padding or left_padding != right_padding: - height_padding = top_padding + stride - width_padding = left_padding + stride - self.need_crop = True - padding = [height_padding, width_padding] - elif padding_type == "VALID": - height_padding = 0 - width_padding = 0 - padding = [height_padding, width_padding] - elif padding_type == "DYNAMIC": - padding = get_padding(filter_size, stride) - else: - padding = padding_type - - groups = 1 if groups is None else groups - self._conv = Conv2d( - input_channels, - output_channels, - filter_size, - groups=groups, - stride=stride, - padding=padding, - weight_attr=param_attr, - bias_attr=bias_attr) - - def forward(self, inputs: paddle.Tensor): - x = self._conv(inputs) - if self.act == "swish": - x = F.swish(x) - elif self.act == "sigmoid": - x = F.sigmoid(x) - - if self.need_crop: - x = x[:, :, 1:, 1:] - return x - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - - def __init__(self, - input_channels: int, - filter_size: int, - output_channels: int, - stride: int = 1, - num_groups: int = 1, - padding_type: str = "SAME", - conv_act: str = None, - bn_act: str = "swish", - use_bn: bool = True, - use_bias: bool = False, - name: str = None, - conv_name: str = None, - bn_name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ConvBNLayer, self).__init__() - - self._conv = Conv2ds( - input_channels=input_channels, - output_channels=output_channels, - filter_size=filter_size, - stride=stride, - groups=num_groups, - act=conv_act, - padding_type=padding_type, - name=conv_name, - use_bias=use_bias, - model_name=model_name, - cur_stage=cur_stage) - self.use_bn = use_bn - if use_bn is True: - bn_name = name + bn_name - param_attr, bias_attr = init_batch_norm_layer(bn_name) - - self._bn = BatchNorm( - num_channels=output_channels, - act=bn_act, - momentum=0.99, - epsilon=0.001, - moving_mean_name=bn_name + "_mean", - moving_variance_name=bn_name + "_variance", - param_attr=param_attr, - bias_attr=bias_attr) - - def forward(self, inputs: paddle.Tensor): - if self.use_bn: - x = self._conv(inputs) - x = self._bn(x) - return x - else: - return self._conv(inputs) - - -class ExpandConvNorm(nn.Layer): - """Expand conv norm layer.""" - - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ExpandConvNorm, self).__init__() - - self.oup = block_args.input_filters * block_args.expand_ratio - self.expand_ratio = block_args.expand_ratio - - if self.expand_ratio != 1: - self._conv = ConvBNLayer( - input_channels, - 1, - self.oup, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_expand_conv", - bn_name="_bn0", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - if self.expand_ratio != 1: - return self._conv(inputs) - else: - return inputs - - -class DepthwiseConvNorm(nn.Layer): - """Depthwise conv norm layer.""" - - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(DepthwiseConvNorm, self).__init__() - - self.k = block_args.kernel_size - self.s = block_args.stride - if isinstance(self.s, list) or isinstance(self.s, tuple): - self.s = self.s[0] - oup = block_args.input_filters * block_args.expand_ratio - - self._conv = ConvBNLayer( - input_channels, - self.k, - oup, - self.s, - num_groups=input_channels, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_depthwise_conv", - bn_name="_bn1", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class ProjectConvNorm(nn.Layer): - """Projection conv bn layer.""" - - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ProjectConvNorm, self).__init__() - - final_oup = block_args.output_filters - - self._conv = ConvBNLayer( - input_channels, - 1, - final_oup, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_project_conv", - bn_name="_bn2", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class SEBlock(nn.Layer): - """Basic Squeeze-and-Excitation block for Efficientnet.""" - - def __init__(self, - input_channels: int, - num_squeezed_channels: int, - oup: int, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(SEBlock, self).__init__() - - self._pool = AdaptiveAvgPool2d(1) - self._conv1 = Conv2ds( - input_channels, - num_squeezed_channels, - 1, - use_bias=True, - padding_type=padding_type, - act="swish", - name=name + "_se_reduce") - - self._conv2 = Conv2ds( - num_squeezed_channels, - oup, - 1, - act="sigmoid", - use_bias=True, - padding_type=padding_type, - name=name + "_se_expand") - - def forward(self, inputs: paddle.Tensor): - x = self._pool(inputs) - x = self._conv1(x) - x = self._conv2(x) - return paddle.multiply(inputs, x) - - -class MbConvBlock(nn.Layer): - """Mobile inverted bottleneck convolution for Efficientnet.""" - - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - use_se: bool, - name: str = None, - drop_connect_rate: float = None, - is_test: bool = False, - model_name: str = None, - cur_stage: str = None): - super(MbConvBlock, self).__init__() - - oup = block_args.input_filters * block_args.expand_ratio - self.block_args = block_args - self.has_se = use_se and (block_args.se_ratio is not None) and (0 < block_args.se_ratio <= 1) - self.id_skip = block_args.id_skip - self.expand_ratio = block_args.expand_ratio - self.drop_connect_rate = drop_connect_rate - self.is_test = is_test - - if self.expand_ratio != 1: - self._ecn = ExpandConvNorm( - input_channels, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - self._dcn = DepthwiseConvNorm( - input_channels * block_args.expand_ratio, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - if self.has_se: - num_squeezed_channels = max(1, int(block_args.input_filters * block_args.se_ratio)) - self._se = SEBlock( - input_channels * block_args.expand_ratio, - num_squeezed_channels, - oup, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - self._pcn = ProjectConvNorm( - input_channels * block_args.expand_ratio, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - x = inputs - if self.expand_ratio != 1: - x = self._ecn(x) - x = F.swish(x) - x = self._dcn(x) - x = F.swish(x) - if self.has_se: - x = self._se(x) - x = self._pcn(x) - if self.id_skip and \ - self.block_args.stride == 1 and \ - self.block_args.input_filters == self.block_args.output_filters: - if self.drop_connect_rate: - x = _drop_connect(x, self.drop_connect_rate, self.is_test) - x = paddle.elementwise_add(x, inputs) - return x - - -class ConvStemNorm(nn.Layer): - """Basic conv stem norm block for extracting features.""" - - def __init__(self, - input_channels: int, - padding_type: str, - _global_params: dict, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ConvStemNorm, self).__init__() - - output_channels = round_filters(32, _global_params) - self._conv = ConvBNLayer( - input_channels, - filter_size=3, - output_channels=output_channels, - stride=2, - bn_act=None, - padding_type=padding_type, - name="", - conv_name="_conv_stem", - bn_name="_bn0", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class ExtractFeatures(nn.Layer): - """Extract features.""" - - def __init__(self, - input_channels: int, - _block_args: dict, - _global_params: dict, - padding_type: str, - use_se: bool, - is_test: bool, - model_name: str = None): - super(ExtractFeatures, self).__init__() - - self._global_params = _global_params - - self._conv_stem = ConvStemNorm( - input_channels, - padding_type=padding_type, - _global_params=_global_params, - model_name=model_name, - cur_stage=0) - - self.block_args_copy = copy.deepcopy(_block_args) - idx = 0 - block_size = 0 - for block_arg in self.block_args_copy: - block_arg = block_arg._replace( - input_filters=round_filters(block_arg.input_filters, _global_params), - output_filters=round_filters(block_arg.output_filters, _global_params), - num_repeat=round_repeats(block_arg.num_repeat, _global_params)) - block_size += 1 - for _ in range(block_arg.num_repeat - 1): - block_size += 1 - - self.conv_seq = [] - cur_stage = 1 - for block_args in _block_args: - block_args = block_args._replace( - input_filters=round_filters(block_args.input_filters, _global_params), - output_filters=round_filters(block_args.output_filters, _global_params), - num_repeat=round_repeats(block_args.num_repeat, _global_params)) - - drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 - if drop_connect_rate: - drop_connect_rate *= float(idx) / block_size - - _mc_block = self.add_sublayer( - "_blocks." + str(idx) + ".", - MbConvBlock( - block_args.input_filters, - block_args=block_args, - padding_type=padding_type, - use_se=use_se, - name="_blocks." + str(idx) + ".", - drop_connect_rate=drop_connect_rate, - model_name=model_name, - cur_stage=cur_stage)) - self.conv_seq.append(_mc_block) - idx += 1 - if block_args.num_repeat > 1: - block_args = block_args._replace(input_filters=block_args.output_filters, stride=1) - for _ in range(block_args.num_repeat - 1): - drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 - if drop_connect_rate: - drop_connect_rate *= float(idx) / block_size - _mc_block = self.add_sublayer( - "block." + str(idx) + ".", - MbConvBlock( - block_args.input_filters, - block_args, - padding_type=padding_type, - use_se=use_se, - name="_blocks." + str(idx) + ".", - drop_connect_rate=drop_connect_rate, - model_name=model_name, - cur_stage=cur_stage)) - self.conv_seq.append(_mc_block) - idx += 1 - cur_stage += 1 - - def forward(self, inputs: paddle.Tensor): - x = self._conv_stem(inputs) - x = F.swish(x) - for _mc_block in self.conv_seq: - x = _mc_block(x) - return x - - -@moduleinfo( - name="efficientnetb4_imagenet", - type="cv/classification", - author="paddlepaddle", - author_email="", - summary="efficientnetb4_imagenet is a classification model, " - "this module is trained with Imagenet dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class EfficientNet_B4(nn.Layer): - def __init__(self, - is_test: bool = False, - padding_type: str = "SAME", - override_params: dict = None, - use_se: bool = True, - class_dim: int = 1000, - load_checkpoint: str = None): - super(EfficientNet_B4, self).__init__() - - model_name = 'efficientnet-b4' - self.name = "b4" - self._block_args, self._global_params = get_model_params(model_name, override_params) - self.padding_type = padding_type - self.use_se = use_se - self.is_test = is_test - - self._ef = ExtractFeatures( - 3, - self._block_args, - self._global_params, - self.padding_type, - self.use_se, - self.is_test, - model_name=self.name) - - output_channels = round_filters(1280, self._global_params) - oup = 448 - - self._conv = ConvBNLayer( - oup, - 1, - output_channels, - bn_act="swish", - padding_type=self.padding_type, - name="", - conv_name="_conv_head", - bn_name="_bn1", - model_name=self.name, - cur_stage=7) - self._pool = AdaptiveAvgPool2d(1) - - if self._global_params.dropout_rate: - self._drop = Dropout(p=self._global_params.dropout_rate, mode="upscale_in_train") - - param_attr, bias_attr = init_fc_layer("_fc") - self._fc = Linear(output_channels, class_dim, weight_attr=param_attr, bias_attr=bias_attr) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") + Run as a command. + """ + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.classify(paths=[args.input_path], batch_size=args.batch_size, use_gpu=args.use_gpu) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument('--use_gpu', + type=ast.literal_eval, + default=False, + help="whether use GPU or not.") + self.arg_config_group.add_argument('--batch_size', type=ast.literal_eval, default=1, help="batch size.") + self.arg_config_group.add_argument('--top_k', type=ast.literal_eval, default=1, help="Return top k results.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") - else: - checkpoint = os.path.join(self.directory, 'efficientnet_b4_imagenet.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/efficientnet_b4_imagenet.pdparams -O ' - + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - def forward(self, inputs: paddle.Tensor): - x = self._ef(inputs) - x = self._conv(x) - x = self._pool(x) - if self._global_params.dropout_rate: - x = self._drop(x) - x = paddle.squeeze(x, axis=[2, 3]) - x = self._fc(x) - return x +if __name__ == '__main__': + b4 = EfficientNetB4ImageNet() + b4.context() + import cv2 + test_image = [ + cv2.imread( + '/mnt/zhangxuefei/program-paddle/PaddleHub/hub_module/tests/image_dataset/classification/animals/dog.jpeg') + ] + res = b4.classification(images=test_image) + print(res) + res = b4.classification(paths=[ + '/mnt/zhangxuefei/program-paddle/PaddleHub/hub_module/tests/image_dataset/classification/animals/dog.jpeg' + ]) + print(res) + res = b4.classification(images=test_image) + print(res) + res = b4.classify(images=test_image) + print(res) diff --git a/modules/image/classification/efficientnetb4_imagenet/processor.py b/modules/image/classification/efficientnetb4_imagenet/processor.py new file mode 100644 index 000000000..9cb22a1b7 --- /dev/null +++ b/modules/image/classification/efficientnetb4_imagenet/processor.py @@ -0,0 +1,65 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import base64 + +import cv2 +import numpy as np + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def softmax(x): + if len(x.shape) > 1: + tmp = np.max(x, axis=1) + x -= tmp.reshape((x.shape[0], 1)) + x = np.exp(x) + tmp = np.sum(x, axis=1) + x /= tmp.reshape((x.shape[0], 1)) + else: + tmp = np.max(x) + x -= tmp + x = np.exp(x) + tmp = np.sum(x) + x /= tmp + return x + + +def postprocess(data_out, label_list, top_k): + """ + Postprocess output of network, one image at a time. + + Args: + data_out (numpy.ndarray): output data of network. + label_list (list): list of label. + top_k (int): Return top k results. + """ + output = [] + for result in data_out: + result_i = softmax(result) + output_i = {} + indexs = np.argsort(result_i)[::-1][0:top_k] + for index in indexs: + label = label_list[index].split(',')[0] + output_i[label] = float(result_i[index]) + output.append(output_i) + return output diff --git a/modules/image/classification/efficientnetb4_imagenet/test.py b/modules/image/classification/efficientnetb4_imagenet/test.py new file mode 100644 index 000000000..8bf583c87 --- /dev/null +++ b/modules/image/classification/efficientnetb4_imagenet/test.py @@ -0,0 +1,63 @@ +import os +import shutil +import unittest + +import cv2 +import requests + +import paddlehub as hub + +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + + +class TestHubModule(unittest.TestCase): + + @classmethod + def setUpClass(cls) -> None: + img_url = 'https://unsplash.com/photos/brFsZ7qszSY/download?ixid=MnwxMjA3fDB8MXxzZWFyY2h8OHx8ZG9nfGVufDB8fHx8MTY2MzA1ODQ1MQ&force=true&w=640' + if not os.path.exists('tests'): + os.makedirs('tests') + response = requests.get(img_url) + assert response.status_code == 200, 'Network Error.' + with open('tests/test.jpg', 'wb') as f: + f.write(response.content) + cls.module = hub.Module(name="efficientnetb4_imagenet") + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree('tests') + shutil.rmtree('inference') + + def test_classification1(self): + results = self.module.classification(paths=['tests/test.jpg']) + data = results[0] + self.assertTrue('Pembroke' in data) + self.assertTrue(data['Pembroke'] > 0.5) + + def test_classification2(self): + results = self.module.classification(images=[cv2.imread('tests/test.jpg')]) + data = results[0] + self.assertTrue('Pembroke' in data) + self.assertTrue(data['Pembroke'] > 0.5) + + def test_classification3(self): + results = self.module.classification(images=[cv2.imread('tests/test.jpg')], use_gpu=True) + data = results[0] + self.assertTrue('Pembroke' in data) + self.assertTrue(data['Pembroke'] > 0.5) + + def test_classification4(self): + self.assertRaises(AssertionError, self.module.classification, paths=['no.jpg']) + + def test_classification5(self): + self.assertRaises(TypeError, self.module.classification, images=['tests/test.jpg']) + + def test_save_inference_model(self): + self.module.save_inference_model('./inference/model') + + self.assertTrue(os.path.exists('./inference/model.pdmodel')) + self.assertTrue(os.path.exists('./inference/model.pdiparams')) + + +if __name__ == "__main__": + unittest.main() From 98145fb691727db4ee2ae1f8cd6fc63465a66432 Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Fri, 14 Oct 2022 14:56:01 +0800 Subject: [PATCH 106/117] update efficientnetb5_imagenet (#2053) --- .../efficientnetb5_imagenet/README.md | 7 +- .../efficientnetb5_imagenet/README_en.md | 7 +- .../efficientnetb5_imagenet/__init__.py | 0 .../efficientnetb5_imagenet/data_feed.py | 93 ++ .../efficientnetb5_imagenet/label_list.txt | 1000 +++++++++++++++++ .../efficientnetb5_imagenet/module.py | 924 +++------------ .../efficientnetb5_imagenet/processor.py | 65 ++ .../efficientnetb5_imagenet/test.py | 63 ++ 8 files changed, 1398 insertions(+), 761 deletions(-) create mode 100644 modules/image/classification/efficientnetb5_imagenet/__init__.py create mode 100644 modules/image/classification/efficientnetb5_imagenet/data_feed.py create mode 100644 modules/image/classification/efficientnetb5_imagenet/label_list.txt create mode 100644 modules/image/classification/efficientnetb5_imagenet/processor.py create mode 100644 modules/image/classification/efficientnetb5_imagenet/test.py diff --git a/modules/image/classification/efficientnetb5_imagenet/README.md b/modules/image/classification/efficientnetb5_imagenet/README.md index 3c8a4bc37..f1cee7034 100644 --- a/modules/image/classification/efficientnetb5_imagenet/README.md +++ b/modules/image/classification/efficientnetb5_imagenet/README.md @@ -132,6 +132,11 @@ * 1.1.0 提升预测性能以及易用性 + +* 1.2.0 + + 移除 Fluid API + - ```shell - $ hub install efficientnetb5_imagenet==1.1.0 + $ hub install efficientnetb5_imagenet==1.2.0 ``` diff --git a/modules/image/classification/efficientnetb5_imagenet/README_en.md b/modules/image/classification/efficientnetb5_imagenet/README_en.md index 2562ba133..7e56e38cd 100644 --- a/modules/image/classification/efficientnetb5_imagenet/README_en.md +++ b/modules/image/classification/efficientnetb5_imagenet/README_en.md @@ -131,6 +131,11 @@ * 1.1.0 Improve the prediction performance and users' experience + +* 1.2.0 + + Remove Fluid API + - ```shell - $ hub install efficientnetb5_imagenet==1.1.0 + $ hub install efficientnetb5_imagenet==1.2.0 ``` diff --git a/modules/image/classification/efficientnetb5_imagenet/__init__.py b/modules/image/classification/efficientnetb5_imagenet/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/modules/image/classification/efficientnetb5_imagenet/data_feed.py b/modules/image/classification/efficientnetb5_imagenet/data_feed.py new file mode 100644 index 000000000..60a33f4df --- /dev/null +++ b/modules/image/classification/efficientnetb5_imagenet/data_feed.py @@ -0,0 +1,93 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import time +from collections import OrderedDict + +import numpy as np +from PIL import Image + +__all__ = ['reader'] + +DATA_DIM = 224 +img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) +img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) + + +def resize_short(img, target_size): + percent = float(target_size) / min(img.size[0], img.size[1]) + resized_width = int(round(img.size[0] * percent)) + resized_height = int(round(img.size[1] * percent)) + img = img.resize((resized_width, resized_height), Image.LANCZOS) + return img + + +def crop_image(img, target_size, center): + width, height = img.size + size = target_size + if center == True: + w_start = (width - size) / 2 + h_start = (height - size) / 2 + else: + w_start = np.random.randint(0, width - size + 1) + h_start = np.random.randint(0, height - size + 1) + w_end = w_start + size + h_end = h_start + size + img = img.crop((w_start, h_start, w_end, h_end)) + return img + + +def process_image(img): + img = resize_short(img, target_size=256) + img = crop_image(img, target_size=DATA_DIM, center=True) + if img.mode != 'RGB': + img = img.convert('RGB') + img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255 + img -= img_mean + img /= img_std + return img + + +def reader(images=None, paths=None): + """ + Preprocess to yield image. + + Args: + images (list[numpy.ndarray]): images data, shape of each is [H, W, C]. + paths (list[str]): paths to images. + + Yield: + each (collections.OrderedDict): info of original image, preprocessed image. + """ + component = list() + if paths: + for im_path in paths: + each = OrderedDict() + assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path) + each['org_im_path'] = im_path + each['org_im'] = Image.open(im_path) + each['org_im_width'], each['org_im_height'] = each['org_im'].size + component.append(each) + if images is not None: + assert type(images), "images is a list." + for im in images: + each = OrderedDict() + each['org_im'] = Image.fromarray(im[:, :, ::-1]) + each['org_im_path'] = 'ndarray_time={}'.format(round(time.time(), 6) * 1e6) + each['org_im_width'], each['org_im_height'] = each['org_im'].size + component.append(each) + + for element in component: + element['image'] = process_image(element['org_im']) + yield element diff --git a/modules/image/classification/efficientnetb5_imagenet/label_list.txt b/modules/image/classification/efficientnetb5_imagenet/label_list.txt new file mode 100644 index 000000000..a509c0074 --- /dev/null +++ b/modules/image/classification/efficientnetb5_imagenet/label_list.txt @@ -0,0 +1,1000 @@ +tench, Tinca tinca +goldfish, Carassius auratus +great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias +tiger shark, Galeocerdo cuvieri +hammerhead, hammerhead shark +electric ray, crampfish, numbfish, torpedo +stingray +cock +hen +ostrich, Struthio camelus +brambling, Fringilla montifringilla +goldfinch, Carduelis carduelis +house finch, linnet, Carpodacus mexicanus +junco, snowbird +indigo bunting, indigo finch, indigo bird, Passerina cyanea +robin, American robin, Turdus migratorius +bulbul +jay +magpie +chickadee +water ouzel, dipper +kite +bald eagle, American eagle, Haliaeetus leucocephalus +vulture +great grey owl, great gray owl, Strix nebulosa +European fire salamander, Salamandra salamandra +common newt, Triturus vulgaris +eft +spotted salamander, Ambystoma maculatum +axolotl, mud puppy, Ambystoma mexicanum +bullfrog, Rana catesbeiana +tree frog, tree-frog +tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui +loggerhead, loggerhead turtle, Caretta caretta +leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea +mud turtle +terrapin +box turtle, box tortoise +banded gecko +common iguana, iguana, Iguana iguana +American chameleon, anole, Anolis carolinensis +whiptail, whiptail lizard +agama +frilled lizard, Chlamydosaurus kingi +alligator lizard +Gila monster, Heloderma suspectum +green lizard, Lacerta viridis +African chameleon, Chamaeleo chamaeleon +Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis +African crocodile, Nile crocodile, Crocodylus niloticus +American alligator, Alligator mississipiensis +triceratops +thunder snake, worm snake, Carphophis amoenus +ringneck snake, ring-necked snake, ring snake +hognose snake, puff adder, sand viper +green snake, grass snake +king snake, kingsnake +garter snake, grass snake +water snake +vine snake +night snake, Hypsiglena torquata +boa constrictor, Constrictor constrictor +rock python, rock snake, Python sebae +Indian cobra, Naja naja +green mamba +sea snake +horned viper, cerastes, sand viper, horned asp, Cerastes cornutus +diamondback, diamondback rattlesnake, Crotalus adamanteus +sidewinder, horned rattlesnake, Crotalus cerastes +trilobite +harvestman, daddy longlegs, Phalangium opilio +scorpion +black and gold garden spider, Argiope aurantia +barn spider, Araneus cavaticus +garden spider, Aranea diademata +black widow, Latrodectus mactans +tarantula +wolf spider, hunting spider +tick +centipede +black grouse +ptarmigan +ruffed grouse, partridge, Bonasa umbellus +prairie chicken, prairie grouse, prairie fowl +peacock +quail +partridge +African grey, African gray, Psittacus erithacus +macaw +sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita +lorikeet +coucal +bee eater +hornbill +hummingbird +jacamar +toucan +drake +red-breasted merganser, Mergus serrator +goose +black swan, Cygnus atratus +tusker +echidna, spiny anteater, anteater +platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus +wallaby, brush kangaroo +koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus +wombat +jellyfish +sea anemone, anemone +brain coral +flatworm, platyhelminth +nematode, nematode worm, roundworm +conch +snail +slug +sea slug, nudibranch +chiton, coat-of-mail shell, sea cradle, polyplacophore +chambered nautilus, pearly nautilus, nautilus +Dungeness crab, Cancer magister +rock crab, Cancer irroratus +fiddler crab +king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica +American lobster, Northern lobster, Maine lobster, Homarus americanus +spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish +crayfish, crawfish, crawdad, crawdaddy +hermit crab +isopod +white stork, Ciconia ciconia +black stork, Ciconia nigra +spoonbill +flamingo +little blue heron, Egretta caerulea +American egret, great white heron, Egretta albus +bittern +crane +limpkin, Aramus pictus +European gallinule, Porphyrio porphyrio +American coot, marsh hen, mud hen, water hen, Fulica americana +bustard +ruddy turnstone, Arenaria interpres +red-backed sandpiper, dunlin, Erolia alpina +redshank, Tringa totanus +dowitcher +oystercatcher, oyster catcher +pelican +king penguin, Aptenodytes patagonica +albatross, mollymawk +grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus +killer whale, killer, orca, grampus, sea wolf, Orcinus orca +dugong, Dugong dugon +sea lion +Chihuahua +Japanese spaniel +Maltese dog, Maltese terrier, Maltese +Pekinese, Pekingese, Peke +Shih-Tzu +Blenheim spaniel +papillon +toy terrier +Rhodesian ridgeback +Afghan hound, Afghan +basset, basset hound +beagle +bloodhound, sleuthhound +bluetick +black-and-tan coonhound +Walker hound, Walker foxhound +English foxhound +redbone +borzoi, Russian wolfhound +Irish wolfhound +Italian greyhound +whippet +Ibizan hound, Ibizan Podenco +Norwegian elkhound, elkhound +otterhound, otter hound +Saluki, gazelle hound +Scottish deerhound, deerhound +Weimaraner +Staffordshire bullterrier, Staffordshire bull terrier +American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier +Bedlington terrier +Border terrier +Kerry blue terrier +Irish terrier +Norfolk terrier +Norwich terrier +Yorkshire terrier +wire-haired fox terrier +Lakeland terrier +Sealyham terrier, Sealyham +Airedale, Airedale terrier +cairn, cairn terrier +Australian terrier +Dandie Dinmont, Dandie Dinmont terrier +Boston bull, Boston terrier +miniature schnauzer +giant schnauzer +standard schnauzer +Scotch terrier, Scottish terrier, Scottie +Tibetan terrier, chrysanthemum dog +silky terrier, Sydney silky +soft-coated wheaten terrier +West Highland white terrier +Lhasa, Lhasa apso +flat-coated retriever +curly-coated retriever +golden retriever +Labrador retriever +Chesapeake Bay retriever +German short-haired pointer +vizsla, Hungarian pointer +English setter +Irish setter, red setter +Gordon setter +Brittany spaniel +clumber, clumber spaniel +English springer, English springer spaniel +Welsh springer spaniel +cocker spaniel, English cocker spaniel, cocker +Sussex spaniel +Irish water spaniel +kuvasz +schipperke +groenendael +malinois +briard +kelpie +komondor +Old English sheepdog, bobtail +Shetland sheepdog, Shetland sheep dog, Shetland +collie +Border collie +Bouvier des Flandres, Bouviers des Flandres +Rottweiler +German shepherd, German shepherd dog, German police dog, alsatian +Doberman, Doberman pinscher +miniature pinscher +Greater Swiss Mountain dog +Bernese mountain dog +Appenzeller +EntleBucher +boxer +bull mastiff +Tibetan mastiff +French bulldog +Great Dane +Saint Bernard, St Bernard +Eskimo dog, husky +malamute, malemute, Alaskan malamute +Siberian husky +dalmatian, coach dog, carriage dog +affenpinscher, monkey pinscher, monkey dog +basenji +pug, pug-dog +Leonberg +Newfoundland, Newfoundland dog +Great Pyrenees +Samoyed, Samoyede +Pomeranian +chow, chow chow +keeshond +Brabancon griffon +Pembroke, Pembroke Welsh corgi +Cardigan, Cardigan Welsh corgi +toy poodle +miniature poodle +standard poodle +Mexican hairless +timber wolf, grey wolf, gray wolf, Canis lupus +white wolf, Arctic wolf, Canis lupus tundrarum +red wolf, maned wolf, Canis rufus, Canis niger +coyote, prairie wolf, brush wolf, Canis latrans +dingo, warrigal, warragal, Canis dingo +dhole, Cuon alpinus +African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus +hyena, hyaena +red fox, Vulpes vulpes +kit fox, Vulpes macrotis +Arctic fox, white fox, Alopex lagopus +grey fox, gray fox, Urocyon cinereoargenteus +tabby, tabby cat +tiger cat +Persian cat +Siamese cat, Siamese +Egyptian cat +cougar, puma, catamount, mountain lion, painter, panther, Felis concolor +lynx, catamount +leopard, Panthera pardus +snow leopard, ounce, Panthera uncia +jaguar, panther, Panthera onca, Felis onca +lion, king of beasts, Panthera leo +tiger, Panthera tigris +cheetah, chetah, Acinonyx jubatus +brown bear, bruin, Ursus arctos +American black bear, black bear, Ursus americanus, Euarctos americanus +ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus +sloth bear, Melursus ursinus, Ursus ursinus +mongoose +meerkat, mierkat +tiger beetle +ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle +ground beetle, carabid beetle +long-horned beetle, longicorn, longicorn beetle +leaf beetle, chrysomelid +dung beetle +rhinoceros beetle +weevil +fly +bee +ant, emmet, pismire +grasshopper, hopper +cricket +walking stick, walkingstick, stick insect +cockroach, roach +mantis, mantid +cicada, cicala +leafhopper +lacewing, lacewing fly +dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk +damselfly +admiral +ringlet, ringlet butterfly +monarch, monarch butterfly, milkweed butterfly, Danaus plexippus +cabbage butterfly +sulphur butterfly, sulfur butterfly +lycaenid, lycaenid butterfly +starfish, sea star +sea urchin +sea cucumber, holothurian +wood rabbit, cottontail, cottontail rabbit +hare +Angora, Angora rabbit +hamster +porcupine, hedgehog +fox squirrel, eastern fox squirrel, Sciurus niger +marmot +beaver +guinea pig, Cavia cobaya +sorrel +zebra +hog, pig, grunter, squealer, Sus scrofa +wild boar, boar, Sus scrofa +warthog +hippopotamus, hippo, river horse, Hippopotamus amphibius +ox +water buffalo, water ox, Asiatic buffalo, Bubalus bubalis +bison +ram, tup +bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis +ibex, Capra ibex +hartebeest +impala, Aepyceros melampus +gazelle +Arabian camel, dromedary, Camelus dromedarius +llama +weasel +mink +polecat, fitch, foulmart, foumart, Mustela putorius +black-footed ferret, ferret, Mustela nigripes +otter +skunk, polecat, wood pussy +badger +armadillo +three-toed sloth, ai, Bradypus tridactylus +orangutan, orang, orangutang, Pongo pygmaeus +gorilla, Gorilla gorilla +chimpanzee, chimp, Pan troglodytes +gibbon, Hylobates lar +siamang, Hylobates syndactylus, Symphalangus syndactylus +guenon, guenon monkey +patas, hussar monkey, Erythrocebus patas +baboon +macaque +langur +colobus, colobus monkey +proboscis monkey, Nasalis larvatus +marmoset +capuchin, ringtail, Cebus capucinus +howler monkey, howler +titi, titi monkey +spider monkey, Ateles geoffroyi +squirrel monkey, Saimiri sciureus +Madagascar cat, ring-tailed lemur, Lemur catta +indri, indris, Indri indri, Indri brevicaudatus +Indian elephant, Elephas maximus +African elephant, Loxodonta africana +lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens +giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca +barracouta, snoek +eel +coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch +rock beauty, Holocanthus tricolor +anemone fish +sturgeon +gar, garfish, garpike, billfish, Lepisosteus osseus +lionfish +puffer, pufferfish, blowfish, globefish +abacus +abaya +academic gown, academic robe, judge's robe +accordion, piano accordion, squeeze box +acoustic guitar +aircraft carrier, carrier, flattop, attack aircraft carrier +airliner +airship, dirigible +altar +ambulance +amphibian, amphibious vehicle +analog clock +apiary, bee house +apron +ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin +assault rifle, assault gun +backpack, back pack, knapsack, packsack, rucksack, haversack +bakery, bakeshop, bakehouse +balance beam, beam +balloon +ballpoint, ballpoint pen, ballpen, Biro +Band Aid +banjo +bannister, banister, balustrade, balusters, handrail +barbell +barber chair +barbershop +barn +barometer +barrel, cask +barrow, garden cart, lawn cart, wheelbarrow +baseball +basketball +bassinet +bassoon +bathing cap, swimming cap +bath towel +bathtub, bathing tub, bath, tub +beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon +beacon, lighthouse, beacon light, pharos +beaker +bearskin, busby, shako +beer bottle +beer glass +bell cote, bell cot +bib +bicycle-built-for-two, tandem bicycle, tandem +bikini, two-piece +binder, ring-binder +binoculars, field glasses, opera glasses +birdhouse +boathouse +bobsled, bobsleigh, bob +bolo tie, bolo, bola tie, bola +bonnet, poke bonnet +bookcase +bookshop, bookstore, bookstall +bottlecap +bow +bow tie, bow-tie, bowtie +brass, memorial tablet, plaque +brassiere, bra, bandeau +breakwater, groin, groyne, mole, bulwark, seawall, jetty +breastplate, aegis, egis +broom +bucket, pail +buckle +bulletproof vest +bullet train, bullet +butcher shop, meat market +cab, hack, taxi, taxicab +caldron, cauldron +candle, taper, wax light +cannon +canoe +can opener, tin opener +cardigan +car mirror +carousel, carrousel, merry-go-round, roundabout, whirligig +carpenter's kit, tool kit +carton +car wheel +cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM +cassette +cassette player +castle +catamaran +CD player +cello, violoncello +cellular telephone, cellular phone, cellphone, cell, mobile phone +chain +chainlink fence +chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour +chain saw, chainsaw +chest +chiffonier, commode +chime, bell, gong +china cabinet, china closet +Christmas stocking +church, church building +cinema, movie theater, movie theatre, movie house, picture palace +cleaver, meat cleaver, chopper +cliff dwelling +cloak +clog, geta, patten, sabot +cocktail shaker +coffee mug +coffeepot +coil, spiral, volute, whorl, helix +combination lock +computer keyboard, keypad +confectionery, confectionary, candy store +container ship, containership, container vessel +convertible +corkscrew, bottle screw +cornet, horn, trumpet, trump +cowboy boot +cowboy hat, ten-gallon hat +cradle +crane +crash helmet +crate +crib, cot +Crock Pot +croquet ball +crutch +cuirass +dam, dike, dyke +desk +desktop computer +dial telephone, dial phone +diaper, nappy, napkin +digital clock +digital watch +dining table, board +dishrag, dishcloth +dishwasher, dish washer, dishwashing machine +disk brake, disc brake +dock, dockage, docking facility +dogsled, dog sled, dog sleigh +dome +doormat, welcome mat +drilling platform, offshore rig +drum, membranophone, tympan +drumstick +dumbbell +Dutch oven +electric fan, blower +electric guitar +electric locomotive +entertainment center +envelope +espresso maker +face powder +feather boa, boa +file, file cabinet, filing cabinet +fireboat +fire engine, fire truck +fire screen, fireguard +flagpole, flagstaff +flute, transverse flute +folding chair +football helmet +forklift +fountain +fountain pen +four-poster +freight car +French horn, horn +frying pan, frypan, skillet +fur coat +garbage truck, dustcart +gasmask, respirator, gas helmet +gas pump, gasoline pump, petrol pump, island dispenser +goblet +go-kart +golf ball +golfcart, golf cart +gondola +gong, tam-tam +gown +grand piano, grand +greenhouse, nursery, glasshouse +grille, radiator grille +grocery store, grocery, food market, market +guillotine +hair slide +hair spray +half track +hammer +hamper +hand blower, blow dryer, blow drier, hair dryer, hair drier +hand-held computer, hand-held microcomputer +handkerchief, hankie, hanky, hankey +hard disc, hard disk, fixed disk +harmonica, mouth organ, harp, mouth harp +harp +harvester, reaper +hatchet +holster +home theater, home theatre +honeycomb +hook, claw +hoopskirt, crinoline +horizontal bar, high bar +horse cart, horse-cart +hourglass +iPod +iron, smoothing iron +jack-o'-lantern +jean, blue jean, denim +jeep, landrover +jersey, T-shirt, tee shirt +jigsaw puzzle +jinrikisha, ricksha, rickshaw +joystick +kimono +knee pad +knot +lab coat, laboratory coat +ladle +lampshade, lamp shade +laptop, laptop computer +lawn mower, mower +lens cap, lens cover +letter opener, paper knife, paperknife +library +lifeboat +lighter, light, igniter, ignitor +limousine, limo +liner, ocean liner +lipstick, lip rouge +Loafer +lotion +loudspeaker, speaker, speaker unit, loudspeaker system, speaker system +loupe, jeweler's loupe +lumbermill, sawmill +magnetic compass +mailbag, postbag +mailbox, letter box +maillot +maillot, tank suit +manhole cover +maraca +marimba, xylophone +mask +matchstick +maypole +maze, labyrinth +measuring cup +medicine chest, medicine cabinet +megalith, megalithic structure +microphone, mike +microwave, microwave oven +military uniform +milk can +minibus +miniskirt, mini +minivan +missile +mitten +mixing bowl +mobile home, manufactured home +Model T +modem +monastery +monitor +moped +mortar +mortarboard +mosque +mosquito net +motor scooter, scooter +mountain bike, all-terrain bike, off-roader +mountain tent +mouse, computer mouse +mousetrap +moving van +muzzle +nail +neck brace +necklace +nipple +notebook, notebook computer +obelisk +oboe, hautboy, hautbois +ocarina, sweet potato +odometer, hodometer, mileometer, milometer +oil filter +organ, pipe organ +oscilloscope, scope, cathode-ray oscilloscope, CRO +overskirt +oxcart +oxygen mask +packet +paddle, boat paddle +paddlewheel, paddle wheel +padlock +paintbrush +pajama, pyjama, pj's, jammies +palace +panpipe, pandean pipe, syrinx +paper towel +parachute, chute +parallel bars, bars +park bench +parking meter +passenger car, coach, carriage +patio, terrace +pay-phone, pay-station +pedestal, plinth, footstall +pencil box, pencil case +pencil sharpener +perfume, essence +Petri dish +photocopier +pick, plectrum, plectron +pickelhaube +picket fence, paling +pickup, pickup truck +pier +piggy bank, penny bank +pill bottle +pillow +ping-pong ball +pinwheel +pirate, pirate ship +pitcher, ewer +plane, carpenter's plane, woodworking plane +planetarium +plastic bag +plate rack +plow, plough +plunger, plumber's helper +Polaroid camera, Polaroid Land camera +pole +police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria +poncho +pool table, billiard table, snooker table +pop bottle, soda bottle +pot, flowerpot +potter's wheel +power drill +prayer rug, prayer mat +printer +prison, prison house +projectile, missile +projector +puck, hockey puck +punching bag, punch bag, punching ball, punchball +purse +quill, quill pen +quilt, comforter, comfort, puff +racer, race car, racing car +racket, racquet +radiator +radio, wireless +radio telescope, radio reflector +rain barrel +recreational vehicle, RV, R.V. +reel +reflex camera +refrigerator, icebox +remote control, remote +restaurant, eating house, eating place, eatery +revolver, six-gun, six-shooter +rifle +rocking chair, rocker +rotisserie +rubber eraser, rubber, pencil eraser +rugby ball +rule, ruler +running shoe +safe +safety pin +saltshaker, salt shaker +sandal +sarong +sax, saxophone +scabbard +scale, weighing machine +school bus +schooner +scoreboard +screen, CRT screen +screw +screwdriver +seat belt, seatbelt +sewing machine +shield, buckler +shoe shop, shoe-shop, shoe store +shoji +shopping basket +shopping cart +shovel +shower cap +shower curtain +ski +ski mask +sleeping bag +slide rule, slipstick +sliding door +slot, one-armed bandit +snorkel +snowmobile +snowplow, snowplough +soap dispenser +soccer ball +sock +solar dish, solar collector, solar furnace +sombrero +soup bowl +space bar +space heater +space shuttle +spatula +speedboat +spider web, spider's web +spindle +sports car, sport car +spotlight, spot +stage +steam locomotive +steel arch bridge +steel drum +stethoscope +stole +stone wall +stopwatch, stop watch +stove +strainer +streetcar, tram, tramcar, trolley, trolley car +stretcher +studio couch, day bed +stupa, tope +submarine, pigboat, sub, U-boat +suit, suit of clothes +sundial +sunglass +sunglasses, dark glasses, shades +sunscreen, sunblock, sun blocker +suspension bridge +swab, swob, mop +sweatshirt +swimming trunks, bathing trunks +swing +switch, electric switch, electrical switch +syringe +table lamp +tank, army tank, armored combat vehicle, armoured combat vehicle +tape player +teapot +teddy, teddy bear +television, television system +tennis ball +thatch, thatched roof +theater curtain, theatre curtain +thimble +thresher, thrasher, threshing machine +throne +tile roof +toaster +tobacco shop, tobacconist shop, tobacconist +toilet seat +torch +totem pole +tow truck, tow car, wrecker +toyshop +tractor +trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi +tray +trench coat +tricycle, trike, velocipede +trimaran +tripod +triumphal arch +trolleybus, trolley coach, trackless trolley +trombone +tub, vat +turnstile +typewriter keyboard +umbrella +unicycle, monocycle +upright, upright piano +vacuum, vacuum cleaner +vase +vault +velvet +vending machine +vestment +viaduct +violin, fiddle +volleyball +waffle iron +wall clock +wallet, billfold, notecase, pocketbook +wardrobe, closet, press +warplane, military plane +washbasin, handbasin, washbowl, lavabo, wash-hand basin +washer, automatic washer, washing machine +water bottle +water jug +water tower +whiskey jug +whistle +wig +window screen +window shade +Windsor tie +wine bottle +wing +wok +wooden spoon +wool, woolen, woollen +worm fence, snake fence, snake-rail fence, Virginia fence +wreck +yawl +yurt +web site, website, internet site, site +comic book +crossword puzzle, crossword +street sign +traffic light, traffic signal, stoplight +book jacket, dust cover, dust jacket, dust wrapper +menu +plate +guacamole +consomme +hot pot, hotpot +trifle +ice cream, icecream +ice lolly, lolly, lollipop, popsicle +French loaf +bagel, beigel +pretzel +cheeseburger +hotdog, hot dog, red hot +mashed potato +head cabbage +broccoli +cauliflower +zucchini, courgette +spaghetti squash +acorn squash +butternut squash +cucumber, cuke +artichoke, globe artichoke +bell pepper +cardoon +mushroom +Granny Smith +strawberry +orange +lemon +fig +pineapple, ananas +banana +jackfruit, jak, jack +custard apple +pomegranate +hay +carbonara +chocolate sauce, chocolate syrup +dough +meat loaf, meatloaf +pizza, pizza pie +potpie +burrito +red wine +espresso +cup +eggnog +alp +bubble +cliff, drop, drop-off +coral reef +geyser +lakeside, lakeshore +promontory, headland, head, foreland +sandbar, sand bar +seashore, coast, seacoast, sea-coast +valley, vale +volcano +ballplayer, baseball player +groom, bridegroom +scuba diver +rapeseed +daisy +yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum +corn +acorn +hip, rose hip, rosehip +buckeye, horse chestnut, conker +coral fungus +agaric +gyromitra +stinkhorn, carrion fungus +earthstar +hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa +bolete +ear, spike, capitulum +toilet tissue, toilet paper, bathroom tissue diff --git a/modules/image/classification/efficientnetb5_imagenet/module.py b/modules/image/classification/efficientnetb5_imagenet/module.py index 723200523..c96ab0c2b 100644 --- a/modules/image/classification/efficientnetb5_imagenet/module.py +++ b/modules/image/classification/efficientnetb5_imagenet/module.py @@ -11,781 +11,187 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import absolute_import +from __future__ import division +import argparse +import ast import os -import math -import collections -import re -import copy - -import paddle -from paddle import ParamAttr -import paddle.nn as nn -import paddle.nn.functional as F -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - -GlobalParams = collections.namedtuple('GlobalParams', [ - 'batch_norm_momentum', - 'batch_norm_epsilon', - 'dropout_rate', - 'num_classes', - 'width_coefficient', - 'depth_coefficient', - 'depth_divisor', - 'min_depth', - 'drop_connect_rate', -]) - -BlockArgs = collections.namedtuple( - 'BlockArgs', - ['kernel_size', 'num_repeat', 'input_filters', 'output_filters', 'expand_ratio', 'id_skip', 'stride', 'se_ratio']) - -GlobalParams.__new__.__defaults__ = (None, ) * len(GlobalParams._fields) -BlockArgs.__new__.__defaults__ = (None, ) * len(BlockArgs._fields) - - -def efficientnet_params(model_name: str): - """ Map EfficientNet model name to parameter coefficients. """ - params_dict = { - # Coefficients: width,depth,resolution,dropout - 'efficientnet-b5': (1.6, 2.2, 456, 0.4) - } - return params_dict[model_name] +import numpy as np +from paddle.inference import Config +from paddle.inference import create_predictor -def efficientnet(width_coefficient: float = None, - depth_coefficient: float = None, - dropout_rate: float = 0.2, - drop_connect_rate: float = 0.2): - """ Get block arguments according to parameter and coefficients. """ - blocks_args = [ - 'r1_k3_s11_e1_i32_o16_se0.25', - 'r2_k3_s22_e6_i16_o24_se0.25', - 'r2_k5_s22_e6_i24_o40_se0.25', - 'r3_k3_s22_e6_i40_o80_se0.25', - 'r3_k5_s11_e6_i80_o112_se0.25', - 'r4_k5_s22_e6_i112_o192_se0.25', - 'r1_k3_s11_e6_i192_o320_se0.25', - ] - blocks_args = BlockDecoder.decode(blocks_args) - - global_params = GlobalParams( - batch_norm_momentum=0.99, - batch_norm_epsilon=1e-3, - dropout_rate=dropout_rate, - drop_connect_rate=drop_connect_rate, - num_classes=1000, - width_coefficient=width_coefficient, - depth_coefficient=depth_coefficient, - depth_divisor=8, - min_depth=None) - - return blocks_args, global_params - - -def get_model_params(model_name: str, override_params: dict): - """ Get the block args and global params for a given model """ - if model_name.startswith('efficientnet'): - w, d, _, p = efficientnet_params(model_name) - blocks_args, global_params = efficientnet(width_coefficient=w, depth_coefficient=d, dropout_rate=p) - else: - raise NotImplementedError('model name is not pre-defined: %s' % model_name) - if override_params: - global_params = global_params._replace(**override_params) - return blocks_args, global_params - - -def round_filters(filters: int, global_params: dict): - """ Calculate and round number of filters based on depth multiplier. """ - multiplier = global_params.width_coefficient - if not multiplier: - return filters - divisor = global_params.depth_divisor - min_depth = global_params.min_depth - filters *= multiplier - min_depth = min_depth or divisor - new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor) - if new_filters < 0.9 * filters: # prevent rounding by more than 10% - new_filters += divisor - return int(new_filters) +from .data_feed import reader +from .processor import base64_to_cv2 +from .processor import postprocess +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving -def round_repeats(repeats: int, global_params: dict): - """ Round number of filters based on depth multiplier. """ - multiplier = global_params.depth_coefficient - if not multiplier: - return repeats - return int(math.ceil(multiplier * repeats)) +@moduleinfo(name="efficientnetb5_imagenet", + type="CV/image_classification", + author="paddlepaddle", + author_email="paddle-dev@baidu.com", + summary="EfficientNetB5 is a image classfication model, this module is trained with imagenet datasets.", + version="1.2.0") +class EfficientNetB5ImageNet: + def __init__(self): + self.default_pretrained_model_path = os.path.join(self.directory, "efficientnetb5_imagenet_infer_model", + "model") + label_file = os.path.join(self.directory, "label_list.txt") + with open(label_file, 'r', encoding='utf-8') as file: + self.label_list = file.read().split("\n")[:-1] + self._set_config() -class BlockDecoder(object): - """ - Block Decoder, straight from the official TensorFlow repository. - """ + def get_expected_image_width(self): + return 224 - @staticmethod - def _decode_block_string(block_string: str): - """ Gets a block through a string notation of arguments. """ - assert isinstance(block_string, str) + def get_expected_image_height(self): + return 224 - ops = block_string.split('_') - options = {} - for op in ops: - splits = re.split(r'(\d.*)', op) - if len(splits) >= 2: - key, value = splits[:2] - options[key] = value + def get_pretrained_images_mean(self): + im_mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3) + return im_mean - # Check stride - cond_1 = ('s' in options and len(options['s']) == 1) - cond_2 = ((len(options['s']) == 2) and (options['s'][0] == options['s'][1])) - assert (cond_1 or cond_2) + def get_pretrained_images_std(self): + im_std = np.array([0.229, 0.224, 0.225]).reshape(1, 3) + return im_std - return BlockArgs( - kernel_size=int(options['k']), - num_repeat=int(options['r']), - input_filters=int(options['i']), - output_filters=int(options['o']), - expand_ratio=int(options['e']), - id_skip=('noskip' not in block_string), - se_ratio=float(options['se']) if 'se' in options else None, - stride=[int(options['s'][0])]) + def _set_config(self): + """ + predictor config setting + """ + model = self.default_pretrained_model_path + '.pdmodel' + params = self.default_pretrained_model_path + '.pdiparams' + cpu_config = Config(model, params) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + self.cpu_predictor = create_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = Config(model, params) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) + self.gpu_predictor = create_predictor(gpu_config) + + def classification(self, images=None, paths=None, batch_size=1, use_gpu=False, top_k=1): + """ + API for image classification. - @staticmethod - def _encode_block_string(block): - """Encodes a block to a string.""" - args = [ - 'r%d' % block.num_repeat, - 'k%d' % block.kernel_size, - 's%d%d' % (block.strides[0], block.strides[1]), - 'e%s' % block.expand_ratio, - 'i%d' % block.input_filters, - 'o%d' % block.output_filters - ] - if 0 < block.se_ratio <= 1: - args.append('se%s' % block.se_ratio) - if block.id_skip is False: - args.append('noskip') - return '_'.join(args) + Args: + images (list[numpy.ndarray]): data of images, shape of each is [H, W, C], color space must be BGR. + paths (list[str]): The paths of images. + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + top_k (int): Return top k results. - @staticmethod - def decode(string_list: list): + Returns: + res (list[dict]): The classfication results. """ - Decode a list of string notations to specify blocks in the network. - - string_list: list of strings, each string is a notation of block - return - list of BlockArgs namedtuples of block args + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + all_data = list() + for yield_data in reader(images, paths): + all_data.append(yield_data) + + total_num = len(all_data) + loop_num = int(np.ceil(total_num / batch_size)) + + res = list() + for iter_id in range(loop_num): + batch_data = list() + handle_id = iter_id * batch_size + for image_id in range(batch_size): + try: + batch_data.append(all_data[handle_id + image_id]) + except: + pass + # feed batch image + batch_image = np.array([data['image'] for data in batch_data]) + + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + input_handle.copy_from_cpu(batch_image.copy()) + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) + + out = postprocess(data_out=output_handle.copy_to_cpu(), label_list=self.label_list, top_k=top_k) + res += out + return res + + @serving + def serving_method(self, images, **kwargs): """ - assert isinstance(string_list, list) - blocks_args = [] - for block_string in string_list: - blocks_args.append(BlockDecoder._decode_block_string(block_string)) - return blocks_args - - @staticmethod - def encode(blocks_args: list): + Run as a service. """ - Encodes a list of BlockArgs to a list of strings. + images_decode = [base64_to_cv2(image) for image in images] + results = self.classify(images=images_decode, **kwargs) + return results - :param blocks_args: a list of BlockArgs namedtuples of block args - :return: a list of strings, each string is a notation of block + @runnable + def run_cmd(self, argvs): """ - block_strings = [] - for block in blocks_args: - block_strings.append(BlockDecoder._encode_block_string(block)) - return block_strings - - -def initial_type(name: str, use_bias: bool = False): - param_attr = ParamAttr(name=name + "_weights") - if use_bias: - bias_attr = ParamAttr(name=name + "_offset") - else: - bias_attr = False - return param_attr, bias_attr - - -def init_batch_norm_layer(name: str = "batch_norm"): - param_attr = ParamAttr(name=name + "_scale") - bias_attr = ParamAttr(name=name + "_offset") - return param_attr, bias_attr - - -def init_fc_layer(name: str = "fc"): - param_attr = ParamAttr(name=name + "_weights") - bias_attr = ParamAttr(name=name + "_offset") - return param_attr, bias_attr - - -def cal_padding(img_size: int, stride: int, filter_size: int, dilation: int = 1): - """Calculate padding size.""" - if img_size % stride == 0: - out_size = max(filter_size - stride, 0) - else: - out_size = max(filter_size - (img_size % stride), 0) - return out_size // 2, out_size - out_size // 2 - - -inp_shape = {"b5": [456, 228, 228, 114, 57, 29, 29, 15]} - - -def _drop_connect(inputs: paddle.Tensor, prob: float, is_test: bool): - """Drop input connection""" - if is_test: - return inputs - keep_prob = 1.0 - prob - inputs_shape = paddle.shape(inputs) - random_tensor = keep_prob + paddle.rand(shape=[inputs_shape[0], 1, 1, 1]) - binary_tensor = paddle.floor(random_tensor) - output = inputs / keep_prob * binary_tensor - return output - - -class Conv2ds(nn.Layer): - """Basic conv layer""" - - def __init__(self, - input_channels: int, - output_channels: int, - filter_size: int, - stride: int = 1, - padding: int = 0, - groups: int = None, - name: str = "conv2d", - act: str = None, - use_bias: bool = False, - padding_type: str = None, - model_name: str = None, - cur_stage: str = None): - super(Conv2ds, self).__init__() - assert act in [None, "swish", "sigmoid"] - self.act = act - - param_attr, bias_attr = initial_type(name=name, use_bias=use_bias) - - def get_padding(filter_size, stride=1, dilation=1): - padding = ((stride - 1) + dilation * (filter_size - 1)) // 2 - return padding - - inps = 1 if model_name == None and cur_stage == None else inp_shape[model_name][cur_stage] - self.need_crop = False - if padding_type == "SAME": - top_padding, bottom_padding = cal_padding(inps, stride, filter_size) - left_padding, right_padding = cal_padding(inps, stride, filter_size) - height_padding = bottom_padding - width_padding = right_padding - if top_padding != bottom_padding or left_padding != right_padding: - height_padding = top_padding + stride - width_padding = left_padding + stride - self.need_crop = True - padding = [height_padding, width_padding] - elif padding_type == "VALID": - height_padding = 0 - width_padding = 0 - padding = [height_padding, width_padding] - elif padding_type == "DYNAMIC": - padding = get_padding(filter_size, stride) - else: - padding = padding_type - - groups = 1 if groups is None else groups - self._conv = Conv2d( - input_channels, - output_channels, - filter_size, - groups=groups, - stride=stride, - padding=padding, - weight_attr=param_attr, - bias_attr=bias_attr) - - def forward(self, inputs: paddle.Tensor): - x = self._conv(inputs) - if self.act == "swish": - x = F.swish(x) - elif self.act == "sigmoid": - x = F.sigmoid(x) - - if self.need_crop: - x = x[:, :, 1:, 1:] - return x - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - - def __init__(self, - input_channels: int, - filter_size: int, - output_channels: int, - stride: int = 1, - num_groups: int = 1, - padding_type: str = "SAME", - conv_act: str = None, - bn_act: str = "swish", - use_bn: bool = True, - use_bias: bool = False, - name: str = None, - conv_name: str = None, - bn_name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ConvBNLayer, self).__init__() - - self._conv = Conv2ds( - input_channels=input_channels, - output_channels=output_channels, - filter_size=filter_size, - stride=stride, - groups=num_groups, - act=conv_act, - padding_type=padding_type, - name=conv_name, - use_bias=use_bias, - model_name=model_name, - cur_stage=cur_stage) - self.use_bn = use_bn - if use_bn is True: - bn_name = name + bn_name - param_attr, bias_attr = init_batch_norm_layer(bn_name) - - self._bn = BatchNorm( - num_channels=output_channels, - act=bn_act, - momentum=0.99, - epsilon=0.001, - moving_mean_name=bn_name + "_mean", - moving_variance_name=bn_name + "_variance", - param_attr=param_attr, - bias_attr=bias_attr) - - def forward(self, inputs: paddle.Tensor): - if self.use_bn: - x = self._conv(inputs) - x = self._bn(x) - return x - else: - return self._conv(inputs) - - -class ExpandConvNorm(nn.Layer): - """Expand conv norm layer.""" - - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ExpandConvNorm, self).__init__() - - self.oup = block_args.input_filters * block_args.expand_ratio - self.expand_ratio = block_args.expand_ratio - - if self.expand_ratio != 1: - self._conv = ConvBNLayer( - input_channels, - 1, - self.oup, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_expand_conv", - bn_name="_bn0", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - if self.expand_ratio != 1: - return self._conv(inputs) - else: - return inputs - - -class DepthwiseConvNorm(nn.Layer): - """Depthwise conv norm layer.""" - - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(DepthwiseConvNorm, self).__init__() - - self.k = block_args.kernel_size - self.s = block_args.stride - if isinstance(self.s, list) or isinstance(self.s, tuple): - self.s = self.s[0] - oup = block_args.input_filters * block_args.expand_ratio - - self._conv = ConvBNLayer( - input_channels, - self.k, - oup, - self.s, - num_groups=input_channels, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_depthwise_conv", - bn_name="_bn1", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class ProjectConvNorm(nn.Layer): - """Projection conv bn layer.""" - - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ProjectConvNorm, self).__init__() - - final_oup = block_args.output_filters - - self._conv = ConvBNLayer( - input_channels, - 1, - final_oup, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_project_conv", - bn_name="_bn2", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class SEBlock(nn.Layer): - """Basic Squeeze-and-Excitation block for Efficientnet.""" - - def __init__(self, - input_channels: int, - num_squeezed_channels: int, - oup: int, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(SEBlock, self).__init__() - - self._pool = AdaptiveAvgPool2d(1) - self._conv1 = Conv2ds( - input_channels, - num_squeezed_channels, - 1, - use_bias=True, - padding_type=padding_type, - act="swish", - name=name + "_se_reduce") - - self._conv2 = Conv2ds( - num_squeezed_channels, - oup, - 1, - act="sigmoid", - use_bias=True, - padding_type=padding_type, - name=name + "_se_expand") - - def forward(self, inputs: paddle.Tensor): - x = self._pool(inputs) - x = self._conv1(x) - x = self._conv2(x) - return paddle.multiply(inputs, x) - - -class MbConvBlock(nn.Layer): - """Mobile inverted bottleneck convolution for Efficientnet.""" - - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - use_se: bool, - name: str = None, - drop_connect_rate: float = None, - is_test: bool = False, - model_name: str = None, - cur_stage: str = None): - super(MbConvBlock, self).__init__() - - oup = block_args.input_filters * block_args.expand_ratio - self.block_args = block_args - self.has_se = use_se and (block_args.se_ratio is not None) and (0 < block_args.se_ratio <= 1) - self.id_skip = block_args.id_skip - self.expand_ratio = block_args.expand_ratio - self.drop_connect_rate = drop_connect_rate - self.is_test = is_test - - if self.expand_ratio != 1: - self._ecn = ExpandConvNorm( - input_channels, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - self._dcn = DepthwiseConvNorm( - input_channels * block_args.expand_ratio, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - if self.has_se: - num_squeezed_channels = max(1, int(block_args.input_filters * block_args.se_ratio)) - self._se = SEBlock( - input_channels * block_args.expand_ratio, - num_squeezed_channels, - oup, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - self._pcn = ProjectConvNorm( - input_channels * block_args.expand_ratio, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - x = inputs - if self.expand_ratio != 1: - x = self._ecn(x) - x = F.swish(x) - x = self._dcn(x) - x = F.swish(x) - if self.has_se: - x = self._se(x) - x = self._pcn(x) - if self.id_skip and \ - self.block_args.stride == 1 and \ - self.block_args.input_filters == self.block_args.output_filters: - if self.drop_connect_rate: - x = _drop_connect(x, self.drop_connect_rate, self.is_test) - x = paddle.elementwise_add(x, inputs) - return x - - -class ConvStemNorm(nn.Layer): - """Basic conv stem norm block for extracting features.""" - - def __init__(self, - input_channels: int, - padding_type: str, - _global_params: dict, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ConvStemNorm, self).__init__() - - output_channels = round_filters(32, _global_params) - self._conv = ConvBNLayer( - input_channels, - filter_size=3, - output_channels=output_channels, - stride=2, - bn_act=None, - padding_type=padding_type, - name="", - conv_name="_conv_stem", - bn_name="_bn0", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class ExtractFeatures(nn.Layer): - """Extract features.""" - - def __init__(self, - input_channels: int, - _block_args: dict, - _global_params: dict, - padding_type: str, - use_se: bool, - is_test: bool, - model_name: str = None): - super(ExtractFeatures, self).__init__() - - self._global_params = _global_params - - self._conv_stem = ConvStemNorm( - input_channels, - padding_type=padding_type, - _global_params=_global_params, - model_name=model_name, - cur_stage=0) - - self.block_args_copy = copy.deepcopy(_block_args) - idx = 0 - block_size = 0 - for block_arg in self.block_args_copy: - block_arg = block_arg._replace( - input_filters=round_filters(block_arg.input_filters, _global_params), - output_filters=round_filters(block_arg.output_filters, _global_params), - num_repeat=round_repeats(block_arg.num_repeat, _global_params)) - block_size += 1 - for _ in range(block_arg.num_repeat - 1): - block_size += 1 - - self.conv_seq = [] - cur_stage = 1 - for block_args in _block_args: - block_args = block_args._replace( - input_filters=round_filters(block_args.input_filters, _global_params), - output_filters=round_filters(block_args.output_filters, _global_params), - num_repeat=round_repeats(block_args.num_repeat, _global_params)) - - drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 - if drop_connect_rate: - drop_connect_rate *= float(idx) / block_size - - _mc_block = self.add_sublayer( - "_blocks." + str(idx) + ".", - MbConvBlock( - block_args.input_filters, - block_args=block_args, - padding_type=padding_type, - use_se=use_se, - name="_blocks." + str(idx) + ".", - drop_connect_rate=drop_connect_rate, - model_name=model_name, - cur_stage=cur_stage)) - self.conv_seq.append(_mc_block) - idx += 1 - if block_args.num_repeat > 1: - block_args = block_args._replace(input_filters=block_args.output_filters, stride=1) - for _ in range(block_args.num_repeat - 1): - drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 - if drop_connect_rate: - drop_connect_rate *= float(idx) / block_size - _mc_block = self.add_sublayer( - "block." + str(idx) + ".", - MbConvBlock( - block_args.input_filters, - block_args, - padding_type=padding_type, - use_se=use_se, - name="_blocks." + str(idx) + ".", - drop_connect_rate=drop_connect_rate, - model_name=model_name, - cur_stage=cur_stage)) - self.conv_seq.append(_mc_block) - idx += 1 - cur_stage += 1 - - def forward(self, inputs: paddle.Tensor): - x = self._conv_stem(inputs) - x = F.swish(x) - for _mc_block in self.conv_seq: - x = _mc_block(x) - return x - - -@moduleinfo( - name="efficientnetb5_imagenet", - type="cv/classification", - author="paddlepaddle", - author_email="", - summary="efficientnetb5_imagenet is a classification model, " - "this module is trained with Imagenet dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class EfficientNet_B5(nn.Layer): - def __init__(self, - is_test: bool = False, - padding_type: str = "SAME", - override_params: dict = None, - use_se: bool = True, - class_dim: int = 1000, - load_checkpoint: str = None): - super(EfficientNet_B5, self).__init__() - - model_name = 'efficientnet-b5' - self.name = "b5" - self._block_args, self._global_params = get_model_params(model_name, override_params) - self.padding_type = padding_type - self.use_se = use_se - self.is_test = is_test - - self._ef = ExtractFeatures( - 3, - self._block_args, - self._global_params, - self.padding_type, - self.use_se, - self.is_test, - model_name=self.name) - - output_channels = round_filters(1280, self._global_params) - oup = 512 - - self._conv = ConvBNLayer( - oup, - 1, - output_channels, - bn_act="swish", - padding_type=self.padding_type, - name="", - conv_name="_conv_head", - bn_name="_bn1", - model_name=self.name, - cur_stage=7) - self._pool = AdaptiveAvgPool2d(1) - - if self._global_params.dropout_rate: - self._drop = Dropout(p=self._global_params.dropout_rate, mode="upscale_in_train") - - param_attr, bias_attr = init_fc_layer("_fc") - self._fc = Linear(output_channels, class_dim, weight_attr=param_attr, bias_attr=bias_attr) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'efficientnet_b5_imagenet.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/efficientnet_b5_imagenet.pdparams -O ' - + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def forward(self, inputs: paddle.Tensor): - x = self._ef(inputs) - x = self._conv(x) - x = self._pool(x) - if self._global_params.dropout_rate: - x = self._drop(x) - x = paddle.squeeze(x, axis=[2, 3]) - x = self._fc(x) - return x + Run as a command. + """ + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.classify(paths=[args.input_path], batch_size=args.batch_size, use_gpu=args.use_gpu) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument('--use_gpu', + type=ast.literal_eval, + default=False, + help="whether use GPU or not.") + self.arg_config_group.add_argument('--batch_size', type=ast.literal_eval, default=1, help="batch size.") + self.arg_config_group.add_argument('--top_k', type=ast.literal_eval, default=1, help="Return top k results.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") + + +if __name__ == '__main__': + b5 = EfficientNetB5ImageNet() + b5.context() + import cv2 + test_image = [cv2.imread('dog.jpeg')] + res = b5.classification(images=test_image) + print(res) + res = b5.classification(paths=['dog.jpeg']) + print(res) + res = b5.classification(images=test_image) + print(res) + res = b5.classify(images=test_image) + print(res) diff --git a/modules/image/classification/efficientnetb5_imagenet/processor.py b/modules/image/classification/efficientnetb5_imagenet/processor.py new file mode 100644 index 000000000..9cb22a1b7 --- /dev/null +++ b/modules/image/classification/efficientnetb5_imagenet/processor.py @@ -0,0 +1,65 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import base64 + +import cv2 +import numpy as np + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def softmax(x): + if len(x.shape) > 1: + tmp = np.max(x, axis=1) + x -= tmp.reshape((x.shape[0], 1)) + x = np.exp(x) + tmp = np.sum(x, axis=1) + x /= tmp.reshape((x.shape[0], 1)) + else: + tmp = np.max(x) + x -= tmp + x = np.exp(x) + tmp = np.sum(x) + x /= tmp + return x + + +def postprocess(data_out, label_list, top_k): + """ + Postprocess output of network, one image at a time. + + Args: + data_out (numpy.ndarray): output data of network. + label_list (list): list of label. + top_k (int): Return top k results. + """ + output = [] + for result in data_out: + result_i = softmax(result) + output_i = {} + indexs = np.argsort(result_i)[::-1][0:top_k] + for index in indexs: + label = label_list[index].split(',')[0] + output_i[label] = float(result_i[index]) + output.append(output_i) + return output diff --git a/modules/image/classification/efficientnetb5_imagenet/test.py b/modules/image/classification/efficientnetb5_imagenet/test.py new file mode 100644 index 000000000..6b331b198 --- /dev/null +++ b/modules/image/classification/efficientnetb5_imagenet/test.py @@ -0,0 +1,63 @@ +import os +import shutil +import unittest + +import cv2 +import requests + +import paddlehub as hub + +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + + +class TestHubModule(unittest.TestCase): + + @classmethod + def setUpClass(cls) -> None: + img_url = 'https://unsplash.com/photos/brFsZ7qszSY/download?ixid=MnwxMjA3fDB8MXxzZWFyY2h8OHx8ZG9nfGVufDB8fHx8MTY2MzA1ODQ1MQ&force=true&w=640' + if not os.path.exists('tests'): + os.makedirs('tests') + response = requests.get(img_url) + assert response.status_code == 200, 'Network Error.' + with open('tests/test.jpg', 'wb') as f: + f.write(response.content) + cls.module = hub.Module(name="efficientnetb5_imagenet") + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree('tests') + shutil.rmtree('inference') + + def test_classification1(self): + results = self.module.classification(paths=['tests/test.jpg']) + data = results[0] + self.assertTrue('Pembroke' in data) + self.assertTrue(data['Pembroke'] > 0.5) + + def test_classification2(self): + results = self.module.classification(images=[cv2.imread('tests/test.jpg')]) + data = results[0] + self.assertTrue('Pembroke' in data) + self.assertTrue(data['Pembroke'] > 0.5) + + def test_classification3(self): + results = self.module.classification(images=[cv2.imread('tests/test.jpg')], use_gpu=True) + data = results[0] + self.assertTrue('Pembroke' in data) + self.assertTrue(data['Pembroke'] > 0.5) + + def test_classification4(self): + self.assertRaises(AssertionError, self.module.classification, paths=['no.jpg']) + + def test_classification5(self): + self.assertRaises(TypeError, self.module.classification, images=['tests/test.jpg']) + + def test_save_inference_model(self): + self.module.save_inference_model('./inference/model') + + self.assertTrue(os.path.exists('./inference/model.pdmodel')) + self.assertTrue(os.path.exists('./inference/model.pdiparams')) + + +if __name__ == "__main__": + unittest.main() From c4aed7442b7af2741ba0f797abf8c8b3a532cf5a Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Fri, 14 Oct 2022 14:56:30 +0800 Subject: [PATCH 107/117] update efficientnetb6_imagenet (#2054) --- .../efficientnetb6_imagenet/README.md | 7 +- .../efficientnetb6_imagenet/README_en.md | 7 +- .../efficientnetb6_imagenet/__init__.py | 0 .../efficientnetb6_imagenet/data_feed.py | 93 ++ .../efficientnetb6_imagenet/label_list.txt | 1000 +++++++++++++++++ .../efficientnetb6_imagenet/module.py | 924 +++------------ .../efficientnetb6_imagenet/processor.py | 66 ++ .../efficientnetb6_imagenet/test.py | 63 ++ 8 files changed, 1399 insertions(+), 761 deletions(-) create mode 100644 modules/image/classification/efficientnetb6_imagenet/__init__.py create mode 100644 modules/image/classification/efficientnetb6_imagenet/data_feed.py create mode 100644 modules/image/classification/efficientnetb6_imagenet/label_list.txt create mode 100644 modules/image/classification/efficientnetb6_imagenet/processor.py create mode 100644 modules/image/classification/efficientnetb6_imagenet/test.py diff --git a/modules/image/classification/efficientnetb6_imagenet/README.md b/modules/image/classification/efficientnetb6_imagenet/README.md index 746ff1a71..7a628eb28 100644 --- a/modules/image/classification/efficientnetb6_imagenet/README.md +++ b/modules/image/classification/efficientnetb6_imagenet/README.md @@ -131,6 +131,11 @@ * 1.1.0 提升预测性能以及易用性 + +* 1.2.0 + + 移除 Fluid API + - ```shell - $ hub install efficientnetb6_imagenet==1.1.0 + $ hub install efficientnetb6_imagenet==1.2.0 ``` diff --git a/modules/image/classification/efficientnetb6_imagenet/README_en.md b/modules/image/classification/efficientnetb6_imagenet/README_en.md index cc10c7262..677b9ede9 100644 --- a/modules/image/classification/efficientnetb6_imagenet/README_en.md +++ b/modules/image/classification/efficientnetb6_imagenet/README_en.md @@ -130,6 +130,11 @@ * 1.1.0 Improve the prediction performance and users' experience + +* 1.2.0 + + Remove Fluid API + - ```shell - $ hub install efficientnetb6_imagenet==1.1.0 + $ hub install efficientnetb6_imagenet==1.2.0 ``` diff --git a/modules/image/classification/efficientnetb6_imagenet/__init__.py b/modules/image/classification/efficientnetb6_imagenet/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/modules/image/classification/efficientnetb6_imagenet/data_feed.py b/modules/image/classification/efficientnetb6_imagenet/data_feed.py new file mode 100644 index 000000000..60a33f4df --- /dev/null +++ b/modules/image/classification/efficientnetb6_imagenet/data_feed.py @@ -0,0 +1,93 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import time +from collections import OrderedDict + +import numpy as np +from PIL import Image + +__all__ = ['reader'] + +DATA_DIM = 224 +img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) +img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) + + +def resize_short(img, target_size): + percent = float(target_size) / min(img.size[0], img.size[1]) + resized_width = int(round(img.size[0] * percent)) + resized_height = int(round(img.size[1] * percent)) + img = img.resize((resized_width, resized_height), Image.LANCZOS) + return img + + +def crop_image(img, target_size, center): + width, height = img.size + size = target_size + if center == True: + w_start = (width - size) / 2 + h_start = (height - size) / 2 + else: + w_start = np.random.randint(0, width - size + 1) + h_start = np.random.randint(0, height - size + 1) + w_end = w_start + size + h_end = h_start + size + img = img.crop((w_start, h_start, w_end, h_end)) + return img + + +def process_image(img): + img = resize_short(img, target_size=256) + img = crop_image(img, target_size=DATA_DIM, center=True) + if img.mode != 'RGB': + img = img.convert('RGB') + img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255 + img -= img_mean + img /= img_std + return img + + +def reader(images=None, paths=None): + """ + Preprocess to yield image. + + Args: + images (list[numpy.ndarray]): images data, shape of each is [H, W, C]. + paths (list[str]): paths to images. + + Yield: + each (collections.OrderedDict): info of original image, preprocessed image. + """ + component = list() + if paths: + for im_path in paths: + each = OrderedDict() + assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path) + each['org_im_path'] = im_path + each['org_im'] = Image.open(im_path) + each['org_im_width'], each['org_im_height'] = each['org_im'].size + component.append(each) + if images is not None: + assert type(images), "images is a list." + for im in images: + each = OrderedDict() + each['org_im'] = Image.fromarray(im[:, :, ::-1]) + each['org_im_path'] = 'ndarray_time={}'.format(round(time.time(), 6) * 1e6) + each['org_im_width'], each['org_im_height'] = each['org_im'].size + component.append(each) + + for element in component: + element['image'] = process_image(element['org_im']) + yield element diff --git a/modules/image/classification/efficientnetb6_imagenet/label_list.txt b/modules/image/classification/efficientnetb6_imagenet/label_list.txt new file mode 100644 index 000000000..a509c0074 --- /dev/null +++ b/modules/image/classification/efficientnetb6_imagenet/label_list.txt @@ -0,0 +1,1000 @@ +tench, Tinca tinca +goldfish, Carassius auratus +great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias +tiger shark, Galeocerdo cuvieri +hammerhead, hammerhead shark +electric ray, crampfish, numbfish, torpedo +stingray +cock +hen +ostrich, Struthio camelus +brambling, Fringilla montifringilla +goldfinch, Carduelis carduelis +house finch, linnet, Carpodacus mexicanus +junco, snowbird +indigo bunting, indigo finch, indigo bird, Passerina cyanea +robin, American robin, Turdus migratorius +bulbul +jay +magpie +chickadee +water ouzel, dipper +kite +bald eagle, American eagle, Haliaeetus leucocephalus +vulture +great grey owl, great gray owl, Strix nebulosa +European fire salamander, Salamandra salamandra +common newt, Triturus vulgaris +eft +spotted salamander, Ambystoma maculatum +axolotl, mud puppy, Ambystoma mexicanum +bullfrog, Rana catesbeiana +tree frog, tree-frog +tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui +loggerhead, loggerhead turtle, Caretta caretta +leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea +mud turtle +terrapin +box turtle, box tortoise +banded gecko +common iguana, iguana, Iguana iguana +American chameleon, anole, Anolis carolinensis +whiptail, whiptail lizard +agama +frilled lizard, Chlamydosaurus kingi +alligator lizard +Gila monster, Heloderma suspectum +green lizard, Lacerta viridis +African chameleon, Chamaeleo chamaeleon +Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis +African crocodile, Nile crocodile, Crocodylus niloticus +American alligator, Alligator mississipiensis +triceratops +thunder snake, worm snake, Carphophis amoenus +ringneck snake, ring-necked snake, ring snake +hognose snake, puff adder, sand viper +green snake, grass snake +king snake, kingsnake +garter snake, grass snake +water snake +vine snake +night snake, Hypsiglena torquata +boa constrictor, Constrictor constrictor +rock python, rock snake, Python sebae +Indian cobra, Naja naja +green mamba +sea snake +horned viper, cerastes, sand viper, horned asp, Cerastes cornutus +diamondback, diamondback rattlesnake, Crotalus adamanteus +sidewinder, horned rattlesnake, Crotalus cerastes +trilobite +harvestman, daddy longlegs, Phalangium opilio +scorpion +black and gold garden spider, Argiope aurantia +barn spider, Araneus cavaticus +garden spider, Aranea diademata +black widow, Latrodectus mactans +tarantula +wolf spider, hunting spider +tick +centipede +black grouse +ptarmigan +ruffed grouse, partridge, Bonasa umbellus +prairie chicken, prairie grouse, prairie fowl +peacock +quail +partridge +African grey, African gray, Psittacus erithacus +macaw +sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita +lorikeet +coucal +bee eater +hornbill +hummingbird +jacamar +toucan +drake +red-breasted merganser, Mergus serrator +goose +black swan, Cygnus atratus +tusker +echidna, spiny anteater, anteater +platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus +wallaby, brush kangaroo +koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus +wombat +jellyfish +sea anemone, anemone +brain coral +flatworm, platyhelminth +nematode, nematode worm, roundworm +conch +snail +slug +sea slug, nudibranch +chiton, coat-of-mail shell, sea cradle, polyplacophore +chambered nautilus, pearly nautilus, nautilus +Dungeness crab, Cancer magister +rock crab, Cancer irroratus +fiddler crab +king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica +American lobster, Northern lobster, Maine lobster, Homarus americanus +spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish +crayfish, crawfish, crawdad, crawdaddy +hermit crab +isopod +white stork, Ciconia ciconia +black stork, Ciconia nigra +spoonbill +flamingo +little blue heron, Egretta caerulea +American egret, great white heron, Egretta albus +bittern +crane +limpkin, Aramus pictus +European gallinule, Porphyrio porphyrio +American coot, marsh hen, mud hen, water hen, Fulica americana +bustard +ruddy turnstone, Arenaria interpres +red-backed sandpiper, dunlin, Erolia alpina +redshank, Tringa totanus +dowitcher +oystercatcher, oyster catcher +pelican +king penguin, Aptenodytes patagonica +albatross, mollymawk +grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus +killer whale, killer, orca, grampus, sea wolf, Orcinus orca +dugong, Dugong dugon +sea lion +Chihuahua +Japanese spaniel +Maltese dog, Maltese terrier, Maltese +Pekinese, Pekingese, Peke +Shih-Tzu +Blenheim spaniel +papillon +toy terrier +Rhodesian ridgeback +Afghan hound, Afghan +basset, basset hound +beagle +bloodhound, sleuthhound +bluetick +black-and-tan coonhound +Walker hound, Walker foxhound +English foxhound +redbone +borzoi, Russian wolfhound +Irish wolfhound +Italian greyhound +whippet +Ibizan hound, Ibizan Podenco +Norwegian elkhound, elkhound +otterhound, otter hound +Saluki, gazelle hound +Scottish deerhound, deerhound +Weimaraner +Staffordshire bullterrier, Staffordshire bull terrier +American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier +Bedlington terrier +Border terrier +Kerry blue terrier +Irish terrier +Norfolk terrier +Norwich terrier +Yorkshire terrier +wire-haired fox terrier +Lakeland terrier +Sealyham terrier, Sealyham +Airedale, Airedale terrier +cairn, cairn terrier +Australian terrier +Dandie Dinmont, Dandie Dinmont terrier +Boston bull, Boston terrier +miniature schnauzer +giant schnauzer +standard schnauzer +Scotch terrier, Scottish terrier, Scottie +Tibetan terrier, chrysanthemum dog +silky terrier, Sydney silky +soft-coated wheaten terrier +West Highland white terrier +Lhasa, Lhasa apso +flat-coated retriever +curly-coated retriever +golden retriever +Labrador retriever +Chesapeake Bay retriever +German short-haired pointer +vizsla, Hungarian pointer +English setter +Irish setter, red setter +Gordon setter +Brittany spaniel +clumber, clumber spaniel +English springer, English springer spaniel +Welsh springer spaniel +cocker spaniel, English cocker spaniel, cocker +Sussex spaniel +Irish water spaniel +kuvasz +schipperke +groenendael +malinois +briard +kelpie +komondor +Old English sheepdog, bobtail +Shetland sheepdog, Shetland sheep dog, Shetland +collie +Border collie +Bouvier des Flandres, Bouviers des Flandres +Rottweiler +German shepherd, German shepherd dog, German police dog, alsatian +Doberman, Doberman pinscher +miniature pinscher +Greater Swiss Mountain dog +Bernese mountain dog +Appenzeller +EntleBucher +boxer +bull mastiff +Tibetan mastiff +French bulldog +Great Dane +Saint Bernard, St Bernard +Eskimo dog, husky +malamute, malemute, Alaskan malamute +Siberian husky +dalmatian, coach dog, carriage dog +affenpinscher, monkey pinscher, monkey dog +basenji +pug, pug-dog +Leonberg +Newfoundland, Newfoundland dog +Great Pyrenees +Samoyed, Samoyede +Pomeranian +chow, chow chow +keeshond +Brabancon griffon +Pembroke, Pembroke Welsh corgi +Cardigan, Cardigan Welsh corgi +toy poodle +miniature poodle +standard poodle +Mexican hairless +timber wolf, grey wolf, gray wolf, Canis lupus +white wolf, Arctic wolf, Canis lupus tundrarum +red wolf, maned wolf, Canis rufus, Canis niger +coyote, prairie wolf, brush wolf, Canis latrans +dingo, warrigal, warragal, Canis dingo +dhole, Cuon alpinus +African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus +hyena, hyaena +red fox, Vulpes vulpes +kit fox, Vulpes macrotis +Arctic fox, white fox, Alopex lagopus +grey fox, gray fox, Urocyon cinereoargenteus +tabby, tabby cat +tiger cat +Persian cat +Siamese cat, Siamese +Egyptian cat +cougar, puma, catamount, mountain lion, painter, panther, Felis concolor +lynx, catamount +leopard, Panthera pardus +snow leopard, ounce, Panthera uncia +jaguar, panther, Panthera onca, Felis onca +lion, king of beasts, Panthera leo +tiger, Panthera tigris +cheetah, chetah, Acinonyx jubatus +brown bear, bruin, Ursus arctos +American black bear, black bear, Ursus americanus, Euarctos americanus +ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus +sloth bear, Melursus ursinus, Ursus ursinus +mongoose +meerkat, mierkat +tiger beetle +ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle +ground beetle, carabid beetle +long-horned beetle, longicorn, longicorn beetle +leaf beetle, chrysomelid +dung beetle +rhinoceros beetle +weevil +fly +bee +ant, emmet, pismire +grasshopper, hopper +cricket +walking stick, walkingstick, stick insect +cockroach, roach +mantis, mantid +cicada, cicala +leafhopper +lacewing, lacewing fly +dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk +damselfly +admiral +ringlet, ringlet butterfly +monarch, monarch butterfly, milkweed butterfly, Danaus plexippus +cabbage butterfly +sulphur butterfly, sulfur butterfly +lycaenid, lycaenid butterfly +starfish, sea star +sea urchin +sea cucumber, holothurian +wood rabbit, cottontail, cottontail rabbit +hare +Angora, Angora rabbit +hamster +porcupine, hedgehog +fox squirrel, eastern fox squirrel, Sciurus niger +marmot +beaver +guinea pig, Cavia cobaya +sorrel +zebra +hog, pig, grunter, squealer, Sus scrofa +wild boar, boar, Sus scrofa +warthog +hippopotamus, hippo, river horse, Hippopotamus amphibius +ox +water buffalo, water ox, Asiatic buffalo, Bubalus bubalis +bison +ram, tup +bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis +ibex, Capra ibex +hartebeest +impala, Aepyceros melampus +gazelle +Arabian camel, dromedary, Camelus dromedarius +llama +weasel +mink +polecat, fitch, foulmart, foumart, Mustela putorius +black-footed ferret, ferret, Mustela nigripes +otter +skunk, polecat, wood pussy +badger +armadillo +three-toed sloth, ai, Bradypus tridactylus +orangutan, orang, orangutang, Pongo pygmaeus +gorilla, Gorilla gorilla +chimpanzee, chimp, Pan troglodytes +gibbon, Hylobates lar +siamang, Hylobates syndactylus, Symphalangus syndactylus +guenon, guenon monkey +patas, hussar monkey, Erythrocebus patas +baboon +macaque +langur +colobus, colobus monkey +proboscis monkey, Nasalis larvatus +marmoset +capuchin, ringtail, Cebus capucinus +howler monkey, howler +titi, titi monkey +spider monkey, Ateles geoffroyi +squirrel monkey, Saimiri sciureus +Madagascar cat, ring-tailed lemur, Lemur catta +indri, indris, Indri indri, Indri brevicaudatus +Indian elephant, Elephas maximus +African elephant, Loxodonta africana +lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens +giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca +barracouta, snoek +eel +coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch +rock beauty, Holocanthus tricolor +anemone fish +sturgeon +gar, garfish, garpike, billfish, Lepisosteus osseus +lionfish +puffer, pufferfish, blowfish, globefish +abacus +abaya +academic gown, academic robe, judge's robe +accordion, piano accordion, squeeze box +acoustic guitar +aircraft carrier, carrier, flattop, attack aircraft carrier +airliner +airship, dirigible +altar +ambulance +amphibian, amphibious vehicle +analog clock +apiary, bee house +apron +ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin +assault rifle, assault gun +backpack, back pack, knapsack, packsack, rucksack, haversack +bakery, bakeshop, bakehouse +balance beam, beam +balloon +ballpoint, ballpoint pen, ballpen, Biro +Band Aid +banjo +bannister, banister, balustrade, balusters, handrail +barbell +barber chair +barbershop +barn +barometer +barrel, cask +barrow, garden cart, lawn cart, wheelbarrow +baseball +basketball +bassinet +bassoon +bathing cap, swimming cap +bath towel +bathtub, bathing tub, bath, tub +beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon +beacon, lighthouse, beacon light, pharos +beaker +bearskin, busby, shako +beer bottle +beer glass +bell cote, bell cot +bib +bicycle-built-for-two, tandem bicycle, tandem +bikini, two-piece +binder, ring-binder +binoculars, field glasses, opera glasses +birdhouse +boathouse +bobsled, bobsleigh, bob +bolo tie, bolo, bola tie, bola +bonnet, poke bonnet +bookcase +bookshop, bookstore, bookstall +bottlecap +bow +bow tie, bow-tie, bowtie +brass, memorial tablet, plaque +brassiere, bra, bandeau +breakwater, groin, groyne, mole, bulwark, seawall, jetty +breastplate, aegis, egis +broom +bucket, pail +buckle +bulletproof vest +bullet train, bullet +butcher shop, meat market +cab, hack, taxi, taxicab +caldron, cauldron +candle, taper, wax light +cannon +canoe +can opener, tin opener +cardigan +car mirror +carousel, carrousel, merry-go-round, roundabout, whirligig +carpenter's kit, tool kit +carton +car wheel +cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM +cassette +cassette player +castle +catamaran +CD player +cello, violoncello +cellular telephone, cellular phone, cellphone, cell, mobile phone +chain +chainlink fence +chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour +chain saw, chainsaw +chest +chiffonier, commode +chime, bell, gong +china cabinet, china closet +Christmas stocking +church, church building +cinema, movie theater, movie theatre, movie house, picture palace +cleaver, meat cleaver, chopper +cliff dwelling +cloak +clog, geta, patten, sabot +cocktail shaker +coffee mug +coffeepot +coil, spiral, volute, whorl, helix +combination lock +computer keyboard, keypad +confectionery, confectionary, candy store +container ship, containership, container vessel +convertible +corkscrew, bottle screw +cornet, horn, trumpet, trump +cowboy boot +cowboy hat, ten-gallon hat +cradle +crane +crash helmet +crate +crib, cot +Crock Pot +croquet ball +crutch +cuirass +dam, dike, dyke +desk +desktop computer +dial telephone, dial phone +diaper, nappy, napkin +digital clock +digital watch +dining table, board +dishrag, dishcloth +dishwasher, dish washer, dishwashing machine +disk brake, disc brake +dock, dockage, docking facility +dogsled, dog sled, dog sleigh +dome +doormat, welcome mat +drilling platform, offshore rig +drum, membranophone, tympan +drumstick +dumbbell +Dutch oven +electric fan, blower +electric guitar +electric locomotive +entertainment center +envelope +espresso maker +face powder +feather boa, boa +file, file cabinet, filing cabinet +fireboat +fire engine, fire truck +fire screen, fireguard +flagpole, flagstaff +flute, transverse flute +folding chair +football helmet +forklift +fountain +fountain pen +four-poster +freight car +French horn, horn +frying pan, frypan, skillet +fur coat +garbage truck, dustcart +gasmask, respirator, gas helmet +gas pump, gasoline pump, petrol pump, island dispenser +goblet +go-kart +golf ball +golfcart, golf cart +gondola +gong, tam-tam +gown +grand piano, grand +greenhouse, nursery, glasshouse +grille, radiator grille +grocery store, grocery, food market, market +guillotine +hair slide +hair spray +half track +hammer +hamper +hand blower, blow dryer, blow drier, hair dryer, hair drier +hand-held computer, hand-held microcomputer +handkerchief, hankie, hanky, hankey +hard disc, hard disk, fixed disk +harmonica, mouth organ, harp, mouth harp +harp +harvester, reaper +hatchet +holster +home theater, home theatre +honeycomb +hook, claw +hoopskirt, crinoline +horizontal bar, high bar +horse cart, horse-cart +hourglass +iPod +iron, smoothing iron +jack-o'-lantern +jean, blue jean, denim +jeep, landrover +jersey, T-shirt, tee shirt +jigsaw puzzle +jinrikisha, ricksha, rickshaw +joystick +kimono +knee pad +knot +lab coat, laboratory coat +ladle +lampshade, lamp shade +laptop, laptop computer +lawn mower, mower +lens cap, lens cover +letter opener, paper knife, paperknife +library +lifeboat +lighter, light, igniter, ignitor +limousine, limo +liner, ocean liner +lipstick, lip rouge +Loafer +lotion +loudspeaker, speaker, speaker unit, loudspeaker system, speaker system +loupe, jeweler's loupe +lumbermill, sawmill +magnetic compass +mailbag, postbag +mailbox, letter box +maillot +maillot, tank suit +manhole cover +maraca +marimba, xylophone +mask +matchstick +maypole +maze, labyrinth +measuring cup +medicine chest, medicine cabinet +megalith, megalithic structure +microphone, mike +microwave, microwave oven +military uniform +milk can +minibus +miniskirt, mini +minivan +missile +mitten +mixing bowl +mobile home, manufactured home +Model T +modem +monastery +monitor +moped +mortar +mortarboard +mosque +mosquito net +motor scooter, scooter +mountain bike, all-terrain bike, off-roader +mountain tent +mouse, computer mouse +mousetrap +moving van +muzzle +nail +neck brace +necklace +nipple +notebook, notebook computer +obelisk +oboe, hautboy, hautbois +ocarina, sweet potato +odometer, hodometer, mileometer, milometer +oil filter +organ, pipe organ +oscilloscope, scope, cathode-ray oscilloscope, CRO +overskirt +oxcart +oxygen mask +packet +paddle, boat paddle +paddlewheel, paddle wheel +padlock +paintbrush +pajama, pyjama, pj's, jammies +palace +panpipe, pandean pipe, syrinx +paper towel +parachute, chute +parallel bars, bars +park bench +parking meter +passenger car, coach, carriage +patio, terrace +pay-phone, pay-station +pedestal, plinth, footstall +pencil box, pencil case +pencil sharpener +perfume, essence +Petri dish +photocopier +pick, plectrum, plectron +pickelhaube +picket fence, paling +pickup, pickup truck +pier +piggy bank, penny bank +pill bottle +pillow +ping-pong ball +pinwheel +pirate, pirate ship +pitcher, ewer +plane, carpenter's plane, woodworking plane +planetarium +plastic bag +plate rack +plow, plough +plunger, plumber's helper +Polaroid camera, Polaroid Land camera +pole +police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria +poncho +pool table, billiard table, snooker table +pop bottle, soda bottle +pot, flowerpot +potter's wheel +power drill +prayer rug, prayer mat +printer +prison, prison house +projectile, missile +projector +puck, hockey puck +punching bag, punch bag, punching ball, punchball +purse +quill, quill pen +quilt, comforter, comfort, puff +racer, race car, racing car +racket, racquet +radiator +radio, wireless +radio telescope, radio reflector +rain barrel +recreational vehicle, RV, R.V. +reel +reflex camera +refrigerator, icebox +remote control, remote +restaurant, eating house, eating place, eatery +revolver, six-gun, six-shooter +rifle +rocking chair, rocker +rotisserie +rubber eraser, rubber, pencil eraser +rugby ball +rule, ruler +running shoe +safe +safety pin +saltshaker, salt shaker +sandal +sarong +sax, saxophone +scabbard +scale, weighing machine +school bus +schooner +scoreboard +screen, CRT screen +screw +screwdriver +seat belt, seatbelt +sewing machine +shield, buckler +shoe shop, shoe-shop, shoe store +shoji +shopping basket +shopping cart +shovel +shower cap +shower curtain +ski +ski mask +sleeping bag +slide rule, slipstick +sliding door +slot, one-armed bandit +snorkel +snowmobile +snowplow, snowplough +soap dispenser +soccer ball +sock +solar dish, solar collector, solar furnace +sombrero +soup bowl +space bar +space heater +space shuttle +spatula +speedboat +spider web, spider's web +spindle +sports car, sport car +spotlight, spot +stage +steam locomotive +steel arch bridge +steel drum +stethoscope +stole +stone wall +stopwatch, stop watch +stove +strainer +streetcar, tram, tramcar, trolley, trolley car +stretcher +studio couch, day bed +stupa, tope +submarine, pigboat, sub, U-boat +suit, suit of clothes +sundial +sunglass +sunglasses, dark glasses, shades +sunscreen, sunblock, sun blocker +suspension bridge +swab, swob, mop +sweatshirt +swimming trunks, bathing trunks +swing +switch, electric switch, electrical switch +syringe +table lamp +tank, army tank, armored combat vehicle, armoured combat vehicle +tape player +teapot +teddy, teddy bear +television, television system +tennis ball +thatch, thatched roof +theater curtain, theatre curtain +thimble +thresher, thrasher, threshing machine +throne +tile roof +toaster +tobacco shop, tobacconist shop, tobacconist +toilet seat +torch +totem pole +tow truck, tow car, wrecker +toyshop +tractor +trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi +tray +trench coat +tricycle, trike, velocipede +trimaran +tripod +triumphal arch +trolleybus, trolley coach, trackless trolley +trombone +tub, vat +turnstile +typewriter keyboard +umbrella +unicycle, monocycle +upright, upright piano +vacuum, vacuum cleaner +vase +vault +velvet +vending machine +vestment +viaduct +violin, fiddle +volleyball +waffle iron +wall clock +wallet, billfold, notecase, pocketbook +wardrobe, closet, press +warplane, military plane +washbasin, handbasin, washbowl, lavabo, wash-hand basin +washer, automatic washer, washing machine +water bottle +water jug +water tower +whiskey jug +whistle +wig +window screen +window shade +Windsor tie +wine bottle +wing +wok +wooden spoon +wool, woolen, woollen +worm fence, snake fence, snake-rail fence, Virginia fence +wreck +yawl +yurt +web site, website, internet site, site +comic book +crossword puzzle, crossword +street sign +traffic light, traffic signal, stoplight +book jacket, dust cover, dust jacket, dust wrapper +menu +plate +guacamole +consomme +hot pot, hotpot +trifle +ice cream, icecream +ice lolly, lolly, lollipop, popsicle +French loaf +bagel, beigel +pretzel +cheeseburger +hotdog, hot dog, red hot +mashed potato +head cabbage +broccoli +cauliflower +zucchini, courgette +spaghetti squash +acorn squash +butternut squash +cucumber, cuke +artichoke, globe artichoke +bell pepper +cardoon +mushroom +Granny Smith +strawberry +orange +lemon +fig +pineapple, ananas +banana +jackfruit, jak, jack +custard apple +pomegranate +hay +carbonara +chocolate sauce, chocolate syrup +dough +meat loaf, meatloaf +pizza, pizza pie +potpie +burrito +red wine +espresso +cup +eggnog +alp +bubble +cliff, drop, drop-off +coral reef +geyser +lakeside, lakeshore +promontory, headland, head, foreland +sandbar, sand bar +seashore, coast, seacoast, sea-coast +valley, vale +volcano +ballplayer, baseball player +groom, bridegroom +scuba diver +rapeseed +daisy +yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum +corn +acorn +hip, rose hip, rosehip +buckeye, horse chestnut, conker +coral fungus +agaric +gyromitra +stinkhorn, carrion fungus +earthstar +hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa +bolete +ear, spike, capitulum +toilet tissue, toilet paper, bathroom tissue diff --git a/modules/image/classification/efficientnetb6_imagenet/module.py b/modules/image/classification/efficientnetb6_imagenet/module.py index d2323354f..0af7068c9 100644 --- a/modules/image/classification/efficientnetb6_imagenet/module.py +++ b/modules/image/classification/efficientnetb6_imagenet/module.py @@ -11,781 +11,187 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import absolute_import +from __future__ import division +import argparse +import ast import os -import math -import collections -import re -import copy - -import paddle -from paddle import ParamAttr -import paddle.nn as nn -import paddle.nn.functional as F -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - -GlobalParams = collections.namedtuple('GlobalParams', [ - 'batch_norm_momentum', - 'batch_norm_epsilon', - 'dropout_rate', - 'num_classes', - 'width_coefficient', - 'depth_coefficient', - 'depth_divisor', - 'min_depth', - 'drop_connect_rate', -]) - -BlockArgs = collections.namedtuple( - 'BlockArgs', - ['kernel_size', 'num_repeat', 'input_filters', 'output_filters', 'expand_ratio', 'id_skip', 'stride', 'se_ratio']) - -GlobalParams.__new__.__defaults__ = (None, ) * len(GlobalParams._fields) -BlockArgs.__new__.__defaults__ = (None, ) * len(BlockArgs._fields) - - -def efficientnet_params(model_name: str): - """ Map EfficientNet model name to parameter coefficients. """ - params_dict = { - # Coefficients: width,depth,resolution,dropout - 'efficientnet-b6': (1.8, 2.6, 528, 0.5) - } - return params_dict[model_name] +import numpy as np +from paddle.inference import Config +from paddle.inference import create_predictor -def efficientnet(width_coefficient: float = None, - depth_coefficient: float = None, - dropout_rate: float = 0.2, - drop_connect_rate: float = 0.2): - """ Get block arguments according to parameter and coefficients. """ - blocks_args = [ - 'r1_k3_s11_e1_i32_o16_se0.25', - 'r2_k3_s22_e6_i16_o24_se0.25', - 'r2_k5_s22_e6_i24_o40_se0.25', - 'r3_k3_s22_e6_i40_o80_se0.25', - 'r3_k5_s11_e6_i80_o112_se0.25', - 'r4_k5_s22_e6_i112_o192_se0.25', - 'r1_k3_s11_e6_i192_o320_se0.25', - ] - blocks_args = BlockDecoder.decode(blocks_args) - - global_params = GlobalParams( - batch_norm_momentum=0.99, - batch_norm_epsilon=1e-3, - dropout_rate=dropout_rate, - drop_connect_rate=drop_connect_rate, - num_classes=1000, - width_coefficient=width_coefficient, - depth_coefficient=depth_coefficient, - depth_divisor=8, - min_depth=None) - - return blocks_args, global_params - - -def get_model_params(model_name: str, override_params: dict): - """ Get the block args and global params for a given model """ - if model_name.startswith('efficientnet'): - w, d, _, p = efficientnet_params(model_name) - blocks_args, global_params = efficientnet(width_coefficient=w, depth_coefficient=d, dropout_rate=p) - else: - raise NotImplementedError('model name is not pre-defined: %s' % model_name) - if override_params: - global_params = global_params._replace(**override_params) - return blocks_args, global_params - - -def round_filters(filters: int, global_params: dict): - """ Calculate and round number of filters based on depth multiplier. """ - multiplier = global_params.width_coefficient - if not multiplier: - return filters - divisor = global_params.depth_divisor - min_depth = global_params.min_depth - filters *= multiplier - min_depth = min_depth or divisor - new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor) - if new_filters < 0.9 * filters: # prevent rounding by more than 10% - new_filters += divisor - return int(new_filters) +from .data_feed import reader +from .processor import base64_to_cv2 +from .processor import postprocess +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving -def round_repeats(repeats: int, global_params: dict): - """ Round number of filters based on depth multiplier. """ - multiplier = global_params.depth_coefficient - if not multiplier: - return repeats - return int(math.ceil(multiplier * repeats)) +@moduleinfo(name="efficientnetb6_imagenet", + type="CV/image_classification", + author="paddlepaddle", + author_email="paddle-dev@baidu.com", + summary="EfficientNetB6 is a image classfication model, this module is trained with imagenet datasets.", + version="1.2.0") +class EfficientNetB6ImageNet: + def __init__(self): + self.default_pretrained_model_path = os.path.join(self.directory, "efficientnetb6_imagenet_infer_model", + "model") + label_file = os.path.join(self.directory, "label_list.txt") + with open(label_file, 'r', encoding='utf-8') as file: + self.label_list = file.read().split("\n")[:-1] + self._set_config() -class BlockDecoder(object): - """ - Block Decoder, straight from the official TensorFlow repository. - """ + def get_expected_image_width(self): + return 224 - @staticmethod - def _decode_block_string(block_string: str): - """ Gets a block through a string notation of arguments. """ - assert isinstance(block_string, str) + def get_expected_image_height(self): + return 224 - ops = block_string.split('_') - options = {} - for op in ops: - splits = re.split(r'(\d.*)', op) - if len(splits) >= 2: - key, value = splits[:2] - options[key] = value + def get_pretrained_images_mean(self): + im_mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3) + return im_mean - # Check stride - cond_1 = ('s' in options and len(options['s']) == 1) - cond_2 = ((len(options['s']) == 2) and (options['s'][0] == options['s'][1])) - assert (cond_1 or cond_2) + def get_pretrained_images_std(self): + im_std = np.array([0.229, 0.224, 0.225]).reshape(1, 3) + return im_std - return BlockArgs( - kernel_size=int(options['k']), - num_repeat=int(options['r']), - input_filters=int(options['i']), - output_filters=int(options['o']), - expand_ratio=int(options['e']), - id_skip=('noskip' not in block_string), - se_ratio=float(options['se']) if 'se' in options else None, - stride=[int(options['s'][0])]) + def _set_config(self): + """ + predictor config setting + """ + model = self.default_pretrained_model_path + '.pdmodel' + params = self.default_pretrained_model_path + '.pdiparams' + cpu_config = Config(model, params) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + self.cpu_predictor = create_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = Config(model, params) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) + self.gpu_predictor = create_predictor(gpu_config) + + def classification(self, images=None, paths=None, batch_size=1, use_gpu=False, top_k=1): + """ + API for image classification. - @staticmethod - def _encode_block_string(block): - """Encodes a block to a string.""" - args = [ - 'r%d' % block.num_repeat, - 'k%d' % block.kernel_size, - 's%d%d' % (block.strides[0], block.strides[1]), - 'e%s' % block.expand_ratio, - 'i%d' % block.input_filters, - 'o%d' % block.output_filters - ] - if 0 < block.se_ratio <= 1: - args.append('se%s' % block.se_ratio) - if block.id_skip is False: - args.append('noskip') - return '_'.join(args) + Args: + images (list[numpy.ndarray]): data of images, shape of each is [H, W, C], color space must be BGR. + paths (list[str]): The paths of images. + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + top_k (int): Return top k results. - @staticmethod - def decode(string_list: list): + Returns: + res (list[dict]): The classfication results. """ - Decode a list of string notations to specify blocks in the network. - - string_list: list of strings, each string is a notation of block - return - list of BlockArgs namedtuples of block args + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + all_data = list() + for yield_data in reader(images, paths): + all_data.append(yield_data) + + total_num = len(all_data) + loop_num = int(np.ceil(total_num / batch_size)) + + res = list() + for iter_id in range(loop_num): + batch_data = list() + handle_id = iter_id * batch_size + for image_id in range(batch_size): + try: + batch_data.append(all_data[handle_id + image_id]) + except: + pass + # feed batch image + batch_image = np.array([data['image'] for data in batch_data]) + + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + input_handle.copy_from_cpu(batch_image.copy()) + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) + + out = postprocess(data_out=output_handle.copy_to_cpu(), label_list=self.label_list, top_k=top_k) + res += out + return res + + @serving + def serving_method(self, images, **kwargs): """ - assert isinstance(string_list, list) - blocks_args = [] - for block_string in string_list: - blocks_args.append(BlockDecoder._decode_block_string(block_string)) - return blocks_args - - @staticmethod - def encode(blocks_args: list): + Run as a service. """ - Encodes a list of BlockArgs to a list of strings. + images_decode = [base64_to_cv2(image) for image in images] + results = self.classify(images=images_decode, **kwargs) + return results - :param blocks_args: a list of BlockArgs namedtuples of block args - :return: a list of strings, each string is a notation of block + @runnable + def run_cmd(self, argvs): """ - block_strings = [] - for block in blocks_args: - block_strings.append(BlockDecoder._encode_block_string(block)) - return block_strings - - -def initial_type(name: str, use_bias: bool = False): - param_attr = ParamAttr(name=name + "_weights") - if use_bias: - bias_attr = ParamAttr(name=name + "_offset") - else: - bias_attr = False - return param_attr, bias_attr - - -def init_batch_norm_layer(name: str = "batch_norm"): - param_attr = ParamAttr(name=name + "_scale") - bias_attr = ParamAttr(name=name + "_offset") - return param_attr, bias_attr - - -def init_fc_layer(name: str = "fc"): - param_attr = ParamAttr(name=name + "_weights") - bias_attr = ParamAttr(name=name + "_offset") - return param_attr, bias_attr - - -def cal_padding(img_size: int, stride: int, filter_size: int, dilation: int = 1): - """Calculate padding size.""" - if img_size % stride == 0: - out_size = max(filter_size - stride, 0) - else: - out_size = max(filter_size - (img_size % stride), 0) - return out_size // 2, out_size - out_size // 2 - - -inp_shape = {"b6": [528, 264, 264, 132, 66, 33, 33, 17]} - - -def _drop_connect(inputs: paddle.Tensor, prob: float, is_test: bool): - """Drop input connection""" - if is_test: - return inputs - keep_prob = 1.0 - prob - inputs_shape = paddle.shape(inputs) - random_tensor = keep_prob + paddle.rand(shape=[inputs_shape[0], 1, 1, 1]) - binary_tensor = paddle.floor(random_tensor) - output = inputs / keep_prob * binary_tensor - return output - - -class Conv2ds(nn.Layer): - """Basic conv layer""" - - def __init__(self, - input_channels: int, - output_channels: int, - filter_size: int, - stride: int = 1, - padding: int = 0, - groups: int = None, - name: str = "conv2d", - act: str = None, - use_bias: bool = False, - padding_type: str = None, - model_name: str = None, - cur_stage: str = None): - super(Conv2ds, self).__init__() - assert act in [None, "swish", "sigmoid"] - self.act = act - - param_attr, bias_attr = initial_type(name=name, use_bias=use_bias) - - def get_padding(filter_size, stride=1, dilation=1): - padding = ((stride - 1) + dilation * (filter_size - 1)) // 2 - return padding - - inps = 1 if model_name == None and cur_stage == None else inp_shape[model_name][cur_stage] - self.need_crop = False - if padding_type == "SAME": - top_padding, bottom_padding = cal_padding(inps, stride, filter_size) - left_padding, right_padding = cal_padding(inps, stride, filter_size) - height_padding = bottom_padding - width_padding = right_padding - if top_padding != bottom_padding or left_padding != right_padding: - height_padding = top_padding + stride - width_padding = left_padding + stride - self.need_crop = True - padding = [height_padding, width_padding] - elif padding_type == "VALID": - height_padding = 0 - width_padding = 0 - padding = [height_padding, width_padding] - elif padding_type == "DYNAMIC": - padding = get_padding(filter_size, stride) - else: - padding = padding_type - - groups = 1 if groups is None else groups - self._conv = Conv2d( - input_channels, - output_channels, - filter_size, - groups=groups, - stride=stride, - padding=padding, - weight_attr=param_attr, - bias_attr=bias_attr) - - def forward(self, inputs: paddle.Tensor): - x = self._conv(inputs) - if self.act == "swish": - x = F.swish(x) - elif self.act == "sigmoid": - x = F.sigmoid(x) - - if self.need_crop: - x = x[:, :, 1:, 1:] - return x - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - - def __init__(self, - input_channels: int, - filter_size: int, - output_channels: int, - stride: int = 1, - num_groups: int = 1, - padding_type: str = "SAME", - conv_act: str = None, - bn_act: str = "swish", - use_bn: bool = True, - use_bias: bool = False, - name: str = None, - conv_name: str = None, - bn_name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ConvBNLayer, self).__init__() - - self._conv = Conv2ds( - input_channels=input_channels, - output_channels=output_channels, - filter_size=filter_size, - stride=stride, - groups=num_groups, - act=conv_act, - padding_type=padding_type, - name=conv_name, - use_bias=use_bias, - model_name=model_name, - cur_stage=cur_stage) - self.use_bn = use_bn - if use_bn is True: - bn_name = name + bn_name - param_attr, bias_attr = init_batch_norm_layer(bn_name) - - self._bn = BatchNorm( - num_channels=output_channels, - act=bn_act, - momentum=0.99, - epsilon=0.001, - moving_mean_name=bn_name + "_mean", - moving_variance_name=bn_name + "_variance", - param_attr=param_attr, - bias_attr=bias_attr) - - def forward(self, inputs: paddle.Tensor): - if self.use_bn: - x = self._conv(inputs) - x = self._bn(x) - return x - else: - return self._conv(inputs) - - -class ExpandConvNorm(nn.Layer): - """Expand conv norm layer.""" - - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ExpandConvNorm, self).__init__() - - self.oup = block_args.input_filters * block_args.expand_ratio - self.expand_ratio = block_args.expand_ratio - - if self.expand_ratio != 1: - self._conv = ConvBNLayer( - input_channels, - 1, - self.oup, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_expand_conv", - bn_name="_bn0", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - if self.expand_ratio != 1: - return self._conv(inputs) - else: - return inputs - - -class DepthwiseConvNorm(nn.Layer): - """Depthwise conv norm layer.""" - - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(DepthwiseConvNorm, self).__init__() - - self.k = block_args.kernel_size - self.s = block_args.stride - if isinstance(self.s, list) or isinstance(self.s, tuple): - self.s = self.s[0] - oup = block_args.input_filters * block_args.expand_ratio - - self._conv = ConvBNLayer( - input_channels, - self.k, - oup, - self.s, - num_groups=input_channels, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_depthwise_conv", - bn_name="_bn1", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class ProjectConvNorm(nn.Layer): - """Projection conv bn layer.""" - - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ProjectConvNorm, self).__init__() - - final_oup = block_args.output_filters - - self._conv = ConvBNLayer( - input_channels, - 1, - final_oup, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_project_conv", - bn_name="_bn2", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class SEBlock(nn.Layer): - """Basic Squeeze-and-Excitation block for Efficientnet.""" - - def __init__(self, - input_channels: int, - num_squeezed_channels: int, - oup: int, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(SEBlock, self).__init__() - - self._pool = AdaptiveAvgPool2d(1) - self._conv1 = Conv2ds( - input_channels, - num_squeezed_channels, - 1, - use_bias=True, - padding_type=padding_type, - act="swish", - name=name + "_se_reduce") - - self._conv2 = Conv2ds( - num_squeezed_channels, - oup, - 1, - act="sigmoid", - use_bias=True, - padding_type=padding_type, - name=name + "_se_expand") - - def forward(self, inputs: paddle.Tensor): - x = self._pool(inputs) - x = self._conv1(x) - x = self._conv2(x) - return paddle.multiply(inputs, x) - - -class MbConvBlock(nn.Layer): - """Mobile inverted bottleneck convolution for Efficientnet.""" - - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - use_se: bool, - name: str = None, - drop_connect_rate: float = None, - is_test: bool = False, - model_name: str = None, - cur_stage: str = None): - super(MbConvBlock, self).__init__() - - oup = block_args.input_filters * block_args.expand_ratio - self.block_args = block_args - self.has_se = use_se and (block_args.se_ratio is not None) and (0 < block_args.se_ratio <= 1) - self.id_skip = block_args.id_skip - self.expand_ratio = block_args.expand_ratio - self.drop_connect_rate = drop_connect_rate - self.is_test = is_test - - if self.expand_ratio != 1: - self._ecn = ExpandConvNorm( - input_channels, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - self._dcn = DepthwiseConvNorm( - input_channels * block_args.expand_ratio, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - if self.has_se: - num_squeezed_channels = max(1, int(block_args.input_filters * block_args.se_ratio)) - self._se = SEBlock( - input_channels * block_args.expand_ratio, - num_squeezed_channels, - oup, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - self._pcn = ProjectConvNorm( - input_channels * block_args.expand_ratio, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - x = inputs - if self.expand_ratio != 1: - x = self._ecn(x) - x = F.swish(x) - x = self._dcn(x) - x = F.swish(x) - if self.has_se: - x = self._se(x) - x = self._pcn(x) - if self.id_skip and \ - self.block_args.stride == 1 and \ - self.block_args.input_filters == self.block_args.output_filters: - if self.drop_connect_rate: - x = _drop_connect(x, self.drop_connect_rate, self.is_test) - x = paddle.elementwise_add(x, inputs) - return x - - -class ConvStemNorm(nn.Layer): - """Basic conv stem norm block for extracting features.""" - - def __init__(self, - input_channels: int, - padding_type: str, - _global_params: dict, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ConvStemNorm, self).__init__() - - output_channels = round_filters(32, _global_params) - self._conv = ConvBNLayer( - input_channels, - filter_size=3, - output_channels=output_channels, - stride=2, - bn_act=None, - padding_type=padding_type, - name="", - conv_name="_conv_stem", - bn_name="_bn0", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class ExtractFeatures(nn.Layer): - """Extract features.""" - - def __init__(self, - input_channels: int, - _block_args: dict, - _global_params: dict, - padding_type: str, - use_se: bool, - is_test: bool, - model_name: str = None): - super(ExtractFeatures, self).__init__() - - self._global_params = _global_params - - self._conv_stem = ConvStemNorm( - input_channels, - padding_type=padding_type, - _global_params=_global_params, - model_name=model_name, - cur_stage=0) - - self.block_args_copy = copy.deepcopy(_block_args) - idx = 0 - block_size = 0 - for block_arg in self.block_args_copy: - block_arg = block_arg._replace( - input_filters=round_filters(block_arg.input_filters, _global_params), - output_filters=round_filters(block_arg.output_filters, _global_params), - num_repeat=round_repeats(block_arg.num_repeat, _global_params)) - block_size += 1 - for _ in range(block_arg.num_repeat - 1): - block_size += 1 - - self.conv_seq = [] - cur_stage = 1 - for block_args in _block_args: - block_args = block_args._replace( - input_filters=round_filters(block_args.input_filters, _global_params), - output_filters=round_filters(block_args.output_filters, _global_params), - num_repeat=round_repeats(block_args.num_repeat, _global_params)) - - drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 - if drop_connect_rate: - drop_connect_rate *= float(idx) / block_size - - _mc_block = self.add_sublayer( - "_blocks." + str(idx) + ".", - MbConvBlock( - block_args.input_filters, - block_args=block_args, - padding_type=padding_type, - use_se=use_se, - name="_blocks." + str(idx) + ".", - drop_connect_rate=drop_connect_rate, - model_name=model_name, - cur_stage=cur_stage)) - self.conv_seq.append(_mc_block) - idx += 1 - if block_args.num_repeat > 1: - block_args = block_args._replace(input_filters=block_args.output_filters, stride=1) - for _ in range(block_args.num_repeat - 1): - drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 - if drop_connect_rate: - drop_connect_rate *= float(idx) / block_size - _mc_block = self.add_sublayer( - "block." + str(idx) + ".", - MbConvBlock( - block_args.input_filters, - block_args, - padding_type=padding_type, - use_se=use_se, - name="_blocks." + str(idx) + ".", - drop_connect_rate=drop_connect_rate, - model_name=model_name, - cur_stage=cur_stage)) - self.conv_seq.append(_mc_block) - idx += 1 - cur_stage += 1 - - def forward(self, inputs: paddle.Tensor): - x = self._conv_stem(inputs) - x = F.swish(x) - for _mc_block in self.conv_seq: - x = _mc_block(x) - return x - - -@moduleinfo( - name="efficientnetb6_imagenet", - type="cv/classification", - author="paddlepaddle", - author_email="", - summary="efficientnetb6_imagenet is a classification model, " - "this module is trained with Imagenet dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class EfficientNet_B6(nn.Layer): - def __init__(self, - is_test: bool = False, - padding_type: str = "SAME", - override_params: dict = None, - use_se: bool = True, - class_dim: int = 1000, - load_checkpoint: str = None): - super(EfficientNet_B6, self).__init__() - - model_name = 'efficientnet-b6' - self.name = "b6" - self._block_args, self._global_params = get_model_params(model_name, override_params) - self.padding_type = padding_type - self.use_se = use_se - self.is_test = is_test - - self._ef = ExtractFeatures( - 3, - self._block_args, - self._global_params, - self.padding_type, - self.use_se, - self.is_test, - model_name=self.name) - - output_channels = round_filters(1280, self._global_params) - oup = 576 - - self._conv = ConvBNLayer( - oup, - 1, - output_channels, - bn_act="swish", - padding_type=self.padding_type, - name="", - conv_name="_conv_head", - bn_name="_bn1", - model_name=self.name, - cur_stage=7) - self._pool = AdaptiveAvgPool2d(1) - - if self._global_params.dropout_rate: - self._drop = Dropout(p=self._global_params.dropout_rate, mode="upscale_in_train") - - param_attr, bias_attr = init_fc_layer("_fc") - self._fc = Linear(output_channels, class_dim, weight_attr=param_attr, bias_attr=bias_attr) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'efficientnet_b6_imagenet.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/efficientnet_b6_imagenet.pdparams -O ' - + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def forward(self, inputs: paddle.Tensor): - x = self._ef(inputs) - x = self._conv(x) - x = self._pool(x) - if self._global_params.dropout_rate: - x = self._drop(x) - x = paddle.squeeze(x, axis=[2, 3]) - x = self._fc(x) - return x + Run as a command. + """ + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.classify(paths=[args.input_path], batch_size=args.batch_size, use_gpu=args.use_gpu) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument('--use_gpu', + type=ast.literal_eval, + default=False, + help="whether use GPU or not.") + self.arg_config_group.add_argument('--batch_size', type=ast.literal_eval, default=1, help="batch size.") + self.arg_config_group.add_argument('--top_k', type=ast.literal_eval, default=1, help="Return top k results.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") + + +if __name__ == '__main__': + b6 = EfficientNetB6ImageNet() + b6.context() + import cv2 + test_image = [cv2.imread('dog.jpeg')] + res = b6.classification(images=test_image) + print(res) + res = b6.classification(paths=['dog.jpeg']) + print(res) + res = b6.classification(images=test_image) + print(res) + res = b6.classify(images=test_image) + print(res) diff --git a/modules/image/classification/efficientnetb6_imagenet/processor.py b/modules/image/classification/efficientnetb6_imagenet/processor.py new file mode 100644 index 000000000..b0d8a83c6 --- /dev/null +++ b/modules/image/classification/efficientnetb6_imagenet/processor.py @@ -0,0 +1,66 @@ +# -*- coding:utf-8 -*- +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import base64 + +import cv2 +import numpy as np + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def softmax(x): + if len(x.shape) > 1: + tmp = np.max(x, axis=1) + x -= tmp.reshape((x.shape[0], 1)) + x = np.exp(x) + tmp = np.sum(x, axis=1) + x /= tmp.reshape((x.shape[0], 1)) + else: + tmp = np.max(x) + x -= tmp + x = np.exp(x) + tmp = np.sum(x) + x /= tmp + return x + + +def postprocess(data_out, label_list, top_k): + """ + Postprocess output of network, one image at a time. + + Args: + data_out (numpy.ndarray): output data of network. + label_list (list): list of label. + top_k (int): Return top k results. + """ + output = [] + for result in data_out: + result_i = softmax(result) + output_i = {} + indexs = np.argsort(result_i)[::-1][0:top_k] + for index in indexs: + label = label_list[index].split(',')[0] + output_i[label] = float(result_i[index]) + output.append(output_i) + return output diff --git a/modules/image/classification/efficientnetb6_imagenet/test.py b/modules/image/classification/efficientnetb6_imagenet/test.py new file mode 100644 index 000000000..321ca6fd0 --- /dev/null +++ b/modules/image/classification/efficientnetb6_imagenet/test.py @@ -0,0 +1,63 @@ +import os +import shutil +import unittest + +import cv2 +import requests + +import paddlehub as hub + +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + + +class TestHubModule(unittest.TestCase): + + @classmethod + def setUpClass(cls) -> None: + img_url = 'https://unsplash.com/photos/brFsZ7qszSY/download?ixid=MnwxMjA3fDB8MXxzZWFyY2h8OHx8ZG9nfGVufDB8fHx8MTY2MzA1ODQ1MQ&force=true&w=640' + if not os.path.exists('tests'): + os.makedirs('tests') + response = requests.get(img_url) + assert response.status_code == 200, 'Network Error.' + with open('tests/test.jpg', 'wb') as f: + f.write(response.content) + cls.module = hub.Module(name="efficientnetb6_imagenet") + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree('tests') + shutil.rmtree('inference') + + def test_classification1(self): + results = self.module.classification(paths=['tests/test.jpg']) + data = results[0] + self.assertTrue('Pembroke' in data) + self.assertTrue(data['Pembroke'] > 0.5) + + def test_classification2(self): + results = self.module.classification(images=[cv2.imread('tests/test.jpg')]) + data = results[0] + self.assertTrue('Pembroke' in data) + self.assertTrue(data['Pembroke'] > 0.5) + + def test_classification3(self): + results = self.module.classification(images=[cv2.imread('tests/test.jpg')], use_gpu=True) + data = results[0] + self.assertTrue('Pembroke' in data) + self.assertTrue(data['Pembroke'] > 0.5) + + def test_classification4(self): + self.assertRaises(AssertionError, self.module.classification, paths=['no.jpg']) + + def test_classification5(self): + self.assertRaises(TypeError, self.module.classification, images=['tests/test.jpg']) + + def test_save_inference_model(self): + self.module.save_inference_model('./inference/model') + + self.assertTrue(os.path.exists('./inference/model.pdmodel')) + self.assertTrue(os.path.exists('./inference/model.pdiparams')) + + +if __name__ == "__main__": + unittest.main() From 4c8b919b1f2cb4221d784e5cda751ab52c135b05 Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Fri, 14 Oct 2022 14:57:17 +0800 Subject: [PATCH 108/117] update efficientnetb7_imagenet (#2055) --- .../efficientnetb7_imagenet/README.md | 7 +- .../efficientnetb7_imagenet/README_en.md | 7 +- .../efficientnetb7_imagenet/__init__.py | 0 .../efficientnetb7_imagenet/data_feed.py | 93 ++ .../efficientnetb7_imagenet/label_list.txt | 1000 +++++++++++++++++ .../efficientnetb7_imagenet/module.py | 924 +++------------ .../efficientnetb7_imagenet/processor.py | 65 ++ .../efficientnetb7_imagenet/test.py | 63 ++ 8 files changed, 1398 insertions(+), 761 deletions(-) create mode 100644 modules/image/classification/efficientnetb7_imagenet/__init__.py create mode 100644 modules/image/classification/efficientnetb7_imagenet/data_feed.py create mode 100644 modules/image/classification/efficientnetb7_imagenet/label_list.txt create mode 100644 modules/image/classification/efficientnetb7_imagenet/processor.py create mode 100644 modules/image/classification/efficientnetb7_imagenet/test.py diff --git a/modules/image/classification/efficientnetb7_imagenet/README.md b/modules/image/classification/efficientnetb7_imagenet/README.md index bef07051b..6ecfbf64c 100644 --- a/modules/image/classification/efficientnetb7_imagenet/README.md +++ b/modules/image/classification/efficientnetb7_imagenet/README.md @@ -132,6 +132,11 @@ * 1.1.0 提升预测性能以及易用性 + +* 1.2.0 + + 移除 Fluid API + - ```shell - $ hub install efficientnetb7_imagenet==1.1.0 + $ hub install efficientnetb7_imagenet==1.2.0 ``` diff --git a/modules/image/classification/efficientnetb7_imagenet/README_en.md b/modules/image/classification/efficientnetb7_imagenet/README_en.md index d61af6696..66f481bca 100644 --- a/modules/image/classification/efficientnetb7_imagenet/README_en.md +++ b/modules/image/classification/efficientnetb7_imagenet/README_en.md @@ -131,6 +131,11 @@ * 1.1.0 Improve the prediction performance and users' experience + +* 1.2.0 + + Remove Fluid API + - ```shell - $ hub install efficientnetb7_imagenet==1.1.0 + $ hub install efficientnetb7_imagenet==1.2.0 ``` diff --git a/modules/image/classification/efficientnetb7_imagenet/__init__.py b/modules/image/classification/efficientnetb7_imagenet/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/modules/image/classification/efficientnetb7_imagenet/data_feed.py b/modules/image/classification/efficientnetb7_imagenet/data_feed.py new file mode 100644 index 000000000..60a33f4df --- /dev/null +++ b/modules/image/classification/efficientnetb7_imagenet/data_feed.py @@ -0,0 +1,93 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import time +from collections import OrderedDict + +import numpy as np +from PIL import Image + +__all__ = ['reader'] + +DATA_DIM = 224 +img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) +img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) + + +def resize_short(img, target_size): + percent = float(target_size) / min(img.size[0], img.size[1]) + resized_width = int(round(img.size[0] * percent)) + resized_height = int(round(img.size[1] * percent)) + img = img.resize((resized_width, resized_height), Image.LANCZOS) + return img + + +def crop_image(img, target_size, center): + width, height = img.size + size = target_size + if center == True: + w_start = (width - size) / 2 + h_start = (height - size) / 2 + else: + w_start = np.random.randint(0, width - size + 1) + h_start = np.random.randint(0, height - size + 1) + w_end = w_start + size + h_end = h_start + size + img = img.crop((w_start, h_start, w_end, h_end)) + return img + + +def process_image(img): + img = resize_short(img, target_size=256) + img = crop_image(img, target_size=DATA_DIM, center=True) + if img.mode != 'RGB': + img = img.convert('RGB') + img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255 + img -= img_mean + img /= img_std + return img + + +def reader(images=None, paths=None): + """ + Preprocess to yield image. + + Args: + images (list[numpy.ndarray]): images data, shape of each is [H, W, C]. + paths (list[str]): paths to images. + + Yield: + each (collections.OrderedDict): info of original image, preprocessed image. + """ + component = list() + if paths: + for im_path in paths: + each = OrderedDict() + assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path) + each['org_im_path'] = im_path + each['org_im'] = Image.open(im_path) + each['org_im_width'], each['org_im_height'] = each['org_im'].size + component.append(each) + if images is not None: + assert type(images), "images is a list." + for im in images: + each = OrderedDict() + each['org_im'] = Image.fromarray(im[:, :, ::-1]) + each['org_im_path'] = 'ndarray_time={}'.format(round(time.time(), 6) * 1e6) + each['org_im_width'], each['org_im_height'] = each['org_im'].size + component.append(each) + + for element in component: + element['image'] = process_image(element['org_im']) + yield element diff --git a/modules/image/classification/efficientnetb7_imagenet/label_list.txt b/modules/image/classification/efficientnetb7_imagenet/label_list.txt new file mode 100644 index 000000000..a509c0074 --- /dev/null +++ b/modules/image/classification/efficientnetb7_imagenet/label_list.txt @@ -0,0 +1,1000 @@ +tench, Tinca tinca +goldfish, Carassius auratus +great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias +tiger shark, Galeocerdo cuvieri +hammerhead, hammerhead shark +electric ray, crampfish, numbfish, torpedo +stingray +cock +hen +ostrich, Struthio camelus +brambling, Fringilla montifringilla +goldfinch, Carduelis carduelis +house finch, linnet, Carpodacus mexicanus +junco, snowbird +indigo bunting, indigo finch, indigo bird, Passerina cyanea +robin, American robin, Turdus migratorius +bulbul +jay +magpie +chickadee +water ouzel, dipper +kite +bald eagle, American eagle, Haliaeetus leucocephalus +vulture +great grey owl, great gray owl, Strix nebulosa +European fire salamander, Salamandra salamandra +common newt, Triturus vulgaris +eft +spotted salamander, Ambystoma maculatum +axolotl, mud puppy, Ambystoma mexicanum +bullfrog, Rana catesbeiana +tree frog, tree-frog +tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui +loggerhead, loggerhead turtle, Caretta caretta +leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea +mud turtle +terrapin +box turtle, box tortoise +banded gecko +common iguana, iguana, Iguana iguana +American chameleon, anole, Anolis carolinensis +whiptail, whiptail lizard +agama +frilled lizard, Chlamydosaurus kingi +alligator lizard +Gila monster, Heloderma suspectum +green lizard, Lacerta viridis +African chameleon, Chamaeleo chamaeleon +Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis +African crocodile, Nile crocodile, Crocodylus niloticus +American alligator, Alligator mississipiensis +triceratops +thunder snake, worm snake, Carphophis amoenus +ringneck snake, ring-necked snake, ring snake +hognose snake, puff adder, sand viper +green snake, grass snake +king snake, kingsnake +garter snake, grass snake +water snake +vine snake +night snake, Hypsiglena torquata +boa constrictor, Constrictor constrictor +rock python, rock snake, Python sebae +Indian cobra, Naja naja +green mamba +sea snake +horned viper, cerastes, sand viper, horned asp, Cerastes cornutus +diamondback, diamondback rattlesnake, Crotalus adamanteus +sidewinder, horned rattlesnake, Crotalus cerastes +trilobite +harvestman, daddy longlegs, Phalangium opilio +scorpion +black and gold garden spider, Argiope aurantia +barn spider, Araneus cavaticus +garden spider, Aranea diademata +black widow, Latrodectus mactans +tarantula +wolf spider, hunting spider +tick +centipede +black grouse +ptarmigan +ruffed grouse, partridge, Bonasa umbellus +prairie chicken, prairie grouse, prairie fowl +peacock +quail +partridge +African grey, African gray, Psittacus erithacus +macaw +sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita +lorikeet +coucal +bee eater +hornbill +hummingbird +jacamar +toucan +drake +red-breasted merganser, Mergus serrator +goose +black swan, Cygnus atratus +tusker +echidna, spiny anteater, anteater +platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus +wallaby, brush kangaroo +koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus +wombat +jellyfish +sea anemone, anemone +brain coral +flatworm, platyhelminth +nematode, nematode worm, roundworm +conch +snail +slug +sea slug, nudibranch +chiton, coat-of-mail shell, sea cradle, polyplacophore +chambered nautilus, pearly nautilus, nautilus +Dungeness crab, Cancer magister +rock crab, Cancer irroratus +fiddler crab +king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica +American lobster, Northern lobster, Maine lobster, Homarus americanus +spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish +crayfish, crawfish, crawdad, crawdaddy +hermit crab +isopod +white stork, Ciconia ciconia +black stork, Ciconia nigra +spoonbill +flamingo +little blue heron, Egretta caerulea +American egret, great white heron, Egretta albus +bittern +crane +limpkin, Aramus pictus +European gallinule, Porphyrio porphyrio +American coot, marsh hen, mud hen, water hen, Fulica americana +bustard +ruddy turnstone, Arenaria interpres +red-backed sandpiper, dunlin, Erolia alpina +redshank, Tringa totanus +dowitcher +oystercatcher, oyster catcher +pelican +king penguin, Aptenodytes patagonica +albatross, mollymawk +grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus +killer whale, killer, orca, grampus, sea wolf, Orcinus orca +dugong, Dugong dugon +sea lion +Chihuahua +Japanese spaniel +Maltese dog, Maltese terrier, Maltese +Pekinese, Pekingese, Peke +Shih-Tzu +Blenheim spaniel +papillon +toy terrier +Rhodesian ridgeback +Afghan hound, Afghan +basset, basset hound +beagle +bloodhound, sleuthhound +bluetick +black-and-tan coonhound +Walker hound, Walker foxhound +English foxhound +redbone +borzoi, Russian wolfhound +Irish wolfhound +Italian greyhound +whippet +Ibizan hound, Ibizan Podenco +Norwegian elkhound, elkhound +otterhound, otter hound +Saluki, gazelle hound +Scottish deerhound, deerhound +Weimaraner +Staffordshire bullterrier, Staffordshire bull terrier +American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier +Bedlington terrier +Border terrier +Kerry blue terrier +Irish terrier +Norfolk terrier +Norwich terrier +Yorkshire terrier +wire-haired fox terrier +Lakeland terrier +Sealyham terrier, Sealyham +Airedale, Airedale terrier +cairn, cairn terrier +Australian terrier +Dandie Dinmont, Dandie Dinmont terrier +Boston bull, Boston terrier +miniature schnauzer +giant schnauzer +standard schnauzer +Scotch terrier, Scottish terrier, Scottie +Tibetan terrier, chrysanthemum dog +silky terrier, Sydney silky +soft-coated wheaten terrier +West Highland white terrier +Lhasa, Lhasa apso +flat-coated retriever +curly-coated retriever +golden retriever +Labrador retriever +Chesapeake Bay retriever +German short-haired pointer +vizsla, Hungarian pointer +English setter +Irish setter, red setter +Gordon setter +Brittany spaniel +clumber, clumber spaniel +English springer, English springer spaniel +Welsh springer spaniel +cocker spaniel, English cocker spaniel, cocker +Sussex spaniel +Irish water spaniel +kuvasz +schipperke +groenendael +malinois +briard +kelpie +komondor +Old English sheepdog, bobtail +Shetland sheepdog, Shetland sheep dog, Shetland +collie +Border collie +Bouvier des Flandres, Bouviers des Flandres +Rottweiler +German shepherd, German shepherd dog, German police dog, alsatian +Doberman, Doberman pinscher +miniature pinscher +Greater Swiss Mountain dog +Bernese mountain dog +Appenzeller +EntleBucher +boxer +bull mastiff +Tibetan mastiff +French bulldog +Great Dane +Saint Bernard, St Bernard +Eskimo dog, husky +malamute, malemute, Alaskan malamute +Siberian husky +dalmatian, coach dog, carriage dog +affenpinscher, monkey pinscher, monkey dog +basenji +pug, pug-dog +Leonberg +Newfoundland, Newfoundland dog +Great Pyrenees +Samoyed, Samoyede +Pomeranian +chow, chow chow +keeshond +Brabancon griffon +Pembroke, Pembroke Welsh corgi +Cardigan, Cardigan Welsh corgi +toy poodle +miniature poodle +standard poodle +Mexican hairless +timber wolf, grey wolf, gray wolf, Canis lupus +white wolf, Arctic wolf, Canis lupus tundrarum +red wolf, maned wolf, Canis rufus, Canis niger +coyote, prairie wolf, brush wolf, Canis latrans +dingo, warrigal, warragal, Canis dingo +dhole, Cuon alpinus +African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus +hyena, hyaena +red fox, Vulpes vulpes +kit fox, Vulpes macrotis +Arctic fox, white fox, Alopex lagopus +grey fox, gray fox, Urocyon cinereoargenteus +tabby, tabby cat +tiger cat +Persian cat +Siamese cat, Siamese +Egyptian cat +cougar, puma, catamount, mountain lion, painter, panther, Felis concolor +lynx, catamount +leopard, Panthera pardus +snow leopard, ounce, Panthera uncia +jaguar, panther, Panthera onca, Felis onca +lion, king of beasts, Panthera leo +tiger, Panthera tigris +cheetah, chetah, Acinonyx jubatus +brown bear, bruin, Ursus arctos +American black bear, black bear, Ursus americanus, Euarctos americanus +ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus +sloth bear, Melursus ursinus, Ursus ursinus +mongoose +meerkat, mierkat +tiger beetle +ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle +ground beetle, carabid beetle +long-horned beetle, longicorn, longicorn beetle +leaf beetle, chrysomelid +dung beetle +rhinoceros beetle +weevil +fly +bee +ant, emmet, pismire +grasshopper, hopper +cricket +walking stick, walkingstick, stick insect +cockroach, roach +mantis, mantid +cicada, cicala +leafhopper +lacewing, lacewing fly +dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk +damselfly +admiral +ringlet, ringlet butterfly +monarch, monarch butterfly, milkweed butterfly, Danaus plexippus +cabbage butterfly +sulphur butterfly, sulfur butterfly +lycaenid, lycaenid butterfly +starfish, sea star +sea urchin +sea cucumber, holothurian +wood rabbit, cottontail, cottontail rabbit +hare +Angora, Angora rabbit +hamster +porcupine, hedgehog +fox squirrel, eastern fox squirrel, Sciurus niger +marmot +beaver +guinea pig, Cavia cobaya +sorrel +zebra +hog, pig, grunter, squealer, Sus scrofa +wild boar, boar, Sus scrofa +warthog +hippopotamus, hippo, river horse, Hippopotamus amphibius +ox +water buffalo, water ox, Asiatic buffalo, Bubalus bubalis +bison +ram, tup +bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis +ibex, Capra ibex +hartebeest +impala, Aepyceros melampus +gazelle +Arabian camel, dromedary, Camelus dromedarius +llama +weasel +mink +polecat, fitch, foulmart, foumart, Mustela putorius +black-footed ferret, ferret, Mustela nigripes +otter +skunk, polecat, wood pussy +badger +armadillo +three-toed sloth, ai, Bradypus tridactylus +orangutan, orang, orangutang, Pongo pygmaeus +gorilla, Gorilla gorilla +chimpanzee, chimp, Pan troglodytes +gibbon, Hylobates lar +siamang, Hylobates syndactylus, Symphalangus syndactylus +guenon, guenon monkey +patas, hussar monkey, Erythrocebus patas +baboon +macaque +langur +colobus, colobus monkey +proboscis monkey, Nasalis larvatus +marmoset +capuchin, ringtail, Cebus capucinus +howler monkey, howler +titi, titi monkey +spider monkey, Ateles geoffroyi +squirrel monkey, Saimiri sciureus +Madagascar cat, ring-tailed lemur, Lemur catta +indri, indris, Indri indri, Indri brevicaudatus +Indian elephant, Elephas maximus +African elephant, Loxodonta africana +lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens +giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca +barracouta, snoek +eel +coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch +rock beauty, Holocanthus tricolor +anemone fish +sturgeon +gar, garfish, garpike, billfish, Lepisosteus osseus +lionfish +puffer, pufferfish, blowfish, globefish +abacus +abaya +academic gown, academic robe, judge's robe +accordion, piano accordion, squeeze box +acoustic guitar +aircraft carrier, carrier, flattop, attack aircraft carrier +airliner +airship, dirigible +altar +ambulance +amphibian, amphibious vehicle +analog clock +apiary, bee house +apron +ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin +assault rifle, assault gun +backpack, back pack, knapsack, packsack, rucksack, haversack +bakery, bakeshop, bakehouse +balance beam, beam +balloon +ballpoint, ballpoint pen, ballpen, Biro +Band Aid +banjo +bannister, banister, balustrade, balusters, handrail +barbell +barber chair +barbershop +barn +barometer +barrel, cask +barrow, garden cart, lawn cart, wheelbarrow +baseball +basketball +bassinet +bassoon +bathing cap, swimming cap +bath towel +bathtub, bathing tub, bath, tub +beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon +beacon, lighthouse, beacon light, pharos +beaker +bearskin, busby, shako +beer bottle +beer glass +bell cote, bell cot +bib +bicycle-built-for-two, tandem bicycle, tandem +bikini, two-piece +binder, ring-binder +binoculars, field glasses, opera glasses +birdhouse +boathouse +bobsled, bobsleigh, bob +bolo tie, bolo, bola tie, bola +bonnet, poke bonnet +bookcase +bookshop, bookstore, bookstall +bottlecap +bow +bow tie, bow-tie, bowtie +brass, memorial tablet, plaque +brassiere, bra, bandeau +breakwater, groin, groyne, mole, bulwark, seawall, jetty +breastplate, aegis, egis +broom +bucket, pail +buckle +bulletproof vest +bullet train, bullet +butcher shop, meat market +cab, hack, taxi, taxicab +caldron, cauldron +candle, taper, wax light +cannon +canoe +can opener, tin opener +cardigan +car mirror +carousel, carrousel, merry-go-round, roundabout, whirligig +carpenter's kit, tool kit +carton +car wheel +cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM +cassette +cassette player +castle +catamaran +CD player +cello, violoncello +cellular telephone, cellular phone, cellphone, cell, mobile phone +chain +chainlink fence +chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour +chain saw, chainsaw +chest +chiffonier, commode +chime, bell, gong +china cabinet, china closet +Christmas stocking +church, church building +cinema, movie theater, movie theatre, movie house, picture palace +cleaver, meat cleaver, chopper +cliff dwelling +cloak +clog, geta, patten, sabot +cocktail shaker +coffee mug +coffeepot +coil, spiral, volute, whorl, helix +combination lock +computer keyboard, keypad +confectionery, confectionary, candy store +container ship, containership, container vessel +convertible +corkscrew, bottle screw +cornet, horn, trumpet, trump +cowboy boot +cowboy hat, ten-gallon hat +cradle +crane +crash helmet +crate +crib, cot +Crock Pot +croquet ball +crutch +cuirass +dam, dike, dyke +desk +desktop computer +dial telephone, dial phone +diaper, nappy, napkin +digital clock +digital watch +dining table, board +dishrag, dishcloth +dishwasher, dish washer, dishwashing machine +disk brake, disc brake +dock, dockage, docking facility +dogsled, dog sled, dog sleigh +dome +doormat, welcome mat +drilling platform, offshore rig +drum, membranophone, tympan +drumstick +dumbbell +Dutch oven +electric fan, blower +electric guitar +electric locomotive +entertainment center +envelope +espresso maker +face powder +feather boa, boa +file, file cabinet, filing cabinet +fireboat +fire engine, fire truck +fire screen, fireguard +flagpole, flagstaff +flute, transverse flute +folding chair +football helmet +forklift +fountain +fountain pen +four-poster +freight car +French horn, horn +frying pan, frypan, skillet +fur coat +garbage truck, dustcart +gasmask, respirator, gas helmet +gas pump, gasoline pump, petrol pump, island dispenser +goblet +go-kart +golf ball +golfcart, golf cart +gondola +gong, tam-tam +gown +grand piano, grand +greenhouse, nursery, glasshouse +grille, radiator grille +grocery store, grocery, food market, market +guillotine +hair slide +hair spray +half track +hammer +hamper +hand blower, blow dryer, blow drier, hair dryer, hair drier +hand-held computer, hand-held microcomputer +handkerchief, hankie, hanky, hankey +hard disc, hard disk, fixed disk +harmonica, mouth organ, harp, mouth harp +harp +harvester, reaper +hatchet +holster +home theater, home theatre +honeycomb +hook, claw +hoopskirt, crinoline +horizontal bar, high bar +horse cart, horse-cart +hourglass +iPod +iron, smoothing iron +jack-o'-lantern +jean, blue jean, denim +jeep, landrover +jersey, T-shirt, tee shirt +jigsaw puzzle +jinrikisha, ricksha, rickshaw +joystick +kimono +knee pad +knot +lab coat, laboratory coat +ladle +lampshade, lamp shade +laptop, laptop computer +lawn mower, mower +lens cap, lens cover +letter opener, paper knife, paperknife +library +lifeboat +lighter, light, igniter, ignitor +limousine, limo +liner, ocean liner +lipstick, lip rouge +Loafer +lotion +loudspeaker, speaker, speaker unit, loudspeaker system, speaker system +loupe, jeweler's loupe +lumbermill, sawmill +magnetic compass +mailbag, postbag +mailbox, letter box +maillot +maillot, tank suit +manhole cover +maraca +marimba, xylophone +mask +matchstick +maypole +maze, labyrinth +measuring cup +medicine chest, medicine cabinet +megalith, megalithic structure +microphone, mike +microwave, microwave oven +military uniform +milk can +minibus +miniskirt, mini +minivan +missile +mitten +mixing bowl +mobile home, manufactured home +Model T +modem +monastery +monitor +moped +mortar +mortarboard +mosque +mosquito net +motor scooter, scooter +mountain bike, all-terrain bike, off-roader +mountain tent +mouse, computer mouse +mousetrap +moving van +muzzle +nail +neck brace +necklace +nipple +notebook, notebook computer +obelisk +oboe, hautboy, hautbois +ocarina, sweet potato +odometer, hodometer, mileometer, milometer +oil filter +organ, pipe organ +oscilloscope, scope, cathode-ray oscilloscope, CRO +overskirt +oxcart +oxygen mask +packet +paddle, boat paddle +paddlewheel, paddle wheel +padlock +paintbrush +pajama, pyjama, pj's, jammies +palace +panpipe, pandean pipe, syrinx +paper towel +parachute, chute +parallel bars, bars +park bench +parking meter +passenger car, coach, carriage +patio, terrace +pay-phone, pay-station +pedestal, plinth, footstall +pencil box, pencil case +pencil sharpener +perfume, essence +Petri dish +photocopier +pick, plectrum, plectron +pickelhaube +picket fence, paling +pickup, pickup truck +pier +piggy bank, penny bank +pill bottle +pillow +ping-pong ball +pinwheel +pirate, pirate ship +pitcher, ewer +plane, carpenter's plane, woodworking plane +planetarium +plastic bag +plate rack +plow, plough +plunger, plumber's helper +Polaroid camera, Polaroid Land camera +pole +police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria +poncho +pool table, billiard table, snooker table +pop bottle, soda bottle +pot, flowerpot +potter's wheel +power drill +prayer rug, prayer mat +printer +prison, prison house +projectile, missile +projector +puck, hockey puck +punching bag, punch bag, punching ball, punchball +purse +quill, quill pen +quilt, comforter, comfort, puff +racer, race car, racing car +racket, racquet +radiator +radio, wireless +radio telescope, radio reflector +rain barrel +recreational vehicle, RV, R.V. +reel +reflex camera +refrigerator, icebox +remote control, remote +restaurant, eating house, eating place, eatery +revolver, six-gun, six-shooter +rifle +rocking chair, rocker +rotisserie +rubber eraser, rubber, pencil eraser +rugby ball +rule, ruler +running shoe +safe +safety pin +saltshaker, salt shaker +sandal +sarong +sax, saxophone +scabbard +scale, weighing machine +school bus +schooner +scoreboard +screen, CRT screen +screw +screwdriver +seat belt, seatbelt +sewing machine +shield, buckler +shoe shop, shoe-shop, shoe store +shoji +shopping basket +shopping cart +shovel +shower cap +shower curtain +ski +ski mask +sleeping bag +slide rule, slipstick +sliding door +slot, one-armed bandit +snorkel +snowmobile +snowplow, snowplough +soap dispenser +soccer ball +sock +solar dish, solar collector, solar furnace +sombrero +soup bowl +space bar +space heater +space shuttle +spatula +speedboat +spider web, spider's web +spindle +sports car, sport car +spotlight, spot +stage +steam locomotive +steel arch bridge +steel drum +stethoscope +stole +stone wall +stopwatch, stop watch +stove +strainer +streetcar, tram, tramcar, trolley, trolley car +stretcher +studio couch, day bed +stupa, tope +submarine, pigboat, sub, U-boat +suit, suit of clothes +sundial +sunglass +sunglasses, dark glasses, shades +sunscreen, sunblock, sun blocker +suspension bridge +swab, swob, mop +sweatshirt +swimming trunks, bathing trunks +swing +switch, electric switch, electrical switch +syringe +table lamp +tank, army tank, armored combat vehicle, armoured combat vehicle +tape player +teapot +teddy, teddy bear +television, television system +tennis ball +thatch, thatched roof +theater curtain, theatre curtain +thimble +thresher, thrasher, threshing machine +throne +tile roof +toaster +tobacco shop, tobacconist shop, tobacconist +toilet seat +torch +totem pole +tow truck, tow car, wrecker +toyshop +tractor +trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi +tray +trench coat +tricycle, trike, velocipede +trimaran +tripod +triumphal arch +trolleybus, trolley coach, trackless trolley +trombone +tub, vat +turnstile +typewriter keyboard +umbrella +unicycle, monocycle +upright, upright piano +vacuum, vacuum cleaner +vase +vault +velvet +vending machine +vestment +viaduct +violin, fiddle +volleyball +waffle iron +wall clock +wallet, billfold, notecase, pocketbook +wardrobe, closet, press +warplane, military plane +washbasin, handbasin, washbowl, lavabo, wash-hand basin +washer, automatic washer, washing machine +water bottle +water jug +water tower +whiskey jug +whistle +wig +window screen +window shade +Windsor tie +wine bottle +wing +wok +wooden spoon +wool, woolen, woollen +worm fence, snake fence, snake-rail fence, Virginia fence +wreck +yawl +yurt +web site, website, internet site, site +comic book +crossword puzzle, crossword +street sign +traffic light, traffic signal, stoplight +book jacket, dust cover, dust jacket, dust wrapper +menu +plate +guacamole +consomme +hot pot, hotpot +trifle +ice cream, icecream +ice lolly, lolly, lollipop, popsicle +French loaf +bagel, beigel +pretzel +cheeseburger +hotdog, hot dog, red hot +mashed potato +head cabbage +broccoli +cauliflower +zucchini, courgette +spaghetti squash +acorn squash +butternut squash +cucumber, cuke +artichoke, globe artichoke +bell pepper +cardoon +mushroom +Granny Smith +strawberry +orange +lemon +fig +pineapple, ananas +banana +jackfruit, jak, jack +custard apple +pomegranate +hay +carbonara +chocolate sauce, chocolate syrup +dough +meat loaf, meatloaf +pizza, pizza pie +potpie +burrito +red wine +espresso +cup +eggnog +alp +bubble +cliff, drop, drop-off +coral reef +geyser +lakeside, lakeshore +promontory, headland, head, foreland +sandbar, sand bar +seashore, coast, seacoast, sea-coast +valley, vale +volcano +ballplayer, baseball player +groom, bridegroom +scuba diver +rapeseed +daisy +yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum +corn +acorn +hip, rose hip, rosehip +buckeye, horse chestnut, conker +coral fungus +agaric +gyromitra +stinkhorn, carrion fungus +earthstar +hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa +bolete +ear, spike, capitulum +toilet tissue, toilet paper, bathroom tissue diff --git a/modules/image/classification/efficientnetb7_imagenet/module.py b/modules/image/classification/efficientnetb7_imagenet/module.py index 9365b5359..5723430ae 100644 --- a/modules/image/classification/efficientnetb7_imagenet/module.py +++ b/modules/image/classification/efficientnetb7_imagenet/module.py @@ -11,781 +11,187 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import absolute_import +from __future__ import division +import argparse +import ast import os -import math -import collections -import re -import copy - -import paddle -from paddle import ParamAttr -import paddle.nn as nn -import paddle.nn.functional as F -from paddle.nn import Conv2d, BatchNorm, Linear, Dropout -from paddle.nn import AdaptiveAvgPool2d, MaxPool2d, AvgPool2d -from paddlehub.module.module import moduleinfo -from paddlehub.module.cv_module import ImageClassifierModule - -GlobalParams = collections.namedtuple('GlobalParams', [ - 'batch_norm_momentum', - 'batch_norm_epsilon', - 'dropout_rate', - 'num_classes', - 'width_coefficient', - 'depth_coefficient', - 'depth_divisor', - 'min_depth', - 'drop_connect_rate', -]) - -BlockArgs = collections.namedtuple( - 'BlockArgs', - ['kernel_size', 'num_repeat', 'input_filters', 'output_filters', 'expand_ratio', 'id_skip', 'stride', 'se_ratio']) - -GlobalParams.__new__.__defaults__ = (None, ) * len(GlobalParams._fields) -BlockArgs.__new__.__defaults__ = (None, ) * len(BlockArgs._fields) - - -def efficientnet_params(model_name: str): - """ Map EfficientNet model name to parameter coefficients. """ - params_dict = { - # Coefficients: width,depth,resolution,dropout - 'efficientnet-b7': (2.0, 3.1, 600, 0.5) - } - return params_dict[model_name] +import numpy as np +from paddle.inference import Config +from paddle.inference import create_predictor -def efficientnet(width_coefficient: float = None, - depth_coefficient: float = None, - dropout_rate: float = 0.2, - drop_connect_rate: float = 0.2): - """ Get block arguments according to parameter and coefficients. """ - blocks_args = [ - 'r1_k3_s11_e1_i32_o16_se0.25', - 'r2_k3_s22_e6_i16_o24_se0.25', - 'r2_k5_s22_e6_i24_o40_se0.25', - 'r3_k3_s22_e6_i40_o80_se0.25', - 'r3_k5_s11_e6_i80_o112_se0.25', - 'r4_k5_s22_e6_i112_o192_se0.25', - 'r1_k3_s11_e6_i192_o320_se0.25', - ] - blocks_args = BlockDecoder.decode(blocks_args) - - global_params = GlobalParams( - batch_norm_momentum=0.99, - batch_norm_epsilon=1e-3, - dropout_rate=dropout_rate, - drop_connect_rate=drop_connect_rate, - num_classes=1000, - width_coefficient=width_coefficient, - depth_coefficient=depth_coefficient, - depth_divisor=8, - min_depth=None) - - return blocks_args, global_params - - -def get_model_params(model_name: str, override_params: dict): - """ Get the block args and global params for a given model """ - if model_name.startswith('efficientnet'): - w, d, _, p = efficientnet_params(model_name) - blocks_args, global_params = efficientnet(width_coefficient=w, depth_coefficient=d, dropout_rate=p) - else: - raise NotImplementedError('model name is not pre-defined: %s' % model_name) - if override_params: - global_params = global_params._replace(**override_params) - return blocks_args, global_params - - -def round_filters(filters: int, global_params: dict): - """ Calculate and round number of filters based on depth multiplier. """ - multiplier = global_params.width_coefficient - if not multiplier: - return filters - divisor = global_params.depth_divisor - min_depth = global_params.min_depth - filters *= multiplier - min_depth = min_depth or divisor - new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor) - if new_filters < 0.9 * filters: # prevent rounding by more than 10% - new_filters += divisor - return int(new_filters) +from .data_feed import reader +from .processor import base64_to_cv2 +from .processor import postprocess +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving -def round_repeats(repeats: int, global_params: dict): - """ Round number of filters based on depth multiplier. """ - multiplier = global_params.depth_coefficient - if not multiplier: - return repeats - return int(math.ceil(multiplier * repeats)) +@moduleinfo(name="efficientnetb7_imagenet", + type="CV/image_classification", + author="paddlepaddle", + author_email="paddle-dev@baidu.com", + summary="EfficientNetB7 is a image classfication model, this module is trained with imagenet datasets.", + version="1.2.0") +class EfficientNetB7ImageNet: + def __init__(self): + self.default_pretrained_model_path = os.path.join(self.directory, "efficientnetb7_imagenet_infer_model", + "model") + label_file = os.path.join(self.directory, "label_list.txt") + with open(label_file, 'r', encoding='utf-8') as file: + self.label_list = file.read().split("\n")[:-1] + self._set_config() -class BlockDecoder(object): - """ - Block Decoder, straight from the official TensorFlow repository. - """ + def get_expected_image_width(self): + return 224 - @staticmethod - def _decode_block_string(block_string: str): - """ Gets a block through a string notation of arguments. """ - assert isinstance(block_string, str) + def get_expected_image_height(self): + return 224 - ops = block_string.split('_') - options = {} - for op in ops: - splits = re.split(r'(\d.*)', op) - if len(splits) >= 2: - key, value = splits[:2] - options[key] = value + def get_pretrained_images_mean(self): + im_mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3) + return im_mean - # Check stride - cond_1 = ('s' in options and len(options['s']) == 1) - cond_2 = ((len(options['s']) == 2) and (options['s'][0] == options['s'][1])) - assert (cond_1 or cond_2) + def get_pretrained_images_std(self): + im_std = np.array([0.229, 0.224, 0.225]).reshape(1, 3) + return im_std - return BlockArgs( - kernel_size=int(options['k']), - num_repeat=int(options['r']), - input_filters=int(options['i']), - output_filters=int(options['o']), - expand_ratio=int(options['e']), - id_skip=('noskip' not in block_string), - se_ratio=float(options['se']) if 'se' in options else None, - stride=[int(options['s'][0])]) + def _set_config(self): + """ + predictor config setting + """ + model = self.default_pretrained_model_path + '.pdmodel' + params = self.default_pretrained_model_path + '.pdiparams' + cpu_config = Config(model, params) + cpu_config.disable_glog_info() + cpu_config.disable_gpu() + self.cpu_predictor = create_predictor(cpu_config) + + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + use_gpu = True + except: + use_gpu = False + if use_gpu: + gpu_config = Config(model, params) + gpu_config.disable_glog_info() + gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) + self.gpu_predictor = create_predictor(gpu_config) + + def classification(self, images=None, paths=None, batch_size=1, use_gpu=False, top_k=1): + """ + API for image classification. - @staticmethod - def _encode_block_string(block): - """Encodes a block to a string.""" - args = [ - 'r%d' % block.num_repeat, - 'k%d' % block.kernel_size, - 's%d%d' % (block.strides[0], block.strides[1]), - 'e%s' % block.expand_ratio, - 'i%d' % block.input_filters, - 'o%d' % block.output_filters - ] - if 0 < block.se_ratio <= 1: - args.append('se%s' % block.se_ratio) - if block.id_skip is False: - args.append('noskip') - return '_'.join(args) + Args: + images (list[numpy.ndarray]): data of images, shape of each is [H, W, C], color space must be BGR. + paths (list[str]): The paths of images. + batch_size (int): batch size. + use_gpu (bool): Whether to use gpu. + top_k (int): Return top k results. - @staticmethod - def decode(string_list: list): + Returns: + res (list[dict]): The classfication results. """ - Decode a list of string notations to specify blocks in the network. - - string_list: list of strings, each string is a notation of block - return - list of BlockArgs namedtuples of block args + if use_gpu: + try: + _places = os.environ["CUDA_VISIBLE_DEVICES"] + int(_places[0]) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + + all_data = list() + for yield_data in reader(images, paths): + all_data.append(yield_data) + + total_num = len(all_data) + loop_num = int(np.ceil(total_num / batch_size)) + + res = list() + for iter_id in range(loop_num): + batch_data = list() + handle_id = iter_id * batch_size + for image_id in range(batch_size): + try: + batch_data.append(all_data[handle_id + image_id]) + except: + pass + # feed batch image + batch_image = np.array([data['image'] for data in batch_data]) + + predictor = self.gpu_predictor if use_gpu else self.cpu_predictor + input_names = predictor.get_input_names() + input_handle = predictor.get_input_handle(input_names[0]) + input_handle.copy_from_cpu(batch_image.copy()) + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) + + out = postprocess(data_out=output_handle.copy_to_cpu(), label_list=self.label_list, top_k=top_k) + res += out + return res + + @serving + def serving_method(self, images, **kwargs): """ - assert isinstance(string_list, list) - blocks_args = [] - for block_string in string_list: - blocks_args.append(BlockDecoder._decode_block_string(block_string)) - return blocks_args - - @staticmethod - def encode(blocks_args: list): + Run as a service. """ - Encodes a list of BlockArgs to a list of strings. + images_decode = [base64_to_cv2(image) for image in images] + results = self.classify(images=images_decode, **kwargs) + return results - :param blocks_args: a list of BlockArgs namedtuples of block args - :return: a list of strings, each string is a notation of block + @runnable + def run_cmd(self, argvs): """ - block_strings = [] - for block in blocks_args: - block_strings.append(BlockDecoder._encode_block_string(block)) - return block_strings - - -def initial_type(name: str, use_bias: bool = False): - param_attr = ParamAttr(name=name + "_weights") - if use_bias: - bias_attr = ParamAttr(name=name + "_offset") - else: - bias_attr = False - return param_attr, bias_attr - - -def init_batch_norm_layer(name: str = "batch_norm"): - param_attr = ParamAttr(name=name + "_scale") - bias_attr = ParamAttr(name=name + "_offset") - return param_attr, bias_attr - - -def init_fc_layer(name: str = "fc"): - param_attr = ParamAttr(name=name + "_weights") - bias_attr = ParamAttr(name=name + "_offset") - return param_attr, bias_attr - - -def cal_padding(img_size: int, stride: int, filter_size: int, dilation: int = 1): - """Calculate padding size.""" - if img_size % stride == 0: - out_size = max(filter_size - stride, 0) - else: - out_size = max(filter_size - (img_size % stride), 0) - return out_size // 2, out_size - out_size // 2 - - -inp_shape = {"b7": [600, 300, 300, 150, 75, 38, 38, 19]} - - -def _drop_connect(inputs: paddle.Tensor, prob: float, is_test: bool): - """Drop input connection""" - if is_test: - return inputs - keep_prob = 1.0 - prob - inputs_shape = paddle.shape(inputs) - random_tensor = keep_prob + paddle.rand(shape=[inputs_shape[0], 1, 1, 1]) - binary_tensor = paddle.floor(random_tensor) - output = inputs / keep_prob * binary_tensor - return output - - -class Conv2ds(nn.Layer): - """Basic conv layer""" - - def __init__(self, - input_channels: int, - output_channels: int, - filter_size: int, - stride: int = 1, - padding: int = 0, - groups: int = None, - name: str = "conv2d", - act: str = None, - use_bias: bool = False, - padding_type: str = None, - model_name: str = None, - cur_stage: str = None): - super(Conv2ds, self).__init__() - assert act in [None, "swish", "sigmoid"] - self.act = act - - param_attr, bias_attr = initial_type(name=name, use_bias=use_bias) - - def get_padding(filter_size, stride=1, dilation=1): - padding = ((stride - 1) + dilation * (filter_size - 1)) // 2 - return padding - - inps = 1 if model_name == None and cur_stage == None else inp_shape[model_name][cur_stage] - self.need_crop = False - if padding_type == "SAME": - top_padding, bottom_padding = cal_padding(inps, stride, filter_size) - left_padding, right_padding = cal_padding(inps, stride, filter_size) - height_padding = bottom_padding - width_padding = right_padding - if top_padding != bottom_padding or left_padding != right_padding: - height_padding = top_padding + stride - width_padding = left_padding + stride - self.need_crop = True - padding = [height_padding, width_padding] - elif padding_type == "VALID": - height_padding = 0 - width_padding = 0 - padding = [height_padding, width_padding] - elif padding_type == "DYNAMIC": - padding = get_padding(filter_size, stride) - else: - padding = padding_type - - groups = 1 if groups is None else groups - self._conv = Conv2d( - input_channels, - output_channels, - filter_size, - groups=groups, - stride=stride, - padding=padding, - weight_attr=param_attr, - bias_attr=bias_attr) - - def forward(self, inputs: paddle.Tensor): - x = self._conv(inputs) - if self.act == "swish": - x = F.swish(x) - elif self.act == "sigmoid": - x = F.sigmoid(x) - - if self.need_crop: - x = x[:, :, 1:, 1:] - return x - - -class ConvBNLayer(nn.Layer): - """Basic conv bn layer.""" - - def __init__(self, - input_channels: int, - filter_size: int, - output_channels: int, - stride: int = 1, - num_groups: int = 1, - padding_type: str = "SAME", - conv_act: str = None, - bn_act: str = "swish", - use_bn: bool = True, - use_bias: bool = False, - name: str = None, - conv_name: str = None, - bn_name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ConvBNLayer, self).__init__() - - self._conv = Conv2ds( - input_channels=input_channels, - output_channels=output_channels, - filter_size=filter_size, - stride=stride, - groups=num_groups, - act=conv_act, - padding_type=padding_type, - name=conv_name, - use_bias=use_bias, - model_name=model_name, - cur_stage=cur_stage) - self.use_bn = use_bn - if use_bn is True: - bn_name = name + bn_name - param_attr, bias_attr = init_batch_norm_layer(bn_name) - - self._bn = BatchNorm( - num_channels=output_channels, - act=bn_act, - momentum=0.99, - epsilon=0.001, - moving_mean_name=bn_name + "_mean", - moving_variance_name=bn_name + "_variance", - param_attr=param_attr, - bias_attr=bias_attr) - - def forward(self, inputs: paddle.Tensor): - if self.use_bn: - x = self._conv(inputs) - x = self._bn(x) - return x - else: - return self._conv(inputs) - - -class ExpandConvNorm(nn.Layer): - """Expand conv norm layer.""" - - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ExpandConvNorm, self).__init__() - - self.oup = block_args.input_filters * block_args.expand_ratio - self.expand_ratio = block_args.expand_ratio - - if self.expand_ratio != 1: - self._conv = ConvBNLayer( - input_channels, - 1, - self.oup, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_expand_conv", - bn_name="_bn0", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - if self.expand_ratio != 1: - return self._conv(inputs) - else: - return inputs - - -class DepthwiseConvNorm(nn.Layer): - """Depthwise conv norm layer.""" - - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(DepthwiseConvNorm, self).__init__() - - self.k = block_args.kernel_size - self.s = block_args.stride - if isinstance(self.s, list) or isinstance(self.s, tuple): - self.s = self.s[0] - oup = block_args.input_filters * block_args.expand_ratio - - self._conv = ConvBNLayer( - input_channels, - self.k, - oup, - self.s, - num_groups=input_channels, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_depthwise_conv", - bn_name="_bn1", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class ProjectConvNorm(nn.Layer): - """Projection conv bn layer.""" - - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ProjectConvNorm, self).__init__() - - final_oup = block_args.output_filters - - self._conv = ConvBNLayer( - input_channels, - 1, - final_oup, - bn_act=None, - padding_type=padding_type, - name=name, - conv_name=name + "_project_conv", - bn_name="_bn2", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class SEBlock(nn.Layer): - """Basic Squeeze-and-Excitation block for Efficientnet.""" - - def __init__(self, - input_channels: int, - num_squeezed_channels: int, - oup: int, - padding_type: str, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(SEBlock, self).__init__() - - self._pool = AdaptiveAvgPool2d(1) - self._conv1 = Conv2ds( - input_channels, - num_squeezed_channels, - 1, - use_bias=True, - padding_type=padding_type, - act="swish", - name=name + "_se_reduce") - - self._conv2 = Conv2ds( - num_squeezed_channels, - oup, - 1, - act="sigmoid", - use_bias=True, - padding_type=padding_type, - name=name + "_se_expand") - - def forward(self, inputs: paddle.Tensor): - x = self._pool(inputs) - x = self._conv1(x) - x = self._conv2(x) - return paddle.multiply(inputs, x) - - -class MbConvBlock(nn.Layer): - """Mobile inverted bottleneck convolution for Efficientnet.""" - - def __init__(self, - input_channels: int, - block_args: dict, - padding_type: str, - use_se: bool, - name: str = None, - drop_connect_rate: float = None, - is_test: bool = False, - model_name: str = None, - cur_stage: str = None): - super(MbConvBlock, self).__init__() - - oup = block_args.input_filters * block_args.expand_ratio - self.block_args = block_args - self.has_se = use_se and (block_args.se_ratio is not None) and (0 < block_args.se_ratio <= 1) - self.id_skip = block_args.id_skip - self.expand_ratio = block_args.expand_ratio - self.drop_connect_rate = drop_connect_rate - self.is_test = is_test - - if self.expand_ratio != 1: - self._ecn = ExpandConvNorm( - input_channels, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - self._dcn = DepthwiseConvNorm( - input_channels * block_args.expand_ratio, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - if self.has_se: - num_squeezed_channels = max(1, int(block_args.input_filters * block_args.se_ratio)) - self._se = SEBlock( - input_channels * block_args.expand_ratio, - num_squeezed_channels, - oup, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - self._pcn = ProjectConvNorm( - input_channels * block_args.expand_ratio, - block_args, - padding_type=padding_type, - name=name, - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - x = inputs - if self.expand_ratio != 1: - x = self._ecn(x) - x = F.swish(x) - x = self._dcn(x) - x = F.swish(x) - if self.has_se: - x = self._se(x) - x = self._pcn(x) - if self.id_skip and \ - self.block_args.stride == 1 and \ - self.block_args.input_filters == self.block_args.output_filters: - if self.drop_connect_rate: - x = _drop_connect(x, self.drop_connect_rate, self.is_test) - x = paddle.elementwise_add(x, inputs) - return x - - -class ConvStemNorm(nn.Layer): - """Basic conv stem norm block for extracting features.""" - - def __init__(self, - input_channels: int, - padding_type: str, - _global_params: dict, - name: str = None, - model_name: str = None, - cur_stage: str = None): - super(ConvStemNorm, self).__init__() - - output_channels = round_filters(32, _global_params) - self._conv = ConvBNLayer( - input_channels, - filter_size=3, - output_channels=output_channels, - stride=2, - bn_act=None, - padding_type=padding_type, - name="", - conv_name="_conv_stem", - bn_name="_bn0", - model_name=model_name, - cur_stage=cur_stage) - - def forward(self, inputs: paddle.Tensor): - return self._conv(inputs) - - -class ExtractFeatures(nn.Layer): - """Extract features.""" - - def __init__(self, - input_channels: int, - _block_args: dict, - _global_params: dict, - padding_type: str, - use_se: bool, - is_test: bool, - model_name: str = None): - super(ExtractFeatures, self).__init__() - - self._global_params = _global_params - - self._conv_stem = ConvStemNorm( - input_channels, - padding_type=padding_type, - _global_params=_global_params, - model_name=model_name, - cur_stage=0) - - self.block_args_copy = copy.deepcopy(_block_args) - idx = 0 - block_size = 0 - for block_arg in self.block_args_copy: - block_arg = block_arg._replace( - input_filters=round_filters(block_arg.input_filters, _global_params), - output_filters=round_filters(block_arg.output_filters, _global_params), - num_repeat=round_repeats(block_arg.num_repeat, _global_params)) - block_size += 1 - for _ in range(block_arg.num_repeat - 1): - block_size += 1 - - self.conv_seq = [] - cur_stage = 1 - for block_args in _block_args: - block_args = block_args._replace( - input_filters=round_filters(block_args.input_filters, _global_params), - output_filters=round_filters(block_args.output_filters, _global_params), - num_repeat=round_repeats(block_args.num_repeat, _global_params)) - - drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 - if drop_connect_rate: - drop_connect_rate *= float(idx) / block_size - - _mc_block = self.add_sublayer( - "_blocks." + str(idx) + ".", - MbConvBlock( - block_args.input_filters, - block_args=block_args, - padding_type=padding_type, - use_se=use_se, - name="_blocks." + str(idx) + ".", - drop_connect_rate=drop_connect_rate, - model_name=model_name, - cur_stage=cur_stage)) - self.conv_seq.append(_mc_block) - idx += 1 - if block_args.num_repeat > 1: - block_args = block_args._replace(input_filters=block_args.output_filters, stride=1) - for _ in range(block_args.num_repeat - 1): - drop_connect_rate = self._global_params.drop_connect_rate if not is_test else 0 - if drop_connect_rate: - drop_connect_rate *= float(idx) / block_size - _mc_block = self.add_sublayer( - "block." + str(idx) + ".", - MbConvBlock( - block_args.input_filters, - block_args, - padding_type=padding_type, - use_se=use_se, - name="_blocks." + str(idx) + ".", - drop_connect_rate=drop_connect_rate, - model_name=model_name, - cur_stage=cur_stage)) - self.conv_seq.append(_mc_block) - idx += 1 - cur_stage += 1 - - def forward(self, inputs: paddle.Tensor): - x = self._conv_stem(inputs) - x = F.swish(x) - for _mc_block in self.conv_seq: - x = _mc_block(x) - return x - - -@moduleinfo( - name="efficientnetb7_imagenet", - type="cv/classification", - author="paddlepaddle", - author_email="", - summary="efficientnetb7_imagenet is a classification model, " - "this module is trained with Imagenet dataset.", - version="1.1.0", - meta=ImageClassifierModule) -class EfficientNet_B7(nn.Layer): - def __init__(self, - is_test: bool = False, - padding_type: str = "SAME", - override_params: dict = None, - use_se: bool = True, - class_dim: int = 1000, - load_checkpoint: str = None): - super(EfficientNet_B7, self).__init__() - - model_name = 'efficientnet-b7' - self.name = "b7" - self._block_args, self._global_params = get_model_params(model_name, override_params) - self.padding_type = padding_type - self.use_se = use_se - self.is_test = is_test - - self._ef = ExtractFeatures( - 3, - self._block_args, - self._global_params, - self.padding_type, - self.use_se, - self.is_test, - model_name=self.name) - - output_channels = round_filters(1280, self._global_params) - oup = 640 - - self._conv = ConvBNLayer( - oup, - 1, - output_channels, - bn_act="swish", - padding_type=self.padding_type, - name="", - conv_name="_conv_head", - bn_name="_bn1", - model_name=self.name, - cur_stage=7) - self._pool = AdaptiveAvgPool2d(1) - - if self._global_params.dropout_rate: - self._drop = Dropout(p=self._global_params.dropout_rate, mode="upscale_in_train") - - param_attr, bias_attr = init_fc_layer("_fc") - self._fc = Linear(output_channels, class_dim, weight_attr=param_attr, bias_attr=bias_attr) - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'efficientnet_b7_imagenet.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/image_classification/efficientnet_b7_imagenet.pdparams -O ' - + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def forward(self, inputs: paddle.Tensor): - x = self._ef(inputs) - x = self._conv(x) - x = self._pool(x) - if self._global_params.dropout_rate: - x = self._drop(x) - x = paddle.squeeze(x, axis=[2, 3]) - x = self._fc(x) - return x + Run as a command. + """ + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.classify(paths=[args.input_path], batch_size=args.batch_size, use_gpu=args.use_gpu) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument('--use_gpu', + type=ast.literal_eval, + default=False, + help="whether use GPU or not.") + self.arg_config_group.add_argument('--batch_size', type=ast.literal_eval, default=1, help="batch size.") + self.arg_config_group.add_argument('--top_k', type=ast.literal_eval, default=1, help="Return top k results.") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") + + +if __name__ == '__main__': + b7 = EfficientNetB7ImageNet() + b7.context() + import cv2 + test_image = [cv2.imread('dog.jpeg')] + res = b7.classification(images=test_image) + print(res) + res = b7.classification(paths=['dog.jpeg']) + print(res) + res = b7.classification(images=test_image) + print(res) + res = b7.classify(images=test_image) + print(res) diff --git a/modules/image/classification/efficientnetb7_imagenet/processor.py b/modules/image/classification/efficientnetb7_imagenet/processor.py new file mode 100644 index 000000000..9cb22a1b7 --- /dev/null +++ b/modules/image/classification/efficientnetb7_imagenet/processor.py @@ -0,0 +1,65 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import base64 + +import cv2 +import numpy as np + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +def softmax(x): + if len(x.shape) > 1: + tmp = np.max(x, axis=1) + x -= tmp.reshape((x.shape[0], 1)) + x = np.exp(x) + tmp = np.sum(x, axis=1) + x /= tmp.reshape((x.shape[0], 1)) + else: + tmp = np.max(x) + x -= tmp + x = np.exp(x) + tmp = np.sum(x) + x /= tmp + return x + + +def postprocess(data_out, label_list, top_k): + """ + Postprocess output of network, one image at a time. + + Args: + data_out (numpy.ndarray): output data of network. + label_list (list): list of label. + top_k (int): Return top k results. + """ + output = [] + for result in data_out: + result_i = softmax(result) + output_i = {} + indexs = np.argsort(result_i)[::-1][0:top_k] + for index in indexs: + label = label_list[index].split(',')[0] + output_i[label] = float(result_i[index]) + output.append(output_i) + return output diff --git a/modules/image/classification/efficientnetb7_imagenet/test.py b/modules/image/classification/efficientnetb7_imagenet/test.py new file mode 100644 index 000000000..355bbf311 --- /dev/null +++ b/modules/image/classification/efficientnetb7_imagenet/test.py @@ -0,0 +1,63 @@ +import os +import shutil +import unittest + +import cv2 +import requests + +import paddlehub as hub + +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + + +class TestHubModule(unittest.TestCase): + + @classmethod + def setUpClass(cls) -> None: + img_url = 'https://unsplash.com/photos/brFsZ7qszSY/download?ixid=MnwxMjA3fDB8MXxzZWFyY2h8OHx8ZG9nfGVufDB8fHx8MTY2MzA1ODQ1MQ&force=true&w=640' + if not os.path.exists('tests'): + os.makedirs('tests') + response = requests.get(img_url) + assert response.status_code == 200, 'Network Error.' + with open('tests/test.jpg', 'wb') as f: + f.write(response.content) + cls.module = hub.Module(name="efficientnetb7_imagenet") + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree('tests') + shutil.rmtree('inference') + + def test_classification1(self): + results = self.module.classification(paths=['tests/test.jpg']) + data = results[0] + self.assertTrue('Cardigan' in data) + self.assertTrue(data['Cardigan'] > 0.5) + + def test_classification2(self): + results = self.module.classification(images=[cv2.imread('tests/test.jpg')]) + data = results[0] + self.assertTrue('Cardigan' in data) + self.assertTrue(data['Cardigan'] > 0.5) + + def test_classification3(self): + results = self.module.classification(images=[cv2.imread('tests/test.jpg')], use_gpu=True) + data = results[0] + self.assertTrue('Cardigan' in data) + self.assertTrue(data['Cardigan'] > 0.5) + + def test_classification4(self): + self.assertRaises(AssertionError, self.module.classification, paths=['no.jpg']) + + def test_classification5(self): + self.assertRaises(TypeError, self.module.classification, images=['tests/test.jpg']) + + def test_save_inference_model(self): + self.module.save_inference_model('./inference/model') + + self.assertTrue(os.path.exists('./inference/model.pdmodel')) + self.assertTrue(os.path.exists('./inference/model.pdiparams')) + + +if __name__ == "__main__": + unittest.main() From 5e8b33d7c0ab81fd1ccf92d41d5cefd2372f2dbf Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Mon, 17 Oct 2022 17:46:15 +0800 Subject: [PATCH 109/117] fix a bug of tokenize on win (int32 -> int64) (#2036) * fix a bug of tokenize on win (int32 -> int64) * fix a bug of tokenize on win (int32 -> int64) --- .../text_to_image/disco_diffusion_clip_rn101/clip/clip/utils.py | 2 +- .../text_to_image/disco_diffusion_clip_rn50/clip/clip/utils.py | 2 +- .../disco_diffusion_clip_vitb32/clip/clip/utils.py | 2 +- .../disco_diffusion_cnclip_vitb16/cn_clip/clip/utils.py | 2 +- .../vit_b_16x/ernievil2/utils/utils.py | 2 +- modules/image/text_to_image/stable_diffusion/clip/clip/utils.py | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn101/clip/clip/utils.py b/modules/image/text_to_image/disco_diffusion_clip_rn101/clip/clip/utils.py index 53d5c4440..54345bd6a 100755 --- a/modules/image/text_to_image/disco_diffusion_clip_rn101/clip/clip/utils.py +++ b/modules/image/text_to_image/disco_diffusion_clip_rn101/clip/clip/utils.py @@ -62,7 +62,7 @@ def tokenize(texts: Union[str, List[str]], context_length: int = 77): for i, tokens in enumerate(all_tokens): if len(tokens) > context_length: raise RuntimeError(f"Input {texts[i]} is too long for context length {context_length}") - result[i, :len(tokens)] = paddle.Tensor(np.array(tokens)) + result[i, :len(tokens)] = paddle.to_tensor(np.array(tokens), dtype='int64') return result diff --git a/modules/image/text_to_image/disco_diffusion_clip_rn50/clip/clip/utils.py b/modules/image/text_to_image/disco_diffusion_clip_rn50/clip/clip/utils.py index 979784682..0127c7cf2 100755 --- a/modules/image/text_to_image/disco_diffusion_clip_rn50/clip/clip/utils.py +++ b/modules/image/text_to_image/disco_diffusion_clip_rn50/clip/clip/utils.py @@ -62,7 +62,7 @@ def tokenize(texts: Union[str, List[str]], context_length: int = 77): for i, tokens in enumerate(all_tokens): if len(tokens) > context_length: raise RuntimeError(f"Input {texts[i]} is too long for context length {context_length}") - result[i, :len(tokens)] = paddle.Tensor(np.array(tokens)) + result[i, :len(tokens)] = paddle.to_tensor(np.array(tokens), dtype='int64') return result diff --git a/modules/image/text_to_image/disco_diffusion_clip_vitb32/clip/clip/utils.py b/modules/image/text_to_image/disco_diffusion_clip_vitb32/clip/clip/utils.py index 8ea909142..ac43ecc3d 100755 --- a/modules/image/text_to_image/disco_diffusion_clip_vitb32/clip/clip/utils.py +++ b/modules/image/text_to_image/disco_diffusion_clip_vitb32/clip/clip/utils.py @@ -62,7 +62,7 @@ def tokenize(texts: Union[str, List[str]], context_length: int = 77): for i, tokens in enumerate(all_tokens): if len(tokens) > context_length: raise RuntimeError(f"Input {texts[i]} is too long for context length {context_length}") - result[i, :len(tokens)] = paddle.Tensor(np.array(tokens)) + result[i, :len(tokens)] = paddle.to_tensor(np.array(tokens), dtype='int64') return result diff --git a/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/cn_clip/clip/utils.py b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/cn_clip/clip/utils.py index 06607c51f..02c74b2fa 100755 --- a/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/cn_clip/clip/utils.py +++ b/modules/image/text_to_image/disco_diffusion_cnclip_vitb16/cn_clip/clip/utils.py @@ -46,7 +46,7 @@ def tokenize(texts: Union[str, List[str]], context_length: int = 64): for i, tokens in enumerate(all_tokens): assert len(tokens) <= context_length - result[i, :len(tokens)] = paddle.to_tensor(tokens) + result[i, :len(tokens)] = paddle.to_tensor(tokens, dtype='int64') return result diff --git a/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/utils/utils.py b/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/utils/utils.py index e603c341d..e47f3445f 100755 --- a/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/utils/utils.py +++ b/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/utils/utils.py @@ -49,7 +49,7 @@ def tokenize(texts: Union[str, List[str]], context_length: int = 64): for i, tokens in enumerate(all_tokens): assert len(tokens) <= context_length - result[i, :len(tokens)] = paddle.to_tensor(tokens) + result[i, :len(tokens)] = paddle.to_tensor(tokens, dtype='int64') return result diff --git a/modules/image/text_to_image/stable_diffusion/clip/clip/utils.py b/modules/image/text_to_image/stable_diffusion/clip/clip/utils.py index b5d417144..d70c61da1 100755 --- a/modules/image/text_to_image/stable_diffusion/clip/clip/utils.py +++ b/modules/image/text_to_image/stable_diffusion/clip/clip/utils.py @@ -59,7 +59,7 @@ def tokenize(texts: Union[str, List[str]], context_length: int = 77): for i, tokens in enumerate(all_tokens): if len(tokens) > context_length: raise RuntimeError(f"Input {texts[i]} is too long for context length {context_length}") - result[i, :len(tokens)] = paddle.Tensor(np.array(tokens)) + result[i, :len(tokens)] = paddle.to_tensor(np.array(tokens), dtype='int64') return result From 5dd04d1bce737aa94a34e1cff521dc3903ac12b8 Mon Sep 17 00:00:00 2001 From: chenjian Date: Mon, 17 Oct 2022 18:01:03 +0800 Subject: [PATCH 110/117] add stable_diffusion_waifu module (#2077) * add stable_diffusion_waifu module * fix clip tokenize --- .../clip/clip/utils.py | 2 +- .../clip/clip/utils.py | 2 +- .../stable_diffusion_waifu/LICENSE | 82 + .../stable_diffusion_waifu/README.md | 157 ++ .../stable_diffusion_waifu/clip/README.md | 2 + .../clip/clip/__init__.py | 1 + .../clip/clip/layers.py | 182 +++ .../stable_diffusion_waifu/clip/clip/model.py | 259 +++ .../clip/clip/simple_tokenizer.py | 135 ++ .../stable_diffusion_waifu/clip/clip/utils.py | 88 + .../diffusers/__init__.py | 20 + .../diffusers/configuration_utils.py | 312 ++++ .../diffusers/models/README.md | 11 + .../diffusers/models/__init__.py | 20 + .../diffusers/models/attention.py | 465 ++++++ .../diffusers/models/embeddings.py | 116 ++ .../diffusers/models/resnet.py | 515 ++++++ .../diffusers/models/unet_2d.py | 206 +++ .../diffusers/models/unet_2d_condition.py | 206 +++ .../diffusers/models/unet_blocks.py | 1428 +++++++++++++++++ .../diffusers/models/vae.py | 465 ++++++ .../diffusers/schedulers/README.md | 18 + .../diffusers/schedulers/__init__.py | 24 + .../diffusers/schedulers/scheduling_ddim.py | 182 +++ .../diffusers/schedulers/scheduling_ddpm.py | 191 +++ .../schedulers/scheduling_karras_ve.py | 124 ++ .../schedulers/scheduling_lms_discrete.py | 133 ++ .../diffusers/schedulers/scheduling_pndm.py | 258 +++ .../diffusers/schedulers/scheduling_sde_ve.py | 172 ++ .../diffusers/schedulers/scheduling_sde_vp.py | 59 + .../diffusers/schedulers/scheduling_utils.py | 102 ++ .../stable_diffusion_waifu/module.py | 367 +++++ .../stable_diffusion_waifu/requirements.txt | 8 + 33 files changed, 6310 insertions(+), 2 deletions(-) create mode 100644 modules/image/text_to_image/stable_diffusion_waifu/LICENSE create mode 100755 modules/image/text_to_image/stable_diffusion_waifu/README.md create mode 100755 modules/image/text_to_image/stable_diffusion_waifu/clip/README.md create mode 100755 modules/image/text_to_image/stable_diffusion_waifu/clip/clip/__init__.py create mode 100755 modules/image/text_to_image/stable_diffusion_waifu/clip/clip/layers.py create mode 100755 modules/image/text_to_image/stable_diffusion_waifu/clip/clip/model.py create mode 100755 modules/image/text_to_image/stable_diffusion_waifu/clip/clip/simple_tokenizer.py create mode 100755 modules/image/text_to_image/stable_diffusion_waifu/clip/clip/utils.py create mode 100644 modules/image/text_to_image/stable_diffusion_waifu/diffusers/__init__.py create mode 100644 modules/image/text_to_image/stable_diffusion_waifu/diffusers/configuration_utils.py create mode 100644 modules/image/text_to_image/stable_diffusion_waifu/diffusers/models/README.md create mode 100644 modules/image/text_to_image/stable_diffusion_waifu/diffusers/models/__init__.py create mode 100644 modules/image/text_to_image/stable_diffusion_waifu/diffusers/models/attention.py create mode 100644 modules/image/text_to_image/stable_diffusion_waifu/diffusers/models/embeddings.py create mode 100644 modules/image/text_to_image/stable_diffusion_waifu/diffusers/models/resnet.py create mode 100644 modules/image/text_to_image/stable_diffusion_waifu/diffusers/models/unet_2d.py create mode 100644 modules/image/text_to_image/stable_diffusion_waifu/diffusers/models/unet_2d_condition.py create mode 100644 modules/image/text_to_image/stable_diffusion_waifu/diffusers/models/unet_blocks.py create mode 100644 modules/image/text_to_image/stable_diffusion_waifu/diffusers/models/vae.py create mode 100644 modules/image/text_to_image/stable_diffusion_waifu/diffusers/schedulers/README.md create mode 100644 modules/image/text_to_image/stable_diffusion_waifu/diffusers/schedulers/__init__.py create mode 100644 modules/image/text_to_image/stable_diffusion_waifu/diffusers/schedulers/scheduling_ddim.py create mode 100644 modules/image/text_to_image/stable_diffusion_waifu/diffusers/schedulers/scheduling_ddpm.py create mode 100644 modules/image/text_to_image/stable_diffusion_waifu/diffusers/schedulers/scheduling_karras_ve.py create mode 100644 modules/image/text_to_image/stable_diffusion_waifu/diffusers/schedulers/scheduling_lms_discrete.py create mode 100644 modules/image/text_to_image/stable_diffusion_waifu/diffusers/schedulers/scheduling_pndm.py create mode 100644 modules/image/text_to_image/stable_diffusion_waifu/diffusers/schedulers/scheduling_sde_ve.py create mode 100644 modules/image/text_to_image/stable_diffusion_waifu/diffusers/schedulers/scheduling_sde_vp.py create mode 100644 modules/image/text_to_image/stable_diffusion_waifu/diffusers/schedulers/scheduling_utils.py create mode 100755 modules/image/text_to_image/stable_diffusion_waifu/module.py create mode 100644 modules/image/text_to_image/stable_diffusion_waifu/requirements.txt diff --git a/modules/image/text_to_image/stable_diffusion_img2img/clip/clip/utils.py b/modules/image/text_to_image/stable_diffusion_img2img/clip/clip/utils.py index b5d417144..d70c61da1 100755 --- a/modules/image/text_to_image/stable_diffusion_img2img/clip/clip/utils.py +++ b/modules/image/text_to_image/stable_diffusion_img2img/clip/clip/utils.py @@ -59,7 +59,7 @@ def tokenize(texts: Union[str, List[str]], context_length: int = 77): for i, tokens in enumerate(all_tokens): if len(tokens) > context_length: raise RuntimeError(f"Input {texts[i]} is too long for context length {context_length}") - result[i, :len(tokens)] = paddle.Tensor(np.array(tokens)) + result[i, :len(tokens)] = paddle.to_tensor(np.array(tokens), dtype='int64') return result diff --git a/modules/image/text_to_image/stable_diffusion_inpainting/clip/clip/utils.py b/modules/image/text_to_image/stable_diffusion_inpainting/clip/clip/utils.py index b5d417144..d70c61da1 100755 --- a/modules/image/text_to_image/stable_diffusion_inpainting/clip/clip/utils.py +++ b/modules/image/text_to_image/stable_diffusion_inpainting/clip/clip/utils.py @@ -59,7 +59,7 @@ def tokenize(texts: Union[str, List[str]], context_length: int = 77): for i, tokens in enumerate(all_tokens): if len(tokens) > context_length: raise RuntimeError(f"Input {texts[i]} is too long for context length {context_length}") - result[i, :len(tokens)] = paddle.Tensor(np.array(tokens)) + result[i, :len(tokens)] = paddle.to_tensor(np.array(tokens), dtype='int64') return result diff --git a/modules/image/text_to_image/stable_diffusion_waifu/LICENSE b/modules/image/text_to_image/stable_diffusion_waifu/LICENSE new file mode 100644 index 000000000..928aa738f --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_waifu/LICENSE @@ -0,0 +1,82 @@ +Copyright (c) 2022 Robin Rombach and Patrick Esser and contributors + +CreativeML Open RAIL-M +dated August 22, 2022 + +Section I: PREAMBLE + +Multimodal generative models are being widely adopted and used, and have the potential to transform the way artists, among other individuals, conceive and benefit from AI or ML technologies as a tool for content creation. + +Notwithstanding the current and potential benefits that these artifacts can bring to society at large, there are also concerns about potential misuses of them, either due to their technical limitations or ethical considerations. + +In short, this license strives for both the open and responsible downstream use of the accompanying model. When it comes to the open character, we took inspiration from open source permissive licenses regarding the grant of IP rights. Referring to the downstream responsible use, we added use-based restrictions not permitting the use of the Model in very specific scenarios, in order for the licensor to be able to enforce the license in case potential misuses of the Model may occur. At the same time, we strive to promote open and responsible research on generative models for art and content generation. + +Even though downstream derivative versions of the model could be released under different licensing terms, the latter will always have to include - at minimum - the same use-based restrictions as the ones in the original license (this license). We believe in the intersection between open and responsible AI development; thus, this License aims to strike a balance between both in order to enable responsible open-science in the field of AI. + +This License governs the use of the model (and its derivatives) and is informed by the model card associated with the model. + +NOW THEREFORE, You and Licensor agree as follows: + +1. Definitions + +- "License" means the terms and conditions for use, reproduction, and Distribution as defined in this document. +- "Data" means a collection of information and/or content extracted from the dataset used with the Model, including to train, pretrain, or otherwise evaluate the Model. The Data is not licensed under this License. +- "Output" means the results of operating a Model as embodied in informational content resulting therefrom. +- "Model" means any accompanying machine-learning based assemblies (including checkpoints), consisting of learnt weights, parameters (including optimizer states), corresponding to the model architecture as embodied in the Complementary Material, that have been trained or tuned, in whole or in part on the Data, using the Complementary Material. +- "Derivatives of the Model" means all modifications to the Model, works based on the Model, or any other model which is created or initialized by transfer of patterns of the weights, parameters, activations or output of the Model, to the other model, in order to cause the other model to perform similarly to the Model, including - but not limited to - distillation methods entailing the use of intermediate data representations or methods based on the generation of synthetic data by the Model for training the other model. +- "Complementary Material" means the accompanying source code and scripts used to define, run, load, benchmark or evaluate the Model, and used to prepare data for training or evaluation, if any. This includes any accompanying documentation, tutorials, examples, etc, if any. +- "Distribution" means any transmission, reproduction, publication or other sharing of the Model or Derivatives of the Model to a third party, including providing the Model as a hosted service made available by electronic or other remote means - e.g. API-based or web access. +- "Licensor" means the copyright owner or entity authorized by the copyright owner that is granting the License, including the persons or entities that may have rights in the Model and/or distributing the Model. +- "You" (or "Your") means an individual or Legal Entity exercising permissions granted by this License and/or making use of the Model for whichever purpose and in any field of use, including usage of the Model in an end-use application - e.g. chatbot, translator, image generator. +- "Third Parties" means individuals or legal entities that are not under common control with Licensor or You. +- "Contribution" means any work of authorship, including the original version of the Model and any modifications or additions to that Model or Derivatives of the Model thereof, that is intentionally submitted to Licensor for inclusion in the Model by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Model, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." +- "Contributor" means Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Model. + +Section II: INTELLECTUAL PROPERTY RIGHTS + +Both copyright and patent grants apply to the Model, Derivatives of the Model and Complementary Material. The Model and Derivatives of the Model are subject to additional terms as described in Section III. + +2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare, publicly display, publicly perform, sublicense, and distribute the Complementary Material, the Model, and Derivatives of the Model. +3. Grant of Patent License. Subject to the terms and conditions of this License and where and as applicable, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this paragraph) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Model and the Complementary Material, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Model to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Model and/or Complementary Material or a Contribution incorporated within the Model and/or Complementary Material constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for the Model and/or Work shall terminate as of the date such litigation is asserted or filed. + +Section III: CONDITIONS OF USAGE, DISTRIBUTION AND REDISTRIBUTION + +4. Distribution and Redistribution. You may host for Third Party remote access purposes (e.g. software-as-a-service), reproduce and distribute copies of the Model or Derivatives of the Model thereof in any medium, with or without modifications, provided that You meet the following conditions: +Use-based restrictions as referenced in paragraph 5 MUST be included as an enforceable provision by You in any type of legal agreement (e.g. a license) governing the use and/or distribution of the Model or Derivatives of the Model, and You shall give notice to subsequent users You Distribute to, that the Model or Derivatives of the Model are subject to paragraph 5. This provision does not apply to the use of Complementary Material. +You must give any Third Party recipients of the Model or Derivatives of the Model a copy of this License; +You must cause any modified files to carry prominent notices stating that You changed the files; +You must retain all copyright, patent, trademark, and attribution notices excluding those notices that do not pertain to any part of the Model, Derivatives of the Model. +You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions - respecting paragraph 4.a. - for use, reproduction, or Distribution of Your modifications, or for any such Derivatives of the Model as a whole, provided Your use, reproduction, and Distribution of the Model otherwise complies with the conditions stated in this License. +5. Use-based restrictions. The restrictions set forth in Attachment A are considered Use-based restrictions. Therefore You cannot use the Model and the Derivatives of the Model for the specified restricted uses. You may use the Model subject to this License, including only for lawful purposes and in accordance with the License. Use may include creating any content with, finetuning, updating, running, training, evaluating and/or reparametrizing the Model. You shall require all of Your users who use the Model or a Derivative of the Model to comply with the terms of this paragraph (paragraph 5). +6. The Output You Generate. Except as set forth herein, Licensor claims no rights in the Output You generate using the Model. You are accountable for the Output you generate and its subsequent uses. No use of the output can contravene any provision as stated in the License. + +Section IV: OTHER PROVISIONS + +7. Updates and Runtime Restrictions. To the maximum extent permitted by law, Licensor reserves the right to restrict (remotely or otherwise) usage of the Model in violation of this License, update the Model through electronic means, or modify the Output of the Model based on updates. You shall undertake reasonable efforts to use the latest version of the Model. +8. Trademarks and related. Nothing in this License permits You to make use of Licensors’ trademarks, trade names, logos or to otherwise suggest endorsement or misrepresent the relationship between the parties; and any rights not expressly granted herein are reserved by the Licensors. +9. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Model and the Complementary Material (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Model, Derivatives of the Model, and the Complementary Material and assume any risks associated with Your exercise of permissions under this License. +10. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Model and the Complementary Material (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. +11. Accepting Warranty or Additional Liability. While redistributing the Model, Derivatives of the Model and the Complementary Material thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. +12. If any provision of this License is held to be invalid, illegal or unenforceable, the remaining provisions shall be unaffected thereby and remain valid as if such provision had not been set forth herein. + +END OF TERMS AND CONDITIONS + + + + +Attachment A + +Use Restrictions + +You agree not to use the Model or Derivatives of the Model: +- In any way that violates any applicable national, federal, state, local or international law or regulation; +- For the purpose of exploiting, harming or attempting to exploit or harm minors in any way; +- To generate or disseminate verifiably false information and/or content with the purpose of harming others; +- To generate or disseminate personal identifiable information that can be used to harm an individual; +- To defame, disparage or otherwise harass others; +- For fully automated decision making that adversely impacts an individual’s legal rights or otherwise creates or modifies a binding, enforceable obligation; +- For any use intended to or which has the effect of discriminating against or harming individuals or groups based on online or offline social behavior or known or predicted personal or personality characteristics; +- To exploit any of the vulnerabilities of a specific group of persons based on their age, social, physical or mental characteristics, in order to materially distort the behavior of a person pertaining to that group in a manner that causes or is likely to cause that person or another person physical or psychological harm; +- For any use intended to or which has the effect of discriminating against individuals or groups based on legally protected characteristics or categories; +- To provide medical advice and medical results interpretation; +- To generate or disseminate information for the purpose to be used for administration of justice, law enforcement, immigration or asylum processes, such as predicting an individual will commit fraud/crime commitment (e.g. by text profiling, drawing causal relationships between assertions made in documents, indiscriminate and arbitrarily-targeted use). \ No newline at end of file diff --git a/modules/image/text_to_image/stable_diffusion_waifu/README.md b/modules/image/text_to_image/stable_diffusion_waifu/README.md new file mode 100755 index 000000000..af25f220c --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_waifu/README.md @@ -0,0 +1,157 @@ +# stable_diffusion_waifu + +|模型名称|stable_diffusion_waifu| +| :--- | :---: | +|类别|多模态-文图生成| +|网络|CLIP Text Encoder+UNet+VAD| +|数据集|-| +|是否支持Fine-tuning|否| +|模型大小|4.0GB| +|最新更新日期|2022-10-17| +|数据指标|-| + +## 一、模型基本信息 + +### 应用效果展示 + + - 输入文本 "Goku" + + - 输出图像 +

+ +
+ + - 生成过程 +

+ +
+ +### 模型介绍 + +Stable Diffusion是一种潜在扩散模型(Latent Diffusion), 属于生成类模型,这类模型通过对随机噪声进行一步步地迭代降噪并采样来获得感兴趣的图像,当前取得了令人惊艳的效果。相比于Disco Diffusion, Stable Diffusion通过在低纬度的潜在空间(lower dimensional latent space)而不是原像素空间来做迭代,极大地降低了内存和计算量的需求,并且在V100上一分钟之内即可以渲染出想要的图像,欢迎体验。本模块采用hakurei的[waifu-diffusion](https://huggingface.co/hakurei/waifu-diffusion)的预训练参数,可用于生成二次元的卡通形象。 + + +更多详情请参考论文:[High-Resolution Image Synthesis with Latent Diffusion Models](https://arxiv.org/abs/2112.10752) + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install stable_diffusion_waifu + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run stable_diffusion_waifu --text_prompts "Goku" --output_dir stable_diffusion_waifu_out + ``` + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + + module = hub.Module(name="stable_diffusion_waifu") + text_prompts = ["Goku"] + # 生成图像, 默认会在stable_diffusion_waifu_out目录保存图像 + # 返回的da是一个DocumentArray对象,保存了所有的结果,包括最终结果和迭代过程的中间结果 + # 可以通过操作DocumentArray对象对生成的图像做后处理,保存或者分析 + # 您可以设置batch_size一次生成多张 + da = module.generate_image(text_prompts=text_prompts, batch_size=3, output_dir='./stable_diffusion_out/') + # 展示所有的中间结果 + da[0].chunks[-1].chunks.plot_image_sprites(skip_empty=True, show_index=True, keep_aspect_ratio=True) + # 将整个生成过程保存为一个动态图gif + da[0].chunks[-1].chunks.save_gif('stable_diffusion_waifu_out-merged-result.gif') + # da索引的是prompt, da[0].chunks索引的是该prompt下生成的第一张图,在batch_size不为1时能同时生成多张图 + # 您也可以按照上述操作显示单张图,如第0张的生成过程 + da[0].chunks[0].chunks.plot_image_sprites(skip_empty=True, show_index=True, keep_aspect_ratio=True) + da[0].chunks[0].chunks.save_gif('stable_diffusion_waifu_out-image-0-result.gif') + ``` + +- ### 3、API + + - ```python + def generate_image( + text_prompts, + width_height: Optional[List[int]] = [512, 512], + seed: Optional[int] = None, + batch_size: Optional[int] = 1, + output_dir: Optional[str] = 'stable_diffusion_out'): + ``` + + - 文图生成API,生成文本描述内容的图像。 + + - **参数** + + - text_prompts(str): 输入的语句,描述想要生成的图像的内容, 如卡通人物Goku。 + - width_height(Optional[List[int]]): 指定最终输出图像的宽高,宽和高都需要是64的倍数,生成的图像越大,所需要的计算时间越长。 + - seed(Optional[int]): 随机种子,由于输入默认是随机高斯噪声,设置不同的随机种子会由不同的初始输入,从而最终生成不同的结果,可以设置该参数来获得不同的输出图像。 + - batch_size(Optional[int]): 指定每个prompt一次生成的图像的数量。 + - output_dir(Optional[str]): 保存输出图像的目录,默认为"stable_diffusion_out"。 + + + - **返回** + - ra(DocumentArray): DocumentArray对象, 包含`n_batches`个Documents,其中每个Document都保存了迭代过程的所有中间结果。详细可参考[DocumentArray使用文档](https://docarray.jina.ai/fundamentals/documentarray/index.html)。 + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线文图生成服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m stable_diffusion_waifu + ``` + + - 这样就完成了一个文图生成的在线服务API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果,返回的预测结果在反序列化后即是上述接口声明中说明的DocumentArray类型,返回后对结果的操作方式和使用generate_image接口完全相同。 + + - ```python + import requests + import json + import cv2 + import base64 + from docarray import DocumentArray + + # 发送HTTP请求 + data = {'text_prompts': 'Goku'} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/stable_diffusion_waifu" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 获取返回结果 + r.json()["results"] + da = DocumentArray.from_base64(r.json()["results"]) + # 保存结果图 + da[0].save_uri_to_file('stable_diffusion_waifu_out.png') + # 将生成过程保存为一个动态图gif + da[0].chunks[0].chunks.save_gif('stable_diffusion_waifu_out.gif') + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + ```shell + $ hub install stable_diffusion_waifu == 1.0.0 + ``` diff --git a/modules/image/text_to_image/stable_diffusion_waifu/clip/README.md b/modules/image/text_to_image/stable_diffusion_waifu/clip/README.md new file mode 100755 index 000000000..9944794f8 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_waifu/clip/README.md @@ -0,0 +1,2 @@ +# OpenAI CLIP implemented in Paddle. +The original implementation repo is [ranchlai/clip.paddle](https://github.com/ranchlai/clip.paddle). We use this repo here for text encoder in stable diffusion. diff --git a/modules/image/text_to_image/stable_diffusion_waifu/clip/clip/__init__.py b/modules/image/text_to_image/stable_diffusion_waifu/clip/clip/__init__.py new file mode 100755 index 000000000..5657b56e6 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_waifu/clip/clip/__init__.py @@ -0,0 +1 @@ +from .utils import * diff --git a/modules/image/text_to_image/stable_diffusion_waifu/clip/clip/layers.py b/modules/image/text_to_image/stable_diffusion_waifu/clip/clip/layers.py new file mode 100755 index 000000000..286f35ab4 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_waifu/clip/clip/layers.py @@ -0,0 +1,182 @@ +from typing import Optional + +import paddle +import paddle.nn as nn +from paddle import Tensor +from paddle.nn import functional as F +from paddle.nn import Linear + +__all__ = ['ResidualAttentionBlock', 'AttentionPool2d', 'multi_head_attention_forward', 'MultiHeadAttention'] + + +def multi_head_attention_forward(x: Tensor, + num_heads: int, + q_proj: Linear, + k_proj: Linear, + v_proj: Linear, + c_proj: Linear, + attn_mask: Optional[Tensor] = None): + max_len, batch_size, emb_dim = x.shape + head_dim = emb_dim // num_heads + scaling = float(head_dim)**-0.5 + q = q_proj(x) # L, N, E + k = k_proj(x) # L, N, E + v = v_proj(x) # L, N, E + #k = k.con + v = v.reshape((-1, batch_size * num_heads, head_dim)).transpose((1, 0, 2)) + k = k.reshape((-1, batch_size * num_heads, head_dim)).transpose((1, 0, 2)) + q = q.reshape((-1, batch_size * num_heads, head_dim)).transpose((1, 0, 2)) + + q = q * scaling + qk = paddle.bmm(q, k.transpose((0, 2, 1))) + if attn_mask is not None: + if attn_mask.ndim == 2: + attn_mask.unsqueeze_(0) + #assert str(attn_mask.dtype) == 'VarType.FP32' and attn_mask.ndim == 3 + assert attn_mask.shape[0] == 1 and attn_mask.shape[1] == max_len and attn_mask.shape[2] == max_len + qk += attn_mask + + qk = paddle.nn.functional.softmax(qk, axis=-1) + atten = paddle.bmm(qk, v) + atten = atten.transpose((1, 0, 2)) + atten = atten.reshape((max_len, batch_size, emb_dim)) + atten = c_proj(atten) + return atten + + +class MultiHeadAttention(nn.Layer): # without attention mask + + def __init__(self, emb_dim: int, num_heads: int): + super().__init__() + self.q_proj = nn.Linear(emb_dim, emb_dim, bias_attr=True) + self.k_proj = nn.Linear(emb_dim, emb_dim, bias_attr=True) + self.v_proj = nn.Linear(emb_dim, emb_dim, bias_attr=True) + self.c_proj = nn.Linear(emb_dim, emb_dim, bias_attr=True) + self.head_dim = emb_dim // num_heads + self.emb_dim = emb_dim + self.num_heads = num_heads + assert self.head_dim * num_heads == emb_dim, "embed_dim must be divisible by num_heads" + #self.scaling = float(self.head_dim) ** -0.5 + + def forward(self, x, attn_mask=None): # x is in shape[max_len,batch_size,emb_dim] + + atten = multi_head_attention_forward(x, + self.num_heads, + self.q_proj, + self.k_proj, + self.v_proj, + self.c_proj, + attn_mask=attn_mask) + + return atten + + +class Identity(nn.Layer): + + def __init__(self): + super().__init__() + + def forward(self, x): + return x + + +class Bottleneck(nn.Layer): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1): + super().__init__() + + # all conv layers have stride 1. an avgpool is performed after the second convolution when stride > 1 + self.conv1 = nn.Conv2D(inplanes, planes, 1, bias_attr=False) + self.bn1 = nn.BatchNorm2D(planes) + + self.conv2 = nn.Conv2D(planes, planes, 3, padding=1, bias_attr=False) + self.bn2 = nn.BatchNorm2D(planes) + + self.avgpool = nn.AvgPool2D(stride) if stride > 1 else Identity() + + self.conv3 = nn.Conv2D(planes, planes * self.expansion, 1, bias_attr=False) + self.bn3 = nn.BatchNorm2D(planes * self.expansion) + + self.relu = nn.ReLU() + self.downsample = None + self.stride = stride + + if stride > 1 or inplanes != planes * Bottleneck.expansion: + self.downsample = nn.Sequential( + ("-1", nn.AvgPool2D(stride)), + ("0", nn.Conv2D(inplanes, planes * self.expansion, 1, stride=1, bias_attr=False)), + ("1", nn.BatchNorm2D(planes * self.expansion))) + + def forward(self, x): + identity = x + + out = self.relu(self.bn1(self.conv1(x))) + out = self.relu(self.bn2(self.conv2(out))) + out = self.avgpool(out) + out = self.bn3(self.conv3(out)) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.relu(out) + return out + + +class AttentionPool2d(nn.Layer): + + def __init__(self, spacial_dim: int, embed_dim: int, num_heads: int, output_dim: int = None): + super().__init__() + + self.positional_embedding = paddle.create_parameter((spacial_dim**2 + 1, embed_dim), dtype='float32') + + self.q_proj = nn.Linear(embed_dim, embed_dim, bias_attr=True) + self.k_proj = nn.Linear(embed_dim, embed_dim, bias_attr=True) + self.v_proj = nn.Linear(embed_dim, embed_dim, bias_attr=True) + self.c_proj = nn.Linear(embed_dim, output_dim or embed_dim, bias_attr=True) + self.num_heads = num_heads + + self.head_dim = embed_dim // num_heads + assert self.head_dim * num_heads == embed_dim, "embed_dim must be divisible by num_heads" + + def forward(self, x): + + x = x.reshape((x.shape[0], x.shape[1], x.shape[2] * x.shape[3])).transpose((2, 0, 1)) # NCHW -> (HW)NC + max_len, batch_size, emb_dim = x.shape + head_dim = self.head_dim + x = paddle.concat([paddle.mean(x, axis=0, keepdim=True), x], axis=0) + x = x + paddle.unsqueeze(self.positional_embedding, 1) + out = multi_head_attention_forward(x, self.num_heads, self.q_proj, self.k_proj, self.v_proj, self.c_proj) + + return out[0] + + +class QuickGELU(nn.Layer): + + def forward(self, x): + return x * paddle.nn.functional.sigmoid(1.702 * x) + + +class ResidualAttentionBlock(nn.Layer): + + def __init__(self, d_model: int, n_head: int, attn_mask=None): + super().__init__() + + self.attn = MultiHeadAttention(d_model, n_head) + self.ln_1 = nn.LayerNorm(d_model) + self.mlp = nn.Sequential(("c_fc", nn.Linear(d_model, d_model * 4)), ("gelu", QuickGELU()), + ("c_proj", nn.Linear(d_model * 4, d_model))) + self.ln_2 = nn.LayerNorm(d_model) + self.attn_mask = attn_mask + + def attention(self, x): + x = self.attn(x, self.attn_mask) + assert isinstance(x, paddle.Tensor) # not tuble here + return x + + def forward(self, x): + + x = x + self.attention(self.ln_1(x)) + x = x + self.mlp(self.ln_2(x)) + return x diff --git a/modules/image/text_to_image/stable_diffusion_waifu/clip/clip/model.py b/modules/image/text_to_image/stable_diffusion_waifu/clip/clip/model.py new file mode 100755 index 000000000..06affcc4b --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_waifu/clip/clip/model.py @@ -0,0 +1,259 @@ +from typing import Tuple +from typing import Union + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddle import nn + +from .layers import AttentionPool2d +from .layers import Bottleneck +from .layers import MultiHeadAttention +from .layers import ResidualAttentionBlock + + +class ModifiedResNet(nn.Layer): + """ + A ResNet class that is similar to torchvision's but contains the following changes: + - There are now 3 "stem" convolutions as opposed to 1, with an average pool instead of a max pool. + - Performs anti-aliasing strided convolutions, where an avgpool is prepended to convolutions with stride > 1 + - The final pooling layer is a QKV attention instead of an average pool + """ + + def __init__(self, layers, output_dim, heads, input_resolution=224, width=64): + super().__init__() + self.output_dim = output_dim + self.input_resolution = input_resolution + + # the 3-layer stem + self.conv1 = nn.Conv2D(3, width // 2, kernel_size=3, stride=2, padding=1, bias_attr=False) + self.bn1 = nn.BatchNorm2D(width // 2) + self.conv2 = nn.Conv2D(width // 2, width // 2, kernel_size=3, padding=1, bias_attr=False) + self.bn2 = nn.BatchNorm2D(width // 2) + self.conv3 = nn.Conv2D(width // 2, width, kernel_size=3, padding=1, bias_attr=False) + self.bn3 = nn.BatchNorm2D(width) + self.avgpool = nn.AvgPool2D(2) + self.relu = nn.ReLU() + + # residual layers + self._inplanes = width # this is a *mutable* variable used during construction + self.layer1 = self._make_layer(width, layers[0]) + self.layer2 = self._make_layer(width * 2, layers[1], stride=2) + self.layer3 = self._make_layer(width * 4, layers[2], stride=2) + self.layer4 = self._make_layer(width * 8, layers[3], stride=2) + + embed_dim = width * 32 # the ResNet feature dimension + self.attnpool = AttentionPool2d(input_resolution // 32, embed_dim, heads, output_dim) + + def _make_layer(self, planes, blocks, stride=1): + layers = [Bottleneck(self._inplanes, planes, stride)] + + self._inplanes = planes * Bottleneck.expansion + for _ in range(1, blocks): + layers.append(Bottleneck(self._inplanes, planes)) + + return nn.Sequential(*layers) + + def forward(self, x): + + def stem(x): + for conv, bn in [(self.conv1, self.bn1), (self.conv2, self.bn2), (self.conv3, self.bn3)]: + x = self.relu(bn(conv(x))) + x = self.avgpool(x) + return x + + #x = x.type(self.conv1.weight.dtype) + x = stem(x) + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + x = self.attnpool(x) + + return x + + +class Transformer(nn.Layer): + + def __init__(self, width: int, layers: int, heads: int, attn_mask=None): + super().__init__() + self.width = width + self.layers = layers + self.resblocks = nn.Sequential(*[ResidualAttentionBlock(width, heads, attn_mask) for _ in range(layers)]) + + def forward(self, x): + return self.resblocks(x) + + +class VisualTransformer(nn.Layer): + + def __init__(self, input_resolution: int, patch_size: int, width: int, layers: int, heads: int, output_dim: int): + super().__init__() + self.input_resolution = input_resolution + self.output_dim = output_dim + # used patch_size x patch_size, stride patch_size to do linear projection + self.conv1 = nn.Conv2D(in_channels=3, + out_channels=width, + kernel_size=patch_size, + stride=patch_size, + bias_attr=False) + + # scale = width ** -0.5 + self.class_embedding = paddle.create_parameter((width, ), 'float32') + + self.positional_embedding = paddle.create_parameter(((input_resolution // patch_size)**2 + 1, width), 'float32') + + self.ln_pre = nn.LayerNorm(width) + + self.transformer = Transformer(width, layers, heads) + + self.ln_post = nn.LayerNorm(width) + self.proj = paddle.create_parameter((width, output_dim), 'float32') + + def forward(self, x): + + x = self.conv1(x) + x = x.reshape((x.shape[0], x.shape[1], -1)) + x = x.transpose((0, 2, 1)) + x = paddle.concat([self.class_embedding + paddle.zeros((x.shape[0], 1, x.shape[-1]), dtype=x.dtype), x], axis=1) + + x = x + self.positional_embedding + x = self.ln_pre(x) + x = x.transpose((1, 0, 2)) + x = self.transformer(x) + x = x.transpose((1, 0, 2)) + x = self.ln_post(x[:, 0, :]) + if self.proj is not None: + x = paddle.matmul(x, self.proj) + + return x + + +class TextTransformer(nn.Layer): + + def __init__(self, context_length: int, vocab_size: int, transformer_width: int, transformer_heads: int, + transformer_layers: int): + super().__init__() + self.context_length = context_length + self.transformer = Transformer(width=transformer_width, + layers=transformer_layers, + heads=transformer_heads, + attn_mask=self.build_attention_mask()) + + self.vocab_size = vocab_size + self.token_embedding = nn.Embedding(vocab_size, transformer_width) + self.positional_embedding = paddle.create_parameter((self.context_length, transformer_width), 'float32') + self.ln_final = nn.LayerNorm(transformer_width) + + def build_attention_mask(self): + # lazily create causal attention mask, with full attention between the vision tokens + # mask = paddle.empty((self.context_length, self.context_length),dtype='float32') + # mask.fill_(float("-inf")) + #mask.triu_(1) # zero out the lower diagonal + + mask = paddle.ones((self.context_length, self.context_length)) * float("-inf") + mask = paddle.triu(mask, diagonal=1) + + return mask + + def forward(self, text): + x = self.token_embedding(text) # [batch_size, n_ctx, d_model] + x = x + self.positional_embedding + x = x.transpose((1, 0, 2)) # NLD -> LND + x = self.transformer(x) + x = x.transpose((1, 0, 2)) # LND -> NLD + x = self.ln_final(x) + return x + + +class CLIP(nn.Layer): + + def __init__( + self, + embed_dim: int, + # vision + image_resolution: int, + vision_layers: Union[Tuple[int, int, int, int], int], + vision_width: int, + vision_patch_size: int, + # text + context_length: int, + vocab_size: int, + transformer_width: int, + transformer_heads: int, + transformer_layers: int): + super().__init__() + + self.context_length = context_length + if isinstance(vision_layers, (tuple, list)): + vision_heads = vision_width * 32 // 64 + self.visual = ModifiedResNet(layers=vision_layers, + output_dim=embed_dim, + heads=vision_heads, + input_resolution=image_resolution, + width=vision_width) + else: + vision_heads = vision_width // 64 + self.visual = VisualTransformer(input_resolution=image_resolution, + patch_size=vision_patch_size, + width=vision_width, + layers=vision_layers, + heads=vision_heads, + output_dim=embed_dim) + + self.transformer = Transformer(width=transformer_width, + layers=transformer_layers, + heads=transformer_heads, + attn_mask=self.build_attention_mask()) + + self.vocab_size = vocab_size + self.token_embedding = nn.Embedding(vocab_size, transformer_width) + self.positional_embedding = paddle.create_parameter((self.context_length, transformer_width), 'float32') + self.ln_final = nn.LayerNorm(transformer_width) + + self.text_projection = paddle.create_parameter((transformer_width, embed_dim), 'float32') + self.logit_scale = paddle.create_parameter((1, ), 'float32') + + def build_attention_mask(self): + # lazily create causal attention mask, with full attention between the vision tokens + # mask = paddle.empty((self.context_length, self.context_length),dtype='float32') + # mask.fill_(float("-inf")) + #mask.triu_(1) # zero out the lower diagonal + + mask = paddle.ones((self.context_length, self.context_length)) * float("-inf") + mask = paddle.triu(mask, diagonal=1) + + return mask + + def encode_image(self, image): + return self.visual(image) + + def encode_text(self, text): + x = self.token_embedding(text) # [batch_size, n_ctx, d_model] + x = x + self.positional_embedding + x = x.transpose((1, 0, 2)) # NLD -> LND + x = self.transformer(x) + x = x.transpose((1, 0, 2)) # LND -> NLD + x = self.ln_final(x) + idx = text.numpy().argmax(-1) + idx = list(idx) + x = [x[i:i + 1, int(j), :] for i, j in enumerate(idx)] + x = paddle.concat(x, 0) + x = paddle.matmul(x, self.text_projection) + return x + + def forward(self, image, text): + image_features = self.encode_image(image) + text_features = self.encode_text(text) + + # normalized features + image_features = image_features / image_features.norm(dim=-1, keepdim=True) + text_features = text_features / text_features.norm(dim=-1, keepdim=True) + + # cosine similarity as logits + logit_scale = self.logit_scale.exp() + logits_per_image = paddle.matmul(logit_scale * image_features, text_features.t()) + logits_per_text = paddle.matmul(logit_scale * text_features, image_features.t()) + + # shape = [global_batch_size, global_batch_size] + return logits_per_image, logits_per_text diff --git a/modules/image/text_to_image/stable_diffusion_waifu/clip/clip/simple_tokenizer.py b/modules/image/text_to_image/stable_diffusion_waifu/clip/clip/simple_tokenizer.py new file mode 100755 index 000000000..4eaf82e9e --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_waifu/clip/clip/simple_tokenizer.py @@ -0,0 +1,135 @@ +import gzip +import html +import os +from functools import lru_cache + +import ftfy +import regex as re + + +@lru_cache() +def default_bpe(): + return os.path.join(os.path.dirname(os.path.abspath(__file__)), "../assets/bpe_simple_vocab_16e6.txt.gz") + + +@lru_cache() +def bytes_to_unicode(): + """ + Returns list of utf-8 byte and a corresponding list of unicode strings. + The reversible bpe codes work on unicode strings. + This means you need a large # of unicode characters in your vocab if you want to avoid UNKs. + When you're at something like a 10B token dataset you end up needing around 5K for decent coverage. + This is a signficant percentage of your normal, say, 32K bpe vocab. + To avoid that, we want lookup tables between utf-8 bytes and unicode strings. + And avoids mapping to whitespace/control characters the bpe code barfs on. + """ + bs = list(range(ord("!"), ord("~") + 1)) + list(range(ord("¡"), ord("¬") + 1)) + list(range(ord("®"), ord("ÿ") + 1)) + cs = bs[:] + n = 0 + for b in range(2**8): + if b not in bs: + bs.append(b) + cs.append(2**8 + n) + n += 1 + cs = [chr(n) for n in cs] + return dict(zip(bs, cs)) + + +def get_pairs(word): + """Return set of symbol pairs in a word. + Word is represented as tuple of symbols (symbols being variable-length strings). + """ + pairs = set() + prev_char = word[0] + for char in word[1:]: + pairs.add((prev_char, char)) + prev_char = char + return pairs + + +def basic_clean(text): + text = ftfy.fix_text(text) + text = html.unescape(html.unescape(text)) + return text.strip() + + +def whitespace_clean(text): + text = re.sub(r'\s+', ' ', text) + text = text.strip() + return text + + +class SimpleTokenizer(object): + + def __init__(self, bpe_path: str = default_bpe()): + self.byte_encoder = bytes_to_unicode() + self.byte_decoder = {v: k for k, v in self.byte_encoder.items()} + merges = gzip.open(bpe_path).read().decode("utf-8").split('\n') + merges = merges[1:49152 - 256 - 2 + 1] + merges = [tuple(merge.split()) for merge in merges] + vocab = list(bytes_to_unicode().values()) + vocab = vocab + [v + '' for v in vocab] + for merge in merges: + vocab.append(''.join(merge)) + vocab.extend(['<|startoftext|>', '<|endoftext|>']) + self.encoder = dict(zip(vocab, range(len(vocab)))) + self.decoder = {v: k for k, v in self.encoder.items()} + self.bpe_ranks = dict(zip(merges, range(len(merges)))) + self.cache = {'<|startoftext|>': '<|startoftext|>', '<|endoftext|>': '<|endoftext|>'} + self.pat = re.compile( + r"""<\|startoftext\|>|<\|endoftext\|>|'s|'t|'re|'ve|'m|'ll|'d|[\p{L}]+|[\p{N}]|[^\s\p{L}\p{N}]+""", + re.IGNORECASE) + + def bpe(self, token): + if token in self.cache: + return self.cache[token] + word = tuple(token[:-1]) + (token[-1] + '', ) + pairs = get_pairs(word) + + if not pairs: + return token + '' + + while True: + bigram = min(pairs, key=lambda pair: self.bpe_ranks.get(pair, float('inf'))) + if bigram not in self.bpe_ranks: + break + first, second = bigram + new_word = [] + i = 0 + while i < len(word): + try: + j = word.index(first, i) + new_word.extend(word[i:j]) + i = j + except: + new_word.extend(word[i:]) + break + + if word[i] == first and i < len(word) - 1 and word[i + 1] == second: + new_word.append(first + second) + i += 2 + else: + new_word.append(word[i]) + i += 1 + new_word = tuple(new_word) + word = new_word + if len(word) == 1: + break + else: + pairs = get_pairs(word) + word = ' '.join(word) + self.cache[token] = word + return word + + def encode(self, text): + bpe_tokens = [] + text = whitespace_clean(basic_clean(text)).lower() + for token in re.findall(self.pat, text): + token = ''.join(self.byte_encoder[b] for b in token.encode('utf-8')) + bpe_tokens.extend(self.encoder[bpe_token] for bpe_token in self.bpe(token).split(' ')) + return bpe_tokens + + def decode(self, tokens): + text = ''.join([self.decoder[token] for token in tokens]) + text = bytearray([self.byte_decoder[c] for c in text]).decode('utf-8', errors="replace").replace('', ' ') + return text diff --git a/modules/image/text_to_image/stable_diffusion_waifu/clip/clip/utils.py b/modules/image/text_to_image/stable_diffusion_waifu/clip/clip/utils.py new file mode 100755 index 000000000..d70c61da1 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_waifu/clip/clip/utils.py @@ -0,0 +1,88 @@ +import os +from typing import List +from typing import Union + +import numpy as np +import paddle +from paddle.utils import download +from paddle.vision.transforms import CenterCrop +from paddle.vision.transforms import Compose +from paddle.vision.transforms import Normalize +from paddle.vision.transforms import Resize +from paddle.vision.transforms import ToTensor + +from .model import CLIP +from .model import TextTransformer +from .simple_tokenizer import SimpleTokenizer + +__all__ = ['transform', 'tokenize', 'build_model'] + +MODEL_NAMES = ['VITL14'] + +URL = {'VITL14': os.path.join(os.path.dirname(__file__), 'pre_trained', 'vitl14_textencoder.pdparams')} + +MEAN, STD = (0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711) +_tokenizer = SimpleTokenizer() + +transform = Compose([ + Resize(224, interpolation='bicubic'), + CenterCrop(224), lambda image: image.convert('RGB'), + ToTensor(), + Normalize(mean=MEAN, std=STD), lambda t: t.unsqueeze_(0) +]) + + +def tokenize(texts: Union[str, List[str]], context_length: int = 77): + """ + Returns the tokenized representation of given input string(s) + + Parameters + ---------- + texts : Union[str, List[str]] + An input string or a list of input strings to tokenize + + context_length : int + The context length to use; all CLIP models use 77 as the context length + + Returns + ------- + A two-dimensional tensor containing the resulting tokens, shape = [number of input strings, context_length] + """ + if isinstance(texts, str): + texts = [texts] + + sot_token = _tokenizer.encoder["<|startoftext|>"] + eot_token = _tokenizer.encoder["<|endoftext|>"] + all_tokens = [[sot_token] + _tokenizer.encode(text) + [eot_token] for text in texts] + result = paddle.zeros((len(all_tokens), context_length), dtype='int64') + + for i, tokens in enumerate(all_tokens): + if len(tokens) > context_length: + raise RuntimeError(f"Input {texts[i]} is too long for context length {context_length}") + result[i, :len(tokens)] = paddle.to_tensor(np.array(tokens), dtype='int64') + + return result + + +def build_model(name='VITL14'): + assert name in MODEL_NAMES, f"model name must be one of {MODEL_NAMES}" + name2model = {'VITL14': build_vitl14_language_model} + model = name2model[name]() + weight = URL[name] + sd = paddle.load(weight) + state_dict = model.state_dict() + for key, value in sd.items(): + if key in state_dict: + state_dict[key] = value + model.load_dict(state_dict) + model.eval() + return model + + +def build_vitl14_language_model(): + model = TextTransformer(context_length=77, + vocab_size=49408, + transformer_width=768, + transformer_heads=12, + transformer_layers=12) + return model diff --git a/modules/image/text_to_image/stable_diffusion_waifu/diffusers/__init__.py b/modules/image/text_to_image/stable_diffusion_waifu/diffusers/__init__.py new file mode 100644 index 000000000..7f41816d7 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_waifu/diffusers/__init__.py @@ -0,0 +1,20 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__version__ = "0.2.4" + +from .models import AutoencoderKL, UNet2DConditionModel, UNet2DModel, VQModel + +from .schedulers import (DDIMScheduler, DDPMScheduler, KarrasVeScheduler, PNDMScheduler, SchedulerMixin, + ScoreSdeVeScheduler, LMSDiscreteScheduler) diff --git a/modules/image/text_to_image/stable_diffusion_waifu/diffusers/configuration_utils.py b/modules/image/text_to_image/stable_diffusion_waifu/diffusers/configuration_utils.py new file mode 100644 index 000000000..c90ebd5be --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_waifu/diffusers/configuration_utils.py @@ -0,0 +1,312 @@ +# coding=utf-8 +# Copyright 2022 The HuggingFace Inc. team. +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" ConfigMixinuration base class and utilities.""" +import functools +import inspect +import json +import os +import re +from collections import OrderedDict +from typing import Any +from typing import Dict +from typing import Tuple +from typing import Union + +from requests import HTTPError + +from paddlehub.common.logger import logger + +HUGGINGFACE_CO_RESOLVE_ENDPOINT = "HUGGINGFACE_CO_RESOLVE_ENDPOINT" +DIFFUSERS_CACHE = "./caches" + +_re_configuration_file = re.compile(r"config\.(.*)\.json") + + +class ConfigMixin: + r""" + Base class for all configuration classes. Handles a few parameters common to all models' configurations as well as + methods for loading/downloading/saving configurations. + + """ + config_name = "model_config.json" + ignore_for_config = [] + + def register_to_config(self, **kwargs): + if self.config_name is None: + raise NotImplementedError(f"Make sure that {self.__class__} has defined a class name `config_name`") + kwargs["_class_name"] = self.__class__.__name__ + kwargs["_diffusers_version"] = "0.0.1" + + for key, value in kwargs.items(): + try: + setattr(self, key, value) + except AttributeError as err: + logger.error(f"Can't set {key} with value {value} for {self}") + raise err + + if not hasattr(self, "_internal_dict"): + internal_dict = kwargs + else: + previous_dict = dict(self._internal_dict) + internal_dict = {**self._internal_dict, **kwargs} + logger.debug(f"Updating config from {previous_dict} to {internal_dict}") + + self._internal_dict = FrozenDict(internal_dict) + + def save_config(self, save_directory: Union[str, os.PathLike], push_to_hub: bool = False, **kwargs): + """ + Save a configuration object to the directory `save_directory`, so that it can be re-loaded using the + [`~ConfigMixin.from_config`] class method. + + Args: + save_directory (`str` or `os.PathLike`): + Directory where the configuration JSON file will be saved (will be created if it does not exist). + kwargs: + Additional key word arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method. + """ + if os.path.isfile(save_directory): + raise AssertionError(f"Provided path ({save_directory}) should be a directory, not a file") + + os.makedirs(save_directory, exist_ok=True) + + # If we save using the predefined names, we can load using `from_config` + output_config_file = os.path.join(save_directory, self.config_name) + + self.to_json_file(output_config_file) + logger.info(f"ConfigMixinuration saved in {output_config_file}") + + @classmethod + def from_config(cls, pretrained_model_name_or_path: Union[str, os.PathLike], return_unused_kwargs=False, **kwargs): + config_dict = cls.get_config_dict(pretrained_model_name_or_path=pretrained_model_name_or_path, **kwargs) + + init_dict, unused_kwargs = cls.extract_init_dict(config_dict, **kwargs) + + model = cls(**init_dict) + + if return_unused_kwargs: + return model, unused_kwargs + else: + return model + + @classmethod + def get_config_dict(cls, pretrained_model_name_or_path: Union[str, os.PathLike], + **kwargs) -> Tuple[Dict[str, Any], Dict[str, Any]]: + cache_dir = kwargs.pop("cache_dir", DIFFUSERS_CACHE) + force_download = kwargs.pop("force_download", False) + resume_download = kwargs.pop("resume_download", False) + proxies = kwargs.pop("proxies", None) + use_auth_token = kwargs.pop("use_auth_token", None) + local_files_only = kwargs.pop("local_files_only", False) + revision = kwargs.pop("revision", None) + subfolder = kwargs.pop("subfolder", None) + + user_agent = {"file_type": "config"} + + pretrained_model_name_or_path = str(pretrained_model_name_or_path) + + if cls.config_name is None: + raise ValueError( + "`self.config_name` is not defined. Note that one should not load a config from " + "`ConfigMixin`. Please make sure to define `config_name` in a class inheriting from `ConfigMixin`") + + if os.path.isfile(pretrained_model_name_or_path): + config_file = pretrained_model_name_or_path + elif os.path.isdir(pretrained_model_name_or_path): + if os.path.isfile(os.path.join(pretrained_model_name_or_path, cls.config_name)): + # Load from a PyTorch checkpoint + config_file = os.path.join(pretrained_model_name_or_path, cls.config_name) + elif subfolder is not None and os.path.isfile( + os.path.join(pretrained_model_name_or_path, subfolder, cls.config_name)): + config_file = os.path.join(pretrained_model_name_or_path, subfolder, cls.config_name) + else: + raise EnvironmentError( + f"Error no file named {cls.config_name} found in directory {pretrained_model_name_or_path}.") + else: + try: + # Load from URL or cache if already cached + from huggingface_hub import hf_hub_download + config_file = hf_hub_download( + pretrained_model_name_or_path, + filename=cls.config_name, + cache_dir=cache_dir, + force_download=force_download, + proxies=proxies, + resume_download=resume_download, + local_files_only=local_files_only, + use_auth_token=use_auth_token, + user_agent=user_agent, + subfolder=subfolder, + ) + + except HTTPError as err: + raise EnvironmentError("There was a specific connection error when trying to load" + f" {pretrained_model_name_or_path}:\n{err}") + except ValueError: + raise EnvironmentError( + f"We couldn't connect to '{HUGGINGFACE_CO_RESOLVE_ENDPOINT}' to load this model, couldn't find it" + f" in the cached files and it looks like {pretrained_model_name_or_path} is not the path to a" + f" directory containing a {cls.config_name} file.\nCheckout your internet connection or see how to" + " run the library in offline mode at" + " 'https://huggingface.co/docs/diffusers/installation#offline-mode'.") + except EnvironmentError: + raise EnvironmentError( + f"Can't load config for '{pretrained_model_name_or_path}'. If you were trying to load it from " + "'https://huggingface.co/models', make sure you don't have a local directory with the same name. " + f"Otherwise, make sure '{pretrained_model_name_or_path}' is the correct path to a directory " + f"containing a {cls.config_name} file") + + try: + # Load config dict + config_dict = cls._dict_from_json_file(config_file) + except (json.JSONDecodeError, UnicodeDecodeError): + raise EnvironmentError(f"It looks like the config file at '{config_file}' is not a valid JSON file.") + + return config_dict + + @classmethod + def extract_init_dict(cls, config_dict, **kwargs): + expected_keys = set(dict(inspect.signature(cls.__init__).parameters).keys()) + expected_keys.remove("self") + # remove general kwargs if present in dict + if "kwargs" in expected_keys: + expected_keys.remove("kwargs") + # remove keys to be ignored + if len(cls.ignore_for_config) > 0: + expected_keys = expected_keys - set(cls.ignore_for_config) + init_dict = {} + for key in expected_keys: + if key in kwargs: + # overwrite key + init_dict[key] = kwargs.pop(key) + elif key in config_dict: + # use value from config dict + init_dict[key] = config_dict.pop(key) + + unused_kwargs = config_dict.update(kwargs) + + passed_keys = set(init_dict.keys()) + if len(expected_keys - passed_keys) > 0: + logger.warning( + f"{expected_keys - passed_keys} was not found in config. Values will be initialized to default values.") + + return init_dict, unused_kwargs + + @classmethod + def _dict_from_json_file(cls, json_file: Union[str, os.PathLike]): + with open(json_file, "r", encoding="utf-8") as reader: + text = reader.read() + return json.loads(text) + + def __repr__(self): + return f"{self.__class__.__name__} {self.to_json_string()}" + + @property + def config(self) -> Dict[str, Any]: + return self._internal_dict + + def to_json_string(self) -> str: + """ + Serializes this instance to a JSON string. + + Returns: + `str`: String containing all the attributes that make up this configuration instance in JSON format. + """ + config_dict = self._internal_dict if hasattr(self, "_internal_dict") else {} + return json.dumps(config_dict, indent=2, sort_keys=True) + "\n" + + def to_json_file(self, json_file_path: Union[str, os.PathLike]): + """ + Save this instance to a JSON file. + + Args: + json_file_path (`str` or `os.PathLike`): + Path to the JSON file in which this configuration instance's parameters will be saved. + """ + with open(json_file_path, "w", encoding="utf-8") as writer: + writer.write(self.to_json_string()) + + +class FrozenDict(OrderedDict): + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + for key, value in self.items(): + setattr(self, key, value) + + self.__frozen = True + + def __delitem__(self, *args, **kwargs): + raise Exception(f"You cannot use ``__delitem__`` on a {self.__class__.__name__} instance.") + + def setdefault(self, *args, **kwargs): + raise Exception(f"You cannot use ``setdefault`` on a {self.__class__.__name__} instance.") + + def pop(self, *args, **kwargs): + raise Exception(f"You cannot use ``pop`` on a {self.__class__.__name__} instance.") + + def update(self, *args, **kwargs): + raise Exception(f"You cannot use ``update`` on a {self.__class__.__name__} instance.") + + def __setattr__(self, name, value): + if hasattr(self, "__frozen") and self.__frozen: + raise Exception(f"You cannot use ``__setattr__`` on a {self.__class__.__name__} instance.") + super().__setattr__(name, value) + + def __setitem__(self, name, value): + if hasattr(self, "__frozen") and self.__frozen: + raise Exception(f"You cannot use ``__setattr__`` on a {self.__class__.__name__} instance.") + super().__setitem__(name, value) + + +def register_to_config(init): + """ + Decorator to apply on the init of classes inheriting from `ConfigMixin` so that all the arguments are automatically + sent to `self.register_for_config`. To ignore a specific argument accepted by the init but that shouldn't be + registered in the config, use the `ignore_for_config` class variable + + Warning: Once decorated, all private arguments (beginning with an underscore) are trashed and not sent to the init! + """ + + @functools.wraps(init) + def inner_init(self, *args, **kwargs): + # Ignore private kwargs in the init. + init_kwargs = {k: v for k, v in kwargs.items() if not k.startswith("_")} + init(self, *args, **init_kwargs) + if not isinstance(self, ConfigMixin): + raise RuntimeError( + f"`@register_for_config` was applied to {self.__class__.__name__} init method, but this class does " + "not inherit from `ConfigMixin`.") + + ignore = getattr(self, "ignore_for_config", []) + # Get positional arguments aligned with kwargs + new_kwargs = {} + signature = inspect.signature(init) + parameters = { + name: p.default + for i, (name, p) in enumerate(signature.parameters.items()) if i > 0 and name not in ignore + } + for arg, name in zip(args, parameters.keys()): + new_kwargs[name] = arg + + # Then add all kwargs + new_kwargs.update({ + k: init_kwargs.get(k, default) + for k, default in parameters.items() if k not in ignore and k not in new_kwargs + }) + getattr(self, "register_to_config")(**new_kwargs) + + return inner_init diff --git a/modules/image/text_to_image/stable_diffusion_waifu/diffusers/models/README.md b/modules/image/text_to_image/stable_diffusion_waifu/diffusers/models/README.md new file mode 100644 index 000000000..e786fe518 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_waifu/diffusers/models/README.md @@ -0,0 +1,11 @@ +# Models + +- Models: Neural network that models $p_\theta(\mathbf{x}_{t-1}|\mathbf{x}_t)$ (see image below) and is trained end-to-end to denoise a noisy input to an image. Examples: UNet, Conditioned UNet, 3D UNet, Transformer UNet + +## API + +TODO(Suraj, Patrick) + +## Examples + +TODO(Suraj, Patrick) diff --git a/modules/image/text_to_image/stable_diffusion_waifu/diffusers/models/__init__.py b/modules/image/text_to_image/stable_diffusion_waifu/diffusers/models/__init__.py new file mode 100644 index 000000000..f55cc88a8 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_waifu/diffusers/models/__init__.py @@ -0,0 +1,20 @@ +# flake8: noqa +# There's no way to ignore "F401 '...' imported but unused" warnings in this +# module, but to preserve other warnings. So, don't check this module at all. +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from .unet_2d import UNet2DModel +from .unet_2d_condition import UNet2DConditionModel +from .vae import AutoencoderKL +from .vae import VQModel diff --git a/modules/image/text_to_image/stable_diffusion_waifu/diffusers/models/attention.py b/modules/image/text_to_image/stable_diffusion_waifu/diffusers/models/attention.py new file mode 100644 index 000000000..29d0e73a7 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_waifu/diffusers/models/attention.py @@ -0,0 +1,465 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +from inspect import isfunction + +import numpy as np +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + + +def finfo(dtype): + if dtype == paddle.float32: + return np.finfo(np.float32) + if dtype == paddle.float16: + return np.finfo(np.float16) + if dtype == paddle.float64: + return np.finfo(np.float64) + + +paddle.finfo = finfo + + +class AttentionBlockNew(nn.Layer): + """ + An attention block that allows spatial positions to attend to each other. Originally ported from here, but adapted + to the N-d case. + https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/models/unet.py#L66. + Uses three q, k, v linear layers to compute attention + """ + + def __init__( + self, + channels, + num_head_channels=None, + num_groups=32, + rescale_output_factor=1.0, + eps=1e-5, + ): + super().__init__() + self.channels = channels + + self.num_heads = channels // num_head_channels if num_head_channels is not None else 1 + self.num_head_size = num_head_channels + self.group_norm = nn.GroupNorm(num_channels=channels, num_groups=num_groups, epsilon=eps) + + # define q,k,v as linear layers + self.query = nn.Linear(channels, channels) + self.key = nn.Linear(channels, channels) + self.value = nn.Linear(channels, channels) + + self.rescale_output_factor = rescale_output_factor + self.proj_attn = nn.Linear(channels, channels) + + def transpose_for_scores(self, projection: paddle.Tensor) -> paddle.Tensor: + new_projection_shape = projection.shape[:-1] + [self.num_heads, -1] + # move heads to 2nd position (B, T, H * D) -> (B, T, H, D) -> (B, H, T, D) + new_projection = projection.reshape(new_projection_shape).transpose([0, 2, 1, 3]) + return new_projection + + def forward(self, hidden_states): + residual = hidden_states + batch, channel, height, width = hidden_states.shape + + # norm + hidden_states = self.group_norm(hidden_states) + + hidden_states = hidden_states.reshape([batch, channel, height * width]).transpose([0, 2, 1]) + + # proj to q, k, v + query_proj = self.query(hidden_states) + key_proj = self.key(hidden_states) + value_proj = self.value(hidden_states) + + # transpose + query_states = self.transpose_for_scores(query_proj) + key_states = self.transpose_for_scores(key_proj) + value_states = self.transpose_for_scores(value_proj) + + # get scores + scale = 1 / math.sqrt(math.sqrt(self.channels / self.num_heads)) + attention_scores = paddle.matmul(query_states * scale, key_states * scale, transpose_y=True) + attention_probs = F.softmax(attention_scores.astype("float32"), axis=-1).astype(attention_scores.dtype) + + # compute attention output + context_states = paddle.matmul(attention_probs, value_states) + + context_states = context_states.transpose([0, 2, 1, 3]) + new_context_states_shape = context_states.shape[:-2] + [ + self.channels, + ] + context_states = context_states.reshape(new_context_states_shape) + + # compute next hidden_states + hidden_states = self.proj_attn(context_states) + hidden_states = hidden_states.transpose([0, 2, 1]).reshape([batch, channel, height, width]) + + # res connect and rescale + hidden_states = (hidden_states + residual) / self.rescale_output_factor + return hidden_states + + def set_weight(self, attn_layer): + self.group_norm.weight.set_value(attn_layer.norm.weight) + self.group_norm.bias.set_value(attn_layer.norm.bias) + + if hasattr(attn_layer, "q"): + self.query.weight.set_value(attn_layer.q.weight[:, :, 0, 0]) + self.key.weight.set_value(attn_layer.k.weight[:, :, 0, 0]) + self.value.weight.set_value(attn_layer.v.weight[:, :, 0, 0]) + + self.query.bias.set_value(attn_layer.q.bias) + self.key.bias.set_value(attn_layer.k.bias) + self.value.bias.set_value(attn_layer.v.bias) + + self.proj_attn.weight.set_value(attn_layer.proj_out.weight[:, :, 0, 0]) + self.proj_attn.bias.set_value(attn_layer.proj_out.bias) + elif hasattr(attn_layer, "NIN_0"): + self.query.weight.set_value(attn_layer.NIN_0.W.t()) + self.key.weight.set_value(attn_layer.NIN_1.W.t()) + self.value.weight.set_value(attn_layer.NIN_2.W.t()) + + self.query.bias.set_value(attn_layer.NIN_0.b) + self.key.bias.set_value(attn_layer.NIN_1.b) + self.value.bias.set_value(attn_layer.NIN_2.b) + + self.proj_attn.weight.set_value(attn_layer.NIN_3.W.t()) + self.proj_attn.bias.set_value(attn_layer.NIN_3.b) + + self.group_norm.weight.set_value(attn_layer.GroupNorm_0.weight) + self.group_norm.bias.set_value(attn_layer.GroupNorm_0.bias) + else: + qkv_weight = attn_layer.qkv.weight.reshape( + [self.num_heads, 3 * self.channels // self.num_heads, self.channels]) + qkv_bias = attn_layer.qkv.bias.reshape([self.num_heads, 3 * self.channels // self.num_heads]) + + q_w, k_w, v_w = qkv_weight.split(self.channels // self.num_heads, axis=1) + q_b, k_b, v_b = qkv_bias.split(self.channels // self.num_heads, axis=1) + + self.query.weight.set_value(q_w.reshape([-1, self.channels])) + self.key.weight.set_value(k_w.reshape([-1, self.channels])) + self.value.weight.set_value(v_w.reshape([-1, self.channels])) + + self.query.bias.set_value(q_b.flatten()) + self.key.bias.set_value(k_b.flatten()) + self.value.bias.set_value(v_b.flatten()) + + self.proj_attn.weight.set_value(attn_layer.proj.weight[:, :, 0]) + self.proj_attn.bias.set_value(attn_layer.proj.bias) + + +class SpatialTransformer(nn.Layer): + """ + Transformer block for image-like data. First, project the input (aka embedding) and reshape to b, t, d. Then apply + standard transformer action. Finally, reshape to image + """ + + def __init__(self, in_channels, n_heads, d_head, depth=1, dropout=0.0, context_dim=None): + super().__init__() + self.n_heads = n_heads + self.d_head = d_head + self.in_channels = in_channels + inner_dim = n_heads * d_head + self.norm = nn.GroupNorm(num_groups=32, num_channels=in_channels, epsilon=1e-6) + + self.proj_in = nn.Conv2D(in_channels, inner_dim, kernel_size=1, stride=1, padding=0) + + self.transformer_blocks = nn.LayerList([ + BasicTransformerBlock(inner_dim, n_heads, d_head, dropout=dropout, context_dim=context_dim) + for d in range(depth) + ]) + + self.proj_out = nn.Conv2D(inner_dim, in_channels, kernel_size=1, stride=1, padding=0) + + def forward(self, x, context=None): + # note: if no context is given, cross-attention defaults to self-attention + b, c, h, w = x.shape + x_in = x + x = self.norm(x) + x = self.proj_in(x) + x = x.transpose([0, 2, 3, 1]).reshape([b, h * w, c]) + for block in self.transformer_blocks: + x = block(x, context=context) + x = x.reshape([b, h, w, c]).transpose([0, 3, 1, 2]) + x = self.proj_out(x) + return x + x_in + + def set_weight(self, layer): + self.norm = layer.norm + self.proj_in = layer.proj_in + self.transformer_blocks = layer.transformer_blocks + self.proj_out = layer.proj_out + + +class BasicTransformerBlock(nn.Layer): + + def __init__(self, dim, n_heads, d_head, dropout=0.0, context_dim=None, gated_ff=True, checkpoint=True): + super().__init__() + self.attn1 = CrossAttention(query_dim=dim, heads=n_heads, dim_head=d_head, + dropout=dropout) # is a self-attention + self.ff = FeedForward(dim, dropout=dropout, glu=gated_ff) + self.attn2 = CrossAttention(query_dim=dim, + context_dim=context_dim, + heads=n_heads, + dim_head=d_head, + dropout=dropout) # is self-attn if context is none + self.norm1 = nn.LayerNorm(dim) + self.norm2 = nn.LayerNorm(dim) + self.norm3 = nn.LayerNorm(dim) + self.checkpoint = checkpoint + + def forward(self, x, context=None): + x = self.attn1(self.norm1(x)) + x + x = self.attn2(self.norm2(x), context=context) + x + x = self.ff(self.norm3(x)) + x + return x + + +class CrossAttention(nn.Layer): + + def __init__(self, query_dim, context_dim=None, heads=8, dim_head=64, dropout=0.0): + super().__init__() + inner_dim = dim_head * heads + context_dim = default(context_dim, query_dim) + + self.scale = dim_head**-0.5 + self.heads = heads + + self.to_q = nn.Linear(query_dim, inner_dim, bias_attr=False) + self.to_k = nn.Linear(context_dim, inner_dim, bias_attr=False) + self.to_v = nn.Linear(context_dim, inner_dim, bias_attr=False) + + self.to_out = nn.Sequential(nn.Linear(inner_dim, query_dim), nn.Dropout(dropout)) + + def reshape_heads_to_batch_dim(self, tensor): + batch_size, seq_len, dim = tensor.shape + head_size = self.heads + tensor = tensor.reshape([batch_size, seq_len, head_size, dim // head_size]) + tensor = tensor.transpose([0, 2, 1, 3]).reshape([batch_size * head_size, seq_len, dim // head_size]) + return tensor + + def reshape_batch_dim_to_heads(self, tensor): + batch_size, seq_len, dim = tensor.shape + head_size = self.heads + tensor = tensor.reshape([batch_size // head_size, head_size, seq_len, dim]) + tensor = tensor.transpose([0, 2, 1, 3]).reshape([batch_size // head_size, seq_len, dim * head_size]) + return tensor + + def forward(self, x, context=None, mask=None): + batch_size, sequence_length, dim = x.shape + + h = self.heads + + q = self.to_q(x) + context = default(context, x) + k = self.to_k(context) + v = self.to_v(context) + + q = self.reshape_heads_to_batch_dim(q) + k = self.reshape_heads_to_batch_dim(k) + v = self.reshape_heads_to_batch_dim(v) + + sim = paddle.einsum("b i d, b j d -> b i j", q * self.scale, k) + + if exists(mask): + mask = mask.reshape([batch_size, -1]) + max_neg_value = -paddle.finfo(sim.dtype).max + mask = mask[:, None, :].repeat(h, 1, 1) + sim.masked_fill_(~mask, max_neg_value) + + # attention, what we cannot get enough of + attn = F.softmax(sim, axis=-1) + + out = paddle.einsum("b i j, b j d -> b i d", attn, v) + out = self.reshape_batch_dim_to_heads(out) + return self.to_out(out) + + +class FeedForward(nn.Layer): + + def __init__(self, dim, dim_out=None, mult=4, glu=False, dropout=0.0): + super().__init__() + inner_dim = int(dim * mult) + dim_out = default(dim_out, dim) + project_in = nn.Sequential(nn.Linear(dim, inner_dim), nn.GELU()) if not glu else GEGLU(dim, inner_dim) + + self.net = nn.Sequential(project_in, nn.Dropout(dropout), nn.Linear(inner_dim, dim_out)) + + def forward(self, x): + return self.net(x) + + +# feedforward +class GEGLU(nn.Layer): + + def __init__(self, dim_in, dim_out): + super().__init__() + self.proj = nn.Linear(dim_in, dim_out * 2) + + def forward(self, x): + x, gate = self.proj(x).chunk(2, axis=-1) + return x * F.gelu(gate) + + +# TODO(Patrick) - remove once all weights have been converted -> not needed anymore then +class NIN(nn.Layer): + + def __init__(self, in_dim, num_units, init_scale=0.1): + super().__init__() + self.W = self.create_parameter(shape=[in_dim, num_units], default_initializer=nn.initializer.Constant(0.)) + self.b = self.create_parameter(shape=[ + num_units, + ], + is_bias=True, + default_initializer=nn.initializer.Constant(0.)) + + +def exists(val): + return val is not None + + +def default(val, d): + if exists(val): + return val + return d() if isfunction(d) else d + + +# the main attention block that is used for all models +class AttentionBlock(nn.Layer): + """ + An attention block that allows spatial positions to attend to each other. + + Originally ported from here, but adapted to the N-d case. + https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/models/unet.py#L66. + """ + + def __init__( + self, + channels, + num_heads=1, + num_head_channels=None, + num_groups=32, + encoder_channels=None, + overwrite_qkv=False, + overwrite_linear=False, + rescale_output_factor=1.0, + eps=1e-5, + ): + super().__init__() + self.channels = channels + if num_head_channels is None: + self.num_heads = num_heads + else: + assert (channels % num_head_channels == 0 + ), f"q,k,v channels {channels} is not divisible by num_head_channels {num_head_channels}" + self.num_heads = channels // num_head_channels + + self.norm = nn.GroupNorm(num_channels=channels, num_groups=num_groups, epsilon=eps) + self.qkv = nn.Conv1D(channels, channels * 3, 1) + self.n_heads = self.num_heads + self.rescale_output_factor = rescale_output_factor + + if encoder_channels is not None: + self.encoder_kv = nn.Conv1D(encoder_channels, channels * 2, 1) + + self.proj = nn.Conv1D(channels, channels, 1) + + self.overwrite_qkv = overwrite_qkv + self.overwrite_linear = overwrite_linear + + if overwrite_qkv: + in_channels = channels + self.norm = nn.GroupNorm(num_channels=channels, num_groups=num_groups, epsilon=1e-6) + self.q = nn.Conv2D(in_channels, in_channels, kernel_size=1, stride=1, padding=0) + self.k = nn.Conv2D(in_channels, in_channels, kernel_size=1, stride=1, padding=0) + self.v = nn.Conv2D(in_channels, in_channels, kernel_size=1, stride=1, padding=0) + self.proj_out = nn.Conv2D(in_channels, in_channels, kernel_size=1, stride=1, padding=0) + elif self.overwrite_linear: + num_groups = min(channels // 4, 32) + self.norm = nn.GroupNorm(num_channels=channels, num_groups=num_groups, epsilon=1e-6) + self.NIN_0 = NIN(channels, channels) + self.NIN_1 = NIN(channels, channels) + self.NIN_2 = NIN(channels, channels) + self.NIN_3 = NIN(channels, channels) + + self.GroupNorm_0 = nn.GroupNorm(num_groups=num_groups, num_channels=channels, epsilon=1e-6) + else: + self.proj_out = nn.Conv1D(channels, channels, 1) + self.set_weights(self) + + self.is_overwritten = False + + def set_weights(self, layer): + if self.overwrite_qkv: + qkv_weight = paddle.concat([layer.q.weight, layer.k.weight, layer.v.weight], axis=0)[:, :, :, 0] + qkv_bias = paddle.concat([layer.q.bias, layer.k.bias, layer.v.bias], axis=0) + + self.qkv.weight.set_value(qkv_weight) + self.qkv.bias.set_value(qkv_bias) + + proj_out = nn.Conv1D(self.channels, self.channels, 1) + proj_out.weight.set_value(layer.proj_out.weight[:, :, :, 0]) + proj_out.bias.set_value(layer.proj_out.bias) + + self.proj = proj_out + elif self.overwrite_linear: + self.qkv.weight.set_value( + paddle.concat([self.NIN_0.W.t(), self.NIN_1.W.t(), self.NIN_2.W.t()], axis=0)[:, :, None]) + self.qkv.bias.set_value(paddle.concat([self.NIN_0.b, self.NIN_1.b, self.NIN_2.b], axis=0)) + + self.proj.weight.set_value(self.NIN_3.W.t()[:, :, None]) + self.proj.bias.set_value(self.NIN_3.b) + + self.norm.weight.set_value(self.GroupNorm_0.weight) + self.norm.bias.set_value(self.GroupNorm_0.bias) + else: + self.proj.weight.set_value(self.proj_out.weight) + self.proj.bias.set_value(self.proj_out.bias) + + def forward(self, x, encoder_out=None): + if not self.is_overwritten and (self.overwrite_qkv or self.overwrite_linear): + self.set_weights(self) + self.is_overwritten = True + + b, c, *spatial = x.shape + hid_states = self.norm(x).reshape([b, c, -1]) + + qkv = self.qkv(hid_states) + bs, width, length = qkv.shape + assert width % (3 * self.n_heads) == 0 + ch = width // (3 * self.n_heads) + q, k, v = qkv.reshape([bs * self.n_heads, ch * 3, length]).split(ch, axis=1) + + if encoder_out is not None: + encoder_kv = self.encoder_kv(encoder_out) + assert encoder_kv.shape[1] == self.n_heads * ch * 2 + ek, ev = encoder_kv.reshape([bs * self.n_heads, ch * 2, -1]).split(ch, axis=1) + k = paddle.concat([ek, k], axis=-1) + v = paddle.concat([ev, v], axis=-1) + + scale = 1 / math.sqrt(math.sqrt(ch)) + weight = paddle.einsum("bct,bcs->bts", q * scale, k * scale) # More stable with f16 than dividing afterwards + weight = F.softmax(weight.astype("float32"), axis=-1).astype(weight.dtype) + + a = paddle.einsum("bts,bcs->bct", weight, v) + h = a.reshape([bs, -1, length]) + + h = self.proj(h) + h = h.reshape([b, c, *spatial]) + + result = x + h + + result = result / self.rescale_output_factor + + return result diff --git a/modules/image/text_to_image/stable_diffusion_waifu/diffusers/models/embeddings.py b/modules/image/text_to_image/stable_diffusion_waifu/diffusers/models/embeddings.py new file mode 100644 index 000000000..3e826193b --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_waifu/diffusers/models/embeddings.py @@ -0,0 +1,116 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math + +import numpy as np +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + + +def get_timestep_embedding(timesteps, + embedding_dim, + flip_sin_to_cos=False, + downscale_freq_shift=1, + scale=1, + max_period=10000): + """ + This matches the implementation in Denoising Diffusion Probabilistic Models: Create sinusoidal timestep embeddings. + + :param timesteps: a 1-D Tensor of N indices, one per batch element. + These may be fractional. + :param embedding_dim: the dimension of the output. :param max_period: controls the minimum frequency of the + embeddings. :return: an [N x dim] Tensor of positional embeddings. + """ + assert len(timesteps.shape) == 1, "Timesteps should be a 1d-array" + + half_dim = embedding_dim // 2 + exponent = -math.log(max_period) * paddle.arange(start=0, end=half_dim, dtype="float32") + exponent = exponent / (half_dim - downscale_freq_shift) + + emb = paddle.exp(exponent) + emb = timesteps[:, None].astype("float32") * emb[None, :] + + # scale embeddings + emb = scale * emb + + # concat sine and cosine embeddings + emb = paddle.concat([paddle.sin(emb), paddle.cos(emb)], axis=-1) + + # flip sine and cosine embeddings + if flip_sin_to_cos: + emb = paddle.concat([emb[:, half_dim:], emb[:, :half_dim]], axis=-1) + + # zero pad + if embedding_dim % 2 == 1: + emb = paddle.concat(emb, paddle.zeros([emb.shape[0], 1]), axis=-1) + return emb + + +class TimestepEmbedding(nn.Layer): + + def __init__(self, channel, time_embed_dim, act_fn="silu"): + super().__init__() + + self.linear_1 = nn.Linear(channel, time_embed_dim) + self.act = None + if act_fn == "silu": + self.act = nn.Silu() + self.linear_2 = nn.Linear(time_embed_dim, time_embed_dim) + + def forward(self, sample): + sample = self.linear_1(sample) + + if self.act is not None: + sample = self.act(sample) + + sample = self.linear_2(sample) + return sample + + +class Timesteps(nn.Layer): + + def __init__(self, num_channels, flip_sin_to_cos, downscale_freq_shift): + super().__init__() + self.num_channels = num_channels + self.flip_sin_to_cos = flip_sin_to_cos + self.downscale_freq_shift = downscale_freq_shift + + def forward(self, timesteps): + t_emb = get_timestep_embedding( + timesteps, + self.num_channels, + flip_sin_to_cos=self.flip_sin_to_cos, + downscale_freq_shift=self.downscale_freq_shift, + ) + return t_emb + + +class GaussianFourierProjection(nn.Layer): + """Gaussian Fourier embeddings for noise levels.""" + + def __init__(self, embedding_size=256, scale=1.0): + super().__init__() + self.register_buffer("weight", paddle.randn((embedding_size, )) * scale) + + # to delete later + self.register_buffer("W", paddle.randn((embedding_size, )) * scale) + + self.weight = self.W + + def forward(self, x): + x = paddle.log(x) + x_proj = x[:, None] * self.weight[None, :] * 2 * np.pi + out = paddle.concat([paddle.sin(x_proj), paddle.cos(x_proj)], axis=-1) + return out diff --git a/modules/image/text_to_image/stable_diffusion_waifu/diffusers/models/resnet.py b/modules/image/text_to_image/stable_diffusion_waifu/diffusers/models/resnet.py new file mode 100644 index 000000000..944bc11cd --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_waifu/diffusers/models/resnet.py @@ -0,0 +1,515 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from functools import partial + +import numpy as np +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + + +def pad_new(x, pad, mode="constant", value=0): + new_pad = [] + for _ in range(x.ndim * 2 - len(pad)): + new_pad.append(0) + ndim = list(range(x.ndim - 1, 0, -1)) + axes_start = {} + for i, _pad in enumerate(pad): + if _pad < 0: + new_pad.append(0) + zhengshu, yushu = divmod(i, 2) + if yushu == 0: + axes_start[ndim[zhengshu]] = -_pad + else: + new_pad.append(_pad) + + padded = paddle.nn.functional.pad(x, new_pad, mode=mode, value=value) + padded_shape = paddle.shape(padded) + axes = [] + starts = [] + ends = [] + for k, v in axes_start.items(): + axes.append(k) + starts.append(v) + ends.append(padded_shape[k]) + assert v < padded_shape[k] + + if axes: + return padded.slice(axes=axes, starts=starts, ends=ends) + else: + return padded + + +class Upsample2D(nn.Layer): + """ + An upsampling layer with an optional convolution. + + :param channels: channels in the inputs and outputs. :param use_conv: a bool determining if a convolution is + applied. :param dims: determines if the signal is 1D, 2D, or 3D. If 3D, then + upsampling occurs in the inner-two dimensions. + """ + + def __init__(self, channels, use_conv=False, use_conv_transpose=False, out_channels=None, name="conv"): + super().__init__() + self.channels = channels + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.use_conv_transpose = use_conv_transpose + self.name = name + + conv = None + if use_conv_transpose: + conv = nn.Conv2DTranspose(channels, self.out_channels, 4, 2, 1) + elif use_conv: + conv = nn.Conv2D(self.channels, self.out_channels, 3, padding=1) + + # TODO(Suraj, Patrick) - clean up after weight dicts are correctly renamed + if name == "conv": + self.conv = conv + else: + self.Conv2d_0 = conv + + def forward(self, x): + assert x.shape[1] == self.channels + if self.use_conv_transpose: + return self.conv(x) + + x = F.interpolate(x, scale_factor=2.0, mode="nearest") + + # TODO(Suraj, Patrick) - clean up after weight dicts are correctly renamed + if self.use_conv: + if self.name == "conv": + x = self.conv(x) + else: + x = self.Conv2d_0(x) + + return x + + +class Downsample2D(nn.Layer): + """ + A downsampling layer with an optional convolution. + + :param channels: channels in the inputs and outputs. :param use_conv: a bool determining if a convolution is + applied. :param dims: determines if the signal is 1D, 2D, or 3D. If 3D, then + downsampling occurs in the inner-two dimensions. + """ + + def __init__(self, channels, use_conv=False, out_channels=None, padding=1, name="conv"): + super().__init__() + self.channels = channels + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.padding = padding + stride = 2 + self.name = name + + if use_conv: + conv = nn.Conv2D(self.channels, self.out_channels, 3, stride=stride, padding=padding) + else: + assert self.channels == self.out_channels + conv = nn.AvgPool2D(kernel_size=stride, stride=stride) + + # TODO(Suraj, Patrick) - clean up after weight dicts are correctly renamed + if name == "conv": + self.Conv2d_0 = conv + self.conv = conv + elif name == "Conv2d_0": + self.conv = conv + else: + self.conv = conv + + def forward(self, x): + assert x.shape[1] == self.channels + if self.use_conv and self.padding == 0: + pad = (0, 1, 0, 1) + x = pad_new(x, pad, mode="constant", value=0) + + assert x.shape[1] == self.channels + x = self.conv(x) + + return x + + +class FirUpsample2D(nn.Layer): + + def __init__(self, channels=None, out_channels=None, use_conv=False, fir_kernel=(1, 3, 3, 1)): + super().__init__() + out_channels = out_channels if out_channels else channels + if use_conv: + self.Conv2d_0 = nn.Conv2D(channels, out_channels, kernel_size=3, stride=1, padding=1) + self.use_conv = use_conv + self.fir_kernel = fir_kernel + self.out_channels = out_channels + + def _upsample_2d(self, x, w=None, k=None, factor=2, gain=1): + """Fused `upsample_2d()` followed by `Conv2d()`. + + Args: + Padding is performed only once at the beginning, not between the operations. The fused op is considerably more + efficient than performing the same calculation using standard TensorFlow ops. It supports gradients of arbitrary: + order. + x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, + C]`. + w: Weight tensor of the shape `[filterH, filterW, inChannels, + outChannels]`. Grouped convolution can be performed by `inChannels = x.shape[0] // numGroups`. + k: FIR filter of the shape `[firH, firW]` or `[firN]` + (separable). The default is `[1] * factor`, which corresponds to nearest-neighbor upsampling. + factor: Integer upsampling factor (default: 2). gain: Scaling factor for signal magnitude (default: 1.0). + + Returns: + Tensor of the shape `[N, C, H * factor, W * factor]` or `[N, H * factor, W * factor, C]`, and same datatype as + `x`. + """ + + assert isinstance(factor, int) and factor >= 1 + + # Setup filter kernel. + if k is None: + k = [1] * factor + + # setup kernel + k = np.asarray(k, dtype=np.float32) + if k.ndim == 1: + k = np.outer(k, k) + k /= np.sum(k) + + k = k * (gain * (factor**2)) + + if self.use_conv: + convH = w.shape[2] + convW = w.shape[3] + inC = w.shape[1] + + p = (k.shape[0] - factor) - (convW - 1) + + stride = (factor, factor) + # Determine data dimensions. + stride = [1, 1, factor, factor] + output_shape = ((x.shape[2] - 1) * factor + convH, (x.shape[3] - 1) * factor + convW) + output_padding = ( + output_shape[0] - (x.shape[2] - 1) * stride[0] - convH, + output_shape[1] - (x.shape[3] - 1) * stride[1] - convW, + ) + assert output_padding[0] >= 0 and output_padding[1] >= 0 + inC = w.shape[1] + num_groups = x.shape[1] // inC + + # Transpose weights. + w = paddle.reshape(w, (num_groups, -1, inC, convH, convW)) + w = w[..., ::-1, ::-1].transpose([0, 2, 1, 3, 4]) + w = paddle.reshape(w, (num_groups * inC, -1, convH, convW)) + + x = F.conv2d_transpose(x, w, stride=stride, output_padding=output_padding, padding=0) + + x = upfirdn2d_native(x, paddle.to_tensor(k), pad=((p + 1) // 2 + factor - 1, p // 2 + 1)) + else: + p = k.shape[0] - factor + x = upfirdn2d_native(x, paddle.to_tensor(k), up=factor, pad=((p + 1) // 2 + factor - 1, p // 2)) + + return x + + def forward(self, x): + if self.use_conv: + h = self._upsample_2d(x, self.Conv2d_0.weight, k=self.fir_kernel) + h = h + self.Conv2d_0.bias.reshape([1, -1, 1, 1]) + else: + h = self._upsample_2d(x, k=self.fir_kernel, factor=2) + + return h + + +class FirDownsample2D(nn.Layer): + + def __init__(self, channels=None, out_channels=None, use_conv=False, fir_kernel=(1, 3, 3, 1)): + super().__init__() + out_channels = out_channels if out_channels else channels + if use_conv: + self.Conv2d_0 = nn.Conv2D(channels, out_channels, kernel_size=3, stride=1, padding=1) + self.fir_kernel = fir_kernel + self.use_conv = use_conv + self.out_channels = out_channels + + def _downsample_2d(self, x, w=None, k=None, factor=2, gain=1): + """Fused `Conv2d()` followed by `downsample_2d()`. + + Args: + Padding is performed only once at the beginning, not between the operations. The fused op is considerably more + efficient than performing the same calculation using standard TensorFlow ops. It supports gradients of arbitrary: + order. + x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`. w: Weight tensor of the shape `[filterH, + filterW, inChannels, outChannels]`. Grouped convolution can be performed by `inChannels = x.shape[0] // + numGroups`. k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable). The default is `[1] * + factor`, which corresponds to average pooling. factor: Integer downsampling factor (default: 2). gain: + Scaling factor for signal magnitude (default: 1.0). + + Returns: + Tensor of the shape `[N, C, H // factor, W // factor]` or `[N, H // factor, W // factor, C]`, and same + datatype as `x`. + """ + + assert isinstance(factor, int) and factor >= 1 + if k is None: + k = [1] * factor + + # setup kernel + k = np.asarray(k, dtype=np.float32) + if k.ndim == 1: + k = np.outer(k, k) + k /= np.sum(k) + + k = k * gain + + if self.use_conv: + _, _, convH, convW = w.shape + p = (k.shape[0] - factor) + (convW - 1) + s = [factor, factor] + x = upfirdn2d_native(x, paddle.to_tensor(k), pad=((p + 1) // 2, p // 2)) + x = F.conv2d(x, w, stride=s, padding=0) + else: + p = k.shape[0] - factor + x = upfirdn2d_native(x, paddle.to_tensor(k), down=factor, pad=((p + 1) // 2, p // 2)) + + return x + + def forward(self, x): + if self.use_conv: + x = self._downsample_2d(x, w=self.Conv2d_0.weight, k=self.fir_kernel) + x = x + self.Conv2d_0.bias.reshape([1, -1, 1, 1]) + else: + x = self._downsample_2d(x, k=self.fir_kernel, factor=2) + + return x + + +class ResnetBlock(nn.Layer): + + def __init__( + self, + *, + in_channels, + out_channels=None, + conv_shortcut=False, + dropout=0.0, + temb_channels=512, + groups=32, + groups_out=None, + pre_norm=True, + eps=1e-6, + non_linearity="swish", + time_embedding_norm="default", + kernel=None, + output_scale_factor=1.0, + use_nin_shortcut=None, + up=False, + down=False, + ): + super().__init__() + self.pre_norm = pre_norm + self.pre_norm = True + self.in_channels = in_channels + out_channels = in_channels if out_channels is None else out_channels + self.out_channels = out_channels + self.use_conv_shortcut = conv_shortcut + self.time_embedding_norm = time_embedding_norm + self.up = up + self.down = down + self.output_scale_factor = output_scale_factor + + if groups_out is None: + groups_out = groups + + self.norm1 = nn.GroupNorm(num_groups=groups, num_channels=in_channels, epsilon=eps) + + self.conv1 = nn.Conv2D(in_channels, out_channels, kernel_size=3, stride=1, padding=1) + + if temb_channels is not None: + self.time_emb_proj = nn.Linear(temb_channels, out_channels) + else: + self.time_emb_proj = None + + self.norm2 = nn.GroupNorm(num_groups=groups_out, num_channels=out_channels, epsilon=eps) + self.dropout = nn.Dropout(dropout) + self.conv2 = nn.Conv2D(out_channels, out_channels, kernel_size=3, stride=1, padding=1) + + if non_linearity == "swish": + self.nonlinearity = lambda x: F.silu(x) + elif non_linearity == "mish": + self.nonlinearity = Mish() + elif non_linearity == "silu": + self.nonlinearity = nn.Silu() + + self.upsample = self.downsample = None + if self.up: + if kernel == "fir": + fir_kernel = (1, 3, 3, 1) + self.upsample = lambda x: upsample_2d(x, k=fir_kernel) + elif kernel == "sde_vp": + self.upsample = partial(F.interpolate, scale_factor=2.0, mode="nearest") + else: + self.upsample = Upsample2D(in_channels, use_conv=False) + elif self.down: + if kernel == "fir": + fir_kernel = (1, 3, 3, 1) + self.downsample = lambda x: downsample_2d(x, k=fir_kernel) + elif kernel == "sde_vp": + self.downsample = partial(F.avg_pool2d, kernel_size=2, stride=2) + else: + self.downsample = Downsample2D(in_channels, use_conv=False, padding=1, name="op") + + self.use_nin_shortcut = self.in_channels != self.out_channels if use_nin_shortcut is None else use_nin_shortcut + + self.conv_shortcut = None + if self.use_nin_shortcut: + self.conv_shortcut = nn.Conv2D(in_channels, out_channels, kernel_size=1, stride=1, padding=0) + + def forward(self, x, temb, hey=False): + h = x + + # make sure hidden states is in float32 + # when running in half-precision + h = self.norm1(h.astype("float32")).astype(h.dtype) + h = self.nonlinearity(h) + + if self.upsample is not None: + x = self.upsample(x) + h = self.upsample(h) + elif self.downsample is not None: + x = self.downsample(x) + h = self.downsample(h) + + h = self.conv1(h) + + if temb is not None: + temb = self.time_emb_proj(self.nonlinearity(temb))[:, :, None, None] + h = h + temb + + # make sure hidden states is in float32 + # when running in half-precision + h = self.norm2(h.astype("float32")).astype(h.dtype) + h = self.nonlinearity(h) + + h = self.dropout(h) + h = self.conv2(h) + + if self.conv_shortcut is not None: + x = self.conv_shortcut(x) + + out = (x + h) / self.output_scale_factor + + return out + + +class Mish(nn.Layer): + + def forward(self, x): + return x * F.tanh(F.softplus(x)) + + +def upsample_2d(x, k=None, factor=2, gain=1): + r"""Upsample2D a batch of 2D images with the given filter. + + Args: + Accepts a batch of 2D images of the shape `[N, C, H, W]` or `[N, H, W, C]` and upsamples each image with the given + filter. The filter is normalized so that if the input pixels are constant, they will be scaled by the specified + `gain`. Pixels outside the image are assumed to be zero, and the filter is padded with zeros so that its shape is a: + multiple of the upsampling factor. + x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, + C]`. + k: FIR filter of the shape `[firH, firW]` or `[firN]` + (separable). The default is `[1] * factor`, which corresponds to nearest-neighbor upsampling. + factor: Integer upsampling factor (default: 2). gain: Scaling factor for signal magnitude (default: 1.0). + + Returns: + Tensor of the shape `[N, C, H * factor, W * factor]` + """ + assert isinstance(factor, int) and factor >= 1 + if k is None: + k = [1] * factor + + k = np.asarray(k, dtype=np.float32) + if k.ndim == 1: + k = np.outer(k, k) + k /= np.sum(k) + + k = k * (gain * (factor**2)) + p = k.shape[0] - factor + return upfirdn2d_native(x, paddle.to_tensor(k), up=factor, pad=((p + 1) // 2 + factor - 1, p // 2)) + + +def downsample_2d(x, k=None, factor=2, gain=1): + r"""Downsample2D a batch of 2D images with the given filter. + + Args: + Accepts a batch of 2D images of the shape `[N, C, H, W]` or `[N, H, W, C]` and downsamples each image with the + given filter. The filter is normalized so that if the input pixels are constant, they will be scaled by the + specified `gain`. Pixels outside the image are assumed to be zero, and the filter is padded with zeros so that its + shape is a multiple of the downsampling factor. + x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, + C]`. + k: FIR filter of the shape `[firH, firW]` or `[firN]` + (separable). The default is `[1] * factor`, which corresponds to average pooling. + factor: Integer downsampling factor (default: 2). gain: Scaling factor for signal magnitude (default: 1.0). + + Returns: + Tensor of the shape `[N, C, H // factor, W // factor]` + """ + + assert isinstance(factor, int) and factor >= 1 + if k is None: + k = [1] * factor + + k = np.asarray(k, dtype=np.float32) + if k.ndim == 1: + k = np.outer(k, k) + k /= np.sum(k) + + k = k * gain + p = k.shape[0] - factor + return upfirdn2d_native(x, paddle.to_tensor(k), down=factor, pad=((p + 1) // 2, p // 2)) + + +def upfirdn2d_native(input, kernel, up=1, down=1, pad=(0, 0)): + up_x = up_y = up + down_x = down_y = down + pad_x0 = pad_y0 = pad[0] + pad_x1 = pad_y1 = pad[1] + + _, channel, in_h, in_w = input.shape + input = input.reshape([-1, in_h, in_w, 1]) + + _, in_h, in_w, minor = input.shape + kernel_h, kernel_w = kernel.shape + + out = input.reshape([-1, in_h, 1, in_w, 1, minor]) + # TODO + out = pad_new(out, [0, 0, 0, up_x - 1, 0, 0, 0, up_y - 1]) + out = out.reshape([-1, in_h * up_y, in_w * up_x, minor]) + + out = pad_new(out, [0, 0, max(pad_x0, 0), max(pad_x1, 0), max(pad_y0, 0), max(pad_y1, 0)]) + out = out[:, max(-pad_y0, 0):out.shape[1] - max(-pad_y1, 0), max(-pad_x0, 0):out.shape[2] - max(-pad_x1, 0), :, ] + + out = out.transpose([0, 3, 1, 2]) + out = out.reshape([-1, 1, in_h * up_y + pad_y0 + pad_y1, in_w * up_x + pad_x0 + pad_x1]) + w = paddle.flip(kernel, [0, 1]).reshape([1, 1, kernel_h, kernel_w]) + out = F.conv2d(out, w) + out = out.reshape( + [-1, minor, in_h * up_y + pad_y0 + pad_y1 - kernel_h + 1, in_w * up_x + pad_x0 + pad_x1 - kernel_w + 1]) + out = out.transpose([0, 2, 3, 1]) + out = out[:, ::down_y, ::down_x, :] + + out_h = (in_h * up_y + pad_y0 + pad_y1 - kernel_h) // down_y + 1 + out_w = (in_w * up_x + pad_x0 + pad_x1 - kernel_w) // down_x + 1 + + return out.reshape([-1, channel, out_h, out_w]) diff --git a/modules/image/text_to_image/stable_diffusion_waifu/diffusers/models/unet_2d.py b/modules/image/text_to_image/stable_diffusion_waifu/diffusers/models/unet_2d.py new file mode 100644 index 000000000..11316a819 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_waifu/diffusers/models/unet_2d.py @@ -0,0 +1,206 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Dict +from typing import Union + +import paddle +import paddle.nn as nn + +from ..configuration_utils import ConfigMixin +from ..configuration_utils import register_to_config +from .embeddings import GaussianFourierProjection +from .embeddings import TimestepEmbedding +from .embeddings import Timesteps +from .unet_blocks import get_down_block +from .unet_blocks import get_up_block +from .unet_blocks import UNetMidBlock2D + + +class UNet2DModel(nn.Layer, ConfigMixin): + + @register_to_config + def __init__( + self, + sample_size=None, + in_channels=3, + out_channels=3, + center_input_sample=False, + time_embedding_type="positional", + freq_shift=0, + flip_sin_to_cos=True, + down_block_types=("DownBlock2D", "AttnDownBlock2D", "AttnDownBlock2D", "AttnDownBlock2D"), + up_block_types=("AttnUpBlock2D", "AttnUpBlock2D", "AttnUpBlock2D", "UpBlock2D"), + block_out_channels=(224, 448, 672, 896), + layers_per_block=2, + mid_block_scale_factor=1, + downsample_padding=1, + act_fn="silu", + attention_head_dim=8, + norm_num_groups=32, + norm_eps=1e-5, + ): + super().__init__() + + self.sample_size = sample_size + time_embed_dim = block_out_channels[0] * 4 + + # input + self.conv_in = nn.Conv2D(in_channels, block_out_channels[0], kernel_size=3, padding=(1, 1)) + + # time + if time_embedding_type == "fourier": + self.time_proj = GaussianFourierProjection(embedding_size=block_out_channels[0], scale=16) + timestep_input_dim = 2 * block_out_channels[0] + elif time_embedding_type == "positional": + self.time_proj = Timesteps(block_out_channels[0], flip_sin_to_cos, freq_shift) + timestep_input_dim = block_out_channels[0] + + self.time_embedding = TimestepEmbedding(timestep_input_dim, time_embed_dim) + + self.down_blocks = nn.LayerList([]) + self.mid_block = None + self.up_blocks = nn.LayerList([]) + + # down + output_channel = block_out_channels[0] + for i, down_block_type in enumerate(down_block_types): + input_channel = output_channel + output_channel = block_out_channels[i] + is_final_block = i == len(block_out_channels) - 1 + + down_block = get_down_block( + down_block_type, + num_layers=layers_per_block, + in_channels=input_channel, + out_channels=output_channel, + temb_channels=time_embed_dim, + add_downsample=not is_final_block, + resnet_eps=norm_eps, + resnet_act_fn=act_fn, + attn_num_head_channels=attention_head_dim, + downsample_padding=downsample_padding, + ) + self.down_blocks.append(down_block) + + # mid + self.mid_block = UNetMidBlock2D( + in_channels=block_out_channels[-1], + temb_channels=time_embed_dim, + resnet_eps=norm_eps, + resnet_act_fn=act_fn, + output_scale_factor=mid_block_scale_factor, + resnet_time_scale_shift="default", + attn_num_head_channels=attention_head_dim, + resnet_groups=norm_num_groups, + ) + + # up + reversed_block_out_channels = list(reversed(block_out_channels)) + output_channel = reversed_block_out_channels[0] + for i, up_block_type in enumerate(up_block_types): + prev_output_channel = output_channel + output_channel = reversed_block_out_channels[i] + input_channel = reversed_block_out_channels[min(i + 1, len(block_out_channels) - 1)] + + is_final_block = i == len(block_out_channels) - 1 + + up_block = get_up_block( + up_block_type, + num_layers=layers_per_block + 1, + in_channels=input_channel, + out_channels=output_channel, + prev_output_channel=prev_output_channel, + temb_channels=time_embed_dim, + add_upsample=not is_final_block, + resnet_eps=norm_eps, + resnet_act_fn=act_fn, + attn_num_head_channels=attention_head_dim, + ) + self.up_blocks.append(up_block) + prev_output_channel = output_channel + + # out + num_groups_out = norm_num_groups if norm_num_groups is not None else min(block_out_channels[0] // 4, 32) + self.conv_norm_out = nn.GroupNorm(num_channels=block_out_channels[0], + num_groups=num_groups_out, + epsilon=norm_eps) + self.conv_act = nn.Silu() + self.conv_out = nn.Conv2D(block_out_channels[0], out_channels, 3, padding=1) + + def forward(self, sample: paddle.Tensor, timestep: Union[paddle.Tensor, float, int]) -> Dict[str, paddle.Tensor]: + + # 0. center input if necessary + if self.config.center_input_sample: + sample = 2 * sample - 1.0 + + # 1. time + timesteps = timestep + if not paddle.is_tensor(timesteps): + timesteps = paddle.to_tensor([timesteps], dtype="int64") + elif paddle.is_tensor(timesteps) and len(timesteps.shape) == 0: + timesteps = timesteps[None] + + # broadcast to batch dimension + timesteps = paddle.broadcast_to(timesteps, [sample.shape[0]]) + + t_emb = self.time_proj(timesteps) + emb = self.time_embedding(t_emb) + + # 2. pre-process + skip_sample = sample + sample = self.conv_in(sample) + + # 3. down + down_block_res_samples = (sample, ) + for downsample_block in self.down_blocks: + if hasattr(downsample_block, "skip_conv"): + sample, res_samples, skip_sample = downsample_block(hidden_states=sample, + temb=emb, + skip_sample=skip_sample) + else: + sample, res_samples = downsample_block(hidden_states=sample, temb=emb) + + down_block_res_samples += res_samples + + # 4. mid + sample = self.mid_block(sample, emb) + + # 5. up + skip_sample = None + for upsample_block in self.up_blocks: + res_samples = down_block_res_samples[-len(upsample_block.resnets):] + down_block_res_samples = down_block_res_samples[:-len(upsample_block.resnets)] + + if hasattr(upsample_block, "skip_conv"): + sample, skip_sample = upsample_block(sample, res_samples, emb, skip_sample) + else: + sample = upsample_block(sample, res_samples, emb) + + # 6. post-process + # make sure hidden states is in float32 + # when running in half-precision + sample = self.conv_norm_out(sample.astype("float32")).astype(sample.dtype) + sample = self.conv_act(sample) + sample = self.conv_out(sample) + + if skip_sample is not None: + sample += skip_sample + + if self.config.time_embedding_type == "fourier": + timesteps = timesteps.reshape((sample.shape[0], *([1] * len(sample.shape[1:])))) + sample = sample / timesteps + + output = {"sample": sample} + + return output diff --git a/modules/image/text_to_image/stable_diffusion_waifu/diffusers/models/unet_2d_condition.py b/modules/image/text_to_image/stable_diffusion_waifu/diffusers/models/unet_2d_condition.py new file mode 100644 index 000000000..897491b2f --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_waifu/diffusers/models/unet_2d_condition.py @@ -0,0 +1,206 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Dict +from typing import Union + +import paddle +import paddle.nn as nn + +from ..configuration_utils import ConfigMixin +from ..configuration_utils import register_to_config +from .embeddings import TimestepEmbedding +from .embeddings import Timesteps +from .unet_blocks import get_down_block +from .unet_blocks import get_up_block +from .unet_blocks import UNetMidBlock2DCrossAttn + + +class UNet2DConditionModel(nn.Layer, ConfigMixin): + + @register_to_config + def __init__( + self, + sample_size=64, + in_channels=4, + out_channels=4, + center_input_sample=False, + flip_sin_to_cos=True, + freq_shift=0, + down_block_types=("CrossAttnDownBlock2D", "CrossAttnDownBlock2D", "CrossAttnDownBlock2D", "DownBlock2D"), + up_block_types=("UpBlock2D", "CrossAttnUpBlock2D", "CrossAttnUpBlock2D", "CrossAttnUpBlock2D"), + block_out_channels=(320, 640, 1280, 1280), + layers_per_block=2, + downsample_padding=1, + mid_block_scale_factor=1, + act_fn="silu", + norm_num_groups=32, + norm_eps=1e-5, + cross_attention_dim=768, + attention_head_dim=8, + ): + super().__init__() + + self.sample_size = sample_size + time_embed_dim = block_out_channels[0] * 4 + + # input + self.conv_in = nn.Conv2D(in_channels, block_out_channels[0], kernel_size=3, padding=(1, 1)) + + # time + self.time_proj = Timesteps(block_out_channels[0], flip_sin_to_cos, freq_shift) + timestep_input_dim = block_out_channels[0] + + self.time_embedding = TimestepEmbedding(timestep_input_dim, time_embed_dim) + + self.down_blocks = nn.LayerList([]) + self.mid_block = None + self.up_blocks = nn.LayerList([]) + + # down + output_channel = block_out_channels[0] + for i, down_block_type in enumerate(down_block_types): + input_channel = output_channel + output_channel = block_out_channels[i] + is_final_block = i == len(block_out_channels) - 1 + + down_block = get_down_block( + down_block_type, + num_layers=layers_per_block, + in_channels=input_channel, + out_channels=output_channel, + temb_channels=time_embed_dim, + add_downsample=not is_final_block, + resnet_eps=norm_eps, + resnet_act_fn=act_fn, + cross_attention_dim=cross_attention_dim, + attn_num_head_channels=attention_head_dim, + downsample_padding=downsample_padding, + ) + self.down_blocks.append(down_block) + + # mid + self.mid_block = UNetMidBlock2DCrossAttn( + in_channels=block_out_channels[-1], + temb_channels=time_embed_dim, + resnet_eps=norm_eps, + resnet_act_fn=act_fn, + output_scale_factor=mid_block_scale_factor, + resnet_time_scale_shift="default", + cross_attention_dim=cross_attention_dim, + attn_num_head_channels=attention_head_dim, + resnet_groups=norm_num_groups, + ) + + # up + reversed_block_out_channels = list(reversed(block_out_channels)) + output_channel = reversed_block_out_channels[0] + for i, up_block_type in enumerate(up_block_types): + prev_output_channel = output_channel + output_channel = reversed_block_out_channels[i] + input_channel = reversed_block_out_channels[min(i + 1, len(block_out_channels) - 1)] + + is_final_block = i == len(block_out_channels) - 1 + + up_block = get_up_block( + up_block_type, + num_layers=layers_per_block + 1, + in_channels=input_channel, + out_channels=output_channel, + prev_output_channel=prev_output_channel, + temb_channels=time_embed_dim, + add_upsample=not is_final_block, + resnet_eps=norm_eps, + resnet_act_fn=act_fn, + cross_attention_dim=cross_attention_dim, + attn_num_head_channels=attention_head_dim, + ) + self.up_blocks.append(up_block) + prev_output_channel = output_channel + + # out + self.conv_norm_out = nn.GroupNorm(num_channels=block_out_channels[0], + num_groups=norm_num_groups, + epsilon=norm_eps) + self.conv_act = nn.Silu() + self.conv_out = nn.Conv2D(block_out_channels[0], out_channels, 3, padding=1) + + def forward( + self, + sample: paddle.Tensor, + timestep: Union[paddle.Tensor, float, int], + encoder_hidden_states: paddle.Tensor, + ) -> Dict[str, paddle.Tensor]: + + # 0. center input if necessary + if self.config.center_input_sample: + sample = 2 * sample - 1.0 + + # 1. time + timesteps = timestep + if not paddle.is_tensor(timesteps): + timesteps = paddle.to_tensor([timesteps], dtype="int64") + elif paddle.is_tensor(timesteps) and len(timesteps.shape) == 0: + timesteps = timesteps[None] + + # broadcast to batch dimension + timesteps = paddle.broadcast_to(timesteps, [sample.shape[0]]) + + t_emb = self.time_proj(timesteps) + emb = self.time_embedding(t_emb) + + # 2. pre-process + sample = self.conv_in(sample) + + # 3. down + down_block_res_samples = (sample, ) + for downsample_block in self.down_blocks: + + if hasattr(downsample_block, "attentions") and downsample_block.attentions is not None: + sample, res_samples = downsample_block(hidden_states=sample, + temb=emb, + encoder_hidden_states=encoder_hidden_states) + else: + sample, res_samples = downsample_block(hidden_states=sample, temb=emb) + + down_block_res_samples += res_samples + + # 4. mid + sample = self.mid_block(sample, emb, encoder_hidden_states=encoder_hidden_states) + + # 5. up + for upsample_block in self.up_blocks: + + res_samples = down_block_res_samples[-len(upsample_block.resnets):] + down_block_res_samples = down_block_res_samples[:-len(upsample_block.resnets)] + + if hasattr(upsample_block, "attentions") and upsample_block.attentions is not None: + sample = upsample_block( + hidden_states=sample, + temb=emb, + res_hidden_states_tuple=res_samples, + encoder_hidden_states=encoder_hidden_states, + ) + else: + sample = upsample_block(hidden_states=sample, temb=emb, res_hidden_states_tuple=res_samples) + + # 6. post-process + # make sure hidden states is in float32 + # when running in half-precision + sample = self.conv_norm_out(sample.astype("float32")).astype(sample.dtype) + sample = self.conv_act(sample) + sample = self.conv_out(sample) + + output = {"sample": sample} + + return output diff --git a/modules/image/text_to_image/stable_diffusion_waifu/diffusers/models/unet_blocks.py b/modules/image/text_to_image/stable_diffusion_waifu/diffusers/models/unet_blocks.py new file mode 100644 index 000000000..684a2a43d --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_waifu/diffusers/models/unet_blocks.py @@ -0,0 +1,1428 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import paddle +import paddle.nn as nn + +from .attention import AttentionBlockNew +from .attention import SpatialTransformer +from .resnet import Downsample2D +from .resnet import FirDownsample2D +from .resnet import FirUpsample2D +from .resnet import ResnetBlock +from .resnet import Upsample2D + + +def get_down_block( + down_block_type, + num_layers, + in_channels, + out_channels, + temb_channels, + add_downsample, + resnet_eps, + resnet_act_fn, + attn_num_head_channels, + cross_attention_dim=None, + downsample_padding=None, +): + down_block_type = down_block_type[7:] if down_block_type.startswith("UNetRes") else down_block_type + if down_block_type == "DownBlock2D": + return DownBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + temb_channels=temb_channels, + add_downsample=add_downsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + downsample_padding=downsample_padding, + ) + elif down_block_type == "AttnDownBlock2D": + return AttnDownBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + temb_channels=temb_channels, + add_downsample=add_downsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + downsample_padding=downsample_padding, + attn_num_head_channels=attn_num_head_channels, + ) + elif down_block_type == "CrossAttnDownBlock2D": + if cross_attention_dim is None: + raise ValueError("cross_attention_dim must be specified for CrossAttnUpBlock2D") + return CrossAttnDownBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + temb_channels=temb_channels, + add_downsample=add_downsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + downsample_padding=downsample_padding, + cross_attention_dim=cross_attention_dim, + attn_num_head_channels=attn_num_head_channels, + ) + elif down_block_type == "SkipDownBlock2D": + return SkipDownBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + temb_channels=temb_channels, + add_downsample=add_downsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + downsample_padding=downsample_padding, + ) + elif down_block_type == "AttnSkipDownBlock2D": + return AttnSkipDownBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + temb_channels=temb_channels, + add_downsample=add_downsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + downsample_padding=downsample_padding, + attn_num_head_channels=attn_num_head_channels, + ) + elif down_block_type == "DownEncoderBlock2D": + return DownEncoderBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + add_downsample=add_downsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + downsample_padding=downsample_padding, + ) + + +def get_up_block( + up_block_type, + num_layers, + in_channels, + out_channels, + prev_output_channel, + temb_channels, + add_upsample, + resnet_eps, + resnet_act_fn, + attn_num_head_channels, + cross_attention_dim=None, +): + up_block_type = up_block_type[7:] if up_block_type.startswith("UNetRes") else up_block_type + if up_block_type == "UpBlock2D": + return UpBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + prev_output_channel=prev_output_channel, + temb_channels=temb_channels, + add_upsample=add_upsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + ) + elif up_block_type == "CrossAttnUpBlock2D": + if cross_attention_dim is None: + raise ValueError("cross_attention_dim must be specified for CrossAttnUpBlock2D") + return CrossAttnUpBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + prev_output_channel=prev_output_channel, + temb_channels=temb_channels, + add_upsample=add_upsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + cross_attention_dim=cross_attention_dim, + attn_num_head_channels=attn_num_head_channels, + ) + elif up_block_type == "AttnUpBlock2D": + return AttnUpBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + prev_output_channel=prev_output_channel, + temb_channels=temb_channels, + add_upsample=add_upsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + attn_num_head_channels=attn_num_head_channels, + ) + elif up_block_type == "SkipUpBlock2D": + return SkipUpBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + prev_output_channel=prev_output_channel, + temb_channels=temb_channels, + add_upsample=add_upsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + ) + elif up_block_type == "AttnSkipUpBlock2D": + return AttnSkipUpBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + prev_output_channel=prev_output_channel, + temb_channels=temb_channels, + add_upsample=add_upsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + attn_num_head_channels=attn_num_head_channels, + ) + elif up_block_type == "UpDecoderBlock2D": + return UpDecoderBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + add_upsample=add_upsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + ) + raise ValueError(f"{up_block_type} does not exist.") + + +class UNetMidBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + attn_num_head_channels=1, + attention_type="default", + output_scale_factor=1.0, + **kwargs, + ): + super().__init__() + + self.attention_type = attention_type + resnet_groups = resnet_groups if resnet_groups is not None else min(in_channels // 4, 32) + + # there is always at least one resnet + resnets = [ + ResnetBlock( + in_channels=in_channels, + out_channels=in_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + ) + ] + attentions = [] + + for _ in range(num_layers): + attentions.append( + AttentionBlockNew( + in_channels, + num_head_channels=attn_num_head_channels, + rescale_output_factor=output_scale_factor, + eps=resnet_eps, + num_groups=resnet_groups, + )) + resnets.append( + ResnetBlock( + in_channels=in_channels, + out_channels=in_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + + self.attentions = nn.LayerList(attentions) + self.resnets = nn.LayerList(resnets) + + def forward(self, hidden_states, temb=None, encoder_states=None): + hidden_states = self.resnets[0](hidden_states, temb) + for attn, resnet in zip(self.attentions, self.resnets[1:]): + if self.attention_type == "default": + hidden_states = attn(hidden_states) + else: + hidden_states = attn(hidden_states, encoder_states) + hidden_states = resnet(hidden_states, temb) + + return hidden_states + + +class UNetMidBlock2DCrossAttn(nn.Layer): + + def __init__( + self, + in_channels: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + attn_num_head_channels=1, + attention_type="default", + output_scale_factor=1.0, + cross_attention_dim=1280, + **kwargs, + ): + super().__init__() + + self.attention_type = attention_type + resnet_groups = resnet_groups if resnet_groups is not None else min(in_channels // 4, 32) + + # there is always at least one resnet + resnets = [ + ResnetBlock( + in_channels=in_channels, + out_channels=in_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + ) + ] + attentions = [] + + for _ in range(num_layers): + attentions.append( + SpatialTransformer( + in_channels, + attn_num_head_channels, + in_channels // attn_num_head_channels, + depth=1, + context_dim=cross_attention_dim, + )) + resnets.append( + ResnetBlock( + in_channels=in_channels, + out_channels=in_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + + self.attentions = nn.LayerList(attentions) + self.resnets = nn.LayerList(resnets) + + def forward(self, hidden_states, temb=None, encoder_hidden_states=None): + hidden_states = self.resnets[0](hidden_states, temb) + for attn, resnet in zip(self.attentions, self.resnets[1:]): + hidden_states = attn(hidden_states, encoder_hidden_states) + hidden_states = resnet(hidden_states, temb) + + return hidden_states + + +class AttnDownBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + out_channels: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + attn_num_head_channels=1, + attention_type="default", + output_scale_factor=1.0, + downsample_padding=1, + add_downsample=True, + ): + super().__init__() + resnets = [] + attentions = [] + + self.attention_type = attention_type + + for i in range(num_layers): + in_channels = in_channels if i == 0 else out_channels + resnets.append( + ResnetBlock( + in_channels=in_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + attentions.append( + AttentionBlockNew( + out_channels, + num_head_channels=attn_num_head_channels, + rescale_output_factor=output_scale_factor, + eps=resnet_eps, + )) + + self.attentions = nn.LayerList(attentions) + self.resnets = nn.LayerList(resnets) + + if add_downsample: + self.downsamplers = nn.LayerList([ + Downsample2D(in_channels, + use_conv=True, + out_channels=out_channels, + padding=downsample_padding, + name="op") + ]) + else: + self.downsamplers = None + + def forward(self, hidden_states, temb=None): + output_states = () + + for resnet, attn in zip(self.resnets, self.attentions): + hidden_states = resnet(hidden_states, temb) + hidden_states = attn(hidden_states) + output_states += (hidden_states, ) + + if self.downsamplers is not None: + for downsampler in self.downsamplers: + hidden_states = downsampler(hidden_states) + + output_states += (hidden_states, ) + + return hidden_states, output_states + + +class CrossAttnDownBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + out_channels: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + attn_num_head_channels=1, + cross_attention_dim=1280, + attention_type="default", + output_scale_factor=1.0, + downsample_padding=1, + add_downsample=True, + ): + super().__init__() + resnets = [] + attentions = [] + + self.attention_type = attention_type + + for i in range(num_layers): + in_channels = in_channels if i == 0 else out_channels + resnets.append( + ResnetBlock( + in_channels=in_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + attentions.append( + SpatialTransformer( + out_channels, + attn_num_head_channels, + out_channels // attn_num_head_channels, + depth=1, + context_dim=cross_attention_dim, + )) + self.attentions = nn.LayerList(attentions) + self.resnets = nn.LayerList(resnets) + + if add_downsample: + self.downsamplers = nn.LayerList([ + Downsample2D(in_channels, + use_conv=True, + out_channels=out_channels, + padding=downsample_padding, + name="op") + ]) + else: + self.downsamplers = None + + def forward(self, hidden_states, temb=None, encoder_hidden_states=None): + output_states = () + + for resnet, attn in zip(self.resnets, self.attentions): + hidden_states = resnet(hidden_states, temb) + hidden_states = attn(hidden_states, context=encoder_hidden_states) + output_states += (hidden_states, ) + + if self.downsamplers is not None: + for downsampler in self.downsamplers: + hidden_states = downsampler(hidden_states) + + output_states += (hidden_states, ) + + return hidden_states, output_states + + +class DownBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + out_channels: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + output_scale_factor=1.0, + add_downsample=True, + downsample_padding=1, + ): + super().__init__() + resnets = [] + + for i in range(num_layers): + in_channels = in_channels if i == 0 else out_channels + resnets.append( + ResnetBlock( + in_channels=in_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + + self.resnets = nn.LayerList(resnets) + + if add_downsample: + self.downsamplers = nn.LayerList([ + Downsample2D(in_channels, + use_conv=True, + out_channels=out_channels, + padding=downsample_padding, + name="op") + ]) + else: + self.downsamplers = None + + def forward(self, hidden_states, temb=None): + output_states = () + + for resnet in self.resnets: + hidden_states = resnet(hidden_states, temb) + output_states += (hidden_states, ) + + if self.downsamplers is not None: + for downsampler in self.downsamplers: + hidden_states = downsampler(hidden_states) + + output_states += (hidden_states, ) + + return hidden_states, output_states + + +class DownEncoderBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + out_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + output_scale_factor=1.0, + add_downsample=True, + downsample_padding=1, + ): + super().__init__() + resnets = [] + + for i in range(num_layers): + in_channels = in_channels if i == 0 else out_channels + resnets.append( + ResnetBlock( + in_channels=in_channels, + out_channels=out_channels, + temb_channels=None, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + + self.resnets = nn.LayerList(resnets) + + if add_downsample: + self.downsamplers = nn.LayerList([ + Downsample2D(in_channels, + use_conv=True, + out_channels=out_channels, + padding=downsample_padding, + name="op") + ]) + else: + self.downsamplers = None + + def forward(self, hidden_states): + for resnet in self.resnets: + hidden_states = resnet(hidden_states, temb=None) + + if self.downsamplers is not None: + for downsampler in self.downsamplers: + hidden_states = downsampler(hidden_states) + + return hidden_states + + +class AttnDownEncoderBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + out_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + attn_num_head_channels=1, + output_scale_factor=1.0, + add_downsample=True, + downsample_padding=1, + ): + super().__init__() + resnets = [] + attentions = [] + + for i in range(num_layers): + in_channels = in_channels if i == 0 else out_channels + resnets.append( + ResnetBlock( + in_channels=in_channels, + out_channels=out_channels, + temb_channels=None, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + attentions.append( + AttentionBlockNew( + out_channels, + num_head_channels=attn_num_head_channels, + rescale_output_factor=output_scale_factor, + eps=resnet_eps, + num_groups=resnet_groups, + )) + + self.attentions = nn.LayerList(attentions) + self.resnets = nn.LayerList(resnets) + + if add_downsample: + self.downsamplers = nn.LayerList([ + Downsample2D(in_channels, + use_conv=True, + out_channels=out_channels, + padding=downsample_padding, + name="op") + ]) + else: + self.downsamplers = None + + def forward(self, hidden_states): + for resnet, attn in zip(self.resnets, self.attentions): + hidden_states = resnet(hidden_states, temb=None) + hidden_states = attn(hidden_states) + + if self.downsamplers is not None: + for downsampler in self.downsamplers: + hidden_states = downsampler(hidden_states) + + return hidden_states + + +class AttnSkipDownBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + out_channels: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_pre_norm: bool = True, + attn_num_head_channels=1, + attention_type="default", + output_scale_factor=np.sqrt(2.0), + downsample_padding=1, + add_downsample=True, + ): + super().__init__() + self.attentions = nn.LayerList([]) + self.resnets = nn.LayerList([]) + + self.attention_type = attention_type + + for i in range(num_layers): + in_channels = in_channels if i == 0 else out_channels + self.resnets.append( + ResnetBlock( + in_channels=in_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=min(in_channels // 4, 32), + groups_out=min(out_channels // 4, 32), + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + self.attentions.append( + AttentionBlockNew( + out_channels, + num_head_channels=attn_num_head_channels, + rescale_output_factor=output_scale_factor, + eps=resnet_eps, + )) + + if add_downsample: + self.resnet_down = ResnetBlock( + in_channels=out_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=min(out_channels // 4, 32), + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + use_nin_shortcut=True, + down=True, + kernel="fir", + ) + self.downsamplers = nn.LayerList([FirDownsample2D(in_channels, out_channels=out_channels)]) + self.skip_conv = nn.Conv2D(3, out_channels, kernel_size=(1, 1), stride=(1, 1)) + else: + self.resnet_down = None + self.downsamplers = None + self.skip_conv = None + + def forward(self, hidden_states, temb=None, skip_sample=None): + output_states = () + + for resnet, attn in zip(self.resnets, self.attentions): + hidden_states = resnet(hidden_states, temb) + hidden_states = attn(hidden_states) + output_states += (hidden_states, ) + + if self.downsamplers is not None: + hidden_states = self.resnet_down(hidden_states, temb) + for downsampler in self.downsamplers: + skip_sample = downsampler(skip_sample) + + hidden_states = self.skip_conv(skip_sample) + hidden_states + + output_states += (hidden_states, ) + + return hidden_states, output_states, skip_sample + + +class SkipDownBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + out_channels: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_pre_norm: bool = True, + output_scale_factor=np.sqrt(2.0), + add_downsample=True, + downsample_padding=1, + ): + super().__init__() + self.resnets = nn.LayerList([]) + + for i in range(num_layers): + in_channels = in_channels if i == 0 else out_channels + self.resnets.append( + ResnetBlock( + in_channels=in_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=min(in_channels // 4, 32), + groups_out=min(out_channels // 4, 32), + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + + if add_downsample: + self.resnet_down = ResnetBlock( + in_channels=out_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=min(out_channels // 4, 32), + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + use_nin_shortcut=True, + down=True, + kernel="fir", + ) + self.downsamplers = nn.LayerList([FirDownsample2D(in_channels, out_channels=out_channels)]) + self.skip_conv = nn.Conv2D(3, out_channels, kernel_size=(1, 1), stride=(1, 1)) + else: + self.resnet_down = None + self.downsamplers = None + self.skip_conv = None + + def forward(self, hidden_states, temb=None, skip_sample=None): + output_states = () + + for resnet in self.resnets: + hidden_states = resnet(hidden_states, temb) + output_states += (hidden_states, ) + + if self.downsamplers is not None: + hidden_states = self.resnet_down(hidden_states, temb) + for downsampler in self.downsamplers: + skip_sample = downsampler(skip_sample) + + hidden_states = self.skip_conv(skip_sample) + hidden_states + + output_states += (hidden_states, ) + + return hidden_states, output_states, skip_sample + + +class AttnUpBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + prev_output_channel: int, + out_channels: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + attention_type="default", + attn_num_head_channels=1, + output_scale_factor=1.0, + add_upsample=True, + ): + super().__init__() + resnets = [] + attentions = [] + + self.attention_type = attention_type + + for i in range(num_layers): + res_skip_channels = in_channels if (i == num_layers - 1) else out_channels + resnet_in_channels = prev_output_channel if i == 0 else out_channels + + resnets.append( + ResnetBlock( + in_channels=resnet_in_channels + res_skip_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + attentions.append( + AttentionBlockNew( + out_channels, + num_head_channels=attn_num_head_channels, + rescale_output_factor=output_scale_factor, + eps=resnet_eps, + )) + + self.attentions = nn.LayerList(attentions) + self.resnets = nn.LayerList(resnets) + + if add_upsample: + self.upsamplers = nn.LayerList([Upsample2D(out_channels, use_conv=True, out_channels=out_channels)]) + else: + self.upsamplers = None + + def forward(self, hidden_states, res_hidden_states_tuple, temb=None): + for resnet, attn in zip(self.resnets, self.attentions): + + # pop res hidden states + res_hidden_states = res_hidden_states_tuple[-1] + res_hidden_states_tuple = res_hidden_states_tuple[:-1] + hidden_states = paddle.concat([hidden_states, res_hidden_states], axis=1) + + hidden_states = resnet(hidden_states, temb) + hidden_states = attn(hidden_states) + + if self.upsamplers is not None: + for upsampler in self.upsamplers: + hidden_states = upsampler(hidden_states) + + return hidden_states + + +class CrossAttnUpBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + out_channels: int, + prev_output_channel: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + attn_num_head_channels=1, + cross_attention_dim=1280, + attention_type="default", + output_scale_factor=1.0, + downsample_padding=1, + add_upsample=True, + ): + super().__init__() + resnets = [] + attentions = [] + + self.attention_type = attention_type + + for i in range(num_layers): + res_skip_channels = in_channels if (i == num_layers - 1) else out_channels + resnet_in_channels = prev_output_channel if i == 0 else out_channels + + resnets.append( + ResnetBlock( + in_channels=resnet_in_channels + res_skip_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + attentions.append( + SpatialTransformer( + out_channels, + attn_num_head_channels, + out_channels // attn_num_head_channels, + depth=1, + context_dim=cross_attention_dim, + )) + self.attentions = nn.LayerList(attentions) + self.resnets = nn.LayerList(resnets) + + if add_upsample: + self.upsamplers = nn.LayerList([Upsample2D(out_channels, use_conv=True, out_channels=out_channels)]) + else: + self.upsamplers = None + + def forward(self, hidden_states, res_hidden_states_tuple, temb=None, encoder_hidden_states=None): + for resnet, attn in zip(self.resnets, self.attentions): + + # pop res hidden states + res_hidden_states = res_hidden_states_tuple[-1] + res_hidden_states_tuple = res_hidden_states_tuple[:-1] + hidden_states = paddle.concat([hidden_states, res_hidden_states], axis=1) + + hidden_states = resnet(hidden_states, temb) + hidden_states = attn(hidden_states, context=encoder_hidden_states) + + if self.upsamplers is not None: + for upsampler in self.upsamplers: + hidden_states = upsampler(hidden_states) + + return hidden_states + + +class UpBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + prev_output_channel: int, + out_channels: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + output_scale_factor=1.0, + add_upsample=True, + ): + super().__init__() + resnets = [] + + for i in range(num_layers): + res_skip_channels = in_channels if (i == num_layers - 1) else out_channels + resnet_in_channels = prev_output_channel if i == 0 else out_channels + + resnets.append( + ResnetBlock( + in_channels=resnet_in_channels + res_skip_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + + self.resnets = nn.LayerList(resnets) + + if add_upsample: + self.upsamplers = nn.LayerList([Upsample2D(out_channels, use_conv=True, out_channels=out_channels)]) + else: + self.upsamplers = None + + def forward(self, hidden_states, res_hidden_states_tuple, temb=None): + for resnet in self.resnets: + + # pop res hidden states + res_hidden_states = res_hidden_states_tuple[-1] + res_hidden_states_tuple = res_hidden_states_tuple[:-1] + hidden_states = paddle.concat([hidden_states, res_hidden_states], axis=1) + + hidden_states = resnet(hidden_states, temb) + + if self.upsamplers is not None: + for upsampler in self.upsamplers: + hidden_states = upsampler(hidden_states) + + return hidden_states + + +class UpDecoderBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + out_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + output_scale_factor=1.0, + add_upsample=True, + ): + super().__init__() + resnets = [] + + for i in range(num_layers): + input_channels = in_channels if i == 0 else out_channels + + resnets.append( + ResnetBlock( + in_channels=input_channels, + out_channels=out_channels, + temb_channels=None, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + + self.resnets = nn.LayerList(resnets) + + if add_upsample: + self.upsamplers = nn.LayerList([Upsample2D(out_channels, use_conv=True, out_channels=out_channels)]) + else: + self.upsamplers = None + + def forward(self, hidden_states): + for resnet in self.resnets: + hidden_states = resnet(hidden_states, temb=None) + + if self.upsamplers is not None: + for upsampler in self.upsamplers: + hidden_states = upsampler(hidden_states) + + return hidden_states + + +class AttnUpDecoderBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + out_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + attn_num_head_channels=1, + output_scale_factor=1.0, + add_upsample=True, + ): + super().__init__() + resnets = [] + attentions = [] + + for i in range(num_layers): + input_channels = in_channels if i == 0 else out_channels + + resnets.append( + ResnetBlock( + in_channels=input_channels, + out_channels=out_channels, + temb_channels=None, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + attentions.append( + AttentionBlockNew( + out_channels, + num_head_channels=attn_num_head_channels, + rescale_output_factor=output_scale_factor, + eps=resnet_eps, + num_groups=resnet_groups, + )) + + self.attentions = nn.LayerList(attentions) + self.resnets = nn.LayerList(resnets) + + if add_upsample: + self.upsamplers = nn.LayerList([Upsample2D(out_channels, use_conv=True, out_channels=out_channels)]) + else: + self.upsamplers = None + + def forward(self, hidden_states): + for resnet, attn in zip(self.resnets, self.attentions): + hidden_states = resnet(hidden_states, temb=None) + hidden_states = attn(hidden_states) + + if self.upsamplers is not None: + for upsampler in self.upsamplers: + hidden_states = upsampler(hidden_states) + + return hidden_states + + +class AttnSkipUpBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + prev_output_channel: int, + out_channels: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_pre_norm: bool = True, + attn_num_head_channels=1, + attention_type="default", + output_scale_factor=np.sqrt(2.0), + upsample_padding=1, + add_upsample=True, + ): + super().__init__() + self.attentions = nn.LayerList([]) + self.resnets = nn.LayerList([]) + + self.attention_type = attention_type + + for i in range(num_layers): + res_skip_channels = in_channels if (i == num_layers - 1) else out_channels + resnet_in_channels = prev_output_channel if i == 0 else out_channels + + self.resnets.append( + ResnetBlock( + in_channels=resnet_in_channels + res_skip_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=min(resnet_in_channels + res_skip_channels // 4, 32), + groups_out=min(out_channels // 4, 32), + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + + self.attentions.append( + AttentionBlockNew( + out_channels, + num_head_channels=attn_num_head_channels, + rescale_output_factor=output_scale_factor, + eps=resnet_eps, + )) + + self.upsampler = FirUpsample2D(in_channels, out_channels=out_channels) + if add_upsample: + self.resnet_up = ResnetBlock( + in_channels=out_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=min(out_channels // 4, 32), + groups_out=min(out_channels // 4, 32), + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + use_nin_shortcut=True, + up=True, + kernel="fir", + ) + self.skip_conv = nn.Conv2D(out_channels, 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + self.skip_norm = nn.GroupNorm(num_groups=min(out_channels // 4, 32), + num_channels=out_channels, + eps=resnet_eps, + affine=True) + self.act = nn.SiLU() + else: + self.resnet_up = None + self.skip_conv = None + self.skip_norm = None + self.act = None + + def forward(self, hidden_states, res_hidden_states_tuple, temb=None, skip_sample=None): + for resnet in self.resnets: + # pop res hidden states + res_hidden_states = res_hidden_states_tuple[-1] + res_hidden_states_tuple = res_hidden_states_tuple[:-1] + hidden_states = paddle.concat([hidden_states, res_hidden_states], axis=1) + + hidden_states = resnet(hidden_states, temb) + + hidden_states = self.attentions[0](hidden_states) + + if skip_sample is not None: + skip_sample = self.upsampler(skip_sample) + else: + skip_sample = 0 + + if self.resnet_up is not None: + skip_sample_states = self.skip_norm(hidden_states) + skip_sample_states = self.act(skip_sample_states) + skip_sample_states = self.skip_conv(skip_sample_states) + + skip_sample = skip_sample + skip_sample_states + + hidden_states = self.resnet_up(hidden_states, temb) + + return hidden_states, skip_sample + + +class SkipUpBlock2D(nn.Layer): + + def __init__( + self, + in_channels: int, + prev_output_channel: int, + out_channels: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = "default", + resnet_act_fn: str = "swish", + resnet_pre_norm: bool = True, + output_scale_factor=np.sqrt(2.0), + add_upsample=True, + upsample_padding=1, + ): + super().__init__() + self.resnets = nn.LayerList([]) + + for i in range(num_layers): + res_skip_channels = in_channels if (i == num_layers - 1) else out_channels + resnet_in_channels = prev_output_channel if i == 0 else out_channels + + self.resnets.append( + ResnetBlock( + in_channels=resnet_in_channels + res_skip_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=min((resnet_in_channels + res_skip_channels) // 4, 32), + groups_out=min(out_channels // 4, 32), + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + + self.upsampler = FirUpsample2D(in_channels, out_channels=out_channels) + if add_upsample: + self.resnet_up = ResnetBlock( + in_channels=out_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=min(out_channels // 4, 32), + groups_out=min(out_channels // 4, 32), + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + use_nin_shortcut=True, + up=True, + kernel="fir", + ) + self.skip_conv = nn.Conv2D(out_channels, 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) + self.skip_norm = nn.GroupNorm(num_groups=min(out_channels // 4, 32), + num_channels=out_channels, + eps=resnet_eps, + affine=True) + self.act = nn.SiLU() + else: + self.resnet_up = None + self.skip_conv = None + self.skip_norm = None + self.act = None + + def forward(self, hidden_states, res_hidden_states_tuple, temb=None, skip_sample=None): + for resnet in self.resnets: + # pop res hidden states + res_hidden_states = res_hidden_states_tuple[-1] + res_hidden_states_tuple = res_hidden_states_tuple[:-1] + hidden_states = paddle.concat([hidden_states, res_hidden_states], axis=1) + + hidden_states = resnet(hidden_states, temb) + + if skip_sample is not None: + skip_sample = self.upsampler(skip_sample) + else: + skip_sample = 0 + + if self.resnet_up is not None: + skip_sample_states = self.skip_norm(hidden_states) + skip_sample_states = self.act(skip_sample_states) + skip_sample_states = self.skip_conv(skip_sample_states) + + skip_sample = skip_sample + skip_sample_states + + hidden_states = self.resnet_up(hidden_states, temb) + + return hidden_states, skip_sample diff --git a/modules/image/text_to_image/stable_diffusion_waifu/diffusers/models/vae.py b/modules/image/text_to_image/stable_diffusion_waifu/diffusers/models/vae.py new file mode 100644 index 000000000..59e35b0fb --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_waifu/diffusers/models/vae.py @@ -0,0 +1,465 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import paddle +import paddle.nn as nn + +from ..configuration_utils import ConfigMixin +from ..configuration_utils import register_to_config +from .unet_blocks import get_down_block +from .unet_blocks import get_up_block +from .unet_blocks import UNetMidBlock2D + + +class Encoder(nn.Layer): + + def __init__( + self, + in_channels=3, + out_channels=3, + down_block_types=("DownEncoderBlock2D", ), + block_out_channels=(64, ), + layers_per_block=2, + act_fn="silu", + double_z=True, + ): + super().__init__() + self.layers_per_block = layers_per_block + + self.conv_in = nn.Conv2D(in_channels, block_out_channels[0], kernel_size=3, stride=1, padding=1) + + self.mid_block = None + self.down_blocks = nn.LayerList([]) + + # down + output_channel = block_out_channels[0] + for i, down_block_type in enumerate(down_block_types): + input_channel = output_channel + output_channel = block_out_channels[i] + is_final_block = i == len(block_out_channels) - 1 + + down_block = get_down_block( + down_block_type, + num_layers=self.layers_per_block, + in_channels=input_channel, + out_channels=output_channel, + add_downsample=not is_final_block, + resnet_eps=1e-6, + downsample_padding=0, + resnet_act_fn=act_fn, + attn_num_head_channels=None, + temb_channels=None, + ) + self.down_blocks.append(down_block) + + # mid + self.mid_block = UNetMidBlock2D( + in_channels=block_out_channels[-1], + resnet_eps=1e-6, + resnet_act_fn=act_fn, + output_scale_factor=1, + resnet_time_scale_shift="default", + attn_num_head_channels=None, + resnet_groups=32, + temb_channels=None, + ) + + # out + num_groups_out = 32 + self.conv_norm_out = nn.GroupNorm(num_channels=block_out_channels[-1], num_groups=num_groups_out, epsilon=1e-6) + self.conv_act = nn.Silu() + + conv_out_channels = 2 * out_channels if double_z else out_channels + self.conv_out = nn.Conv2D(block_out_channels[-1], conv_out_channels, 3, padding=1) + + def forward(self, x): + sample = x + sample = self.conv_in(sample) + + # down + for down_block in self.down_blocks: + sample = down_block(sample) + + # middle + sample = self.mid_block(sample) + + # post-process + sample = self.conv_norm_out(sample) + sample = self.conv_act(sample) + sample = self.conv_out(sample) + + return sample + + +class Decoder(nn.Layer): + + def __init__( + self, + in_channels=3, + out_channels=3, + up_block_types=("UpDecoderBlock2D", ), + block_out_channels=(64, ), + layers_per_block=2, + act_fn="silu", + ): + super().__init__() + self.layers_per_block = layers_per_block + + self.conv_in = nn.Conv2D(in_channels, block_out_channels[-1], kernel_size=3, stride=1, padding=1) + + self.mid_block = None + self.up_blocks = nn.LayerList([]) + + # mid + self.mid_block = UNetMidBlock2D( + in_channels=block_out_channels[-1], + resnet_eps=1e-6, + resnet_act_fn=act_fn, + output_scale_factor=1, + resnet_time_scale_shift="default", + attn_num_head_channels=None, + resnet_groups=32, + temb_channels=None, + ) + + # up + reversed_block_out_channels = list(reversed(block_out_channels)) + output_channel = reversed_block_out_channels[0] + for i, up_block_type in enumerate(up_block_types): + prev_output_channel = output_channel + output_channel = reversed_block_out_channels[i] + + is_final_block = i == len(block_out_channels) - 1 + + up_block = get_up_block( + up_block_type, + num_layers=self.layers_per_block + 1, + in_channels=prev_output_channel, + out_channels=output_channel, + prev_output_channel=None, + add_upsample=not is_final_block, + resnet_eps=1e-6, + resnet_act_fn=act_fn, + attn_num_head_channels=None, + temb_channels=None, + ) + self.up_blocks.append(up_block) + prev_output_channel = output_channel + + # out + num_groups_out = 32 + self.conv_norm_out = nn.GroupNorm(num_channels=block_out_channels[0], num_groups=num_groups_out, epsilon=1e-6) + self.conv_act = nn.Silu() + self.conv_out = nn.Conv2D(block_out_channels[0], out_channels, 3, padding=1) + + def forward(self, z): + sample = z + sample = self.conv_in(sample) + + # middle + sample = self.mid_block(sample) + + # up + for up_block in self.up_blocks: + sample = up_block(sample) + + # post-process + sample = self.conv_norm_out(sample) + sample = self.conv_act(sample) + sample = self.conv_out(sample) + + return sample + + +class VectorQuantizer(nn.Layer): + """ + Improved version over VectorQuantizer, can be used as a drop-in replacement. Mostly avoids costly matrix + multiplications and allows for post-hoc remapping of indices. + """ + + # NOTE: due to a bug the beta term was applied to the wrong term. for + # backwards compatibility we use the buggy version by default, but you can + # specify legacy=False to fix it. + def __init__(self, n_e, e_dim, beta, remap=None, unknown_index="random", sane_index_shape=False, legacy=True): + super().__init__() + self.n_e = n_e + self.e_dim = e_dim + self.beta = beta + self.legacy = legacy + + self.embedding = nn.Embedding(self.n_e, self.e_dim) + self.embedding.weight.data.uniform_(-1.0 / self.n_e, 1.0 / self.n_e) + + self.remap = remap + if self.remap is not None: + self.register_buffer("used", paddle.to_tensor(np.load(self.remap))) + self.re_embed = self.used.shape[0] + self.unknown_index = unknown_index # "random" or "extra" or integer + if self.unknown_index == "extra": + self.unknown_index = self.re_embed + self.re_embed = self.re_embed + 1 + print(f"Remapping {self.n_e} indices to {self.re_embed} indices. " + f"Using {self.unknown_index} for unknown indices.") + else: + self.re_embed = n_e + + self.sane_index_shape = sane_index_shape + + def remap_to_used(self, inds): + ishape = inds.shape + assert len(ishape) > 1 + inds = inds.reshape([ishape[0], -1]) + used = self.used + match = (inds[:, :, None] == used[None, None, ...]).astype("int64") + new = match.argmax(-1) + unknown = match.sum(2) < 1 + if self.unknown_index == "random": + new[unknown] = paddle.randint(0, self.re_embed, shape=new[unknown].shape) + else: + new[unknown] = self.unknown_index + return new.reshape(ishape) + + def unmap_to_all(self, inds): + ishape = inds.shape + assert len(ishape) > 1 + inds = inds.reshape([ishape[0], -1]) + used = self.used + if self.re_embed > self.used.shape[0]: # extra token + inds[inds >= self.used.shape[0]] = 0 # simply set to zero + back = paddle.gather(used[None, :][inds.shape[0] * [0], :], inds, axis=1) + return back.reshape(ishape) + + def forward(self, z): + # reshape z -> (batch, height, width, channel) and flatten + z = z.transpose([0, 2, 3, 1]) + z_flattened = z.reshape([-1, self.e_dim]) + # distances from z to embeddings e_j (z - e)^2 = z^2 + e^2 - 2 e * z + + d = (paddle.sum(z_flattened**2, axis=1, keepdim=True) + paddle.sum(self.embedding.weight**2, axis=1) - + 2 * paddle.einsum("bd,dn->bn", z_flattened, self.embedding.weight.t())) + + min_encoding_indices = paddle.argmin(d, axis=1) + z_q = self.embedding(min_encoding_indices).reshape(z.shape) + perplexity = None + min_encodings = None + + # compute loss for embedding + if not self.legacy: + loss = self.beta * paddle.mean((z_q.detach() - z)**2) + paddle.mean((z_q - z.detach())**2) + else: + loss = paddle.mean((z_q.detach() - z)**2) + self.beta * paddle.mean((z_q - z.detach())**2) + + # preserve gradients + z_q = z + (z_q - z).detach() + + # reshape back to match original input shape + z_q = z_q.transpose([0, 3, 1, 2]) + + if self.remap is not None: + min_encoding_indices = min_encoding_indices.reshape([z.shape[0], -1]) # add batch axis + min_encoding_indices = self.remap_to_used(min_encoding_indices) + min_encoding_indices = min_encoding_indices.reshape([-1, 1]) # flatten + + if self.sane_index_shape: + min_encoding_indices = min_encoding_indices.reshape([z_q.shape[0], z_q.shape[2], z_q.shape[3]]) + + return z_q, loss, (perplexity, min_encodings, min_encoding_indices) + + def get_codebook_entry(self, indices, shape): + # shape specifying (batch, height, width, channel) + if self.remap is not None: + indices = indices.reshape([shape[0], -1]) # add batch axis + indices = self.unmap_to_all(indices) + indices = indices.flatten() # flatten again + + # get quantized latent vectors + z_q = self.embedding(indices) + + if shape is not None: + z_q = z_q.reshape(shape) + # reshape back to match original input shape + z_q = z_q.transpose([0, 3, 1, 2]) + + return z_q + + +class DiagonalGaussianDistribution(object): + + def __init__(self, parameters, deterministic=False): + self.parameters = parameters + self.mean, self.logvar = paddle.chunk(parameters, 2, axis=1) + self.logvar = paddle.clip(self.logvar, -30.0, 20.0) + self.deterministic = deterministic + self.std = paddle.exp(0.5 * self.logvar) + self.var = paddle.exp(self.logvar) + if self.deterministic: + self.var = self.std = paddle.zeros_like(self.mean) + + def sample(self): + x = self.mean + self.std * paddle.randn(self.mean.shape) + return x + + def kl(self, other=None): + if self.deterministic: + return paddle.to_tensor([0.0]) + else: + if other is None: + return 0.5 * paddle.sum(paddle.pow(self.mean, 2) + self.var - 1.0 - self.logvar, axis=[1, 2, 3]) + else: + return 0.5 * paddle.sum( + paddle.pow(self.mean - other.mean, 2) / other.var + self.var / other.var - 1.0 - self.logvar + + other.logvar, + axis=[1, 2, 3], + ) + + def nll(self, sample, dims=[1, 2, 3]): + if self.deterministic: + return paddle.to_tensor([0.0]) + logtwopi = np.log(2.0 * np.pi) + return 0.5 * paddle.sum(logtwopi + self.logvar + paddle.pow(sample - self.mean, 2) / self.var, axis=dims) + + def mode(self): + return self.mean + + +class VQModel(ConfigMixin): + + @register_to_config + def __init__( + self, + in_channels=3, + out_channels=3, + down_block_types=("DownEncoderBlock2D", ), + up_block_types=("UpDecoderBlock2D", ), + block_out_channels=(64, ), + layers_per_block=1, + act_fn="silu", + latent_channels=3, + sample_size=32, + num_vq_embeddings=256, + ): + super().__init__() + + # pass init params to Encoder + self.encoder = Encoder( + in_channels=in_channels, + out_channels=latent_channels, + down_block_types=down_block_types, + block_out_channels=block_out_channels, + layers_per_block=layers_per_block, + act_fn=act_fn, + double_z=False, + ) + + self.quant_conv = nn.Conv2D(latent_channels, latent_channels, 1) + self.quantize = VectorQuantizer(num_vq_embeddings, + latent_channels, + beta=0.25, + remap=None, + sane_index_shape=False) + self.post_quant_conv = nn.Conv2D(latent_channels, latent_channels, 1) + + # pass init params to Decoder + self.decoder = Decoder( + in_channels=latent_channels, + out_channels=out_channels, + up_block_types=up_block_types, + block_out_channels=block_out_channels, + layers_per_block=layers_per_block, + act_fn=act_fn, + ) + + def encode(self, x): + h = self.encoder(x) + h = self.quant_conv(h) + return h + + def decode(self, h, force_not_quantize=False): + # also go through quantization layer + if not force_not_quantize: + quant, emb_loss, info = self.quantize(h) + else: + quant = h + quant = self.post_quant_conv(quant) + dec = self.decoder(quant) + return dec + + def forward(self, sample): + x = sample + h = self.encode(x) + dec = self.decode(h) + return dec + + +class AutoencoderKL(nn.Layer, ConfigMixin): + + @register_to_config + def __init__( + self, + in_channels=3, + out_channels=3, + down_block_types=("DownEncoderBlock2D", "DownEncoderBlock2D", "DownEncoderBlock2D", "DownEncoderBlock2D"), + up_block_types=("UpDecoderBlock2D", "UpDecoderBlock2D", "UpDecoderBlock2D", "UpDecoderBlock2D"), + block_out_channels=(128, 256, 512, 512), + layers_per_block=2, + act_fn="silu", + latent_channels=4, + sample_size=512, + ): + super().__init__() + + # pass init params to Encoder + self.encoder = Encoder( + in_channels=in_channels, + out_channels=latent_channels, + down_block_types=down_block_types, + block_out_channels=block_out_channels, + layers_per_block=layers_per_block, + act_fn=act_fn, + double_z=True, + ) + + # pass init params to Decoder + self.decoder = Decoder( + in_channels=latent_channels, + out_channels=out_channels, + up_block_types=up_block_types, + block_out_channels=block_out_channels, + layers_per_block=layers_per_block, + act_fn=act_fn, + ) + + self.quant_conv = nn.Conv2D(2 * latent_channels, 2 * latent_channels, 1) + self.post_quant_conv = nn.Conv2D(latent_channels, latent_channels, 1) + + def encode(self, x): + h = self.encoder(x) + moments = self.quant_conv(h) + posterior = DiagonalGaussianDistribution(moments) + return posterior + + def decode(self, z): + z = self.post_quant_conv(z) + dec = self.decoder(z) + return dec + + def forward(self, sample, sample_posterior=False): + x = sample + posterior = self.encode(x) + if sample_posterior: + z = posterior.sample() + else: + z = posterior.mode() + dec = self.decode(z) + return dec diff --git a/modules/image/text_to_image/stable_diffusion_waifu/diffusers/schedulers/README.md b/modules/image/text_to_image/stable_diffusion_waifu/diffusers/schedulers/README.md new file mode 100644 index 000000000..40f50f232 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_waifu/diffusers/schedulers/README.md @@ -0,0 +1,18 @@ +# Schedulers + +- Schedulers are the algorithms to use diffusion models in inference as well as for training. They include the noise schedules and define algorithm-specific diffusion steps. +- Schedulers can be used interchangable between diffusion models in inference to find the preferred trade-off between speed and generation quality. +- Schedulers are available in numpy, but can easily be transformed into Py + +## API + +- Schedulers should provide one or more `def step(...)` functions that should be called iteratively to unroll the diffusion loop during +the forward pass. +- Schedulers should be framework-agnostic, but provide a simple functionality to convert the scheduler into a specific framework, such as PyTorch +with a `set_format(...)` method. + +## Examples + +- The DDPM scheduler was proposed in [Denoising Diffusion Probabilistic Models](https://arxiv.org/abs/2006.11239) and can be found in [scheduling_ddpm.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_ddpm.py). An example of how to use this scheduler can be found in [pipeline_ddpm.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/pipeline_ddpm.py). +- The DDIM scheduler was proposed in [Denoising Diffusion Implicit Models](https://arxiv.org/abs/2010.02502) and can be found in [scheduling_ddim.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_ddim.py). An example of how to use this scheduler can be found in [pipeline_ddim.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/pipeline_ddim.py). +- The PNDM scheduler was proposed in [Pseudo Numerical Methods for Diffusion Models on Manifolds](https://arxiv.org/abs/2202.09778) and can be found in [scheduling_pndm.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_pndm.py). An example of how to use this scheduler can be found in [pipeline_pndm.py](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/pipeline_pndm.py). diff --git a/modules/image/text_to_image/stable_diffusion_waifu/diffusers/schedulers/__init__.py b/modules/image/text_to_image/stable_diffusion_waifu/diffusers/schedulers/__init__.py new file mode 100644 index 000000000..cebc3e618 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_waifu/diffusers/schedulers/__init__.py @@ -0,0 +1,24 @@ +# flake8: noqa +# There's no way to ignore "F401 '...' imported but unused" warnings in this +# module, but to preserve other warnings. So, don't check this module at all. +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from .scheduling_ddim import DDIMScheduler +from .scheduling_ddpm import DDPMScheduler +from .scheduling_karras_ve import KarrasVeScheduler +from .scheduling_lms_discrete import LMSDiscreteScheduler +from .scheduling_pndm import PNDMScheduler +from .scheduling_sde_ve import ScoreSdeVeScheduler +from .scheduling_sde_vp import ScoreSdeVpScheduler +from .scheduling_utils import SchedulerMixin diff --git a/modules/image/text_to_image/stable_diffusion_waifu/diffusers/schedulers/scheduling_ddim.py b/modules/image/text_to_image/stable_diffusion_waifu/diffusers/schedulers/scheduling_ddim.py new file mode 100644 index 000000000..ebe362d99 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_waifu/diffusers/schedulers/scheduling_ddim.py @@ -0,0 +1,182 @@ +# Copyright 2022 Stanford University Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# DISCLAIMER: This code is strongly influenced by https://github.com/pesser/pypaddle_diffusion +# and https://github.com/hojonathanho/diffusion +import math +from typing import Union + +import numpy as np +import paddle + +from ..configuration_utils import ConfigMixin +from ..configuration_utils import register_to_config +from .scheduling_utils import SchedulerMixin + + +def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999): + """ + Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of + (1-beta) over time from t = [0,1]. + + :param num_diffusion_timesteps: the number of betas to produce. :param alpha_bar: a lambda that takes an argument t + from 0 to 1 and + produces the cumulative product of (1-beta) up to that part of the diffusion process. + :param max_beta: the maximum beta to use; use values lower than 1 to + prevent singularities. + """ + + def alpha_bar(time_step): + return math.cos((time_step + 0.008) / 1.008 * math.pi / 2)**2 + + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta)) + return np.array(betas, dtype=np.float32) + + +class DDIMScheduler(SchedulerMixin, ConfigMixin): + + @register_to_config + def __init__( + self, + num_train_timesteps=1000, + beta_start=0.0001, + beta_end=0.02, + beta_schedule="linear", + trained_betas=None, + timestep_values=None, + clip_sample=True, + set_alpha_to_one=True, + tensor_format="pd", + ): + + if beta_schedule == "linear": + self.betas = np.linspace(beta_start, beta_end, num_train_timesteps, dtype=np.float32) + elif beta_schedule == "scaled_linear": + # this schedule is very specific to the latent diffusion model. + self.betas = np.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=np.float32)**2 + elif beta_schedule == "squaredcos_cap_v2": + # Glide cosine schedule + self.betas = betas_for_alpha_bar(num_train_timesteps) + else: + raise NotImplementedError(f"{beta_schedule} does is not implemented for {self.__class__}") + + self.alphas = 1.0 - self.betas + self.alphas_cumprod = np.cumprod(self.alphas, axis=0) + + # At every step in ddim, we are looking into the previous alphas_cumprod + # For the final step, there is no previous alphas_cumprod because we are already at 0 + # `set_alpha_to_one` decides whether we set this paratemer simply to one or + # whether we use the final alpha of the "non-previous" one. + self.final_alpha_cumprod = np.array(1.0) if set_alpha_to_one else self.alphas_cumprod[0] + + # setable values + self.num_inference_steps = None + self.timesteps = np.arange(0, num_train_timesteps)[::-1].copy() + + self.tensor_format = tensor_format + self.set_format(tensor_format=tensor_format) + + def _get_variance(self, timestep, prev_timestep): + alpha_prod_t = self.alphas_cumprod[timestep] + alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod + beta_prod_t = 1 - alpha_prod_t + beta_prod_t_prev = 1 - alpha_prod_t_prev + + variance = (beta_prod_t_prev / beta_prod_t) * (1 - alpha_prod_t / alpha_prod_t_prev) + + return variance + + def set_timesteps(self, num_inference_steps, offset=0): + self.num_inference_steps = num_inference_steps + self.timesteps = np.arange(0, self.config.num_train_timesteps, + self.config.num_train_timesteps // self.num_inference_steps)[::-1].copy() + self.timesteps += offset + self.set_format(tensor_format=self.tensor_format) + + def step( + self, + model_output: Union[paddle.Tensor, np.ndarray], + timestep: int, + sample: Union[paddle.Tensor, np.ndarray], + eta: float = 0.0, + use_clipped_model_output: bool = False, + generator=None, + ): + # See formulas (12) and (16) of DDIM paper https://arxiv.org/pdf/2010.02502.pdf + # Ideally, read DDIM paper in-detail understanding + + # Notation ( -> + # - pred_noise_t -> e_theta(x_t, t) + # - pred_original_sample -> f_theta(x_t, t) or x_0 + # - std_dev_t -> sigma_t + # - eta -> η + # - pred_sample_direction -> "direction pointingc to x_t" + # - pred_prev_sample -> "x_t-1" + + # 1. get previous step value (=t-1) + prev_timestep = timestep - self.config.num_train_timesteps // self.num_inference_steps + + # 2. compute alphas, betas + alpha_prod_t = self.alphas_cumprod[timestep] + alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod + beta_prod_t = 1 - alpha_prod_t + + # 3. compute predicted original sample from predicted noise also called + # "predicted x_0" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf + pred_original_sample = (sample - beta_prod_t**(0.5) * model_output) / alpha_prod_t**(0.5) + + # 4. Clip "predicted x_0" + if self.config.clip_sample: + pred_original_sample = self.clip(pred_original_sample, -1, 1) + + # 5. compute variance: "sigma_t(η)" -> see formula (16) + # σ_t = sqrt((1 − α_t−1)/(1 − α_t)) * sqrt(1 − α_t/α_t−1) + variance = self._get_variance(timestep, prev_timestep) + std_dev_t = eta * variance**(0.5) + + if use_clipped_model_output: + # the model_output is always re-derived from the clipped x_0 in Glide + model_output = (sample - alpha_prod_t**(0.5) * pred_original_sample) / beta_prod_t**(0.5) + + # 6. compute "direction pointing to x_t" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf + pred_sample_direction = (1 - alpha_prod_t_prev - std_dev_t**2)**(0.5) * model_output + + # 7. compute x_t without "random noise" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf + prev_sample = alpha_prod_t_prev**(0.5) * pred_original_sample + pred_sample_direction + + if eta > 0: + noise = paddle.randn(model_output.shape) + variance = self._get_variance(timestep, prev_timestep)**(0.5) * eta * noise + + if not paddle.is_tensor(model_output): + variance = variance.numpy() + + prev_sample = prev_sample + variance + + return {"prev_sample": prev_sample} + + def add_noise(self, original_samples, noise, timesteps): + sqrt_alpha_prod = self.alphas_cumprod[timesteps]**0.5 + sqrt_alpha_prod = self.match_shape(sqrt_alpha_prod, original_samples) + sqrt_one_minus_alpha_prod = (1 - self.alphas_cumprod[timesteps])**0.5 + sqrt_one_minus_alpha_prod = self.match_shape(sqrt_one_minus_alpha_prod, original_samples) + + noisy_samples = sqrt_alpha_prod * original_samples + sqrt_one_minus_alpha_prod * noise + return noisy_samples + + def __len__(self): + return self.config.num_train_timesteps diff --git a/modules/image/text_to_image/stable_diffusion_waifu/diffusers/schedulers/scheduling_ddpm.py b/modules/image/text_to_image/stable_diffusion_waifu/diffusers/schedulers/scheduling_ddpm.py new file mode 100644 index 000000000..34551b2ad --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_waifu/diffusers/schedulers/scheduling_ddpm.py @@ -0,0 +1,191 @@ +# Copyright 2022 UC Berkely Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# DISCLAIMER: This file is strongly influenced by https://github.com/ermongroup/ddim +import math +from typing import Union + +import numpy as np +import paddle + +from ..configuration_utils import ConfigMixin +from ..configuration_utils import register_to_config +from .scheduling_utils import SchedulerMixin + + +def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999): + """ + Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of + (1-beta) over time from t = [0,1]. + + :param num_diffusion_timesteps: the number of betas to produce. :param alpha_bar: a lambda that takes an argument t + from 0 to 1 and + produces the cumulative product of (1-beta) up to that part of the diffusion process. + :param max_beta: the maximum beta to use; use values lower than 1 to + prevent singularities. + """ + + def alpha_bar(time_step): + return math.cos((time_step + 0.008) / 1.008 * math.pi / 2)**2 + + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta)) + return np.array(betas, dtype=np.float32) + + +class DDPMScheduler(SchedulerMixin, ConfigMixin): + + @register_to_config + def __init__( + self, + num_train_timesteps=1000, + beta_start=0.0001, + beta_end=0.02, + beta_schedule="linear", + trained_betas=None, + variance_type="fixed_small", + clip_sample=True, + tensor_format="pd", + ): + + if trained_betas is not None: + self.betas = np.asarray(trained_betas) + elif beta_schedule == "linear": + self.betas = np.linspace(beta_start, beta_end, num_train_timesteps, dtype=np.float32) + elif beta_schedule == "scaled_linear": + # this schedule is very specific to the latent diffusion model. + self.betas = np.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=np.float32)**2 + elif beta_schedule == "squaredcos_cap_v2": + # Glide cosine schedule + self.betas = betas_for_alpha_bar(num_train_timesteps) + else: + raise NotImplementedError(f"{beta_schedule} does is not implemented for {self.__class__}") + + self.alphas = 1.0 - self.betas + self.alphas_cumprod = np.cumprod(self.alphas, axis=0) + self.one = np.array(1.0) + + # setable values + self.num_inference_steps = None + self.timesteps = np.arange(0, num_train_timesteps)[::-1].copy() + + self.tensor_format = tensor_format + self.set_format(tensor_format=tensor_format) + + self.variance_type = variance_type + + def set_timesteps(self, num_inference_steps): + num_inference_steps = min(self.config.num_train_timesteps, num_inference_steps) + self.num_inference_steps = num_inference_steps + self.timesteps = np.arange(0, self.config.num_train_timesteps, + self.config.num_train_timesteps // self.num_inference_steps)[::-1].copy() + self.set_format(tensor_format=self.tensor_format) + + def _get_variance(self, t, predicted_variance=None, variance_type=None): + alpha_prod_t = self.alphas_cumprod[t] + alpha_prod_t_prev = self.alphas_cumprod[t - 1] if t > 0 else self.one + + # For t > 0, compute predicted variance βt (see formula (6) and (7) from https://arxiv.org/pdf/2006.11239.pdf) + # and sample from it to get previous sample + # x_{t-1} ~ N(pred_prev_sample, variance) == add variance to pred_sample + variance = (1 - alpha_prod_t_prev) / (1 - alpha_prod_t) * self.betas[t] + + if variance_type is None: + variance_type = self.config.variance_type + + # hacks - were probs added for training stability + if variance_type == "fixed_small": + variance = self.clip(variance, min_value=1e-20) + # for rl-diffuser https://arxiv.org/abs/2205.09991 + elif variance_type == "fixed_small_log": + variance = self.log(self.clip(variance, min_value=1e-20)) + elif variance_type == "fixed_large": + variance = self.betas[t] + elif variance_type == "fixed_large_log": + # Glide max_log + variance = self.log(self.betas[t]) + elif variance_type == "learned": + return predicted_variance + elif variance_type == "learned_range": + min_log = variance + max_log = self.betas[t] + frac = (predicted_variance + 1) / 2 + variance = frac * max_log + (1 - frac) * min_log + + return variance + + def step( + self, + model_output: Union[paddle.Tensor, np.ndarray], + timestep: int, + sample: Union[paddle.Tensor, np.ndarray], + predict_epsilon=True, + generator=None, + ): + t = timestep + + if model_output.shape[1] == sample.shape[1] * 2 and self.variance_type in ["learned", "learned_range"]: + model_output, predicted_variance = paddle.split(model_output, sample.shape[1], axis=1) + else: + predicted_variance = None + + # 1. compute alphas, betas + alpha_prod_t = self.alphas_cumprod[t] + alpha_prod_t_prev = self.alphas_cumprod[t - 1] if t > 0 else self.one + beta_prod_t = 1 - alpha_prod_t + beta_prod_t_prev = 1 - alpha_prod_t_prev + + # 2. compute predicted original sample from predicted noise also called + # "predicted x_0" of formula (15) from https://arxiv.org/pdf/2006.11239.pdf + if predict_epsilon: + pred_original_sample = (sample - beta_prod_t**(0.5) * model_output) / alpha_prod_t**(0.5) + else: + pred_original_sample = model_output + + # 3. Clip "predicted x_0" + if self.config.clip_sample: + pred_original_sample = self.clip(pred_original_sample, -1, 1) + + # 4. Compute coefficients for pred_original_sample x_0 and current sample x_t + # See formula (7) from https://arxiv.org/pdf/2006.11239.pdf + pred_original_sample_coeff = (alpha_prod_t_prev**(0.5) * self.betas[t]) / beta_prod_t + current_sample_coeff = self.alphas[t]**(0.5) * beta_prod_t_prev / beta_prod_t + + # 5. Compute predicted previous sample µ_t + # See formula (7) from https://arxiv.org/pdf/2006.11239.pdf + pred_prev_sample = pred_original_sample_coeff * pred_original_sample + current_sample_coeff * sample + + # 6. Add noise + variance = 0 + if t > 0: + noise = self.randn_like(model_output) + variance = (self._get_variance(t, predicted_variance=predicted_variance)**0.5) * noise + + pred_prev_sample = pred_prev_sample + variance + + return {"prev_sample": pred_prev_sample} + + def add_noise(self, original_samples, noise, timesteps): + sqrt_alpha_prod = self.alphas_cumprod[timesteps]**0.5 + sqrt_alpha_prod = self.match_shape(sqrt_alpha_prod, original_samples) + sqrt_one_minus_alpha_prod = (1 - self.alphas_cumprod[timesteps])**0.5 + sqrt_one_minus_alpha_prod = self.match_shape(sqrt_one_minus_alpha_prod, original_samples) + + noisy_samples = sqrt_alpha_prod * original_samples + sqrt_one_minus_alpha_prod * noise + return noisy_samples + + def __len__(self): + return self.config.num_train_timesteps diff --git a/modules/image/text_to_image/stable_diffusion_waifu/diffusers/schedulers/scheduling_karras_ve.py b/modules/image/text_to_image/stable_diffusion_waifu/diffusers/schedulers/scheduling_karras_ve.py new file mode 100644 index 000000000..36827564e --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_waifu/diffusers/schedulers/scheduling_karras_ve.py @@ -0,0 +1,124 @@ +# Copyright 2022 NVIDIA and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Union + +import numpy as np +import paddle + +from ..configuration_utils import ConfigMixin +from ..configuration_utils import register_to_config +from .scheduling_utils import SchedulerMixin + + +class KarrasVeScheduler(SchedulerMixin, ConfigMixin): + """ + Stochastic sampling from Karras et al. [1] tailored to the Variance-Expanding (VE) models [2]. Use Algorithm 2 and + the VE column of Table 1 from [1] for reference. + + [1] Karras, Tero, et al. "Elucidating the Design Space of Diffusion-Based Generative Models." + https://arxiv.org/abs/2206.00364 [2] Song, Yang, et al. "Score-based generative modeling through stochastic + differential equations." https://arxiv.org/abs/2011.13456 + """ + + @register_to_config + def __init__( + self, + sigma_min=0.02, + sigma_max=100, + s_noise=1.007, + s_churn=80, + s_min=0.05, + s_max=50, + tensor_format="pd", + ): + """ + For more details on the parameters, see the original paper's Appendix E.: "Elucidating the Design Space of + Diffusion-Based Generative Models." https://arxiv.org/abs/2206.00364. The grid search values used to find the + optimal {s_noise, s_churn, s_min, s_max} for a specific model are described in Table 5 of the paper. + + Args: + sigma_min (`float`): minimum noise magnitude + sigma_max (`float`): maximum noise magnitude + s_noise (`float`): the amount of additional noise to counteract loss of detail during sampling. + A reasonable range is [1.000, 1.011]. + s_churn (`float`): the parameter controlling the overall amount of stochasticity. + A reasonable range is [0, 100]. + s_min (`float`): the start value of the sigma range where we add noise (enable stochasticity). + A reasonable range is [0, 10]. + s_max (`float`): the end value of the sigma range where we add noise. + A reasonable range is [0.2, 80]. + """ + # setable values + self.num_inference_steps = None + self.timesteps = None + self.schedule = None # sigma(t_i) + + self.tensor_format = tensor_format + self.set_format(tensor_format=tensor_format) + + def set_timesteps(self, num_inference_steps): + self.num_inference_steps = num_inference_steps + self.timesteps = np.arange(0, self.num_inference_steps)[::-1].copy() + self.schedule = [(self.sigma_max * (self.sigma_min**2 / self.sigma_max**2)**(i / (num_inference_steps - 1))) + for i in self.timesteps] + self.schedule = np.array(self.schedule, dtype=np.float32) + + self.set_format(tensor_format=self.tensor_format) + + def add_noise_to_input(self, sample, sigma, generator=None): + """ + Explicit Langevin-like "churn" step of adding noise to the sample according to a factor gamma_i ≥ 0 to reach a + higher noise level sigma_hat = sigma_i + gamma_i*sigma_i. + """ + if self.s_min <= sigma <= self.s_max: + gamma = min(self.s_churn / self.num_inference_steps, 2**0.5 - 1) + else: + gamma = 0 + + # sample eps ~ N(0, S_noise^2 * I) + eps = self.s_noise * paddle.randn(sample.shape) + sigma_hat = sigma + gamma * sigma + sample_hat = sample + ((sigma_hat**2 - sigma**2)**0.5 * eps) + + return sample_hat, sigma_hat + + def step( + self, + model_output: Union[paddle.Tensor, np.ndarray], + sigma_hat: float, + sigma_prev: float, + sample_hat: Union[paddle.Tensor, np.ndarray], + ): + pred_original_sample = sample_hat + sigma_hat * model_output + derivative = (sample_hat - pred_original_sample) / sigma_hat + sample_prev = sample_hat + (sigma_prev - sigma_hat) * derivative + + return {"prev_sample": sample_prev, "derivative": derivative} + + def step_correct( + self, + model_output: Union[paddle.Tensor, np.ndarray], + sigma_hat: float, + sigma_prev: float, + sample_hat: Union[paddle.Tensor, np.ndarray], + sample_prev: Union[paddle.Tensor, np.ndarray], + derivative: Union[paddle.Tensor, np.ndarray], + ): + pred_original_sample = sample_prev + sigma_prev * model_output + derivative_corr = (sample_prev - pred_original_sample) / sigma_prev + sample_prev = sample_hat + (sigma_prev - sigma_hat) * (0.5 * derivative + 0.5 * derivative_corr) + return {"prev_sample": sample_prev, "derivative": derivative_corr} + + def add_noise(self, original_samples, noise, timesteps): + raise NotImplementedError() diff --git a/modules/image/text_to_image/stable_diffusion_waifu/diffusers/schedulers/scheduling_lms_discrete.py b/modules/image/text_to_image/stable_diffusion_waifu/diffusers/schedulers/scheduling_lms_discrete.py new file mode 100644 index 000000000..2ed63cc2c --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_waifu/diffusers/schedulers/scheduling_lms_discrete.py @@ -0,0 +1,133 @@ +# Copyright 2022 Katherine Crowson and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Union + +import numpy as np +import paddle +from scipy import integrate + +from ..configuration_utils import ConfigMixin +from ..configuration_utils import register_to_config +from .scheduling_utils import SchedulerMixin + + +class LMSDiscreteScheduler(SchedulerMixin, ConfigMixin): + + @register_to_config + def __init__( + self, + num_train_timesteps=1000, + beta_start=0.0001, + beta_end=0.02, + beta_schedule="linear", + trained_betas=None, + timestep_values=None, + tensor_format="pd", + ): + """ + Linear Multistep Scheduler for discrete beta schedules. Based on the original k-diffusion implementation by + Katherine Crowson: + https://github.com/crowsonkb/k-diffusion/blob/481677d114f6ea445aa009cf5bd7a9cdee909e47/k_diffusion/sampling.py#L181 + """ + + if beta_schedule == "linear": + self.betas = np.linspace(beta_start, beta_end, num_train_timesteps, dtype=np.float32) + elif beta_schedule == "scaled_linear": + # this schedule is very specific to the latent diffusion model. + self.betas = np.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=np.float32)**2 + else: + raise NotImplementedError(f"{beta_schedule} does is not implemented for {self.__class__}") + + self.alphas = 1.0 - self.betas + self.alphas_cumprod = np.cumprod(self.alphas, axis=0) + + self.sigmas = ((1 - self.alphas_cumprod) / self.alphas_cumprod)**0.5 + + # setable values + self.num_inference_steps = None + self.timesteps = np.arange(0, num_train_timesteps)[::-1].copy() + self.derivatives = [] + + self.tensor_format = tensor_format + self.set_format(tensor_format=tensor_format) + + def get_lms_coefficient(self, order, t, current_order): + """ + Compute a linear multistep coefficient + """ + + def lms_derivative(tau): + prod = 1.0 + for k in range(order): + if current_order == k: + continue + prod *= (tau - self.sigmas[t - k]) / (self.sigmas[t - current_order] - self.sigmas[t - k]) + return prod + + integrated_coeff = integrate.quad(lms_derivative, self.sigmas[t], self.sigmas[t + 1], epsrel=1e-4)[0] + + return integrated_coeff + + def set_timesteps(self, num_inference_steps): + self.num_inference_steps = num_inference_steps + self.timesteps = np.linspace(self.num_train_timesteps - 1, 0, num_inference_steps, dtype=float) + + low_idx = np.floor(self.timesteps).astype(int) + high_idx = np.ceil(self.timesteps).astype(int) + frac = np.mod(self.timesteps, 1.0) + sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod)**0.5) + sigmas = (1 - frac) * sigmas[low_idx] + frac * sigmas[high_idx] + self.sigmas = np.concatenate([sigmas, [0.0]]) + + self.derivatives = [] + + self.set_format(tensor_format=self.tensor_format) + + def step( + self, + model_output: Union[paddle.Tensor, np.ndarray], + timestep: int, + sample: Union[paddle.Tensor, np.ndarray], + order: int = 4, + ): + sigma = self.sigmas[timestep] + + # 1. compute predicted original sample (x_0) from sigma-scaled predicted noise + pred_original_sample = sample - sigma * model_output + + # 2. Convert to an ODE derivative + derivative = (sample - pred_original_sample) / sigma + self.derivatives.append(derivative) + if len(self.derivatives) > order: + self.derivatives.pop(0) + + # 3. Compute linear multistep coefficients + order = min(timestep + 1, order) + lms_coeffs = [self.get_lms_coefficient(order, timestep, curr_order) for curr_order in range(order)] + + # 4. Compute previous sample based on the derivatives path + prev_sample = sample + sum(coeff * derivative + for coeff, derivative in zip(lms_coeffs, reversed(self.derivatives))) + + return {"prev_sample": prev_sample} + + def add_noise(self, original_samples, noise, timesteps): + alpha_prod = self.alphas_cumprod[timesteps] + alpha_prod = self.match_shape(alpha_prod, original_samples) + + noisy_samples = (alpha_prod**0.5) * original_samples + ((1 - alpha_prod)**0.5) * noise + return noisy_samples + + def __len__(self): + return self.config.num_train_timesteps diff --git a/modules/image/text_to_image/stable_diffusion_waifu/diffusers/schedulers/scheduling_pndm.py b/modules/image/text_to_image/stable_diffusion_waifu/diffusers/schedulers/scheduling_pndm.py new file mode 100644 index 000000000..12abd9cfe --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_waifu/diffusers/schedulers/scheduling_pndm.py @@ -0,0 +1,258 @@ +# Copyright 2022 Zhejiang University Team and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# DISCLAIMER: This file is strongly influenced by https://github.com/ermongroup/ddim +import math +from typing import Union + +import numpy as np +import paddle + +from ..configuration_utils import ConfigMixin +from ..configuration_utils import register_to_config +from .scheduling_utils import SchedulerMixin + + +def betas_for_alpha_bar(num_diffusion_timesteps, max_beta=0.999): + """ + Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of + (1-beta) over time from t = [0,1]. + + :param num_diffusion_timesteps: the number of betas to produce. :param alpha_bar: a lambda that takes an argument t + from 0 to 1 and + produces the cumulative product of (1-beta) up to that part of the diffusion process. + :param max_beta: the maximum beta to use; use values lower than 1 to + prevent singularities. + """ + + def alpha_bar(time_step): + return math.cos((time_step + 0.008) / 1.008 * math.pi / 2)**2 + + betas = [] + for i in range(num_diffusion_timesteps): + t1 = i / num_diffusion_timesteps + t2 = (i + 1) / num_diffusion_timesteps + betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta)) + return np.array(betas, dtype=np.float32) + + +class PNDMScheduler(SchedulerMixin, ConfigMixin): + + @register_to_config + def __init__( + self, + num_train_timesteps=1000, + beta_start=0.0001, + beta_end=0.02, + beta_schedule="linear", + tensor_format="pd", + skip_prk_steps=False, + ): + + if beta_schedule == "linear": + self.betas = np.linspace(beta_start, beta_end, num_train_timesteps, dtype=np.float32) + elif beta_schedule == "scaled_linear": + # this schedule is very specific to the latent diffusion model. + self.betas = np.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=np.float32)**2 + elif beta_schedule == "squaredcos_cap_v2": + # Glide cosine schedule + self.betas = betas_for_alpha_bar(num_train_timesteps) + else: + raise NotImplementedError(f"{beta_schedule} does is not implemented for {self.__class__}") + + self.alphas = 1.0 - self.betas + self.alphas_cumprod = np.cumprod(self.alphas, axis=0) + + self.one = np.array(1.0) + + # For now we only support F-PNDM, i.e. the runge-kutta method + # For more information on the algorithm please take a look at the paper: https://arxiv.org/pdf/2202.09778.pdf + # mainly at formula (9), (12), (13) and the Algorithm 2. + self.pndm_order = 4 + + # running values + self.cur_model_output = 0 + self.counter = 0 + self.cur_sample = None + self.ets = [] + + # setable values + self.num_inference_steps = None + self._timesteps = np.arange(0, num_train_timesteps)[::-1].copy() + self._offset = 0 + self.prk_timesteps = None + self.plms_timesteps = None + self.timesteps = None + + self.tensor_format = tensor_format + self.set_format(tensor_format=tensor_format) + + def set_timesteps(self, num_inference_steps, offset=0): + self.num_inference_steps = num_inference_steps + self._timesteps = list( + range(0, self.config.num_train_timesteps, self.config.num_train_timesteps // num_inference_steps)) + self._offset = offset + self._timesteps = [t + self._offset for t in self._timesteps] + + if self.config.skip_prk_steps: + # for some models like stable diffusion the prk steps can/should be skipped to + # produce better results. When using PNDM with `self.config.skip_prk_steps` the implementation + # is based on crowsonkb's PLMS sampler implementation: https://github.com/CompVis/latent-diffusion/pull/51 + self.prk_timesteps = [] + self.plms_timesteps = list(reversed(self._timesteps[:-1] + self._timesteps[-2:-1] + self._timesteps[-1:])) + else: + prk_timesteps = np.array(self._timesteps[-self.pndm_order:]).repeat(2) + np.tile( + np.array([0, self.config.num_train_timesteps // num_inference_steps // 2]), self.pndm_order) + self.prk_timesteps = list(reversed(prk_timesteps[:-1].repeat(2)[1:-1])) + self.plms_timesteps = list(reversed(self._timesteps[:-3])) + + self.timesteps = self.prk_timesteps + self.plms_timesteps + + self.ets = [] + self.counter = 0 + self.set_format(tensor_format=self.tensor_format) + + def step( + self, + model_output: Union[paddle.Tensor, np.ndarray], + timestep: int, + sample: Union[paddle.Tensor, np.ndarray], + ): + if self.counter < len(self.prk_timesteps) and not self.config.skip_prk_steps: + return self.step_prk(model_output=model_output, timestep=timestep, sample=sample) + else: + return self.step_plms(model_output=model_output, timestep=timestep, sample=sample) + + def step_prk( + self, + model_output: Union[paddle.Tensor, np.ndarray], + timestep: int, + sample: Union[paddle.Tensor, np.ndarray], + ): + """ + Step function propagating the sample with the Runge-Kutta method. RK takes 4 forward passes to approximate the + solution to the differential equation. + """ + diff_to_prev = 0 if self.counter % 2 else self.config.num_train_timesteps // self.num_inference_steps // 2 + prev_timestep = max(timestep - diff_to_prev, self.prk_timesteps[-1]) + timestep = self.prk_timesteps[self.counter // 4 * 4] + + if self.counter % 4 == 0: + self.cur_model_output += 1 / 6 * model_output + self.ets.append(model_output) + self.cur_sample = sample + elif (self.counter - 1) % 4 == 0: + self.cur_model_output += 1 / 3 * model_output + elif (self.counter - 2) % 4 == 0: + self.cur_model_output += 1 / 3 * model_output + elif (self.counter - 3) % 4 == 0: + model_output = self.cur_model_output + 1 / 6 * model_output + self.cur_model_output = 0 + + # cur_sample should not be `None` + cur_sample = self.cur_sample if self.cur_sample is not None else sample + + prev_sample = self._get_prev_sample(cur_sample, timestep, prev_timestep, model_output) + self.counter += 1 + + return {"prev_sample": prev_sample} + + def step_plms( + self, + model_output: Union[paddle.Tensor, np.ndarray], + timestep: int, + sample: Union[paddle.Tensor, np.ndarray], + ): + """ + Step function propagating the sample with the linear multi-step method. This has one forward pass with multiple + times to approximate the solution. + """ + if not self.config.skip_prk_steps and len(self.ets) < 3: + raise ValueError( + f"{self.__class__} can only be run AFTER scheduler has been run " + "in 'prk' mode for at least 12 iterations " + "See: https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/pipeline_pndm.py " + "for more information.") + + prev_timestep = max(timestep - self.config.num_train_timesteps // self.num_inference_steps, 0) + + if self.counter != 1: + self.ets.append(model_output) + else: + prev_timestep = timestep + timestep = timestep + self.config.num_train_timesteps // self.num_inference_steps + + if len(self.ets) == 1 and self.counter == 0: + model_output = model_output + self.cur_sample = sample + elif len(self.ets) == 1 and self.counter == 1: + model_output = (model_output + self.ets[-1]) / 2 + sample = self.cur_sample + self.cur_sample = None + elif len(self.ets) == 2: + model_output = (3 * self.ets[-1] - self.ets[-2]) / 2 + elif len(self.ets) == 3: + model_output = (23 * self.ets[-1] - 16 * self.ets[-2] + 5 * self.ets[-3]) / 12 + else: + model_output = (1 / 24) * (55 * self.ets[-1] - 59 * self.ets[-2] + 37 * self.ets[-3] - 9 * self.ets[-4]) + + prev_sample = self._get_prev_sample(sample, timestep, prev_timestep, model_output) + self.counter += 1 + + return {"prev_sample": prev_sample} + + def _get_prev_sample(self, sample, timestep, timestep_prev, model_output): + # See formula (9) of PNDM paper https://arxiv.org/pdf/2202.09778.pdf + # this function computes x_(t−δ) using the formula of (9) + # Note that x_t needs to be added to both sides of the equation + + # Notation ( -> + # alpha_prod_t -> α_t + # alpha_prod_t_prev -> α_(t−δ) + # beta_prod_t -> (1 - α_t) + # beta_prod_t_prev -> (1 - α_(t−δ)) + # sample -> x_t + # model_output -> e_θ(x_t, t) + # prev_sample -> x_(t−δ) + alpha_prod_t = self.alphas_cumprod[timestep + 1 - self._offset] + alpha_prod_t_prev = self.alphas_cumprod[timestep_prev + 1 - self._offset] + beta_prod_t = 1 - alpha_prod_t + beta_prod_t_prev = 1 - alpha_prod_t_prev + + # corresponds to (α_(t−δ) - α_t) divided by + # denominator of x_t in formula (9) and plus 1 + # Note: (α_(t−δ) - α_t) / (sqrt(α_t) * (sqrt(α_(t−δ)) + sqr(α_t))) = + # sqrt(α_(t−δ)) / sqrt(α_t)) + sample_coeff = (alpha_prod_t_prev / alpha_prod_t)**(0.5) + + # corresponds to denominator of e_θ(x_t, t) in formula (9) + model_output_denom_coeff = alpha_prod_t * beta_prod_t_prev**(0.5) + (alpha_prod_t * beta_prod_t * + alpha_prod_t_prev)**(0.5) + + # full formula (9) + prev_sample = (sample_coeff * sample - + (alpha_prod_t_prev - alpha_prod_t) * model_output / model_output_denom_coeff) + + return prev_sample + + def add_noise(self, original_samples, noise, timesteps): + sqrt_alpha_prod = self.alphas_cumprod[timesteps]**0.5 + sqrt_alpha_prod = self.match_shape(sqrt_alpha_prod, original_samples) + sqrt_one_minus_alpha_prod = (1 - self.alphas_cumprod[timesteps])**0.5 + sqrt_one_minus_alpha_prod = self.match_shape(sqrt_one_minus_alpha_prod, original_samples) + + noisy_samples = sqrt_alpha_prod * original_samples + sqrt_one_minus_alpha_prod * noise + return noisy_samples + + def __len__(self): + return self.config.num_train_timesteps diff --git a/modules/image/text_to_image/stable_diffusion_waifu/diffusers/schedulers/scheduling_sde_ve.py b/modules/image/text_to_image/stable_diffusion_waifu/diffusers/schedulers/scheduling_sde_ve.py new file mode 100644 index 000000000..92ca23d5b --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_waifu/diffusers/schedulers/scheduling_sde_ve.py @@ -0,0 +1,172 @@ +# Copyright 2022 Google Brain and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# DISCLAIMER: This file is strongly influenced by https://github.com/yang-song/score_sde_pypaddle +# TODO(Patrick, Anton, Suraj) - make scheduler framework indepedent and clean-up a bit +from typing import Union + +import numpy as np +import paddle + +from ..configuration_utils import ConfigMixin +from ..configuration_utils import register_to_config +from .scheduling_utils import SchedulerMixin + + +class ScoreSdeVeScheduler(SchedulerMixin, ConfigMixin): + """ + The variance exploding stochastic differential equation (SDE) scheduler. + + :param snr: coefficient weighting the step from the model_output sample (from the network) to the random noise. + :param sigma_min: initial noise scale for sigma sequence in sampling procedure. The minimum sigma should mirror the + distribution of the data. + :param sigma_max: :param sampling_eps: the end value of sampling, where timesteps decrease progessively from 1 to + epsilon. :param correct_steps: number of correction steps performed on a produced sample. :param tensor_format: + "np" or "pd" for the expected format of samples passed to the Scheduler. + """ + + @register_to_config + def __init__( + self, + num_train_timesteps=2000, + snr=0.15, + sigma_min=0.01, + sigma_max=1348, + sampling_eps=1e-5, + correct_steps=1, + tensor_format="pd", + ): + # self.sigmas = None + # self.discrete_sigmas = None + # + # # setable values + # self.num_inference_steps = None + self.timesteps = None + + self.set_sigmas(num_train_timesteps, sigma_min, sigma_max, sampling_eps) + + self.tensor_format = tensor_format + self.set_format(tensor_format=tensor_format) + + def set_timesteps(self, num_inference_steps, sampling_eps=None): + sampling_eps = sampling_eps if sampling_eps is not None else self.config.sampling_eps + tensor_format = getattr(self, "tensor_format", "pd") + if tensor_format == "np": + self.timesteps = np.linspace(1, sampling_eps, num_inference_steps) + elif tensor_format == "pd": + self.timesteps = paddle.linspace(1, sampling_eps, num_inference_steps) + else: + raise ValueError(f"`self.tensor_format`: {self.tensor_format} is not valid.") + + def set_sigmas(self, num_inference_steps, sigma_min=None, sigma_max=None, sampling_eps=None): + sigma_min = sigma_min if sigma_min is not None else self.config.sigma_min + sigma_max = sigma_max if sigma_max is not None else self.config.sigma_max + sampling_eps = sampling_eps if sampling_eps is not None else self.config.sampling_eps + if self.timesteps is None: + self.set_timesteps(num_inference_steps, sampling_eps) + + tensor_format = getattr(self, "tensor_format", "pd") + if tensor_format == "np": + self.discrete_sigmas = np.exp(np.linspace(np.log(sigma_min), np.log(sigma_max), num_inference_steps)) + self.sigmas = np.array([sigma_min * (sigma_max / sigma_min)**t for t in self.timesteps]) + elif tensor_format == "pd": + self.discrete_sigmas = paddle.exp(paddle.linspace(np.log(sigma_min), np.log(sigma_max), + num_inference_steps)) + self.sigmas = paddle.to_tensor([sigma_min * (sigma_max / sigma_min)**t for t in self.timesteps]) + else: + raise ValueError(f"`self.tensor_format`: {self.tensor_format} is not valid.") + + def get_adjacent_sigma(self, timesteps, t): + tensor_format = getattr(self, "tensor_format", "pd") + if tensor_format == "np": + return np.where(timesteps == 0, np.zeros_like(t), self.discrete_sigmas[timesteps - 1]) + elif tensor_format == "pd": + return paddle.where(timesteps == 0, paddle.zeros_like(t), self.discrete_sigmas[timesteps - 1]) + + raise ValueError(f"`self.tensor_format`: {self.tensor_format} is not valid.") + + def set_seed(self, seed): + tensor_format = getattr(self, "tensor_format", "pd") + if tensor_format == "np": + np.random.seed(seed) + elif tensor_format == "pd": + paddle.seed(seed) + else: + raise ValueError(f"`self.tensor_format`: {self.tensor_format} is not valid.") + + def step_pred( + self, + model_output: Union[paddle.Tensor, np.ndarray], + timestep: int, + sample: Union[paddle.Tensor, np.ndarray], + seed=None, + ): + """ + Predict the sample at the previous timestep by reversing the SDE. + """ + if seed is not None: + self.set_seed(seed) + # TODO(Patrick) non-Pypaddle + + timestep = timestep * paddle.ones(sample.shape[0]) # paddle.repeat_interleave(timestep, sample.shape[0]) + timesteps = (timestep * (len(self.timesteps) - 1)).astype("int64") + + sigma = self.discrete_sigmas[timesteps] + adjacent_sigma = self.get_adjacent_sigma(timesteps, timestep) + drift = self.zeros_like(sample) + diffusion = (sigma**2 - adjacent_sigma**2)**0.5 + + # equation 6 in the paper: the model_output modeled by the network is grad_x log pt(x) + # also equation 47 shows the analog from SDE models to ancestral sampling methods + drift = drift - diffusion[:, None, None, None]**2 * model_output + + # equation 6: sample noise for the diffusion term of + noise = self.randn_like(sample) + prev_sample_mean = sample - drift # subtract because `dt` is a small negative timestep + # TODO is the variable diffusion the correct scaling term for the noise? + prev_sample = prev_sample_mean + diffusion[:, None, None, None] * noise # add impact of diffusion field g + + return {"prev_sample": prev_sample, "prev_sample_mean": prev_sample_mean} + + def step_correct( + self, + model_output: Union[paddle.Tensor, np.ndarray], + sample: Union[paddle.Tensor, np.ndarray], + seed=None, + ): + """ + Correct the predicted sample based on the output model_output of the network. This is often run repeatedly + after making the prediction for the previous timestep. + """ + if seed is not None: + self.set_seed(seed) + + # For small batch sizes, the paper "suggest replacing norm(z) with sqrt(d), where d is the dim. of z" + # sample noise for correction + noise = self.randn_like(sample) + + # compute step size from the model_output, the noise, and the snr + grad_norm = self.norm(model_output) + noise_norm = self.norm(noise) + step_size = (self.config.snr * noise_norm / grad_norm)**2 * 2 + step_size = step_size * paddle.ones(sample.shape[0]) + # self.repeat_scalar(step_size, sample.shape[0]) + + # compute corrected sample: model_output term and noise term + prev_sample_mean = sample + step_size[:, None, None, None] * model_output + prev_sample = prev_sample_mean + ((step_size * 2)**0.5)[:, None, None, None] * noise + + return {"prev_sample": prev_sample} + + def __len__(self): + return self.config.num_train_timesteps diff --git a/modules/image/text_to_image/stable_diffusion_waifu/diffusers/schedulers/scheduling_sde_vp.py b/modules/image/text_to_image/stable_diffusion_waifu/diffusers/schedulers/scheduling_sde_vp.py new file mode 100644 index 000000000..8ad84c73e --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_waifu/diffusers/schedulers/scheduling_sde_vp.py @@ -0,0 +1,59 @@ +# Copyright 2022 Google Brain and The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# DISCLAIMER: This file is strongly influenced by https://github.com/yang-song/score_sde_pytorch +# TODO(Patrick, Anton, Suraj) - make scheduler framework indepedent and clean-up a bit +import numpy as np +import paddle + +from ..configuration_utils import ConfigMixin +from ..configuration_utils import register_to_config +from .scheduling_utils import SchedulerMixin + + +class ScoreSdeVpScheduler(SchedulerMixin, ConfigMixin): + + @register_to_config + def __init__(self, num_train_timesteps=2000, beta_min=0.1, beta_max=20, sampling_eps=1e-3, tensor_format="np"): + + self.sigmas = None + self.discrete_sigmas = None + self.timesteps = None + + def set_timesteps(self, num_inference_steps): + self.timesteps = paddle.linspace(1, self.config.sampling_eps, num_inference_steps) + + def step_pred(self, score, x, t): + # TODO(Patrick) better comments + non-PyTorch + # postprocess model score + log_mean_coeff = (-0.25 * t**2 * (self.config.beta_max - self.config.beta_min) - 0.5 * t * self.config.beta_min) + std = paddle.sqrt(1.0 - paddle.exp(2.0 * log_mean_coeff)) + score = -score / std[:, None, None, None] + + # compute + dt = -1.0 / len(self.timesteps) + + beta_t = self.config.beta_min + t * (self.config.beta_max - self.config.beta_min) + drift = -0.5 * beta_t[:, None, None, None] * x + diffusion = paddle.sqrt(beta_t) + drift = drift - diffusion[:, None, None, None]**2 * score + x_mean = x + drift * dt + + # add noise + noise = self.randn_like(x) + x = x_mean + diffusion[:, None, None, None] * np.sqrt(-dt) * noise + + return x, x_mean + + def __len__(self): + return self.config.num_train_timesteps diff --git a/modules/image/text_to_image/stable_diffusion_waifu/diffusers/schedulers/scheduling_utils.py b/modules/image/text_to_image/stable_diffusion_waifu/diffusers/schedulers/scheduling_utils.py new file mode 100644 index 000000000..dc3cbde5a --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_waifu/diffusers/schedulers/scheduling_utils.py @@ -0,0 +1,102 @@ +# Copyright 2022 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Union + +import numpy as np +import paddle + +SCHEDULER_CONFIG_NAME = "scheduler_config.json" + + +class SchedulerMixin: + + config_name = SCHEDULER_CONFIG_NAME + ignore_for_config = ["tensor_format"] + + def set_format(self, tensor_format="pd"): + self.tensor_format = tensor_format + if tensor_format == "pd": + for key, value in vars(self).items(): + if isinstance(value, np.ndarray): + setattr(self, key, paddle.to_tensor(value)) + + return self + + def clip(self, tensor, min_value=None, max_value=None): + tensor_format = getattr(self, "tensor_format", "pd") + + if tensor_format == "np": + return np.clip(tensor, min_value, max_value) + elif tensor_format == "pd": + return paddle.clip(tensor, min_value, max_value) + + raise ValueError(f"`self.tensor_format`: {self.tensor_format} is not valid.") + + def log(self, tensor): + tensor_format = getattr(self, "tensor_format", "pd") + + if tensor_format == "np": + return np.log(tensor) + elif tensor_format == "pd": + return paddle.log(tensor) + + raise ValueError(f"`self.tensor_format`: {self.tensor_format} is not valid.") + + def match_shape(self, values: Union[np.ndarray, paddle.Tensor], broadcast_array: Union[np.ndarray, paddle.Tensor]): + """ + Turns a 1-D array into an array or tensor with len(broadcast_array.shape) dims. + + Args: + values: an array or tensor of values to extract. + broadcast_array: an array with a larger shape of K dimensions with the batch + dimension equal to the length of timesteps. + Returns: + a tensor of shape [batch_size, 1, ...] where the shape has K dims. + """ + + tensor_format = getattr(self, "tensor_format", "pd") + values = values.flatten() + + while len(values.shape) < len(broadcast_array.shape): + values = values[..., None] + + return values + + def norm(self, tensor): + tensor_format = getattr(self, "tensor_format", "pd") + if tensor_format == "np": + return np.linalg.norm(tensor) + elif tensor_format == "pd": + return paddle.norm(tensor.reshape([tensor.shape[0], -1]), axis=-1).mean() + + raise ValueError(f"`self.tensor_format`: {self.tensor_format} is not valid.") + + def randn_like(self, tensor, generator=None): + tensor_format = getattr(self, "tensor_format", "pd") + if tensor_format == "np": + return np.random.randn(np.shape(tensor)) + elif tensor_format == "pd": + # return paddle.randn_like(tensor) + return paddle.randn(tensor.shape) + + raise ValueError(f"`self.tensor_format`: {self.tensor_format} is not valid.") + + def zeros_like(self, tensor): + tensor_format = getattr(self, "tensor_format", "pd") + if tensor_format == "np": + return np.zeros_like(tensor) + elif tensor_format == "pd": + return paddle.zeros_like(tensor) + + raise ValueError(f"`self.tensor_format`: {self.tensor_format} is not valid.") diff --git a/modules/image/text_to_image/stable_diffusion_waifu/module.py b/modules/image/text_to_image/stable_diffusion_waifu/module.py new file mode 100755 index 000000000..c31082a96 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_waifu/module.py @@ -0,0 +1,367 @@ +# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import ast +import os +import random +import sys +from functools import partial +from typing import List +from typing import Optional + +import numpy as np +import paddle +from docarray import Document +from docarray import DocumentArray +from IPython import display +from PIL import Image +from stable_diffusion.clip.clip.utils import build_model +from stable_diffusion.clip.clip.utils import tokenize +from stable_diffusion.diffusers import AutoencoderKL +from stable_diffusion.diffusers import DDIMScheduler +from stable_diffusion.diffusers import LMSDiscreteScheduler +from stable_diffusion.diffusers import PNDMScheduler +from stable_diffusion.diffusers import UNet2DConditionModel +from tqdm.auto import tqdm + +import paddlehub as hub +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving + + +@moduleinfo(name="stable_diffusion_waifu", + version="1.0.0", + type="image/text_to_image", + summary="", + author="paddlepaddle", + author_email="paddle-dev@baidu.com") +class StableDiffusion: + + def __init__(self): + self.vae = AutoencoderKL(in_channels=3, + out_channels=3, + down_block_types=("DownEncoderBlock2D", "DownEncoderBlock2D", "DownEncoderBlock2D", + "DownEncoderBlock2D"), + up_block_types=("UpDecoderBlock2D", "UpDecoderBlock2D", "UpDecoderBlock2D", + "UpDecoderBlock2D"), + block_out_channels=(128, 256, 512, 512), + layers_per_block=2, + act_fn="silu", + latent_channels=4, + sample_size=512) + + self.unet = UNet2DConditionModel(sample_size=64, + in_channels=4, + out_channels=4, + center_input_sample=False, + flip_sin_to_cos=True, + freq_shift=0, + down_block_types=("CrossAttnDownBlock2D", "CrossAttnDownBlock2D", + "CrossAttnDownBlock2D", "DownBlock2D"), + up_block_types=("UpBlock2D", "CrossAttnUpBlock2D", "CrossAttnUpBlock2D", + "CrossAttnUpBlock2D"), + block_out_channels=(320, 640, 1280, 1280), + layers_per_block=2, + downsample_padding=1, + mid_block_scale_factor=1, + act_fn="silu", + norm_num_groups=32, + norm_eps=1e-5, + cross_attention_dim=768, + attention_head_dim=8) + + vae_path = os.path.join(self.directory, 'pre_trained', 'waifu-vae.pdparams') + unet_path = os.path.join(self.directory, 'pre_trained', 'waifu-unet.pdparams') + self.unet.set_dict(paddle.load(unet_path)) + self.vae.set_dict(paddle.load(vae_path)) + for parameter in self.unet.parameters(): + parameter.stop_gradient = True + self.vae.eval() + for parameter in self.vae.parameters(): + parameter.stop_gradient = True + self.unet.eval() + + self.text_encoder = build_model() + for parameter in self.text_encoder.parameters(): + parameter.stop_gradient = True + self.scheduler = PNDMScheduler(beta_start=0.00085, + beta_end=0.012, + beta_schedule="scaled_linear", + num_train_timesteps=1000, + skip_prk_steps=True) + + def generate_image(self, + text_prompts, + style: Optional[str] = None, + artist: Optional[str] = None, + width_height: Optional[List[int]] = [512, 512], + batch_size: Optional[int] = 1, + num_inference_steps=50, + guidance_scale=7.5, + enable_fp16=False, + seed=None, + display_rate=5, + use_gpu=True, + output_dir: Optional[str] = 'stable_diffusion_waifu_out'): + """ + Create Disco Diffusion artworks and save the result into a DocumentArray. + + :param text_prompts: Phrase, sentence, or string of words and phrases describing what the image should look like. The words will be analyzed by the AI and will guide the diffusion process toward the image(s) you describe. These can include commas and weights to adjust the relative importance of each element. E.g. "A beautiful painting of a singular lighthouse, shining its light across a tumultuous sea of blood by greg rutkowski and thomas kinkade, Trending on artstation."Notice that this prompt loosely follows a structure: [subject], [prepositional details], [setting], [meta modifiers and artist]; this is a good starting point for your experiments. Developing text prompts takes practice and experience, and is not the subject of this guide. If you are a beginner to writing text prompts, a good place to start is on a simple AI art app like Night Cafe, starry ai or WOMBO prior to using DD, to get a feel for how text gets translated into images by GAN tools. These other apps use different technologies, but many of the same principles apply. + :param style: Image style, such as oil paintings, if specified, style will be used to construct prompts. + :param artist: Artist style, if specified, style will be used to construct prompts. + :param width_height: Desired final image size, in pixels. You can have a square, wide, or tall image, but each edge length should be set to a multiple of 64px, and a minimum of 512px on the default CLIP model setting. If you forget to use multiples of 64px in your dimensions, DD will adjust the dimensions of your image to make it so. + :param batch_size: This variable sets the number of still images you want SD to create for each prompt. + :param num_inference_steps: The number of inference steps. + :param guidance_scale: Increase the adherence to the conditional signal which in this case is text as well as overall sample quality. + :param enable_fp16: Whether to use float16. + :param use_gpu: whether to use gpu or not. + :param output_dir: Output directory. + :return: a DocumentArray object that has `n_batches` Documents + """ + if seed: + np.random.seed(seed) + random.seed(seed) + paddle.seed(seed) + + if use_gpu: + try: + _places = os.environ.get("CUDA_VISIBLE_DEVICES", None) + if _places: + paddle.device.set_device("gpu:{}".format(0)) + except: + raise RuntimeError( + "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." + ) + else: + paddle.device.set_device("cpu") + paddle.disable_static() + + if not os.path.exists(output_dir): + os.makedirs(output_dir, exist_ok=True) + + if isinstance(text_prompts, str): + text_prompts = text_prompts.rstrip(',.,。') + if style is not None: + text_prompts += ",{}".format(style) + if artist is not None: + text_prompts += ",{},trending on artstation".format(artist) + text_prompts = [text_prompts] + elif isinstance(text_prompts, list): + for i, prompt in enumerate( + text_prompts): # different from dd here, dd can have multiple prompts for one image with weight. + text_prompts[i] = prompt.rstrip(',.,。') + if style is not None: + text_prompts[i] += ",{}".format(style) + if artist is not None: + text_prompts[i] += ",{},trending on artstation".format(artist) + + width, height = width_height + da_batches = DocumentArray() + + for prompt in text_prompts: + d = Document(tags={'prompt': prompt}) + da_batches.append(d) + for i in range(batch_size): + d.chunks.append(Document(tags={'prompt': prompt, 'image idx': i})) + d.chunks.append(Document(tags={'prompt': prompt, 'image idx': 'merged'})) + with paddle.amp.auto_cast(enable=enable_fp16, level='O2'): + prompts = [prompt] * batch_size + text_input = tokenize(prompts) + text_embeddings = self.text_encoder(text_input) + uncond_input = tokenize([""] * batch_size) + uncond_embeddings = self.text_encoder(uncond_input) + text_embeddings = paddle.concat([uncond_embeddings, text_embeddings]) + + latents = paddle.randn((batch_size, self.unet.in_channels, height // 8, width // 8), ) + if isinstance(self.scheduler, LMSDiscreteScheduler): + latents = latents * self.scheduler.sigmas[0] + + self.scheduler.set_timesteps(num_inference_steps) + for i, t in tqdm(enumerate(self.scheduler.timesteps)): + # expand the latents if we are doing classifier-free guidance to avoid doing two forward passes. + latent_model_input = paddle.concat([latents] * 2) + + if isinstance(self.scheduler, LMSDiscreteScheduler): + sigma = self.scheduler.sigmas[i] + latent_model_input = latent_model_input / ((sigma**2 + 1)**0.5) + + # predict the noise residual + noise_pred = self.unet(latent_model_input, t, encoder_hidden_states=text_embeddings)["sample"] + + # perform guidance + noise_pred_uncond, noise_pred_text = noise_pred.chunk(2) + noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond) + + # compute the previous noisy sample x_t -> x_t-1 + if isinstance(self.scheduler, LMSDiscreteScheduler): + latents = self.scheduler.step(noise_pred, i, latents)["prev_sample"] + else: + latents = self.scheduler.step(noise_pred, t, latents)["prev_sample"] + if i % display_rate == 0: + # vae decode + images = self.vae.decode(1 / 0.18215 * latents) + images = (images / 2 + 0.5).clip(0, 1) + merge_image = images.cpu().transpose([2, 0, 3, 1]).flatten(1, 2).numpy() + merge_image = (merge_image * 255).round().astype(np.uint8) + merge_image = Image.fromarray(merge_image) + merge_image.save(os.path.join(output_dir, f'{prompt[:10]}-progress.png')) + c = Document(tags={'step': i, 'prompt': prompt}) + c.load_pil_image_to_datauri(merge_image) + d.chunks[-1].chunks.append(c) + display.clear_output(wait=True) + display.display(merge_image) + images = images.cpu().transpose([0, 2, 3, 1]).numpy() + images = (images * 255).round().astype(np.uint8) + for j in range(images.shape[0]): + image = Image.fromarray(images[j]) + c = Document(tags={'step': i, 'prompt': prompt}) + c.load_pil_image_to_datauri(image) + d.chunks[j].chunks.append(c) + + # vae decode + images = self.vae.decode(1 / 0.18215 * latents) + images = (images / 2 + 0.5).clip(0, 1) + merge_image = images.cpu().transpose([2, 0, 3, 1]).flatten(1, 2).numpy() + merge_image = (merge_image * 255).round().astype(np.uint8) + merge_image = Image.fromarray(merge_image) + merge_image.save(os.path.join(output_dir, f'{prompt[:10]}-merge.png')) + display.clear_output(wait=True) + display.display(merge_image) + d.load_pil_image_to_datauri(merge_image) + d.chunks[-1].load_pil_image_to_datauri(merge_image) + images = images.cpu().transpose([0, 2, 3, 1]).numpy() + images = (images * 255).round().astype(np.uint8) + for j in range(images.shape[0]): + image = Image.fromarray(images[j]) + image.save(os.path.join(output_dir, f'{prompt[:10]}-image-{j}.png')) + d.chunks[j].load_pil_image_to_datauri(image) + return da_batches + + @serving + def serving_method(self, text_prompts, **kwargs): + """ + Run as a service. + """ + results = self.generate_image(text_prompts=text_prompts, **kwargs).to_base64() + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + args = self.parser.parse_args(argvs) + results = self.generate_image(text_prompts=args.text_prompts, + style=args.style, + artist=args.artist, + width_height=args.width_height, + batch_size=args.batch_size, + num_inference_steps=args.num_inference_steps, + guidance_scale=args.guidance_scale, + enable_fp16=args.enable_fp16, + seed=args.seed, + display_rate=args.display_rate, + use_gpu=args.use_gpu, + output_dir=args.output_dir) + return results + + def add_module_config_arg(self): + """ + Add the command config options. + """ + + self.arg_input_group.add_argument('--num_inference_steps', + type=int, + default=50, + help="The number of inference steps.") + + self.arg_input_group.add_argument( + '--guidance_scale', + type=float, + default=7.5, + help= + "Increase the adherence to the conditional signal which in this case is text as well as overall sample quality." + ) + + self.arg_input_group.add_argument( + '--seed', + type=int, + default=None, + help= + "Deep in the diffusion code, there is a random number ‘seed’ which is used as the basis for determining the initial state of the diffusion. By default, this is random, but you can also specify your own seed." + ) + + self.arg_input_group.add_argument( + '--display_rate', + type=int, + default=10, + help="During a diffusion run, you can monitor the progress of each image being created with this variable.") + + self.arg_config_group.add_argument('--use_gpu', + type=ast.literal_eval, + default=True, + help="whether use GPU or not") + + self.arg_config_group.add_argument('--enable_fp16', + type=ast.literal_eval, + default=False, + help="whether use float16 or not") + + self.arg_config_group.add_argument('--output_dir', + type=str, + default='stable_diffusion_waifu_out', + help='Output directory.') + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument( + '--text_prompts', + type=str, + help= + 'Phrase, sentence, or string of words and phrases describing what the image should look like. The words will be analyzed by the AI and will guide the diffusion process toward the image(s) you describe. These can include commas and weights to adjust the relative importance of each element. E.g. "A beautiful painting of a singular lighthouse, shining its light across a tumultuous sea of blood by greg rutkowski and thomas kinkade, Trending on artstation."Notice that this prompt loosely follows a structure: [subject], [prepositional details], [setting], [meta modifiers and artist]; this is a good starting point for your experiments. Developing text prompts takes practice and experience, and is not the subject of this guide. If you are a beginner to writing text prompts, a good place to start is on a simple AI art app like Night Cafe, starry ai or WOMBO prior to using DD, to get a feel for how text gets translated into images by GAN tools. These other apps use different technologies, but many of the same principles apply.' + ) + self.arg_input_group.add_argument( + '--style', + type=str, + default=None, + help='Image style, such as oil paintings, if specified, style will be used to construct prompts.') + self.arg_input_group.add_argument('--artist', + type=str, + default=None, + help='Artist style, if specified, style will be used to construct prompts.') + + self.arg_input_group.add_argument( + '--width_height', + type=ast.literal_eval, + default=[512, 512], + help= + "Desired final image size, in pixels. You can have a square, wide, or tall image, but each edge length should be set to a multiple of 64px, and a minimum of 512px on the default CLIP model setting. If you forget to use multiples of 64px in your dimensions, DD will adjust the dimensions of your image to make it so." + ) + self.arg_input_group.add_argument( + '--batch_size', + type=int, + default=1, + help="This variable sets the number of still images you want SD to create for each prompt.") diff --git a/modules/image/text_to_image/stable_diffusion_waifu/requirements.txt b/modules/image/text_to_image/stable_diffusion_waifu/requirements.txt new file mode 100644 index 000000000..45e6baa06 --- /dev/null +++ b/modules/image/text_to_image/stable_diffusion_waifu/requirements.txt @@ -0,0 +1,8 @@ +numpy +ftfy +regex +docarray>=0.13.29 +pyyaml +regex +tqdm +ipywidgets From 21545f0ca353cc4bf05798d64964441c532e69e3 Mon Sep 17 00:00:00 2001 From: TrellixVulnTeam <112716341+TrellixVulnTeam@users.noreply.github.com> Date: Mon, 17 Oct 2022 06:47:47 -0500 Subject: [PATCH 111/117] Adding tarfile member sanitization to extractall() (#2061) --- .../ernievil2/transformers/file_utils.py | 21 ++++++++++++++++++- .../MidAutumnPoetry/model/file_utils.py | 21 ++++++++++++++++++- 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/transformers/file_utils.py b/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/transformers/file_utils.py index bead1f2c7..0d39f7232 100755 --- a/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/transformers/file_utils.py +++ b/modules/image/text_to_image/disco_diffusion_ernievil_base/vit_b_16x/ernievil2/transformers/file_utils.py @@ -47,7 +47,26 @@ def _fetch_from_remote(url, force_download=False, cached_dir='~/.paddle-ernie-ca f.flush() log.debug('extacting... to %s' % tmpfile) with tarfile.open(tmpfile.as_posix()) as tf: - tf.extractall(path=str(cached_dir_model)) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tf, path=str(cached_dir_model)) donefile.touch() os.remove(tmpfile.as_posix()) diff --git a/modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/model/file_utils.py b/modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/model/file_utils.py index 608be4efc..1ba17701b 100644 --- a/modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/model/file_utils.py +++ b/modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/model/file_utils.py @@ -33,7 +33,26 @@ def _fetch_from_remote(url, force_download=False): f.flush() logger.debug('extacting... to %s' % f.name) with tarfile.open(f.name) as tf: - tf.extractall(path=cached_dir) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tf, path=cached_dir) logger.debug('%s cached in %s' % (url, cached_dir)) return cached_dir From 270cc958d30ae156e9ee03a81d01212050d4c082 Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Tue, 18 Oct 2022 10:58:01 +0800 Subject: [PATCH 112/117] add fbcnn_color module (#2065) * add fbcnn_color module * update example * fix save name * fix a cls --- .../enhancement/fbcnn_color/README.md | 166 +++++++ .../enhancement/fbcnn_color/fbcnn.py | 422 ++++++++++++++++++ .../enhancement/fbcnn_color/module.py | 126 ++++++ .../enhancement/fbcnn_color/test.py | 59 +++ 4 files changed, 773 insertions(+) create mode 100644 modules/image/Image_editing/enhancement/fbcnn_color/README.md create mode 100644 modules/image/Image_editing/enhancement/fbcnn_color/fbcnn.py create mode 100644 modules/image/Image_editing/enhancement/fbcnn_color/module.py create mode 100644 modules/image/Image_editing/enhancement/fbcnn_color/test.py diff --git a/modules/image/Image_editing/enhancement/fbcnn_color/README.md b/modules/image/Image_editing/enhancement/fbcnn_color/README.md new file mode 100644 index 000000000..271a52248 --- /dev/null +++ b/modules/image/Image_editing/enhancement/fbcnn_color/README.md @@ -0,0 +1,166 @@ +# fbcnn_color + +|模型名称|fbcnn_color| +| :--- | :---: | +|类别|图像-图像编辑| +|网络|FBCNN| +|数据集|-| +|是否支持Fine-tuning|否| +|模型大小|288MB| +|指标|-| +|最新更新日期|2022-10-08| + + +## 一、模型基本信息 + +- ### 应用效果展示 + + - 网络结构: +

+
+

+ + - 样例结果示例: +

+ +

+ +- ### 模型介绍 + + - FBCNN 是一个基于卷积神经网络的 JPEG 图像伪影去除模型,它可以预测可调整的质量因子,以控制伪影重新移动和细节保留之间的权衡。 + + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2.安装 + + - ```shell + $ hub install fbcnn_color + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + - ### 1、命令行预测 + + ```shell + $ hub run fbcnn_color \ + --input_path "/PATH/TO/IMAGE" \ + --quality_factor -1 \ + --output_dir "fbcnn_color_output" + ``` + + - ### 2、预测代码示例 + + ```python + import paddlehub as hub + import cv2 + + module = hub.Module(name="fbcnn_color") + result = module.artifacts_removal( + image=cv2.imread('/PATH/TO/IMAGE'), + quality_factor=None, + visualization=True, + output_dir='fbcnn_color_output' + ) + ``` + + - ### 3、API + + ```python + def artifacts_removal( + image: Union[str, numpy.ndarray], + quality_factor: float = None, + visualization: bool = True, + output_dir: str = "fbcnn_color_output" + ) -> numpy.ndarray + ``` + + - 伪影去除 API + + - **参数** + + * image (Union\[str, numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; + * quality_factor (float): 自定义质量因子(0.0 - 1.0),默认 None 为自适应; + * visualization (bool): 是否将识别结果保存为图片文件; + * output\_dir (str): 保存处理结果的文件目录。 + + - **返回** + + * res (numpy.ndarray): 图像伪影去除结果 (BGR); + +## 四、服务部署 + +- PaddleHub Serving 可以部署一个图像伪影去除的在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + + ```shell + $ hub serving start -m fbcnn_color + ``` + + - 这样就完成了一个图像伪影去除服务化API的部署,默认端口号为8866。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + ```python + import requests + import json + import base64 + + import cv2 + import numpy as np + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tobytes()).decode('utf8') + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.frombuffer(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = { + 'image': cv2_to_base64(org_im) + } + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/fbcnn_color" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 结果转换 + results = r.json()['results'] + results = base64_to_cv2(results) + + # 保存结果 + cv2.imwrite('output.jpg', results) + ``` + +## 五、参考资料 + +* 论文:[Towards Flexible Blind JPEG Artifacts Removal](https://arxiv.org/abs/2109.14573) + +* 官方实现:[jiaxi-jiang/FBCNN](https://github.com/jiaxi-jiang/FBCNN) + +## 六、更新历史 + +* 1.0.0 + + 初始发布 + + ```shell + $ hub install fbcnn_color==1.0.0 + ``` diff --git a/modules/image/Image_editing/enhancement/fbcnn_color/fbcnn.py b/modules/image/Image_editing/enhancement/fbcnn_color/fbcnn.py new file mode 100644 index 000000000..49d2df125 --- /dev/null +++ b/modules/image/Image_editing/enhancement/fbcnn_color/fbcnn.py @@ -0,0 +1,422 @@ +from collections import OrderedDict + +import numpy as np +import paddle.nn as nn +''' +# -------------------------------------------- +# Advanced nn.Sequential +# https://github.com/xinntao/BasicSR +# -------------------------------------------- +''' + + +def sequential(*args): + """Advanced nn.Sequential. + Args: + nn.Sequential, nn.Layer + Returns: + nn.Sequential + """ + if len(args) == 1: + if isinstance(args[0], OrderedDict): + raise NotImplementedError('sequential does not support OrderedDict input.') + return args[0] # No sequential is needed. + modules = [] + for module in args: + if isinstance(module, nn.Sequential): + for submodule in module.children(): + modules.append(submodule) + elif isinstance(module, nn.Layer): + modules.append(module) + return nn.Sequential(*modules) + + +# -------------------------------------------- +# return nn.Sequantial of (Conv + BN + ReLU) +# -------------------------------------------- +def conv(in_channels=64, + out_channels=64, + kernel_size=3, + stride=1, + padding=1, + bias=True, + mode='CBR', + negative_slope=0.2): + L = [] + for t in mode: + if t == 'C': + L.append( + nn.Conv2D(in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + bias_attr=bias)) + elif t == 'T': + L.append( + nn.Conv2DTranspose(in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + bias_attr=bias)) + elif t == 'B': + L.append(nn.BatchNorm2D(out_channels, momentum=0.9, eps=1e-04, affine=True)) + elif t == 'I': + L.append(nn.InstanceNorm2D(out_channels, affine=True)) + elif t == 'R': + L.append(nn.ReLU()) + elif t == 'r': + L.append(nn.ReLU()) + elif t == 'L': + L.append(nn.LeakyReLU(negative_slope=negative_slope)) + elif t == 'l': + L.append(nn.LeakyReLU(negative_slope=negative_slope)) + elif t == '2': + L.append(nn.PixelShuffle(upscale_factor=2)) + elif t == '3': + L.append(nn.PixelShuffle(upscale_factor=3)) + elif t == '4': + L.append(nn.PixelShuffle(upscale_factor=4)) + elif t == 'U': + L.append(nn.Upsample(scale_factor=2, mode='nearest')) + elif t == 'u': + L.append(nn.Upsample(scale_factor=3, mode='nearest')) + elif t == 'v': + L.append(nn.Upsample(scale_factor=4, mode='nearest')) + elif t == 'M': + L.append(nn.MaxPool2D(kernel_size=kernel_size, stride=stride, padding=0)) + elif t == 'A': + L.append(nn.AvgPool2D(kernel_size=kernel_size, stride=stride, padding=0)) + else: + raise NotImplementedError('Undefined type: '.format(t)) + return sequential(*L) + + +# -------------------------------------------- +# Res Block: x + conv(relu(conv(x))) +# -------------------------------------------- +class ResBlock(nn.Layer): + + def __init__(self, + in_channels=64, + out_channels=64, + kernel_size=3, + stride=1, + padding=1, + bias=True, + mode='CRC', + negative_slope=0.2): + super(ResBlock, self).__init__() + + assert in_channels == out_channels, 'Only support in_channels==out_channels.' + if mode[0] in ['R', 'L']: + mode = mode[0].lower() + mode[1:] + + self.res = conv(in_channels, out_channels, kernel_size, stride, padding, bias, mode, negative_slope) + + def forward(self, x): + res = self.res(x) + return x + res + + +# -------------------------------------------- +# conv + subp (+ relu) +# -------------------------------------------- +def upsample_pixelshuffle(in_channels=64, + out_channels=3, + kernel_size=3, + stride=1, + padding=1, + bias=True, + mode='2R', + negative_slope=0.2): + assert len(mode) < 4 and mode[0] in ['2', '3', '4'], 'mode examples: 2, 2R, 2BR, 3, ..., 4BR.' + up1 = conv(in_channels, + out_channels * (int(mode[0])**2), + kernel_size, + stride, + padding, + bias, + mode='C' + mode, + negative_slope=negative_slope) + return up1 + + +# -------------------------------------------- +# nearest_upsample + conv (+ R) +# -------------------------------------------- +def upsample_upconv(in_channels=64, + out_channels=3, + kernel_size=3, + stride=1, + padding=1, + bias=True, + mode='2R', + negative_slope=0.2): + assert len(mode) < 4 and mode[0] in ['2', '3', '4'], 'mode examples: 2, 2R, 2BR, 3, ..., 4BR' + if mode[0] == '2': + uc = 'UC' + elif mode[0] == '3': + uc = 'uC' + elif mode[0] == '4': + uc = 'vC' + mode = mode.replace(mode[0], uc) + up1 = conv(in_channels, out_channels, kernel_size, stride, padding, bias, mode=mode, negative_slope=negative_slope) + return up1 + + +# -------------------------------------------- +# convTranspose (+ relu) +# -------------------------------------------- +def upsample_convtranspose(in_channels=64, + out_channels=3, + kernel_size=2, + stride=2, + padding=0, + bias=True, + mode='2R', + negative_slope=0.2): + assert len(mode) < 4 and mode[0] in ['2', '3', '4'], 'mode examples: 2, 2R, 2BR, 3, ..., 4BR.' + kernel_size = int(mode[0]) + stride = int(mode[0]) + mode = mode.replace(mode[0], 'T') + up1 = conv(in_channels, out_channels, kernel_size, stride, padding, bias, mode, negative_slope) + return up1 + + +''' +# -------------------------------------------- +# Downsampler +# Kai Zhang, https://github.com/cszn/KAIR +# -------------------------------------------- +# downsample_strideconv +# downsample_maxpool +# downsample_avgpool +# -------------------------------------------- +''' + + +# -------------------------------------------- +# strideconv (+ relu) +# -------------------------------------------- +def downsample_strideconv(in_channels=64, + out_channels=64, + kernel_size=2, + stride=2, + padding=0, + bias=True, + mode='2R', + negative_slope=0.2): + assert len(mode) < 4 and mode[0] in ['2', '3', '4'], 'mode examples: 2, 2R, 2BR, 3, ..., 4BR.' + kernel_size = int(mode[0]) + stride = int(mode[0]) + mode = mode.replace(mode[0], 'C') + down1 = conv(in_channels, out_channels, kernel_size, stride, padding, bias, mode, negative_slope) + return down1 + + +# -------------------------------------------- +# maxpooling + conv (+ relu) +# -------------------------------------------- +def downsample_maxpool(in_channels=64, + out_channels=64, + kernel_size=3, + stride=1, + padding=0, + bias=True, + mode='2R', + negative_slope=0.2): + assert len(mode) < 4 and mode[0] in ['2', '3'], 'mode examples: 2, 2R, 2BR, 3, ..., 3BR.' + kernel_size_pool = int(mode[0]) + stride_pool = int(mode[0]) + mode = mode.replace(mode[0], 'MC') + pool = conv(kernel_size=kernel_size_pool, stride=stride_pool, mode=mode[0], negative_slope=negative_slope) + pool_tail = conv(in_channels, + out_channels, + kernel_size, + stride, + padding, + bias, + mode=mode[1:], + negative_slope=negative_slope) + return sequential(pool, pool_tail) + + +# -------------------------------------------- +# averagepooling + conv (+ relu) +# -------------------------------------------- +def downsample_avgpool(in_channels=64, + out_channels=64, + kernel_size=3, + stride=1, + padding=1, + bias=True, + mode='2R', + negative_slope=0.2): + assert len(mode) < 4 and mode[0] in ['2', '3'], 'mode examples: 2, 2R, 2BR, 3, ..., 3BR.' + kernel_size_pool = int(mode[0]) + stride_pool = int(mode[0]) + mode = mode.replace(mode[0], 'AC') + pool = conv(kernel_size=kernel_size_pool, stride=stride_pool, mode=mode[0], negative_slope=negative_slope) + pool_tail = conv(in_channels, + out_channels, + kernel_size, + stride, + padding, + bias, + mode=mode[1:], + negative_slope=negative_slope) + return sequential(pool, pool_tail) + + +class QFAttention(nn.Layer): + + def __init__(self, + in_channels=64, + out_channels=64, + kernel_size=3, + stride=1, + padding=1, + bias=True, + mode='CRC', + negative_slope=0.2): + super(QFAttention, self).__init__() + + assert in_channels == out_channels, 'Only support in_channels==out_channels.' + if mode[0] in ['R', 'L']: + mode = mode[0].lower() + mode[1:] + + self.res = conv(in_channels, out_channels, kernel_size, stride, padding, bias, mode, negative_slope) + + def forward(self, x, gamma, beta): + gamma = gamma.unsqueeze(-1).unsqueeze(-1) + beta = beta.unsqueeze(-1).unsqueeze(-1) + res = (gamma) * self.res(x) + beta + return x + res + + +class FBCNN(nn.Layer): + + def __init__(self, + in_nc=3, + out_nc=3, + nc=[64, 128, 256, 512], + nb=4, + act_mode='R', + downsample_mode='strideconv', + upsample_mode='convtranspose'): + super(FBCNN, self).__init__() + + self.m_head = conv(in_nc, nc[0], bias=True, mode='C') + self.nb = nb + self.nc = nc + # downsample + if downsample_mode == 'avgpool': + downsample_block = downsample_avgpool + elif downsample_mode == 'maxpool': + downsample_block = downsample_maxpool + elif downsample_mode == 'strideconv': + downsample_block = downsample_strideconv + else: + raise NotImplementedError('downsample mode [{:s}] is not found'.format(downsample_mode)) + + self.m_down1 = sequential(*[ResBlock(nc[0], nc[0], bias=True, mode='C' + act_mode + 'C') for _ in range(nb)], + downsample_block(nc[0], nc[1], bias=True, mode='2')) + self.m_down2 = sequential(*[ResBlock(nc[1], nc[1], bias=True, mode='C' + act_mode + 'C') for _ in range(nb)], + downsample_block(nc[1], nc[2], bias=True, mode='2')) + self.m_down3 = sequential(*[ResBlock(nc[2], nc[2], bias=True, mode='C' + act_mode + 'C') for _ in range(nb)], + downsample_block(nc[2], nc[3], bias=True, mode='2')) + + self.m_body_encoder = sequential( + *[ResBlock(nc[3], nc[3], bias=True, mode='C' + act_mode + 'C') for _ in range(nb)]) + + self.m_body_decoder = sequential( + *[ResBlock(nc[3], nc[3], bias=True, mode='C' + act_mode + 'C') for _ in range(nb)]) + + # upsample + if upsample_mode == 'upconv': + upsample_block = upsample_upconv + elif upsample_mode == 'pixelshuffle': + upsample_block = upsample_pixelshuffle + elif upsample_mode == 'convtranspose': + upsample_block = upsample_convtranspose + else: + raise NotImplementedError('upsample mode [{:s}] is not found'.format(upsample_mode)) + + self.m_up3 = nn.LayerList([ + upsample_block(nc[3], nc[2], bias=True, mode='2'), + *[QFAttention(nc[2], nc[2], bias=True, mode='C' + act_mode + 'C') for _ in range(nb)] + ]) + + self.m_up2 = nn.LayerList([ + upsample_block(nc[2], nc[1], bias=True, mode='2'), + *[QFAttention(nc[1], nc[1], bias=True, mode='C' + act_mode + 'C') for _ in range(nb)] + ]) + + self.m_up1 = nn.LayerList([ + upsample_block(nc[1], nc[0], bias=True, mode='2'), + *[QFAttention(nc[0], nc[0], bias=True, mode='C' + act_mode + 'C') for _ in range(nb)] + ]) + + self.m_tail = conv(nc[0], out_nc, bias=True, mode='C') + + self.qf_pred = sequential(*[ResBlock(nc[3], nc[3], bias=True, mode='C' + act_mode + 'C') for _ in range(nb)], + nn.AdaptiveAvgPool2D((1, 1)), nn.Flatten(), nn.Linear(512, 512), nn.ReLU(), + nn.Linear(512, 512), nn.ReLU(), nn.Linear(512, 1), nn.Sigmoid()) + + self.qf_embed = sequential(nn.Linear(1, 512), nn.ReLU(), nn.Linear(512, 512), nn.ReLU(), nn.Linear(512, 512), + nn.ReLU()) + + self.to_gamma_3 = sequential(nn.Linear(512, nc[2]), nn.Sigmoid()) + self.to_beta_3 = sequential(nn.Linear(512, nc[2]), nn.Tanh()) + self.to_gamma_2 = sequential(nn.Linear(512, nc[1]), nn.Sigmoid()) + self.to_beta_2 = sequential(nn.Linear(512, nc[1]), nn.Tanh()) + self.to_gamma_1 = sequential(nn.Linear(512, nc[0]), nn.Sigmoid()) + self.to_beta_1 = sequential(nn.Linear(512, nc[0]), nn.Tanh()) + + def forward(self, x, qf_input=None): + + h, w = x.shape[-2:] + paddingBottom = int(np.ceil(h / 8) * 8 - h) + paddingRight = int(np.ceil(w / 8) * 8 - w) + x = nn.functional.pad(x, (0, paddingRight, 0, paddingBottom), mode='reflect') + + x1 = self.m_head(x) + x2 = self.m_down1(x1) + x3 = self.m_down2(x2) + x4 = self.m_down3(x3) + x = self.m_body_encoder(x4) + qf = self.qf_pred(x) + x = self.m_body_decoder(x) + qf_embedding = self.qf_embed(qf_input) if qf_input is not None else self.qf_embed(qf) + gamma_3 = self.to_gamma_3(qf_embedding) + beta_3 = self.to_beta_3(qf_embedding) + + gamma_2 = self.to_gamma_2(qf_embedding) + beta_2 = self.to_beta_2(qf_embedding) + + gamma_1 = self.to_gamma_1(qf_embedding) + beta_1 = self.to_beta_1(qf_embedding) + + x = x + x4 + x = self.m_up3[0](x) + for i in range(self.nb): + x = self.m_up3[i + 1](x, gamma_3, beta_3) + + x = x + x3 + + x = self.m_up2[0](x) + for i in range(self.nb): + x = self.m_up2[i + 1](x, gamma_2, beta_2) + x = x + x2 + + x = self.m_up1[0](x) + for i in range(self.nb): + x = self.m_up1[i + 1](x, gamma_1, beta_1) + + x = x + x1 + x = self.m_tail(x) + x = x[..., :h, :w] + + return x, qf diff --git a/modules/image/Image_editing/enhancement/fbcnn_color/module.py b/modules/image/Image_editing/enhancement/fbcnn_color/module.py new file mode 100644 index 000000000..5dd53ff3f --- /dev/null +++ b/modules/image/Image_editing/enhancement/fbcnn_color/module.py @@ -0,0 +1,126 @@ +import argparse +import base64 +import os +import time +from typing import Union + +import cv2 +import numpy as np +import paddle +import paddle.nn as nn + +from .fbcnn import FBCNN +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving + + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tobytes()).decode('utf8') + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.frombuffer(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +@moduleinfo( + name='fbcnn_color', + version='1.0.0', + type="CV/image_editing", + author="", + author_email="", + summary="Flexible JPEG Artifacts Removal.", +) +class FBCNNColor(nn.Layer): + + def __init__(self): + super(FBCNNColor, self).__init__() + self.default_pretrained_model_path = os.path.join(self.directory, 'ckpts', 'fbcnn_color.pdparams') + self.fbcnn = FBCNN() + state_dict = paddle.load(self.default_pretrained_model_path) + self.fbcnn.set_state_dict(state_dict) + self.fbcnn.eval() + + def preprocess(self, img: np.ndarray) -> np.ndarray: + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + img = img.transpose((2, 0, 1)) + img = img / 255.0 + return img.astype(np.float32) + + def postprocess(self, img: np.ndarray) -> np.ndarray: + img = img.clip(0, 1) + img = img * 255.0 + img = img.transpose((1, 2, 0)) + img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) + return img.astype(np.uint8) + + def artifacts_removal(self, + image: Union[str, np.ndarray], + quality_factor: float = None, + visualization: bool = True, + output_dir: str = "fbcnn_color_output") -> np.ndarray: + if isinstance(image, str): + _, file_name = os.path.split(image) + save_name, _ = os.path.splitext(file_name) + save_name = save_name + '_' + str(int(time.time())) + '.jpg' + image = cv2.imdecode(np.fromfile(image, dtype=np.uint8), cv2.IMREAD_COLOR) + elif isinstance(image, np.ndarray): + save_name = str(int(time.time())) + '.jpg' + image = image + else: + raise Exception("image should be a str / np.ndarray") + + with paddle.no_grad(): + img_input = self.preprocess(image) + img_input = paddle.to_tensor(img_input[None, ...], dtype=paddle.float32) + if quality_factor and 0 <= quality_factor <= 1: + qf_input = paddle.to_tensor([[quality_factor]], dtype=paddle.float32) + else: + qf_input = None + img_output, _ = self.fbcnn(img_input, qf_input) + img_output = img_output.numpy()[0] + img_output = self.postprocess(img_output) + + if visualization: + if not os.path.isdir(output_dir): + os.makedirs(output_dir) + save_path = os.path.join(output_dir, save_name) + cv2.imwrite(save_path, img_output) + + return img_output + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.parser.add_argument('--input_path', type=str, help="Path to image.") + self.parser.add_argument('--quality_factor', type=float, default=None, help="Image quality factor (0.0-1.0).") + self.parser.add_argument('--output_dir', + type=str, + default='fbcnn_color_output', + help="The directory to save output images.") + args = self.parser.parse_args(argvs) + self.artifacts_removal(image=args.input_path, + quality_factor=args.quality_factor, + visualization=True, + output_dir=args.output_dir) + return 'Artifacts removal results are saved in %s' % args.output_dir + + @serving + def serving_method(self, image, **kwargs): + """ + Run as a service. + """ + image = base64_to_cv2(image) + img_output = self.artifacts_removal(image=image, **kwargs) + + return cv2_to_base64(img_output) diff --git a/modules/image/Image_editing/enhancement/fbcnn_color/test.py b/modules/image/Image_editing/enhancement/fbcnn_color/test.py new file mode 100644 index 000000000..1853b79f0 --- /dev/null +++ b/modules/image/Image_editing/enhancement/fbcnn_color/test.py @@ -0,0 +1,59 @@ +import os +import shutil +import unittest + +import cv2 +import numpy as np +import requests + +import paddlehub as hub + +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + + +class TestHubModule(unittest.TestCase): + + @classmethod + def setUpClass(cls) -> None: + img_url = 'https://unsplash.com/photos/mJaD10XeD7w/download?ixid=MnwxMjA3fDB8MXxzZWFyY2h8M3x8Y2F0fGVufDB8fHx8MTY2MzczNDc3Mw&force=true&w=640' + if not os.path.exists('tests'): + os.makedirs('tests') + response = requests.get(img_url) + assert response.status_code == 200, 'Network Error.' + with open('tests/test.jpg', 'wb') as f: + f.write(response.content) + cls.module = hub.Module(name="fbcnn_color") + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree('tests') + shutil.rmtree('fbcnn_color_output') + + def test_artifacts_removal1(self): + results = self.module.artifacts_removal(image='tests/test.jpg', quality_factor=None, visualization=False) + + self.assertIsInstance(results, np.ndarray) + + def test_artifacts_removal2(self): + results = self.module.artifacts_removal(image=cv2.imread('tests/test.jpg'), + quality_factor=None, + visualization=True) + + self.assertIsInstance(results, np.ndarray) + + def test_artifacts_removal3(self): + results = self.module.artifacts_removal(image=cv2.imread('tests/test.jpg'), + quality_factor=0.5, + visualization=True) + + self.assertIsInstance(results, np.ndarray) + + def test_artifacts_removal4(self): + self.assertRaises(Exception, self.module.artifacts_removal, image=['tests/test.jpg']) + + def test_artifacts_removal5(self): + self.assertRaises(FileNotFoundError, self.module.artifacts_removal, image='no.jpg') + + +if __name__ == "__main__": + unittest.main() From 72c7ba612babb80d984a22f268db6b518a2611f8 Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Tue, 18 Oct 2022 11:00:20 +0800 Subject: [PATCH 113/117] add fbcnn_gray module (#2066) * add fbcnn_gray module * fix save name * fix a cls * fix example --- .../enhancement/fbcnn_gray/README.md | 166 +++++++ .../enhancement/fbcnn_gray/fbcnn.py | 422 ++++++++++++++++++ .../enhancement/fbcnn_gray/module.py | 123 +++++ .../enhancement/fbcnn_gray/test.py | 59 +++ 4 files changed, 770 insertions(+) create mode 100644 modules/image/Image_editing/enhancement/fbcnn_gray/README.md create mode 100644 modules/image/Image_editing/enhancement/fbcnn_gray/fbcnn.py create mode 100644 modules/image/Image_editing/enhancement/fbcnn_gray/module.py create mode 100644 modules/image/Image_editing/enhancement/fbcnn_gray/test.py diff --git a/modules/image/Image_editing/enhancement/fbcnn_gray/README.md b/modules/image/Image_editing/enhancement/fbcnn_gray/README.md new file mode 100644 index 000000000..8235893cf --- /dev/null +++ b/modules/image/Image_editing/enhancement/fbcnn_gray/README.md @@ -0,0 +1,166 @@ +# fbcnn_gray + +|模型名称|fbcnn_gray| +| :--- | :---: | +|类别|图像-图像编辑| +|网络|FBCNN| +|数据集|-| +|是否支持Fine-tuning|否| +|模型大小|288MB| +|指标|-| +|最新更新日期|2022-10-08| + + +## 一、模型基本信息 + +- ### 应用效果展示 + + - 网络结构: +

+
+

+ + - 样例结果示例: +

+ +

+ +- ### 模型介绍 + + - FBCNN 是一个基于卷积神经网络的 JPEG 图像伪影去除模型,它可以预测可调整的质量因子,以控制伪影重新移动和细节保留之间的权衡。 + + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2.安装 + + - ```shell + $ hub install fbcnn_gray + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + - ### 1、命令行预测 + + ```shell + $ hub run fbcnn_gray \ + --input_path "/PATH/TO/IMAGE" \ + --quality_factor -1 \ + --output_dir "fbcnn_gray_output" + ``` + + - ### 2、预测代码示例 + + ```python + import paddlehub as hub + import cv2 + + module = hub.Module(name="fbcnn_gray") + result = module.artifacts_removal( + image=cv2.imread('/PATH/TO/IMAGE', cv2.IMREAD_GRAYSCALE), + quality_factor=None, + visualization=True, + output_dir='fbcnn_gray_output' + ) + ``` + + - ### 3、API + + ```python + def artifacts_removal( + image: Union[str, numpy.ndarray], + quality_factor: float = None, + visualization: bool = True, + output_dir: str = "fbcnn_gray_output" + ) -> numpy.ndarray + ``` + + - 伪影去除 API + + - **参数** + + * image (Union\[str, numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W\],GRAY格式; + * quality_factor (float): 自定义质量因子(0.0 - 1.0),默认 None 为自适应; + * visualization (bool): 是否将识别结果保存为图片文件; + * output\_dir (str): 保存处理结果的文件目录。 + + - **返回** + + * res (numpy.ndarray): 图像伪影去除结果 (GRAY); + +## 四、服务部署 + +- PaddleHub Serving 可以部署一个图像伪影去除的在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + + ```shell + $ hub serving start -m fbcnn_gray + ``` + + - 这样就完成了一个图像伪影去除服务化API的部署,默认端口号为8866。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + ```python + import requests + import json + import base64 + + import cv2 + import numpy as np + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tobytes()).decode('utf8') + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.frombuffer(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_GRAYSCALE) + return data + + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = { + 'image': cv2_to_base64(org_im) + } + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/fbcnn_gray" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 结果转换 + results = r.json()['results'] + results = base64_to_cv2(results) + + # 保存结果 + cv2.imwrite('output.jpg', results) + ``` + +## 五、参考资料 + +* 论文:[Towards Flexible Blind JPEG Artifacts Removal](https://arxiv.org/abs/2109.14573) + +* 官方实现:[jiaxi-jiang/FBCNN](https://github.com/jiaxi-jiang/FBCNN) + +## 六、更新历史 + +* 1.0.0 + + 初始发布 + + ```shell + $ hub install fbcnn_gray==1.0.0 + ``` diff --git a/modules/image/Image_editing/enhancement/fbcnn_gray/fbcnn.py b/modules/image/Image_editing/enhancement/fbcnn_gray/fbcnn.py new file mode 100644 index 000000000..49d2df125 --- /dev/null +++ b/modules/image/Image_editing/enhancement/fbcnn_gray/fbcnn.py @@ -0,0 +1,422 @@ +from collections import OrderedDict + +import numpy as np +import paddle.nn as nn +''' +# -------------------------------------------- +# Advanced nn.Sequential +# https://github.com/xinntao/BasicSR +# -------------------------------------------- +''' + + +def sequential(*args): + """Advanced nn.Sequential. + Args: + nn.Sequential, nn.Layer + Returns: + nn.Sequential + """ + if len(args) == 1: + if isinstance(args[0], OrderedDict): + raise NotImplementedError('sequential does not support OrderedDict input.') + return args[0] # No sequential is needed. + modules = [] + for module in args: + if isinstance(module, nn.Sequential): + for submodule in module.children(): + modules.append(submodule) + elif isinstance(module, nn.Layer): + modules.append(module) + return nn.Sequential(*modules) + + +# -------------------------------------------- +# return nn.Sequantial of (Conv + BN + ReLU) +# -------------------------------------------- +def conv(in_channels=64, + out_channels=64, + kernel_size=3, + stride=1, + padding=1, + bias=True, + mode='CBR', + negative_slope=0.2): + L = [] + for t in mode: + if t == 'C': + L.append( + nn.Conv2D(in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + bias_attr=bias)) + elif t == 'T': + L.append( + nn.Conv2DTranspose(in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + bias_attr=bias)) + elif t == 'B': + L.append(nn.BatchNorm2D(out_channels, momentum=0.9, eps=1e-04, affine=True)) + elif t == 'I': + L.append(nn.InstanceNorm2D(out_channels, affine=True)) + elif t == 'R': + L.append(nn.ReLU()) + elif t == 'r': + L.append(nn.ReLU()) + elif t == 'L': + L.append(nn.LeakyReLU(negative_slope=negative_slope)) + elif t == 'l': + L.append(nn.LeakyReLU(negative_slope=negative_slope)) + elif t == '2': + L.append(nn.PixelShuffle(upscale_factor=2)) + elif t == '3': + L.append(nn.PixelShuffle(upscale_factor=3)) + elif t == '4': + L.append(nn.PixelShuffle(upscale_factor=4)) + elif t == 'U': + L.append(nn.Upsample(scale_factor=2, mode='nearest')) + elif t == 'u': + L.append(nn.Upsample(scale_factor=3, mode='nearest')) + elif t == 'v': + L.append(nn.Upsample(scale_factor=4, mode='nearest')) + elif t == 'M': + L.append(nn.MaxPool2D(kernel_size=kernel_size, stride=stride, padding=0)) + elif t == 'A': + L.append(nn.AvgPool2D(kernel_size=kernel_size, stride=stride, padding=0)) + else: + raise NotImplementedError('Undefined type: '.format(t)) + return sequential(*L) + + +# -------------------------------------------- +# Res Block: x + conv(relu(conv(x))) +# -------------------------------------------- +class ResBlock(nn.Layer): + + def __init__(self, + in_channels=64, + out_channels=64, + kernel_size=3, + stride=1, + padding=1, + bias=True, + mode='CRC', + negative_slope=0.2): + super(ResBlock, self).__init__() + + assert in_channels == out_channels, 'Only support in_channels==out_channels.' + if mode[0] in ['R', 'L']: + mode = mode[0].lower() + mode[1:] + + self.res = conv(in_channels, out_channels, kernel_size, stride, padding, bias, mode, negative_slope) + + def forward(self, x): + res = self.res(x) + return x + res + + +# -------------------------------------------- +# conv + subp (+ relu) +# -------------------------------------------- +def upsample_pixelshuffle(in_channels=64, + out_channels=3, + kernel_size=3, + stride=1, + padding=1, + bias=True, + mode='2R', + negative_slope=0.2): + assert len(mode) < 4 and mode[0] in ['2', '3', '4'], 'mode examples: 2, 2R, 2BR, 3, ..., 4BR.' + up1 = conv(in_channels, + out_channels * (int(mode[0])**2), + kernel_size, + stride, + padding, + bias, + mode='C' + mode, + negative_slope=negative_slope) + return up1 + + +# -------------------------------------------- +# nearest_upsample + conv (+ R) +# -------------------------------------------- +def upsample_upconv(in_channels=64, + out_channels=3, + kernel_size=3, + stride=1, + padding=1, + bias=True, + mode='2R', + negative_slope=0.2): + assert len(mode) < 4 and mode[0] in ['2', '3', '4'], 'mode examples: 2, 2R, 2BR, 3, ..., 4BR' + if mode[0] == '2': + uc = 'UC' + elif mode[0] == '3': + uc = 'uC' + elif mode[0] == '4': + uc = 'vC' + mode = mode.replace(mode[0], uc) + up1 = conv(in_channels, out_channels, kernel_size, stride, padding, bias, mode=mode, negative_slope=negative_slope) + return up1 + + +# -------------------------------------------- +# convTranspose (+ relu) +# -------------------------------------------- +def upsample_convtranspose(in_channels=64, + out_channels=3, + kernel_size=2, + stride=2, + padding=0, + bias=True, + mode='2R', + negative_slope=0.2): + assert len(mode) < 4 and mode[0] in ['2', '3', '4'], 'mode examples: 2, 2R, 2BR, 3, ..., 4BR.' + kernel_size = int(mode[0]) + stride = int(mode[0]) + mode = mode.replace(mode[0], 'T') + up1 = conv(in_channels, out_channels, kernel_size, stride, padding, bias, mode, negative_slope) + return up1 + + +''' +# -------------------------------------------- +# Downsampler +# Kai Zhang, https://github.com/cszn/KAIR +# -------------------------------------------- +# downsample_strideconv +# downsample_maxpool +# downsample_avgpool +# -------------------------------------------- +''' + + +# -------------------------------------------- +# strideconv (+ relu) +# -------------------------------------------- +def downsample_strideconv(in_channels=64, + out_channels=64, + kernel_size=2, + stride=2, + padding=0, + bias=True, + mode='2R', + negative_slope=0.2): + assert len(mode) < 4 and mode[0] in ['2', '3', '4'], 'mode examples: 2, 2R, 2BR, 3, ..., 4BR.' + kernel_size = int(mode[0]) + stride = int(mode[0]) + mode = mode.replace(mode[0], 'C') + down1 = conv(in_channels, out_channels, kernel_size, stride, padding, bias, mode, negative_slope) + return down1 + + +# -------------------------------------------- +# maxpooling + conv (+ relu) +# -------------------------------------------- +def downsample_maxpool(in_channels=64, + out_channels=64, + kernel_size=3, + stride=1, + padding=0, + bias=True, + mode='2R', + negative_slope=0.2): + assert len(mode) < 4 and mode[0] in ['2', '3'], 'mode examples: 2, 2R, 2BR, 3, ..., 3BR.' + kernel_size_pool = int(mode[0]) + stride_pool = int(mode[0]) + mode = mode.replace(mode[0], 'MC') + pool = conv(kernel_size=kernel_size_pool, stride=stride_pool, mode=mode[0], negative_slope=negative_slope) + pool_tail = conv(in_channels, + out_channels, + kernel_size, + stride, + padding, + bias, + mode=mode[1:], + negative_slope=negative_slope) + return sequential(pool, pool_tail) + + +# -------------------------------------------- +# averagepooling + conv (+ relu) +# -------------------------------------------- +def downsample_avgpool(in_channels=64, + out_channels=64, + kernel_size=3, + stride=1, + padding=1, + bias=True, + mode='2R', + negative_slope=0.2): + assert len(mode) < 4 and mode[0] in ['2', '3'], 'mode examples: 2, 2R, 2BR, 3, ..., 3BR.' + kernel_size_pool = int(mode[0]) + stride_pool = int(mode[0]) + mode = mode.replace(mode[0], 'AC') + pool = conv(kernel_size=kernel_size_pool, stride=stride_pool, mode=mode[0], negative_slope=negative_slope) + pool_tail = conv(in_channels, + out_channels, + kernel_size, + stride, + padding, + bias, + mode=mode[1:], + negative_slope=negative_slope) + return sequential(pool, pool_tail) + + +class QFAttention(nn.Layer): + + def __init__(self, + in_channels=64, + out_channels=64, + kernel_size=3, + stride=1, + padding=1, + bias=True, + mode='CRC', + negative_slope=0.2): + super(QFAttention, self).__init__() + + assert in_channels == out_channels, 'Only support in_channels==out_channels.' + if mode[0] in ['R', 'L']: + mode = mode[0].lower() + mode[1:] + + self.res = conv(in_channels, out_channels, kernel_size, stride, padding, bias, mode, negative_slope) + + def forward(self, x, gamma, beta): + gamma = gamma.unsqueeze(-1).unsqueeze(-1) + beta = beta.unsqueeze(-1).unsqueeze(-1) + res = (gamma) * self.res(x) + beta + return x + res + + +class FBCNN(nn.Layer): + + def __init__(self, + in_nc=3, + out_nc=3, + nc=[64, 128, 256, 512], + nb=4, + act_mode='R', + downsample_mode='strideconv', + upsample_mode='convtranspose'): + super(FBCNN, self).__init__() + + self.m_head = conv(in_nc, nc[0], bias=True, mode='C') + self.nb = nb + self.nc = nc + # downsample + if downsample_mode == 'avgpool': + downsample_block = downsample_avgpool + elif downsample_mode == 'maxpool': + downsample_block = downsample_maxpool + elif downsample_mode == 'strideconv': + downsample_block = downsample_strideconv + else: + raise NotImplementedError('downsample mode [{:s}] is not found'.format(downsample_mode)) + + self.m_down1 = sequential(*[ResBlock(nc[0], nc[0], bias=True, mode='C' + act_mode + 'C') for _ in range(nb)], + downsample_block(nc[0], nc[1], bias=True, mode='2')) + self.m_down2 = sequential(*[ResBlock(nc[1], nc[1], bias=True, mode='C' + act_mode + 'C') for _ in range(nb)], + downsample_block(nc[1], nc[2], bias=True, mode='2')) + self.m_down3 = sequential(*[ResBlock(nc[2], nc[2], bias=True, mode='C' + act_mode + 'C') for _ in range(nb)], + downsample_block(nc[2], nc[3], bias=True, mode='2')) + + self.m_body_encoder = sequential( + *[ResBlock(nc[3], nc[3], bias=True, mode='C' + act_mode + 'C') for _ in range(nb)]) + + self.m_body_decoder = sequential( + *[ResBlock(nc[3], nc[3], bias=True, mode='C' + act_mode + 'C') for _ in range(nb)]) + + # upsample + if upsample_mode == 'upconv': + upsample_block = upsample_upconv + elif upsample_mode == 'pixelshuffle': + upsample_block = upsample_pixelshuffle + elif upsample_mode == 'convtranspose': + upsample_block = upsample_convtranspose + else: + raise NotImplementedError('upsample mode [{:s}] is not found'.format(upsample_mode)) + + self.m_up3 = nn.LayerList([ + upsample_block(nc[3], nc[2], bias=True, mode='2'), + *[QFAttention(nc[2], nc[2], bias=True, mode='C' + act_mode + 'C') for _ in range(nb)] + ]) + + self.m_up2 = nn.LayerList([ + upsample_block(nc[2], nc[1], bias=True, mode='2'), + *[QFAttention(nc[1], nc[1], bias=True, mode='C' + act_mode + 'C') for _ in range(nb)] + ]) + + self.m_up1 = nn.LayerList([ + upsample_block(nc[1], nc[0], bias=True, mode='2'), + *[QFAttention(nc[0], nc[0], bias=True, mode='C' + act_mode + 'C') for _ in range(nb)] + ]) + + self.m_tail = conv(nc[0], out_nc, bias=True, mode='C') + + self.qf_pred = sequential(*[ResBlock(nc[3], nc[3], bias=True, mode='C' + act_mode + 'C') for _ in range(nb)], + nn.AdaptiveAvgPool2D((1, 1)), nn.Flatten(), nn.Linear(512, 512), nn.ReLU(), + nn.Linear(512, 512), nn.ReLU(), nn.Linear(512, 1), nn.Sigmoid()) + + self.qf_embed = sequential(nn.Linear(1, 512), nn.ReLU(), nn.Linear(512, 512), nn.ReLU(), nn.Linear(512, 512), + nn.ReLU()) + + self.to_gamma_3 = sequential(nn.Linear(512, nc[2]), nn.Sigmoid()) + self.to_beta_3 = sequential(nn.Linear(512, nc[2]), nn.Tanh()) + self.to_gamma_2 = sequential(nn.Linear(512, nc[1]), nn.Sigmoid()) + self.to_beta_2 = sequential(nn.Linear(512, nc[1]), nn.Tanh()) + self.to_gamma_1 = sequential(nn.Linear(512, nc[0]), nn.Sigmoid()) + self.to_beta_1 = sequential(nn.Linear(512, nc[0]), nn.Tanh()) + + def forward(self, x, qf_input=None): + + h, w = x.shape[-2:] + paddingBottom = int(np.ceil(h / 8) * 8 - h) + paddingRight = int(np.ceil(w / 8) * 8 - w) + x = nn.functional.pad(x, (0, paddingRight, 0, paddingBottom), mode='reflect') + + x1 = self.m_head(x) + x2 = self.m_down1(x1) + x3 = self.m_down2(x2) + x4 = self.m_down3(x3) + x = self.m_body_encoder(x4) + qf = self.qf_pred(x) + x = self.m_body_decoder(x) + qf_embedding = self.qf_embed(qf_input) if qf_input is not None else self.qf_embed(qf) + gamma_3 = self.to_gamma_3(qf_embedding) + beta_3 = self.to_beta_3(qf_embedding) + + gamma_2 = self.to_gamma_2(qf_embedding) + beta_2 = self.to_beta_2(qf_embedding) + + gamma_1 = self.to_gamma_1(qf_embedding) + beta_1 = self.to_beta_1(qf_embedding) + + x = x + x4 + x = self.m_up3[0](x) + for i in range(self.nb): + x = self.m_up3[i + 1](x, gamma_3, beta_3) + + x = x + x3 + + x = self.m_up2[0](x) + for i in range(self.nb): + x = self.m_up2[i + 1](x, gamma_2, beta_2) + x = x + x2 + + x = self.m_up1[0](x) + for i in range(self.nb): + x = self.m_up1[i + 1](x, gamma_1, beta_1) + + x = x + x1 + x = self.m_tail(x) + x = x[..., :h, :w] + + return x, qf diff --git a/modules/image/Image_editing/enhancement/fbcnn_gray/module.py b/modules/image/Image_editing/enhancement/fbcnn_gray/module.py new file mode 100644 index 000000000..b06eba461 --- /dev/null +++ b/modules/image/Image_editing/enhancement/fbcnn_gray/module.py @@ -0,0 +1,123 @@ +import argparse +import base64 +import os +import time +from typing import Union + +import cv2 +import numpy as np +import paddle +import paddle.nn as nn + +from .fbcnn import FBCNN +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving + + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tobytes()).decode('utf8') + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.frombuffer(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_GRAYSCALE) + return data + + +@moduleinfo( + name='fbcnn_gray', + version='1.0.0', + type="CV/image_editing", + author="", + author_email="", + summary="Flexible JPEG Artifacts Removal.", +) +class FBCNNGary(nn.Layer): + + def __init__(self): + super(FBCNNGary, self).__init__() + self.default_pretrained_model_path = os.path.join(self.directory, 'ckpts', 'fbcnn_gray.pdparams') + self.fbcnn = FBCNN(in_nc=1, out_nc=1) + state_dict = paddle.load(self.default_pretrained_model_path) + self.fbcnn.set_state_dict(state_dict) + self.fbcnn.eval() + + def preprocess(self, img: np.ndarray) -> np.ndarray: + img = img[None, ...] + img = img / 255.0 + return img.astype(np.float32) + + def postprocess(self, img: np.ndarray) -> np.ndarray: + img = img.clip(0, 1) + img = img * 255.0 + return img.astype(np.uint8) + + def artifacts_removal(self, + image: Union[str, np.ndarray], + quality_factor: float = None, + visualization: bool = True, + output_dir: str = "fbcnn_gray_output") -> np.ndarray: + if isinstance(image, str): + _, file_name = os.path.split(image) + save_name, _ = os.path.splitext(file_name) + save_name = save_name + '_' + str(int(time.time())) + '.jpg' + image = cv2.imdecode(np.fromfile(image, dtype=np.uint8), cv2.IMREAD_GRAYSCALE) + elif isinstance(image, np.ndarray): + save_name = str(int(time.time())) + '.jpg' + image = image + else: + raise Exception("image should be a str / np.ndarray") + + with paddle.no_grad(): + img_input = self.preprocess(image) + img_input = paddle.to_tensor(img_input[None, ...], dtype=paddle.float32) + if quality_factor and 0 <= quality_factor <= 1: + qf_input = paddle.to_tensor([[quality_factor]], dtype=paddle.float32) + else: + qf_input = None + img_output, _ = self.fbcnn(img_input, qf_input) + img_output = img_output.numpy()[0][0] + img_output = self.postprocess(img_output) + + if visualization: + if not os.path.isdir(output_dir): + os.makedirs(output_dir) + save_path = os.path.join(output_dir, save_name) + cv2.imwrite(save_path, img_output) + + return img_output + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.parser.add_argument('--input_path', type=str, help="Path to image.") + self.parser.add_argument('--quality_factor', type=float, default=None, help="Image quality factor (0.0-1.0).") + self.parser.add_argument('--output_dir', + type=str, + default='fbcnn_gray_output', + help="The directory to save output images.") + args = self.parser.parse_args(argvs) + self.artifacts_removal(image=args.input_path, + quality_factor=args.quality_factor, + visualization=True, + output_dir=args.output_dir) + return 'Artifacts removal results are saved in %s' % args.output_dir + + @serving + def serving_method(self, image, **kwargs): + """ + Run as a service. + """ + image = base64_to_cv2(image) + img_output = self.artifacts_removal(image=image, **kwargs) + + return cv2_to_base64(img_output) diff --git a/modules/image/Image_editing/enhancement/fbcnn_gray/test.py b/modules/image/Image_editing/enhancement/fbcnn_gray/test.py new file mode 100644 index 000000000..844aa0b1b --- /dev/null +++ b/modules/image/Image_editing/enhancement/fbcnn_gray/test.py @@ -0,0 +1,59 @@ +import os +import shutil +import unittest + +import cv2 +import numpy as np +import requests + +import paddlehub as hub + +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + + +class TestHubModule(unittest.TestCase): + + @classmethod + def setUpClass(cls) -> None: + img_url = 'https://unsplash.com/photos/mJaD10XeD7w/download?ixid=MnwxMjA3fDB8MXxzZWFyY2h8M3x8Y2F0fGVufDB8fHx8MTY2MzczNDc3Mw&force=true&w=640' + if not os.path.exists('tests'): + os.makedirs('tests') + response = requests.get(img_url) + assert response.status_code == 200, 'Network Error.' + with open('tests/test.jpg', 'wb') as f: + f.write(response.content) + cls.module = hub.Module(name="fbcnn_gray") + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree('tests') + shutil.rmtree('fbcnn_gray_output') + + def test_artifacts_removal1(self): + results = self.module.artifacts_removal(image='tests/test.jpg', quality_factor=None, visualization=False) + + self.assertIsInstance(results, np.ndarray) + + def test_artifacts_removal2(self): + results = self.module.artifacts_removal(image=cv2.imread('tests/test.jpg', cv2.IMREAD_GRAYSCALE), + quality_factor=None, + visualization=True) + + self.assertIsInstance(results, np.ndarray) + + def test_artifacts_removal3(self): + results = self.module.artifacts_removal(image=cv2.imread('tests/test.jpg', cv2.IMREAD_GRAYSCALE), + quality_factor=0.5, + visualization=True) + + self.assertIsInstance(results, np.ndarray) + + def test_artifacts_removal4(self): + self.assertRaises(Exception, self.module.artifacts_removal, image=['tests/test.jpg']) + + def test_artifacts_removal5(self): + self.assertRaises(FileNotFoundError, self.module.artifacts_removal, image='no.jpg') + + +if __name__ == "__main__": + unittest.main() From 840150a5e275481cc4652c95bc63624f6d177dcc Mon Sep 17 00:00:00 2001 From: chenjian Date: Thu, 20 Oct 2022 20:41:17 +0800 Subject: [PATCH 114/117] update ernie_vilg readme (#2082) --- .../image/text_to_image/ernie_vilg/README.md | 688 ++++-------------- 1 file changed, 141 insertions(+), 547 deletions(-) diff --git a/modules/image/text_to_image/ernie_vilg/README.md b/modules/image/text_to_image/ernie_vilg/README.md index 36b33d613..0d1a064c5 100755 --- a/modules/image/text_to_image/ernie_vilg/README.md +++ b/modules/image/text_to_image/ernie_vilg/README.md @@ -102,639 +102,233 @@ ## 四、 Prompt 指南 +作者:佳祥 (LCL-Brew) & 单斌 +### Prompt公式 -这是一份如何调整 Prompt 得到更漂亮的图片的经验性文档。我们的结果和经验都来源于[文心 ERNIE-ViLG Demo](https://wenxin.baidu.com/moduleApi/ernieVilg) 和[社区的资料](#related-work)。 +「公式」= 图片主体,细节词,修饰词 +细节词可以任意组合,修饰词可以限定一种风格,也可以限定多种风格,遵循的基本原则是符合正常的中文语法逻辑即可。 -什么是 Prompt?Prompt 是输入到 Demo 中的文字,可以是一个实体,例如猫;也可以是一串富含想象力的文字,例如:『夕阳日落时,天边有巨大的云朵,海面波涛汹涌,风景,胶片感』。不同的 Prompt 对于生成的图像质量影响非常大。所以也就有了下面所有的 Prompt 的一些经验性技巧。 +### 示例 -| ![174_蒙娜丽莎,赛博朋克,宝丽来,33毫米,蒸汽波艺术_000-1](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/174_蒙娜丽莎,赛博朋克,宝丽来,33毫米,蒸汽波艺术_000-1.jpg) | -| :----------------------------------------------------------: | -| 蒙娜丽莎,赛博朋克,宝丽来,33毫米,蒸汽波艺术 | +|drawing| +| --- | +| prompt:蒙娜丽莎,赛博朋克,宝丽来,33毫米,
蒸汽波艺术 | +|drawing| +| --- | +| prompt:火焰,凤凰,少女,未来感,高清,3d,
精致面容,cg感,古风,唯美,毛发细致,上半身立绘 | -## 前言 +|drawing| +| --- | +| prompt:巨狼,飘雪,蓝色大片烟雾,毛发细致,
烟雾缭绕,高清,3d,cg感,侧面照 | -Prompt 的重要性如此重要,以至于我们需要构造一个示例来进行一次说明。 -如下图,[文心 ERNIE-ViLG Demo](https://wenxin.baidu.com/moduleApi/ernieVilg) 中,『卡通』模式下,输入的 Prompt 为『橘猫』,以及 『卡通』模型式下『极乐迪斯科里的猫, 故障艺术』两个示例,能够看出来后者的细节更多,呈现的图片也更加的风格化。 +| drawing | +| --- | +|
prompt:浮世绘日本科幻哑光绘画,概念艺术,
动漫风格神道寺禅园英雄动作序列,包豪斯
| -开放风格限制(本质上就是在 Prompt 中不加入风格控制词),即下图图3,得到的图片细节更多、也更加真实,同时还保留了比较强烈的风格元素。所以后面的所有内容,都将围绕着如何构造更好的 Prompt 进行资料的整理。 +### 修饰词 -| ![橘猫](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/极乐猫0.jpg) | ![极乐迪斯科里的猫](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/极乐猫1.jpg) | ![极乐迪斯科里的猫](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/极乐猫3.jpg) | -| :----------------------------------------------------------: | :----------------------------------------------------------: | ------------------------------------------------------------ | -| “橘猫”(卡通) | “极乐迪斯科里的猫, 故障艺术”(卡通) | “极乐迪斯科里的猫, 故障艺术” (探索无限) | +好的修饰词可以让图片生成的效果更好,基于产业级知识增强的文心大模型,用户可以通过输入独特且特征明显的修饰词,来达到更高质量的图片生成。 -| ![cat-hd](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/cat-hd.jpg) | -| :----------------------------: | -| 极乐迪斯科里的猫,故障艺术 | +#### 1. 效果参考 +drawing +**cg感** +| drawing |drawing | +| --- | --- | -## 呼吁与准则 -机器生成图片的最终目的还是便捷地为人类创造美的作品。而技术不是十全十美的,不能保证每次生成的图像都能够尽善尽美。因此呼吁所有相关玩家,如果想分享作品,那就分享那些美感爆棚的作品! +| drawing|drawing| +| --- | --- | -算法生成的图片难免会受到数据的影响,从而导致生成的图片是有数据偏见的。因此在分享机器生成图片到社交媒体之前,请三思当前的图片是不是含有:令人不适的、暴力的、色情的内容。如果有以上的内容请自行承担法律后果。 +**厚涂风格 / 厚涂版绘** - -## Prompt 的设计 -如何设计 Prompt,下文大概会通过4个方面来说明:[Prompt 公式](#p-eq),[Prompt 原则](#p-principle),[Prompt 主体](#p-entity)、[Prompt 修饰词](#p-modifier)。 -需要注意的是,这里的 Prompt 公式仅仅是个入门级别的参考,是经验的简单总结,在熟悉了 Prompt 的原理之后,可以尽情的发挥脑洞修改 Prompt。 +| drawing |drawing | +| --- | --- | +| drawing|drawing| +| --- | --- | +**古风** +| drawing |drawing | +| --- | --- | - -## Prompt 公式 -$$ -Prompt = [形容词] [主语] ,[细节设定], [修饰语或者艺术家] -$$ +| drawing|drawing| +| --- | --- | -按照这个公式,我们首先构造一个形容词加主语的案例。 这里我构造的是 戴着眼镜的猫, 风格我选择的是油画风格,然后我再添加一些细节设定,这里我给的是 漂浮在宇宙中, 可以看到 ,猫猫的后面出现了很多天体。 -| ![猫1](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/猫1.jpg) | ![猫2](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/猫2.jpg) | ![猫3](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/猫3.jpg) | -| :----------------------------------------------------------: | :----------------------------------------------------------: | :----------------------------------------------------------: | -| “戴着眼镜的猫”(油画) | “戴着眼镜的猫,漂浮在宇宙中”(油画) | “戴着眼镜的猫,漂浮在宇宙中,高更风格”(油画) | +**精致面容** -最后我们想让我们的照片风格更加有艺术性的效果, 我们选择的艺术家是高更, 可以看到图像的画风有了更强的艺术风格。 +| drawing |drawing | +| --- | --- | +| drawing|drawing| +| --- | --- | - -## Prompt 设计原则 +**穆夏 / 穆夏风格** -### Prompt 简单原则: 清楚地陈述 +| drawing |drawing | +| --- | --- | -除了公式之外,也有一些简单的 Prompt设计原则分享给大家:即**清楚的陈述**。 +**机械感 / 机械** -例如我们如果是简单的输入风景的话,往往模型不知道我们想要的风景是什么样子的(下图1)。我们要去尽量的幻想风景的样子,然后变成语言描述。 例如我想像的是日落时,海边的风景, 那我就构造了 Prompt 『夕阳日落时,阳光落在云层上,海面波光粼粼,风景』(下图2)。 进一步的,我想风格化我的图像,所以我在结尾的部分,增加了『胶片感』来让图片的色彩更加好看一些(下图3)。但是云彩的细节丢失了一些,进一步的我再增加天边巨大云朵这一个细节,让我的图片朝着我想要的样子靠的更进一步(下图4)。 +| drawing |drawing | +| --- | --- | -| ![猫1](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/风景1.jpg) | ![猫2](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/风景2.jpg) | ![猫3](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/风景3.jpg) | ![猫3](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/风景4.jpg) | -| :------------------------: | :----------------------------------------------: | :------------------------------------------------------: | -------------------------------------------------------- | -| “风景” | “夕阳日落时,阳光落在云层上,海面波光粼粼,风景” | “夕阳日落时,阳光落在云层上,海面波涛汹涌,风景,胶片感” | 夕阳日落时,天边有巨大的云朵,海面波涛汹涌,风景,胶片感 | +**宫崎骏动画** +| drawing |drawing | +| --- | --- | - -## Prompt 主体的选择 +**烟雾 / 烟雾缭绕** -Prompt 的主体可以是千奇百怪、各种各样的。这里我挑了几个简单的容易出效果的主体示例和一些能够营造特殊氛围的氛围词来激发大家的灵感。 +| drawing |drawing | +| --- | --- | +**皮克斯动画** +| drawing |drawing | +| --- | --- | -| ![宇航员](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/宇航员.jpg) | ![孤岛](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/孤岛.jpg) | ![白色城堡](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/白色城堡.jpg) | ![机器人](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/机器人.jpg) | -| :----------------------------------------------------------: | :----------------------------------------------------------: | :----------------------------------------------------------: | :----------------------------------------------------------: | -| 宇航员 | 孤岛 | 白色城堡 | 机器人 | -| ![巫师](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/巫师.jpg) | ![罗马城](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/罗马城.jpg) | ![海鸥](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/海鸥.jpg) | ![气球](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/气球.jpg) | -| 巫师 | 罗马城 | 海鸥 | 气球 | +**拟人化** +| drawing |drawing | +| --- | --- | +**剪纸叠加风格** +| drawing |drawing | +| --- | --- | +**色彩斑斓** -| ![霓虹灯](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/霓虹灯.jpg) | ![烟](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/烟.jpg) | ![漩涡](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/漩涡.jpg) | -| :----------------------------------------------------------: | :----------------------------------------------------------: | :----------------------------------------------------------: | -| …日落,霓虹灯…薄雾 | …烟… | …燃烧漩涡, …烟雾和碎片 | -| ![废墟](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/废墟.jpg) | ![光之](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/光之.jpg) | ![巨大的](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/巨大的.jpg) | -| …废墟… | 光之… | 巨大的… | +| drawing |drawing | +| --- | --- | +**城市印象 & 圆形轮廓** +| drawing |drawing | +| --- | --- | - -## Prompt 修饰词 +**上半身立绘 / 人物立绘** -如果想让生成的图片更加的艺术化、风格话,可以考虑在 Prompt 中添加艺术修饰词。艺术修饰词可以是一些美术风格(例如表现主义、抽象主义等),也可以是一些美学词汇(蒸汽波艺术、故障艺术等),也可以是一些摄影术语(80mm摄像头、浅景深等),也可以是一些绘图软件(虚幻引擎、C4D等)。 +| drawing |drawing | +| --- | --- | -按照这样的规律,我们在两个输入基准上 : +**电影质感** -> 一只猫坐在椅子上,戴着一副墨镜 -> -> 日落时的城市天际线 -> +| drawing |drawing | +| --- | --- | -通过构造『输入 + Prompt 修饰词』来展示不同修饰词的效果 (这里的策略参考了[资料](https://docs.google.com/document/d/11WlzjBT0xRpQhP9tFMtxzd0q6ANIdHPUBkMV-YB043U/edit))。 +**扁平化设计 / 扁平化** -需要注意的是,不是所有的 Prompt 对于所有的修饰词都会发生反应。所以查阅 Prompt 修饰词的过程中,会发现部分的 Prompt 修饰词只能对两个基准中的一个生效。这是很正常的,因为 Prompt 的调优是一个反复的试错的过程。接下来,大家结合如下的 Prompt 修饰词, Happy Prompting 吧! +| drawing |drawing | +| --- | --- | +**logo设计 / 简约logo设计** +| drawing |drawing | +| --- | --- | -### 复古未来主义风格 +**细节清晰** -| ![00472_000_一只猫坐在椅子上,戴着一副墨镜,复古未来主义风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00472_000_一只猫坐在椅子上,戴着一副墨镜,复古未来主义风格.jpg) | ![00472_000_日落时的城市天际线,复古未来主义风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00472_000_日落时的城市天际线,复古未来主义风格.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 一只猫坐在椅子上,戴着一副墨镜,复古未来主义风格 | 日落时的城市天际线,复古未来主义风格 | +| drawing |drawing | +| --- | --- | +**毛发细致** +| drawing |drawing | +| --- | --- | -### 粉彩朋克风格 -| ![00017_004_一只猫坐在椅子上,戴着一副墨镜,粉彩朋克风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00017_004_一只猫坐在椅子上,戴着一副墨镜,粉彩朋克风格.jpg) | ![00029_001_日落时的城市天际线,粉彩朋克风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00029_001_日落时的城市天际线,粉彩朋克风格.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 一只猫坐在椅子上,戴着一副墨镜,粉彩朋克风格 | 日落时的城市天际线,粉彩朋克风格 | -### 史前遗迹风格 -| ![00443_005_一只猫坐在椅子上,戴着一副墨镜,史前遗迹风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00443_005_一只猫坐在椅子上,戴着一副墨镜,史前遗迹风格.jpg) | ![00443_005_日落时的城市天际线,史前遗迹风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00443_005_日落时的城市天际线,史前遗迹风格.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 一只猫坐在椅子上,戴着一副墨镜,史前遗迹风格 | 日落时的城市天际线,史前遗迹风格 | +#### 2. 风格词参考 +复古未来主义风格 ->- 海滩兔风格 ->- 抽象技术风格 ->- 酸性精灵风格 ->- 古埃及风格 ->- 风帽风格 ->- 装饰艺术风格 ->- 极光风格 ->- 秋天风格 ->- 巴洛克风格 ->- 摩托车手风格 ->- 碎核风格 ->- 纸箱风格 ->- 未来主义风格 ->- 孟菲斯公司风格 ->- 立体主义风格 ->-赛博朋克风格 ->- 黑暗自然主义风格 ->- 表现主义风格 ->- 野兽派风格 ->- 鬼魂风格 ->- 嘻哈风格 ->- 嬉皮士风格 ->- 幻象之城风格 ->- 印象主义风格 ->- 卡瓦伊风格 ->- 美人鱼风格 ->- 极简主义风格 ->- 水井惠郎风格 ->- 苔藓风格 ->- 新浪潮风格 ->- 迷宫物语风格 ->- 仙女风格 ->- 粉彩朋克风格 ->- 照片写实风格 ->- 粉红公主风格 ->- 海盗风格 ->- 像素可爱风格 ->- 波普艺术风格 ->- 史前遗迹风格 ->- 迷幻风格 ->- 雨天风格 ->- 湿漉漉的风格 ->- 浮世绘风格 ->- 矢量心风格 ->- 维京人风格 ->- 女巫店风格 ->- 后印象主义 ->- 素人主义 -### 波普艺术风格 +#### 3. 艺术词参考 -| ![00434_005_一只猫坐在椅子上,戴着一副墨镜,波普艺术风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00434_005_一只猫坐在椅子上,戴着一副墨镜,波普艺术风格.jpg) | ![00434_002_日落时的城市天际线,波普艺术风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00434_002_日落时的城市天际线,波普艺术风格.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 一只猫坐在椅子上,戴着一副墨镜,波普艺术风格 | 日落时的城市天际线,后世界末日风格 | +|                   艺术类型                   |                   艺术家                   |                   常用艺术风格                   | +| --- | --- | --- | +|
肖像画|
文森特·梵高|
印象主义| +|
风景画 |
尼古拉斯·罗伊里奇 |
现实主义| +|
风俗画 |
皮埃尔-奥古斯特·雷诺阿|
浪漫主义| +|
宗教绘画 |
克劳德·莫内 |
表现主义| +|
抽象画|
彼得·孔查洛夫斯基 |
后印象主义| +|
都市风景画|
卡米尔·毕沙罗 |
象征主义| +|
素描与草图|
约翰·辛格·萨金特|
新艺术主义| +|
静物|
伦勃朗|
巴洛克风格| +|
裸体画|
马克·夏加尔|
抽象表现主义| +|
插画|
巴勃罗·毕加索 |
北欧文艺复兴| +| |
古斯塔夫·多雷 |
素人艺术,原始主义| +| |
阿尔布雷特·丢勒 |
立体主义| +| |
鲍里斯·库斯妥基耶夫 |
洛可可| +| |
埃德加·德加|
色域绘画| +| | |
波普艺术| +| | |
文艺复兴开端| +| | |
文艺复兴全盛期| | +|| |
极简主义| +| | |
矫饰主义,文艺复兴晚期| -### 迷幻风格 -| ![00451_000_一只猫坐在椅子上,戴着一副墨镜,迷幻药风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00451_000_一只猫坐在椅子上,戴着一副墨镜,迷幻药风格.jpg) | ![00451_001_日落时的城市天际线,迷幻药风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00451_001_日落时的城市天际线,迷幻药风格.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 一只猫坐在椅子上,戴着一副墨镜,迷幻风格 | 日落时的城市天际线,迷幻风格 | +#### 4. 摄影词参考 -### 赛博朋克风格 -| ![00142_003_一只猫坐在椅子上,戴着一副墨镜,赛博朋克风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00142_003_一只猫坐在椅子上,戴着一副墨镜,赛博朋克风格.jpg) | ![00142_000_日落时的城市天际线,赛博朋克风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00142_000_日落时的城市天际线,赛博朋克风格.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 一只猫坐在椅子上,戴着一副墨镜,赛博朋克风格 | 日落时的城市天际线,赛博朋克风格 | +|
                  可以加入到Prompt 中的摄影词                  |                                                                        | +| --- | --- | +|
浅景深 |
仰拍| +|
负像 |
动态模糊| +|
微距 |
高反差| +|
双色版|
中心构图| +|
角度 |
逆光| +|
三分法|
长曝光| +|
抓拍 |
禅宗摄影| +|
软焦点|
抽象微距镜头| +|
黑白|
暗色调| +|
无镜反射|
长时间曝光| +|
双色调|
框架,取景| +|
颗粒图像|| -### 纸箱风格 +### 技巧提示 -| ![00081_000_一只猫坐在椅子上,戴着一副墨镜,纸箱风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00081_000_一只猫坐在椅子上,戴着一副墨镜,纸箱风格.jpg) | ![00081_000_日落时的城市天际线,纸箱风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00081_000_日落时的城市天际线,纸箱风格.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 一只猫坐在椅子上,戴着一副墨镜,纸箱风格 | 日落时的城市天际线,纸箱风格 | +1. 【作图规则】Prompt构建是文本符合逻辑的组合,有序且丰富的描述可以不断提升画面效果 +2. 【新手入门】不知如何输入Prompt?点击示例,体验文生图的魅力,参考教程,逐步进阶~ +3. 【风格生成】试试添加 “国潮”、“国风”等,感受中国风的魅力 +4. 【风格生成】试试混合两种代表性的风格,例如“赛博朋克,扁平化设计”、”皮克斯动画,赛博朋克” +5. 【人像生成】添加“仙鹤、月亮、楼阁、小屋、街道、玫瑰、机械”,画面会更饱满 +6. 【人像生成】添加“精致面容、唯美、cg感、细节清晰“等,人物刻画会更细致 +7. 【风格生成】添加“扁平化风格,logo”等,可以设计出各类图标等,例如 “猫猫头像,扁平化风格” +8. 【风格生成】指定颜色,或添加“烟雾缭绕”、“火焰”、“烟尘”、“花瓣”,生成画面的氛围感更饱满 +9. 【创意生成】发挥想象力,例如:“中西混搭”、“泰迪熊唱京剧”、“米老鼠吃火锅” +10. 【风格生成】“水彩”,“水墨”与古诗组合,画面意境会有提升~ +11. 【风格生成】想要日系头像和拟人化动物?试试关键词“日系手绘”、“治愈风” +12. 【风格生成】添加“pixiv”,生成二次元或者动漫的画质更惊艳 -### 未来主义风格 +### 呼吁与准则 -| ![00083_000_一只猫坐在椅子上,戴着一副墨镜,未来主义风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00083_000_一只猫坐在椅子上,戴着一副墨镜,未来主义风格.jpg) | ![00083_002_日落时的城市天际线,未来主义风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00083_002_日落时的城市天际线,未来主义风格.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 一只猫坐在椅子上,戴着一副墨镜,未来主义风格 | 一只猫坐在椅子上,戴着一副墨镜,未来主义风格 | - - - -### 抽象技术风格 - -| ![00000_003_一只猫坐在椅子上,戴着一副墨镜, 抽象技术风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00000_003_一只猫坐在椅子上,戴着一副墨镜,抽象技术风格.jpg) | ![00000_004_日落时的城市天际线,抽象技术风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00000_004_日落时的城市天际线,抽象技术风格.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 一只猫坐在椅子上,戴着一副墨镜,抽象技术风格 | 日落时的城市天际线,抽象技术风格 | - - - - -### 海滩兔风格 - - -| ![00049_001_一只猫坐在椅子上,戴着一副墨镜,海滩兔风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00049_001_一只猫坐在椅子上,戴着一副墨镜,海滩兔风格.jpg) | ![00049_003_日落时的城市天际线,海滩兔风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00049_003_日落时的城市天际线,海滩兔风格.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 一只猫坐在椅子上,戴着一副墨镜,海滩兔风格 | 日落时的城市天际线,海滩兔风格 | - - -### 粉红公主风格 - -| ![00038_004_一只猫坐在椅子上,戴着一副墨镜,粉红公主风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00038_004_一只猫坐在椅子上,戴着一副墨镜,粉红公主风格.jpg) | ![00046_004_日落时的城市天际线,粉红公主风格-1](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00046_004_日落时的城市天际线,粉红公主风格-1.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 一只猫坐在椅子上,戴着一副墨镜,粉红公主风格 | 日落时的城市天际线,粉红公主风格 | - - -### 嬉皮士风格 - -| ![00275_002_一只猫坐在椅子上,戴着一副墨镜,嬉皮士风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00275_002_一只猫坐在椅子上,戴着一副墨镜,嬉皮士风格.jpg) | ![00275_001_日落时的城市天际线,嬉皮士风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00275_001_日落时的城市天际线,嬉皮士风格.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 一只猫坐在椅子上,戴着一副墨镜,嬉皮士风格 | 日落时的城市天际线,嬉皮士风格 | - -### 幻象之城风格 - -| ![00288_000_一只猫坐在椅子上,戴着一副墨镜,幻象之城风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00288_000_一只猫坐在椅子上,戴着一副墨镜,幻象之城风格.jpg) | ![00288_004_日落时的城市天际线,幻象之城风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00288_004_日落时的城市天际线,幻象之城风格.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 一只猫坐在椅子上,戴着一副墨镜,幻象之城风格 | 日落时的城市天际线,幻象之城风格 | - - -### 美人鱼风格 - -| ![00351_002_一只猫坐在椅子上,戴着一副墨镜,美人鱼风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00351_002_一只猫坐在椅子上,戴着一副墨镜,美人鱼风格.jpg) | ![00351_000_日落时的城市天际线,美人鱼风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00351_000_日落时的城市天际线,美人鱼风格.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 一只猫坐在椅子上,戴着一副墨镜,美人鱼风格 | 日落时的城市天际线,美人鱼风格 | - - -### 迷宫物语风格 - - -| ![00382_005_一只猫坐在椅子上,戴着一副墨镜,迷宫物语风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00382_005_一只猫坐在椅子上,戴着一副墨镜,迷宫物语风格.jpg) | ![00382_000_日落时的城市天际线,迷宫物语风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00382_000_日落时的城市天际线,迷宫物语风格.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 一只猫坐在椅子上,戴着一副墨镜,迷宫物语风格 | 日落时的城市天际线,迷宫物语风格 | - -### 仙女风格 - - -| ![00397_003_一只猫坐在椅子上,戴着一副墨镜,仙女风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00397_003_一只猫坐在椅子上,戴着一副墨镜,仙女风格.jpg) | ![00397_004_日落时的城市天际线,仙女风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00397_004_日落时的城市天际线,仙女风格.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 一只猫坐在椅子上,戴着一副墨镜,仙女风格 | 日落时的城市天际线,仙女风格 | - - - - - -### Low Poly 风格 - -| ![猫low-poly风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/猫low-poly风格.jpg) | ![sky-line-low-poly](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/sky-line-low-poly.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 一只猫坐在椅子上,戴着一副墨镜, low poly 风格 | 日落时的城市天际线, low-poly | - - - - -### 浮世绘风格 - -| ![00564_001_一只猫坐在椅子上,戴着一副墨镜,浮世绘风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00564_001_一只猫坐在椅子上,戴着一副墨镜,浮世绘风格.jpg) | ![00564_002_日落时的城市天际线,浮世绘风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00564_002_日落时的城市天际线,浮世绘风格.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 一只猫坐在椅子上,戴着一副墨镜,浮世绘风格 | 日落时的城市天际线,浮世绘风格 | - -### 矢量心风格 - -| ![00573_001_一只猫坐在椅子上,戴着一副墨镜,矢量心风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00573_001_一只猫坐在椅子上,戴着一副墨镜,矢量心风格.jpg) | ![00573_005_日落时的城市天际线,矢量心风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00573_005_日落时的城市天际线,矢量心风格.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 一只猫坐在椅子上,戴着一副墨镜,矢量心风格 | 日落时的城市天际线,矢量心风格 | - - -### 摩托车手风格 - - -| ![00051_000_一只猫坐在椅子上,戴着一副墨镜,摩托车手风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00051_000_一只猫坐在椅子上,戴着一副墨镜,摩托车手风格.jpg) | ![日落时的城市天际线,摩托车手风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/日落时的城市天际线,摩托车手风格.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 一只猫坐在椅子上,戴着一副墨镜,摩托车手风格 | 日落时的城市天际线,摩托车手风格 | - - - -### 孟菲斯公司风格 - - -| ![00114_001_一只猫坐在椅子上,戴着一副墨镜,孟菲斯公司风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00114_001_一只猫坐在椅子上,戴着一副墨镜,孟菲斯公司风格.jpg) | ![00114_002_日落时的城市天际线,孟菲斯公司风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00114_002_日落时的城市天际线,孟菲斯公司风格.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 一只猫坐在椅子上,戴着一副墨镜,孟菲斯公司风格 | 日落时的城市天际线,孟菲斯公司风格 | - - -### 泥塑风格 - - -| ![一只猫坐在椅子上,戴着一副墨镜, 泥塑风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/一只猫坐在椅子上戴着一副墨镜泥塑风格.jpg) | ![00013_002_日落时的城市天际线, 泥塑](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00013_002_日落时的城市天际线,泥塑.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 一只猫坐在椅子上,戴着一副墨镜, 泥塑风格 | 日落时的城市天际线, 泥塑风格 | - - - - -### 苔藓风格 - -| ![00006_001_一只猫坐在椅子上,戴着一副墨镜,苔藓风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00006_001_一只猫坐在椅子上,戴着一副墨镜,苔藓风格.jpg) | ![00004_004_日落时的城市天际线,苔藓风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00004_004_日落时的城市天际线,苔藓风格.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 一只猫坐在椅子上,戴着一副墨镜,苔藓风格 | 日落时的城市天际线,苔藓风格 | - - - -### 新浪潮风格 - -| ![00389_000_一只猫坐在椅子上,戴着一副墨镜,新浪潮风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00389_000_一只猫坐在椅子上,戴着一副墨镜,新浪潮风格.jpg) | ![00389_005_日落时的城市天际线,新浪潮风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00389_005_日落时的城市天际线,新浪潮风格.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 一只猫坐在椅子上,戴着一副墨镜,新浪潮风格 | 日落时的城市天际线,新浪潮风格 | - -### 嘻哈风格 - -| ![00274_000_一只猫坐在椅子上,戴着一副墨镜,嘻哈风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00274_000_一只猫坐在椅子上,戴着一副墨镜,嘻哈风格.jpg) | ![00274_005_日落时的城市天际线,嘻哈风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00274_005_日落时的城市天际线,嘻哈风格.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 一只猫坐在椅子上,戴着一副墨镜,嘻哈风格 | 日落时的城市天际线,嘻哈风格 | - -### 矢量图 - -| ![00177_001_一只猫坐在椅子上,戴着一副墨镜, 矢量图](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00177_001_一只猫坐在椅子上戴着一副墨镜矢量图.jpg) | ![00020_002_日落时的城市天际线, 矢量图](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00020_002_日落时的城市天际线矢量图.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 一只猫坐在椅子上,戴着一副墨镜, 矢量图 | 日落时的城市天际线, 矢量图 | - -### 铅笔艺术 - - -| ![00203_000_一只猫坐在椅子上,戴着一副墨镜, 铅笔艺术](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00203_000_一只猫坐在椅子上戴着一副墨镜铅笔艺术.jpg) | ![00053_000_日落时的城市天际线, 铅笔艺术](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00053_000_日落时的城市天际线铅笔艺术.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 一只猫坐在椅子上,戴着一副墨镜, 铅笔艺术 | 日落时的城市天际线, 铅笔艺术 | - - -### 女巫店风格 - -| ![00606_001_一只猫坐在椅子上,戴着一副墨镜,女巫店风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00606_001_一只猫坐在椅子上,戴着一副墨镜,女巫店风格.jpg) | ![00606_000_日落时的城市天际线,女巫店风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00606_000_日落时的城市天际线,女巫店风格.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 一只猫坐在椅子上,戴着一副墨镜,女巫店风格 | 日落时的城市天际线,女巫店风格 | - - - -### 4D 建模 - - -| ![00230_000_一只猫坐在椅子上,戴着一副墨镜, 4D 建模](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00230_000_一只猫坐在椅子上戴着一副墨镜4D建模.jpg) | ![00082_001_日落时的城市天际线, 4D 建模](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00082_001_日落时的城市天际线4D建模.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 一只猫坐在椅子上,戴着一副墨镜, 4D 建模 | 日落时的城市天际线, 4D 建模 | - - - -### 水彩墨风格 - - -| ![00280_004_一只猫坐在椅子上,戴着一副墨镜, 水彩墨风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00280_004_一只猫坐在椅子上,戴着一副墨镜,水彩墨风格.jpg) | ![00130_004_日落时的城市天际线, 水彩墨风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00130_004_日落时的城市天际线,水彩墨风格.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 一只猫坐在椅子上,戴着一副墨镜, 水彩墨风格 | 日落时的城市天际线, 水彩墨风格 | - - - -### 酸性精灵风格 - -| ![00001_004_一只猫坐在椅子上,戴着一副墨镜,酸性精灵风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00001_004_一只猫坐在椅子上,戴着一副墨镜,酸性精灵风格.jpg) | ![00001_004_日落时的城市天际线,酸性精灵风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00001_004_日落时的城市天际线,酸性精灵风格.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 一只猫坐在椅子上,戴着一副墨镜,酸性精灵风格 | 日落时的城市天际线,酸性精灵风格 | - - -### 海盗风格 - -| ![00427_002_一只猫坐在椅子上,戴着一副墨镜,海盗风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00427_002_一只猫坐在椅子上,戴着一副墨镜,海盗风格.jpg) | ![00427_000_日落时的城市天际线,海盗风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00427_000_日落时的城市天际线,海盗风格.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 日落时的城市天际线,海盗风格 | 一只猫坐在椅子上,戴着一副墨镜,海盗风格 | - - - -### 古埃及风格 - - -| ![00017_005_一只猫坐在椅子上,戴着一副墨镜,古埃及风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00017_005_一只猫坐在椅子上,戴着一副墨镜,古埃及风格.jpg) | ![00017_003_日落时的城市天际线,古埃及风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00017_003_日落时的城市天际线,古埃及风格.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 一只猫坐在椅子上,戴着一副墨镜,古埃及风格 | 日落时的城市天际线,古埃及风格 | - -### 风帽风格 - - -| ![戴着帽子的猫](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/戴着帽子的猫.jpg) | ![戴着帽子的城市](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/戴着帽子的城市.jpg) | -| --------------------------------------------------------- | ------------------------------------------------------------ | -| 一只猫坐在椅子上,戴着一副墨镜,风帽风格 | 日落时的城市天际线,风帽风格 | - -### 装饰艺术风格 - - -| ![00029_000_一只猫坐在椅子上,戴着一副墨镜,装饰艺术风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00029_000_一只猫坐在椅子上,戴着一副墨镜,装饰艺术风格.jpg) | ![00029_005_日落时的城市天际线,装饰艺术风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00029_005_日落时的城市天际线,装饰艺术风格.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 一只猫坐在椅子上,戴着一副墨镜,装饰艺术风格 | 日落时的城市天际线,装饰艺术风格 | - -### 极光风格 - - -| ![00035_004_一只猫坐在椅子上,戴着一副墨镜,极光风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00035_004_一只猫坐在椅子上,戴着一副墨镜,极光风格.jpg) | ![00035_003_日落时的城市天际线,极光风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00035_003_日落时的城市天际线,极光风格.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 一只猫坐在椅子上,戴着一副墨镜,极光风格 | 日落时的城市天际线,极光风格 | - -### 秋天风格 - - -| ![00036_005_一只猫坐在椅子上,戴着一副墨镜,秋天风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00036_005_一只猫坐在椅子上,戴着一副墨镜,秋天风格.jpg) | ![00036_003_日落时的城市天际线,秋天风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00036_003_日落时的城市天际线,秋天风格.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 日落时的城市天际线,秋天风格 | 一只猫坐在椅子上,戴着一副墨镜,秋天风格 | - -### 巴洛克风格 - - -| ![00046_002_一只猫坐在椅子上,戴着一副墨镜,巴洛克风格风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00046_002_一只猫坐在椅子上,戴着一副墨镜,巴洛克风格风格.jpg) | ![00046_003_日落时的城市天际线,巴洛克风格风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00046_003_日落时的城市天际线,巴洛克风格风格.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 一只猫坐在椅子上,戴着一副墨镜,巴洛克风格 | 日落时的城市天际线,巴洛克风格 | - -### 立体主义风格 - -| ![00128_002_一只猫坐在椅子上,戴着一副墨镜,立体主义风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00128_002_一只猫坐在椅子上,戴着一副墨镜,立体主义风格.jpg) | ![00128_004_日落时的城市天际线,立体主义风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00128_004_日落时的城市天际线,立体主义风格.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 一只猫坐在椅子上,戴着一副墨镜,立体主义风格 | 日落时的城市天际线,立体主义风格 | - - -### 黑暗自然主义风格 - -| ![00147_002_一只猫坐在椅子上,戴着一副墨镜,黑暗自然主义风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00147_002_一只猫坐在椅子上,戴着一副墨镜,黑暗自然主义风格.jpg) | ![00147_004_日落时的城市天际线,黑暗自然主义风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00147_004_日落时的城市天际线,黑暗自然主义风格.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 一只猫坐在椅子上,戴着一副墨镜,黑暗自然主义风格 | 日落时的城市天际线,黑暗自然主义风格 | - -### 表现主义风格 - -| ![00190_001_一只猫坐在椅子上,戴着一副墨镜,表现主义风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00190_001_一只猫坐在椅子上,戴着一副墨镜,表现主义风格.jpg) | ![00190_000_日落时的城市天际线,表现主义风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00190_000_日落时的城市天际线,表现主义风格.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 一只猫坐在椅子上,戴着一副墨镜,表现主义风格 | 日落时的城市天际线,表现主义风格 | - -### 野兽派风格 - -| ![00200_000_一只猫坐在椅子上,戴着一副墨镜,野兽派风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00200_000_一只猫坐在椅子上,戴着一副墨镜,野兽派风格.jpg) | ![00200_002_日落时的城市天际线,野兽派风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00200_002_日落时的城市天际线,野兽派风格.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 一只猫坐在椅子上,戴着一副墨镜,野兽派风格 | 日落时的城市天际线,野兽派风格 | - -### 鬼魂风格 - -| ![00226_001_一只猫坐在椅子上,戴着一副墨镜,鬼魂风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00226_001_一只猫坐在椅子上,戴着一副墨镜,鬼魂风格.jpg) | ![00226_002_日落时的城市天际线,鬼魂风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00226_002_日落时的城市天际线,鬼魂风格.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 一只猫坐在椅子上,戴着一副墨镜,鬼魂风格 | 日落时的城市天际线,鬼魂风格 | - -### 印象主义风格 - -| ![00289_000_一只猫坐在椅子上,戴着一副墨镜,印象主义风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00289_000_一只猫坐在椅子上,戴着一副墨镜,印象主义风格.jpg) | ![00289_001_日落时的城市天际线,印象主义风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00289_001_日落时的城市天际线,印象主义风格.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 一只猫坐在椅子上,戴着一副墨镜,印象主义风格 | 日落时的城市天际线,印象主义风格 | - -### 卡瓦伊风格 - -| ![00305_001_一只猫坐在椅子上,戴着一副墨镜,卡瓦伊风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00305_001_一只猫坐在椅子上,戴着一副墨镜,卡瓦伊风格.jpg) | ![00305_000_日落时的城市天际线,卡瓦伊风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00305_000_日落时的城市天际线,卡瓦伊风格.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 一只猫坐在椅子上,戴着一副墨镜,卡瓦伊风格 | 日落时的城市天际线,卡瓦伊风格 | - -### 极简主义风格 - -| ![00362_004_一只猫坐在椅子上,戴着一副墨镜,极简主义风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00362_004_一只猫坐在椅子上,戴着一副墨镜,极简主义风格.jpg) | ![00362_002_日落时的城市天际线,极简主义风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00362_002_日落时的城市天际线,极简主义风格.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 一只猫坐在椅子上,戴着一副墨镜,极简主义风格 | 日落时的城市天际线,极简主义风格 | - -### 水井惠郎风格 - -| ![00364_000_一只猫坐在椅子上,戴着一副墨镜,水井惠郎风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00364_000_一只猫坐在椅子上,戴着一副墨镜,水井惠郎风格.jpg) | ![00364_000_日落时的城市天际线,水井惠郎风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00364_000_日落时的城市天际线,水井惠郎风格.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 一只猫坐在椅子上,戴着一副墨镜,水井惠郎风格 | 日落时的城市天际线,水井惠郎风格 | - -### 照片写实风格 - -| ![00423_000_一只猫坐在椅子上,戴着一副墨镜,照片写实风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00423_000_一只猫坐在椅子上,戴着一副墨镜,照片写实风格.jpg) | ![00423_002_日落时的城市天际线,照片写实风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00423_002_日落时的城市天际线,照片写实风格.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 一只猫坐在椅子上,戴着一副墨镜,照片写实风格 | 日落时的城市天际线,照片写实风格 | - - -### 像素可爱风格 - -| ![00428_005_一只猫坐在椅子上,戴着一副墨镜,像素可爱风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00428_005_一只猫坐在椅子上,戴着一副墨镜,像素可爱风格.jpg) | ![00428_005_日落时的城市天际线,像素可爱风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00428_005_日落时的城市天际线,像素可爱风格.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 一只猫坐在椅子上,戴着一副墨镜,像素可爱风格 | 日落时的城市天际线,像素可爱风格 | - - - -### 雨天风格 - -| ![00067_002_一只猫坐在椅子上,戴着一副墨镜,雨天风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00067_002_一只猫坐在椅子上,戴着一副墨镜,雨天风格.jpg) | ![00050_003_日落时的城市天际线,雨天风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00050_003_日落时的城市天际线,雨天风格.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 日落时的城市天际线,雨天风格 | 一只猫坐在椅子上,戴着一副墨镜,雨天风格 | - -### 湿漉漉的风格 - -| ![00523_005_一只猫坐在椅子上,戴着一副墨镜,湿漉漉的风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00523_005_一只猫坐在椅子上,戴着一副墨镜,湿漉漉的风格.jpg) | ![00523_001_日落时的城市天际线,湿漉漉的风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00523_001_日落时的城市天际线,湿漉漉的风格.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 一只猫坐在椅子上,戴着一副墨镜,湿漉漉的风格 | 日落时的城市天际线,湿漉漉的风格 | - - -### 维京人风格 - -| ![00577_004_一只猫坐在椅子上,戴着一副墨镜,维京人风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00577_004_一只猫坐在椅子上,戴着一副墨镜,维京人风格.jpg) | ![00577_005_日落时的城市天际线,维京人风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00577_005_日落时的城市天际线,维京人风格.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 一只猫坐在椅子上,戴着一副墨镜,维京人风格 | 日落时的城市天际线,维京人风格 | - -### 后印象主义 - - -| ![一只猫坐在椅子上,戴着一副墨镜,风格:后印象主义](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style/一只猫坐在椅子上,戴着一副墨镜,风格:后印象主义.jpg) | ![日落时的城市天际线, 风格:后印象主义-v2](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style/日落时的城市天际线,风格:后印象主义-v2.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 一只猫坐在椅子上,戴着一副墨镜,风格:后印象主义 | 日落时的城市天际线, 风格:后印象主义-v2 | - -### 素人主义 - - -| ![一只猫坐在椅子上,戴着一副墨镜,风格:素人主义](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style/一只猫坐在椅子上,戴着一副墨镜,风格:素人主义.jpg) | ![日落时的城市天际线,风格:素人艺术](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style/日落时的城市天际线,风格:素人艺术.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 一只猫坐在椅子上,戴着一副墨镜,风格:素人主义 | 日落时的城市天际线, 风格:素人艺术 | - - - -### 碎核风格 - - -| ![00064_000_一只猫坐在椅子上,戴着一副墨镜,碎核风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00064_000_一只猫坐在椅子上,戴着一副墨镜,碎核风格.jpg) | ![00064_002_日落时的城市天际线,碎核风格](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/art-style-1024/00064_002_日落时的城市天际线,碎核风格.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 一只猫坐在椅子上,戴着一副墨镜,碎核风格 | 日落时的城市天际线,碎核风格 | - - - - - - - -## Prompt 更多信息 - -### 概念组合 - -![赛博朋克中国山水园林](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/赛博朋克中国山水园林.jpg) - -## ShowCase - -更多 ShowCase 和创意 Prompt,可以参考我的[社交账号](#关注我) 或者是 http://youpromptme.cn/#/gallery/ (建设中) - -### 故障艺术 - -| ![076_时钟故障,时间故障,概念艺术,艺术站总部,pixiv趋势,cgsociety,蒸汽波艺术_004-1](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/076_时钟故障,时间故障,概念艺术,艺术站总部,pixiv趋势,cgsociety,蒸汽波艺术_004-1.jpg) | ![024_巨大的纯白色城堡-油画,故障艺术_005-1](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/024_巨大的纯白色城堡-油画,故障艺术_005-1.jpg) | ![065_Yggdrasil,世界树和地球融合在一起,故障艺术_009](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/065_Yggdrasil,世界树和地球融合在一起,故障艺术_009.jpg) | ![106_在百货公司和工厂的高商业需求中,未来复古科幻幻想对象或设备的专业概念艺术,故障艺术_005](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/106_在百货公司和工厂的高商业需求中,未来复古科幻幻想对象或设备的专业概念艺术,故障艺术_005.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | -| _时钟故障,时间故障,概念艺术,艺术站总部,pixiv趋势,cgsociety,蒸汽波艺术 | 巨大的纯白色城堡-油画,故障艺术 | Yggdrasil,世界树和地球融合在一起,故障艺术 | 在百货公司和工厂的高商业需求中,未来复古科幻幻想对象或设备的专业概念艺术,故障艺术 | - - - -### 蒸汽波艺术 - -| ![185_荒岛,蒸汽波艺术_000-1](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/185_荒岛,蒸汽波艺术_000-1.jpg) | ![060_Christoph-Vacher和Kevin-sloan创作的广阔幻想景观,蒸汽波艺术_007](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/060_Christoph-Vacher和Kevin-sloan创作的广阔幻想景观,蒸汽波艺术_007.jpg) | ![戴着眼镜的猫,蒸汽波艺术, vaporwave art 02](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/戴着眼镜的猫,蒸汽波艺术,vaporwaveart02.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | -| 荒岛,蒸汽波艺术 | Christoph-Vacher和Kevin-sloan创作的广阔幻想景观,蒸汽波艺术 | 戴着眼镜的猫,蒸汽波艺术 | - - -### 包豪斯艺术 - -| ![007_一只海鸥和史蒂文·西格正在进行一场凝视比赛,绘画,包豪斯_002](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/007_一只海鸥和史蒂文·西格正在进行一场凝视比赛,绘画,包豪斯_002.jpg) | ![033_梵高猫头鹰,包豪斯_000](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/033_梵高猫头鹰,包豪斯_000.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 一只海鸥和史蒂文·西格正在进行一场凝视比赛,绘画,包豪斯 | 梵高猫头鹰,包豪斯 | - - - - - -### 概念艺术 - -| ![079_4k专业HDR-DnD幻想概念艺术一条由闪电制成的令人敬畏的龙,故障艺术_004](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/079_4k专业HDR-DnD幻想概念艺术一条由闪电制成的令人敬畏的龙,故障艺术_004.jpg) | ![043_4k专业HDR-DnD奇幻概念艺术小鸡施展幻觉咒语,故障艺术_003](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/043_4k专业HDR-DnD奇幻概念艺术小鸡施展幻觉咒语,故障艺术_003.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 4k专业HDR-DnD幻想概念艺术一条由闪电制成的令人敬畏的龙,概念艺术 | 4k专业HDR-DnD奇幻概念艺术小鸡施展幻觉咒语,概念艺术 | - - - -### 像素艺术 - -| ![pixel1](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/pixel1.jpg) | ![pixel2](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/pixel2.jpg) | ![pixel3](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/pixel3.jpg) | ![pixel4](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/pixel4.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | - - - -### 艺术家 - -| ![001_萨尔瓦多·达利描绘古代文明的超现实主义梦幻油画,写实风格_006](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/001_萨尔瓦多·达利描绘古代文明的超现实主义梦幻油画,写实风格_006.jpg) | ![033_梵高猫头鹰,蒸汽波艺术_001](https://raw.githubusercontent.com/OleNet/YouPromptMe/gh-pages/you-prompt-me/images/033_梵高猫头鹰,蒸汽波艺术_001.jpg) | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| 萨尔瓦多·达利描绘古代文明的超现实主义梦幻油画,写实风格 | 梵高猫头鹰,蒸汽波艺术 | - - - - -## 附录 - -### 常见的艺术家和艺术风格整理 - -| 艺术类型 | 艺术家 | 常用艺术风格 | -| ---------- | ---------------------- | ---------------------- | -| 肖像画 | 文森特·梵高 | 印象主义 | -| 风景画 | 尼古拉斯·罗伊里奇 | 现实主义 | -| 风俗画 | 皮埃尔-奥古斯特·雷诺阿 | 浪漫主义 | -| 宗教绘画 | 克劳德·莫内 | 表现主义 | -| 抽象画 | 彼得·孔查洛夫斯基 | 后印象主义 | -| 都市风景画 | 卡米尔·毕沙罗 | 象征主义 | -| 素描与草图 | 约翰·辛格·萨金特 | 新艺术主义 | -| 静物 | 伦勃朗 | 巴洛克风格 | -| 裸体画 | 马克·夏加尔 | 抽象表现主义 | -| 插画 | 巴勃罗·毕加索 | 北欧文艺复兴 | -| | 古斯塔夫·多雷 | 素人艺术,原始主义 | -| | 阿尔布雷特·丢勒 | 立体主义 | -| | 鲍里斯·库斯妥基耶夫 | 洛可可 | -| | 埃德加·德加 | 色域绘画 | -| | | 波普艺术 | -| | | 文艺复兴开端 | -| | | 文艺复兴全盛期 | -| | | 极简主义 | -| | | 矫饰主义,文艺复兴晚期 | - - - -### 常见的摄影风格词整理 - -| 可以加入到 Prompt 中的摄影词 | | -| ---------------------------- | ------------ | -| 浅景深 | 仰拍 | -| 负像 | 动态模糊 | -| 微距 | 高反差 | -| 双色版 | 中心构图 | -| 角度 | 逆光 | -| 三分法 | 长曝光 | -| 抓拍 | 禅宗摄影 | -| 软焦点 | 抽象微距镜头 | -| 黑白 | 暗色调 | -| 无镜反射 | 长时间曝光 | -| 双色调 | 框架,取景 | -| 颗粒图像 | | +利用AI技术生成图片的最终目的是要便捷地为人类创造美的作品,激发人的想象力和创作力。而技术在发展中,做不到十全十美,不能保证每次生成的图片都能够尽善尽美。因此呼吁所有用户,您想分享满意的AI图片时,请以正能量进行传播宣传! +算法生成的图片难免会受到数据的影响,从而导致生成的图片是有数据偏见的。因此在分享AI生成图片到社交媒体之前,请谨慎评估当前的图片是不是含有:令人不适的、暴力的、色情的内容。如对以上的内容进行恶意传播,您将会承担相应的法律后果。 From 2e373966a7fd3119c205350fb14d0b7bfe74185d Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Mon, 24 Oct 2022 15:15:04 +0800 Subject: [PATCH 115/117] add swinir_l_real_sr_x4 (#2076) * git add swinir_l_real_sr_x4 * fix typo * fix typo Co-authored-by: chenjian --- .../swinir_l_real_sr_x4/README.md | 163 ++++ .../swinir_l_real_sr_x4/module.py | 129 +++ .../swinir_l_real_sr_x4/swinir.py | 903 ++++++++++++++++++ .../swinir_l_real_sr_x4/test.py | 58 ++ 4 files changed, 1253 insertions(+) create mode 100644 modules/image/Image_editing/super_resolution/swinir_l_real_sr_x4/README.md create mode 100644 modules/image/Image_editing/super_resolution/swinir_l_real_sr_x4/module.py create mode 100644 modules/image/Image_editing/super_resolution/swinir_l_real_sr_x4/swinir.py create mode 100644 modules/image/Image_editing/super_resolution/swinir_l_real_sr_x4/test.py diff --git a/modules/image/Image_editing/super_resolution/swinir_l_real_sr_x4/README.md b/modules/image/Image_editing/super_resolution/swinir_l_real_sr_x4/README.md new file mode 100644 index 000000000..4862177fe --- /dev/null +++ b/modules/image/Image_editing/super_resolution/swinir_l_real_sr_x4/README.md @@ -0,0 +1,163 @@ +# swinir_l_real_sr_x4 + +|模型名称|swinir_l_real_sr_x4| +| :--- | :---: | +|类别|图像-图像编辑| +|网络|SwinIR| +|数据集|DIV2K / Flickr2K| +|是否支持Fine-tuning|否| +|模型大小|142.2MB| +|指标|-| +|最新更新日期|2022-10-10| + + +## 一、模型基本信息 + +- ### 应用效果展示 + + - 网络结构: +

+
+

+ + - 样例结果示例: +

+ + +

+ +- ### 模型介绍 + + - SwinIR 是一个基于 Swin Transformer 的图像恢复模型。swinir_l_real_sr_x4 是基于 SwinIR-L 的 4 倍现实图像超分辨率模型。 + + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2.安装 + + - ```shell + $ hub install swinir_l_real_sr_x4 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + - ### 1、命令行预测 + + ```shell + $ hub run swinir_l_real_sr_x4 \ + --input_path "/PATH/TO/IMAGE" \ + --output_dir "swinir_l_real_sr_x4_output" + ``` + + - ### 2、预测代码示例 + + ```python + import paddlehub as hub + import cv2 + + module = hub.Module(name="swinir_l_real_sr_x4") + result = module.real_sr( + image=cv2.imread('/PATH/TO/IMAGE'), + visualization=True, + output_dir='swinir_l_real_sr_x4_output' + ) + ``` + + - ### 3、API + + ```python + def real_sr( + image: Union[str, numpy.ndarray], + visualization: bool = True, + output_dir: str = "swinir_l_real_sr_x4_output" + ) -> numpy.ndarray + ``` + + - 超分辨率 API + + - **参数** + + * image (Union\[str, numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; + * visualization (bool): 是否将识别结果保存为图片文件; + * output\_dir (str): 保存处理结果的文件目录。 + + - **返回** + + * res (numpy.ndarray): 图像超分辨率结果 (BGR); + +## 四、服务部署 + +- PaddleHub Serving 可以部署一个图像超分辨率的在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + + ```shell + $ hub serving start -m swinir_l_real_sr_x4 + ``` + + - 这样就完成了一个图像超分辨率服务化API的部署,默认端口号为8866。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + ```python + import requests + import json + import base64 + + import cv2 + import numpy as np + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tobytes()).decode('utf8') + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.frombuffer(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = { + 'image': cv2_to_base64(org_im) + } + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/swinir_l_real_sr_x4" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 结果转换 + results = r.json()['results'] + results = base64_to_cv2(results) + + # 保存结果 + cv2.imwrite('output.jpg', results) + ``` + +## 五、参考资料 + +* 论文:[SwinIR: Image Restoration Using Swin Transformer](https://arxiv.org/abs/2108.10257) + +* 官方实现:[JingyunLiang/SwinIR](https://github.com/JingyunLiang/SwinIR) + +## 六、更新历史 + +* 1.0.0 + + 初始发布 + + ```shell + $ hub install swinir_l_real_sr_x4==1.0.0 + ``` diff --git a/modules/image/Image_editing/super_resolution/swinir_l_real_sr_x4/module.py b/modules/image/Image_editing/super_resolution/swinir_l_real_sr_x4/module.py new file mode 100644 index 000000000..fecf3c841 --- /dev/null +++ b/modules/image/Image_editing/super_resolution/swinir_l_real_sr_x4/module.py @@ -0,0 +1,129 @@ +import argparse +import base64 +import os +import time +from typing import Union + +import cv2 +import numpy as np +import paddle +import paddle.nn as nn + +from .swinir import SwinIR +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving + + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tobytes()).decode('utf8') + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.frombuffer(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +@moduleinfo( + name='swinir_l_real_sr_x4', + version='1.0.0', + type="CV/image_editing", + author="", + author_email="", + summary="Image Restoration (Real image Super Resolution) Using Swin Transformer.", +) +class SwinIRMRealSR(nn.Layer): + + def __init__(self): + super(SwinIRMRealSR, self).__init__() + self.default_pretrained_model_path = os.path.join(self.directory, + '003_realSR_BSRGAN_DFOWMFC_s64w8_SwinIR-L_x4_GAN.pdparams') + self.swinir = SwinIR(upscale=4, + in_chans=3, + img_size=64, + window_size=8, + img_range=1., + depths=[6, 6, 6, 6, 6, 6, 6, 6, 6], + embed_dim=240, + num_heads=[8, 8, 8, 8, 8, 8, 8, 8, 8], + mlp_ratio=2, + upsampler='nearest+conv', + resi_connection='3conv') + state_dict = paddle.load(self.default_pretrained_model_path) + self.swinir.set_state_dict(state_dict) + self.swinir.eval() + + def preprocess(self, img: np.ndarray) -> np.ndarray: + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + img = img.transpose((2, 0, 1)) + img = img / 255.0 + return img.astype(np.float32) + + def postprocess(self, img: np.ndarray) -> np.ndarray: + img = img.clip(0, 1) + img = img * 255.0 + img = img.transpose((1, 2, 0)) + img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) + return img.astype(np.uint8) + + def real_sr(self, + image: Union[str, np.ndarray], + visualization: bool = True, + output_dir: str = "swinir_l_real_sr_x4_output") -> np.ndarray: + if isinstance(image, str): + _, file_name = os.path.split(image) + save_name, _ = os.path.splitext(file_name) + save_name = save_name + '_' + str(int(time.time())) + '.jpg' + image = cv2.imdecode(np.fromfile(image, dtype=np.uint8), cv2.IMREAD_COLOR) + elif isinstance(image, np.ndarray): + save_name = str(int(time.time())) + '.jpg' + image = image + else: + raise Exception("image should be a str / np.ndarray") + + with paddle.no_grad(): + img_input = self.preprocess(image) + img_input = paddle.to_tensor(img_input[None, ...], dtype=paddle.float32) + + img_output = self.swinir(img_input) + img_output = img_output.numpy()[0] + img_output = self.postprocess(img_output) + + if visualization: + if not os.path.isdir(output_dir): + os.makedirs(output_dir) + save_path = os.path.join(output_dir, save_name) + cv2.imwrite(save_path, img_output) + + return img_output + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.parser.add_argument('--input_path', type=str, help="Path to image.") + self.parser.add_argument('--output_dir', + type=str, + default='swinir_l_real_sr_x4_output', + help="The directory to save output images.") + args = self.parser.parse_args(argvs) + self.real_sr(image=args.input_path, visualization=True, output_dir=args.output_dir) + return 'Results are saved in %s' % args.output_dir + + @serving + def serving_method(self, image, **kwargs): + """ + Run as a service. + """ + image = base64_to_cv2(image) + img_output = self.real_sr(image=image, **kwargs) + + return cv2_to_base64(img_output) diff --git a/modules/image/Image_editing/super_resolution/swinir_l_real_sr_x4/swinir.py b/modules/image/Image_editing/super_resolution/swinir_l_real_sr_x4/swinir.py new file mode 100644 index 000000000..f4c490a4e --- /dev/null +++ b/modules/image/Image_editing/super_resolution/swinir_l_real_sr_x4/swinir.py @@ -0,0 +1,903 @@ +import math + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + + +def to_2tuple(x): + if isinstance(x, int): + return (x, x) + else: + return tuple(x) + + +class Mlp(nn.Layer): + + def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.): + super().__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.fc1 = nn.Linear(in_features, hidden_features) + self.act = act_layer() + self.fc2 = nn.Linear(hidden_features, out_features) + self.drop = nn.Dropout(drop) + + def forward(self, x): + x = self.fc1(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x + + +def window_partition(x, window_size): + """ + Args: + x: (B, H, W, C) + window_size (int): window size + Returns: + windows: (num_windows*B, window_size, window_size, C) + """ + B, H, W, C = x.shape + x = x.reshape((B, H // window_size, window_size, W // window_size, window_size, C)) + windows = x.transpose((0, 1, 3, 2, 4, 5)).reshape((-1, window_size, window_size, C)) + return windows + + +def window_reverse(windows, window_size, H, W): + """ + Args: + windows: (num_windows*B, window_size, window_size, C) + window_size (int): Window size + H (int): Height of image + W (int): Width of image + Returns: + x: (B, H, W, C) + """ + B = int(windows.shape[0] / (H * W / window_size / window_size)) + x = windows.reshape((B, H // window_size, W // window_size, window_size, window_size, -1)) + x = x.transpose((0, 1, 3, 2, 4, 5)).reshape((B, H, W, -1)) + return x + + +class WindowAttention(nn.Layer): + r""" Window based multi-head self attention (W-MSA) module with relative position bias. + It supports both of shifted and non-shifted window. + Args: + dim (int): Number of input channels. + window_size (tuple[int]): The height and width of the window. + num_heads (int): Number of attention heads. + qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set + attn_drop (float, optional): Dropout ratio of attention weight. Default: 0.0 + proj_drop (float, optional): Dropout ratio of output. Default: 0.0 + """ + + def __init__(self, dim, window_size, num_heads, qkv_bias=True, qk_scale=None, attn_drop=0., proj_drop=0.): + + super().__init__() + self.dim = dim + self.window_size = window_size # Wh, Ww + self.num_heads = num_heads + head_dim = dim // num_heads + self.scale = qk_scale or head_dim**-0.5 + + # define a parameter table of relative position bias + self.relative_position_bias_table = self.create_parameter(shape=((2 * window_size[0] - 1) * + (2 * window_size[1] - 1), num_heads), + default_initializer=nn.initializer.Constant(0.0)) + + # get pair-wise relative position index for each token inside the window + coords_h = paddle.arange(self.window_size[0]) + coords_w = paddle.arange(self.window_size[1]) + coords = paddle.stack(paddle.meshgrid([coords_h, coords_w])) # 2, Wh, Ww + coords_flatten = paddle.flatten(coords, 1) # 2, Wh*Ww + relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :] # 2, Wh*Ww, Wh*Ww + relative_coords = relative_coords.transpose((1, 2, 0)) # Wh*Ww, Wh*Ww, 2 + relative_coords[:, :, 0] += self.window_size[0] - 1 # shift to start from 0 + relative_coords[:, :, 1] += self.window_size[1] - 1 + relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1 + relative_position_index = relative_coords.sum(-1) # Wh*Ww, Wh*Ww + self.register_buffer("relative_position_index", relative_position_index) + + self.qkv = nn.Linear(dim, dim * 3, bias_attr=qkv_bias) + self.attn_drop = nn.Dropout(attn_drop) + self.proj = nn.Linear(dim, dim) + + self.proj_drop = nn.Dropout(proj_drop) + + self.softmax = nn.Softmax(axis=-1) + + def forward(self, x, mask=None): + """ + Args: + x: input features with shape of (num_windows*B, N, C) + mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None + """ + B_, N, C = x.shape + qkv = self.qkv(x).reshape((B_, N, 3, self.num_heads, C // self.num_heads)).transpose((2, 0, 3, 1, 4)) + q, k, v = qkv[0], qkv[1], qkv[2] # make torchscript happy (cannot use tensor as tuple) + + q = q * self.scale + attn = (q @ k.transpose((0, 1, 3, 2))) + + relative_position_bias = self.relative_position_bias_table[self.relative_position_index.reshape( + (-1, ))].reshape((self.window_size[0] * self.window_size[1], self.window_size[0] * self.window_size[1], + -1)) # Wh*Ww,Wh*Ww,nH + relative_position_bias = relative_position_bias.transpose((2, 0, 1)) # nH, Wh*Ww, Wh*Ww + attn = attn + relative_position_bias.unsqueeze(0) + + if mask is not None: + nW = mask.shape[0] + attn = attn.reshape((B_ // nW, nW, self.num_heads, N, N)) + mask.unsqueeze(1).unsqueeze(0) + attn = attn.reshape((-1, self.num_heads, N, N)) + attn = self.softmax(attn) + else: + attn = self.softmax(attn) + + attn = self.attn_drop(attn) + + x = (attn @ v).transpose((0, 2, 1, 3)).reshape((B_, N, C)) + x = self.proj(x) + x = self.proj_drop(x) + return x + + def extra_repr(self) -> str: + return f'dim={self.dim}, window_size={self.window_size}, num_heads={self.num_heads}' + + def flops(self, N): + # calculate flops for 1 window with token length of N + flops = 0 + # qkv = self.qkv(x) + flops += N * self.dim * 3 * self.dim + # attn = (q @ k.transpose(-2, -1)) + flops += self.num_heads * N * (self.dim // self.num_heads) * N + # x = (attn @ v) + flops += self.num_heads * N * N * (self.dim // self.num_heads) + # x = self.proj(x) + flops += N * self.dim * self.dim + return flops + + +class SwinTransformerBlock(nn.Layer): + r""" Swin Transformer Block. + Args: + dim (int): Number of input channels. + input_resolution (tuple[int]): Input resulotion. + num_heads (int): Number of attention heads. + window_size (int): Window size. + shift_size (int): Shift size for SW-MSA. + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. + qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set. + drop (float, optional): Dropout rate. Default: 0.0 + attn_drop (float, optional): Attention dropout rate. Default: 0.0 + drop_path (float, optional): Stochastic depth rate. Default: 0.0 + act_layer (nn.Layer, optional): Activation layer. Default: nn.GELU + norm_layer (nn.Layer, optional): Normalization layer. Default: nn.LayerNorm + """ + + def __init__(self, + dim, + input_resolution, + num_heads, + window_size=7, + shift_size=0, + mlp_ratio=4., + qkv_bias=True, + qk_scale=None, + drop=0., + attn_drop=0., + drop_path=0., + act_layer=nn.GELU, + norm_layer=nn.LayerNorm): + super().__init__() + self.dim = dim + self.input_resolution = input_resolution + self.num_heads = num_heads + self.window_size = window_size + self.shift_size = shift_size + self.mlp_ratio = mlp_ratio + if min(self.input_resolution) <= self.window_size: + # if window size is larger than input resolution, we don't partition windows + self.shift_size = 0 + self.window_size = min(self.input_resolution) + assert 0 <= self.shift_size < self.window_size, "shift_size must in 0-window_size" + + self.norm1 = norm_layer(dim) + self.attn = WindowAttention(dim, + window_size=to_2tuple(self.window_size), + num_heads=num_heads, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + attn_drop=attn_drop, + proj_drop=drop) + + self.drop_path = nn.Identity() + self.norm2 = norm_layer(dim) + mlp_hidden_dim = int(dim * mlp_ratio) + self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop) + + if self.shift_size > 0: + attn_mask = self.calculate_mask(self.input_resolution) + else: + attn_mask = None + + self.register_buffer("attn_mask", attn_mask) + + def calculate_mask(self, x_size): + # calculate attention mask for SW-MSA + H, W = x_size + img_mask = paddle.zeros((1, H, W, 1)) # 1 H W 1 + + h_slices = (slice(0, -self.window_size), slice(-self.window_size, + -self.shift_size if self.shift_size else None), + slice(-self.shift_size, None)) + w_slices = (slice(0, -self.window_size), slice(-self.window_size, + -self.shift_size if self.shift_size else None), + slice(-self.shift_size, None)) + cnt = 0 + for h in h_slices: + for w in w_slices: + img_mask[:, h, w, :] = cnt + cnt += 1 + + mask_windows = window_partition(img_mask, self.window_size) # nW, window_size, window_size, 1 + mask_windows = mask_windows.reshape((-1, self.window_size * self.window_size)) + + attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2) + _h = paddle.full_like(attn_mask, -100.0, dtype='float32') + _z = paddle.full_like(attn_mask, 0.0, dtype='float32') + attn_mask = paddle.where(attn_mask != 0, _h, _z) + + return attn_mask + + def forward(self, x, x_size): + H, W = x_size + B, L, C = x.shape + # assert L == H * W, "input feature has wrong size" + + shortcut = x + x = self.norm1(x) + x = x.reshape((B, H, W, C)) + + # cyclic shift + if self.shift_size > 0: + shifted_x = paddle.roll(x, shifts=(-self.shift_size, -self.shift_size), axis=(1, 2)) + else: + shifted_x = x + + # partition windows + x_windows = window_partition(shifted_x, self.window_size) # nW*B, window_size, window_size, C + x_windows = x_windows.reshape((-1, self.window_size * self.window_size, C)) # nW*B, window_size*window_size, C + + # W-MSA/SW-MSA (to be compatible for testing on images whose shapes are the multiple of window size + if self.input_resolution == x_size: + attn_windows = self.attn(x_windows, mask=self.attn_mask) # nW*B, window_size*window_size, C + else: + attn_windows = self.attn(x_windows, mask=self.calculate_mask(x_size)) + + # merge windows + attn_windows = attn_windows.reshape((-1, self.window_size, self.window_size, C)) + shifted_x = window_reverse(attn_windows, self.window_size, H, W) # B H' W' C + + # reverse cyclic shift + if self.shift_size > 0: + x = paddle.roll(shifted_x, shifts=(self.shift_size, self.shift_size), axis=(1, 2)) + else: + x = shifted_x + x = x.reshape((B, H * W, C)) + + # FFN + x = shortcut + self.drop_path(x) + x = x + self.drop_path(self.mlp(self.norm2(x))) + + return x + + def extra_repr(self) -> str: + return f"dim={self.dim}, input_resolution={self.input_resolution}, num_heads={self.num_heads}, " \ + f"window_size={self.window_size}, shift_size={self.shift_size}, mlp_ratio={self.mlp_ratio}" + + def flops(self): + flops = 0 + H, W = self.input_resolution + # norm1 + flops += self.dim * H * W + # W-MSA/SW-MSA + nW = H * W / self.window_size / self.window_size + flops += nW * self.attn.flops(self.window_size * self.window_size) + # mlp + flops += 2 * H * W * self.dim * self.dim * self.mlp_ratio + # norm2 + flops += self.dim * H * W + return flops + + +class PatchMerging(nn.Layer): + r""" Patch Merging Layer. + Args: + input_resolution (tuple[int]): Resolution of input feature. + dim (int): Number of input channels. + norm_layer (nn.Layer, optional): Normalization layer. Default: nn.LayerNorm + """ + + def __init__(self, input_resolution, dim, norm_layer=nn.LayerNorm): + super().__init__() + self.input_resolution = input_resolution + self.dim = dim + self.reduction = nn.Linear(4 * dim, 2 * dim, bias=False) + self.norm = norm_layer(4 * dim) + + def forward(self, x): + """ + x: B, H*W, C + """ + H, W = self.input_resolution + B, L, C = x.shape + assert L == H * W, "input feature has wrong size" + assert H % 2 == 0 and W % 2 == 0, f"x size ({H}*{W}) are not even." + + x = x.reshape((B, H, W, C)) + + x0 = x[:, 0::2, 0::2, :] # B H/2 W/2 C + x1 = x[:, 1::2, 0::2, :] # B H/2 W/2 C + x2 = x[:, 0::2, 1::2, :] # B H/2 W/2 C + x3 = x[:, 1::2, 1::2, :] # B H/2 W/2 C + x = paddle.concat([x0, x1, x2, x3], -1) # B H/2 W/2 4*C + x = x.reshape((B, -1, 4 * C)) # B H/2*W/2 4*C + + x = self.norm(x) + x = self.reduction(x) + + return x + + def extra_repr(self) -> str: + return f"input_resolution={self.input_resolution}, dim={self.dim}" + + def flops(self): + H, W = self.input_resolution + flops = H * W * self.dim + flops += (H // 2) * (W // 2) * 4 * self.dim * 2 * self.dim + return flops + + +class BasicLayer(nn.Layer): + """ A basic Swin Transformer layer for one stage. + Args: + dim (int): Number of input channels. + input_resolution (tuple[int]): Input resolution. + depth (int): Number of blocks. + num_heads (int): Number of attention heads. + window_size (int): Local window size. + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. + qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set. + drop (float, optional): Dropout rate. Default: 0.0 + attn_drop (float, optional): Attention dropout rate. Default: 0.0 + drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0 + norm_layer (nn.Layer, optional): Normalization layer. Default: nn.LayerNorm + downsample (nn.Layer | None, optional): Downsample layer at the end of the layer. Default: None + use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False. + """ + + def __init__(self, + dim, + input_resolution, + depth, + num_heads, + window_size, + mlp_ratio=4., + qkv_bias=True, + qk_scale=None, + drop=0., + attn_drop=0., + drop_path=0., + norm_layer=nn.LayerNorm, + downsample=None, + use_checkpoint=False): + + super().__init__() + self.dim = dim + self.input_resolution = input_resolution + self.depth = depth + self.use_checkpoint = use_checkpoint + + # build blocks + self.blocks = nn.LayerList([ + SwinTransformerBlock(dim=dim, + input_resolution=input_resolution, + num_heads=num_heads, + window_size=window_size, + shift_size=0 if (i % 2 == 0) else window_size // 2, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop, + attn_drop=attn_drop, + drop_path=drop_path[i] if isinstance(drop_path, list) else drop_path, + norm_layer=norm_layer) for i in range(depth) + ]) + + # patch merging layer + if downsample is not None: + self.downsample = downsample(input_resolution, dim=dim, norm_layer=norm_layer) + else: + self.downsample = None + + def forward(self, x, x_size): + for blk in self.blocks: + x = blk(x, x_size) + if self.downsample is not None: + x = self.downsample(x) + return x + + def extra_repr(self) -> str: + return f"dim={self.dim}, input_resolution={self.input_resolution}, depth={self.depth}" + + def flops(self): + flops = 0 + for blk in self.blocks: + flops += blk.flops() + if self.downsample is not None: + flops += self.downsample.flops() + return flops + + +class RSTB(nn.Layer): + """Residual Swin Transformer Block (RSTB). + Args: + dim (int): Number of input channels. + input_resolution (tuple[int]): Input resolution. + depth (int): Number of blocks. + num_heads (int): Number of attention heads. + window_size (int): Local window size. + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. + qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set. + drop (float, optional): Dropout rate. Default: 0.0 + attn_drop (float, optional): Attention dropout rate. Default: 0.0 + drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0 + norm_layer (nn.Layer, optional): Normalization layer. Default: nn.LayerNorm + downsample (nn.Layer | None, optional): Downsample layer at the end of the layer. Default: None + use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False. + img_size: Input image size. + patch_size: Patch size. + resi_connection: The convolutional block before residual connection. + """ + + def __init__(self, + dim, + input_resolution, + depth, + num_heads, + window_size, + mlp_ratio=4., + qkv_bias=True, + qk_scale=None, + drop=0., + attn_drop=0., + drop_path=0., + norm_layer=nn.LayerNorm, + downsample=None, + use_checkpoint=False, + img_size=224, + patch_size=4, + resi_connection='1conv'): + super(RSTB, self).__init__() + + self.dim = dim + self.input_resolution = input_resolution + + self.residual_group = BasicLayer(dim=dim, + input_resolution=input_resolution, + depth=depth, + num_heads=num_heads, + window_size=window_size, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop, + attn_drop=attn_drop, + drop_path=drop_path, + norm_layer=norm_layer, + downsample=downsample, + use_checkpoint=use_checkpoint) + + if resi_connection == '1conv': + self.conv = nn.Conv2D(dim, dim, 3, 1, 1) + elif resi_connection == '3conv': + # to save parameters and memory + self.conv = nn.Sequential(nn.Conv2D(dim, dim // 4, 3, 1, 1), nn.LeakyReLU(negative_slope=0.2), + nn.Conv2D(dim // 4, dim // 4, 1, 1, 0), nn.LeakyReLU(negative_slope=0.2), + nn.Conv2D(dim // 4, dim, 3, 1, 1)) + + self.patch_embed = PatchEmbed(img_size=img_size, + patch_size=patch_size, + in_chans=0, + embed_dim=dim, + norm_layer=None) + + self.patch_unembed = PatchUnEmbed(img_size=img_size, + patch_size=patch_size, + in_chans=0, + embed_dim=dim, + norm_layer=None) + + def forward(self, x, x_size): + return self.patch_embed(self.conv(self.patch_unembed(self.residual_group(x, x_size), x_size))) + x + + def flops(self): + flops = 0 + flops += self.residual_group.flops() + H, W = self.input_resolution + flops += H * W * self.dim * self.dim * 9 + flops += self.patch_embed.flops() + flops += self.patch_unembed.flops() + + return flops + + +class PatchEmbed(nn.Layer): + r""" Image to Patch Embedding + Args: + img_size (int): Image size. Default: 224. + patch_size (int): Patch token size. Default: 4. + in_chans (int): Number of input image channels. Default: 3. + embed_dim (int): Number of linear projection output channels. Default: 96. + norm_layer (nn.Layer, optional): Normalization layer. Default: None + """ + + def __init__(self, img_size=224, patch_size=4, in_chans=3, embed_dim=96, norm_layer=None): + super().__init__() + img_size = to_2tuple(img_size) + patch_size = to_2tuple(patch_size) + patches_resolution = [img_size[0] // patch_size[0], img_size[1] // patch_size[1]] + self.img_size = img_size + self.patch_size = patch_size + self.patches_resolution = patches_resolution + self.num_patches = patches_resolution[0] * patches_resolution[1] + + self.in_chans = in_chans + self.embed_dim = embed_dim + + if norm_layer is not None: + self.norm = norm_layer(embed_dim) + else: + self.norm = None + + def forward(self, x): + x = x.flatten(2).transpose((0, 2, 1)) # B Ph*Pw C + if self.norm is not None: + x = self.norm(x) + return x + + def flops(self): + flops = 0 + H, W = self.img_size + if self.norm is not None: + flops += H * W * self.embed_dim + return flops + + +class PatchUnEmbed(nn.Layer): + r""" Image to Patch Unembedding + Args: + img_size (int): Image size. Default: 224. + patch_size (int): Patch token size. Default: 4. + in_chans (int): Number of input image channels. Default: 3. + embed_dim (int): Number of linear projection output channels. Default: 96. + norm_layer (nn.Layer, optional): Normalization layer. Default: None + """ + + def __init__(self, img_size=224, patch_size=4, in_chans=3, embed_dim=96, norm_layer=None): + super().__init__() + img_size = to_2tuple(img_size) + patch_size = to_2tuple(patch_size) + patches_resolution = [img_size[0] // patch_size[0], img_size[1] // patch_size[1]] + self.img_size = img_size + self.patch_size = patch_size + self.patches_resolution = patches_resolution + self.num_patches = patches_resolution[0] * patches_resolution[1] + + self.in_chans = in_chans + self.embed_dim = embed_dim + + def forward(self, x, x_size): + B, HW, C = x.shape + x = x.transpose((0, 2, 1)).reshape((B, self.embed_dim, x_size[0], x_size[1])) # B Ph*Pw C + return x + + def flops(self): + flops = 0 + return flops + + +class Upsample(nn.Sequential): + """Upsample module. + Args: + scale (int): Scale factor. Supported scales: 2^n and 3. + num_feat (int): Channel number of intermediate features. + """ + + def __init__(self, scale, num_feat): + m = [] + if (scale & (scale - 1)) == 0: # scale = 2^n + for _ in range(int(math.log(scale, 2))): + m.append(nn.Conv2D(num_feat, 4 * num_feat, 3, 1, 1)) + m.append(nn.PixelShuffle(2)) + elif scale == 3: + m.append(nn.Conv2D(num_feat, 9 * num_feat, 3, 1, 1)) + m.append(nn.PixelShuffle(3)) + else: + raise ValueError(f'scale {scale} is not supported. ' + 'Supported scales: 2^n and 3.') + super(Upsample, self).__init__(*m) + + +class UpsampleOneStep(nn.Sequential): + """UpsampleOneStep module (the difference with Upsample is that it always only has 1conv + 1pixelshuffle) + Used in lightweight SR to save parameters. + Args: + scale (int): Scale factor. Supported scales: 2^n and 3. + num_feat (int): Channel number of intermediate features. + """ + + def __init__(self, scale, num_feat, num_out_ch, input_resolution=None): + self.num_feat = num_feat + self.input_resolution = input_resolution + m = [] + m.append(nn.Conv2D(num_feat, (scale**2) * num_out_ch, 3, 1, 1)) + m.append(nn.PixelShuffle(scale)) + super(UpsampleOneStep, self).__init__(*m) + + def flops(self): + H, W = self.input_resolution + flops = H * W * self.num_feat * 3 * 9 + return flops + + +class SwinIR(nn.Layer): + r""" SwinIR + A PyTorch impl of : `SwinIR: Image Restoration Using Swin Transformer`, based on Swin Transformer. + Args: + img_size (int | tuple(int)): Input image size. Default 64 + patch_size (int | tuple(int)): Patch size. Default: 1 + in_chans (int): Number of input image channels. Default: 3 + embed_dim (int): Patch embedding dimension. Default: 96 + depths (tuple(int)): Depth of each Swin Transformer layer. + num_heads (tuple(int)): Number of attention heads in different layers. + window_size (int): Window size. Default: 7 + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4 + qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float): Override default qk scale of head_dim ** -0.5 if set. Default: None + drop_rate (float): Dropout rate. Default: 0 + attn_drop_rate (float): Attention dropout rate. Default: 0 + drop_path_rate (float): Stochastic depth rate. Default: 0.1 + norm_layer (nn.Layer): Normalization layer. Default: nn.LayerNorm. + ape (bool): If True, add absolute position embedding to the patch embedding. Default: False + patch_norm (bool): If True, add normalization after patch embedding. Default: True + use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False + upscale: Upscale factor. 2/3/4/8 for image SR, 1 for denoising and compress artifact reduction + img_range: Image range. 1. or 255. + upsampler: The reconstruction reconstruction module. 'pixelshuffle'/'pixelshuffledirect'/'nearest+conv'/None + resi_connection: The convolutional block before residual connection. '1conv'/'3conv' + """ + + def __init__(self, + img_size=64, + patch_size=1, + in_chans=3, + embed_dim=96, + depths=[6, 6, 6, 6], + num_heads=[6, 6, 6, 6], + window_size=7, + mlp_ratio=4., + qkv_bias=True, + qk_scale=None, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0.1, + norm_layer=nn.LayerNorm, + ape=False, + patch_norm=True, + use_checkpoint=False, + upscale=2, + img_range=1., + upsampler='', + resi_connection='1conv', + **kwargs): + super(SwinIR, self).__init__() + num_in_ch = in_chans + num_out_ch = in_chans + num_feat = 64 + self.img_range = img_range + if in_chans == 3: + rgb_mean = (0.4488, 0.4371, 0.4040) + self.mean = paddle.to_tensor(rgb_mean).reshape((1, 3, 1, 1)) + else: + self.mean = paddle.zeros((1, 1, 1, 1)) + self.upscale = upscale + self.upsampler = upsampler + self.window_size = window_size + + ##################################################################################################### + ################################### 1, shallow feature extraction ################################### + self.conv_first = nn.Conv2D(num_in_ch, embed_dim, 3, 1, 1) + + ##################################################################################################### + ################################### 2, deep feature extraction ###################################### + self.num_layers = len(depths) + self.embed_dim = embed_dim + self.ape = ape + self.patch_norm = patch_norm + self.num_features = embed_dim + self.mlp_ratio = mlp_ratio + + # split image into non-overlapping patches + self.patch_embed = PatchEmbed(img_size=img_size, + patch_size=patch_size, + in_chans=embed_dim, + embed_dim=embed_dim, + norm_layer=norm_layer if self.patch_norm else None) + num_patches = self.patch_embed.num_patches + patches_resolution = self.patch_embed.patches_resolution + self.patches_resolution = patches_resolution + + # merge non-overlapping patches into image + self.patch_unembed = PatchUnEmbed(img_size=img_size, + patch_size=patch_size, + in_chans=embed_dim, + embed_dim=embed_dim, + norm_layer=norm_layer if self.patch_norm else None) + + # absolute position embedding + if self.ape: + # self.absolute_pos_embed = nn.Parameter(torch.zeros(1, num_patches, embed_dim)) + self.absolute_pos_embed = self.create_parameter(shape=(1, num_patches, embed_dim), + default_initializer=nn.initializer.Constant(0.0)) + + self.pos_drop = nn.Dropout(p=drop_rate) + + # stochastic depth + dpr = [x.item() for x in paddle.linspace(0, drop_path_rate, sum(depths))] # stochastic depth decay rule + + # build Residual Swin Transformer blocks (RSTB) + self.layers = nn.LayerList() + for i_layer in range(self.num_layers): + layer = RSTB( + dim=embed_dim, + input_resolution=(patches_resolution[0], patches_resolution[1]), + depth=depths[i_layer], + num_heads=num_heads[i_layer], + window_size=window_size, + mlp_ratio=self.mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop_rate, + attn_drop=attn_drop_rate, + drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])], # no impact on SR results + norm_layer=norm_layer, + downsample=None, + use_checkpoint=use_checkpoint, + img_size=img_size, + patch_size=patch_size, + resi_connection=resi_connection) + self.layers.append(layer) + self.norm = norm_layer(self.num_features) + + # build the last conv layer in deep feature extraction + if resi_connection == '1conv': + self.conv_after_body = nn.Conv2D(embed_dim, embed_dim, 3, 1, 1) + elif resi_connection == '3conv': + # to save parameters and memory + self.conv_after_body = nn.Sequential(nn.Conv2D(embed_dim, embed_dim // 4, 3, 1, 1), + nn.LeakyReLU(negative_slope=0.2), + nn.Conv2D(embed_dim // 4, embed_dim // 4, 1, 1, 0), + nn.LeakyReLU(negative_slope=0.2), + nn.Conv2D(embed_dim // 4, embed_dim, 3, 1, 1)) + + ##################################################################################################### + ################################ 3, high quality image reconstruction ################################ + if self.upsampler == 'pixelshuffle': + # for classical SR + self.conv_before_upsample = nn.Sequential(nn.Conv2D(embed_dim, num_feat, 3, 1, 1), nn.LeakyReLU()) + self.upsample = Upsample(upscale, num_feat) + self.conv_last = nn.Conv2D(num_feat, num_out_ch, 3, 1, 1) + elif self.upsampler == 'pixelshuffledirect': + # for lightweight SR (to save parameters) + self.upsample = UpsampleOneStep(upscale, embed_dim, num_out_ch, + (patches_resolution[0], patches_resolution[1])) + elif self.upsampler == 'nearest+conv': + # for real-world SR (less artifacts) + self.conv_before_upsample = nn.Sequential(nn.Conv2D(embed_dim, num_feat, 3, 1, 1), nn.LeakyReLU()) + self.conv_up1 = nn.Conv2D(num_feat, num_feat, 3, 1, 1) + if self.upscale == 4: + self.conv_up2 = nn.Conv2D(num_feat, num_feat, 3, 1, 1) + self.conv_hr = nn.Conv2D(num_feat, num_feat, 3, 1, 1) + self.conv_last = nn.Conv2D(num_feat, num_out_ch, 3, 1, 1) + self.lrelu = nn.LeakyReLU(negative_slope=0.2) + else: + # for image denoising and JPEG compression artifact reduction + self.conv_last = nn.Conv2D(embed_dim, num_out_ch, 3, 1, 1) + + self.apply(self._init_weights) + + def _init_weights(self, m): + if isinstance(m, nn.Linear): + if isinstance(m, nn.Linear) and m.bias is not None: + nn.initializer.Constant(0.0)(m.bias) + elif isinstance(m, nn.LayerNorm): + nn.initializer.Constant(0.0)(m.bias) + nn.initializer.Constant(1.0)(m.weight) + + def check_image_size(self, x): + _, _, h, w = x.shape + mod_pad_h = (self.window_size - h % self.window_size) % self.window_size + mod_pad_w = (self.window_size - w % self.window_size) % self.window_size + x = F.pad(x, (0, mod_pad_w, 0, mod_pad_h), 'reflect') + return x + + def forward_features(self, x): + x_size = (x.shape[2], x.shape[3]) + x = self.patch_embed(x) + if self.ape: + x = x + self.absolute_pos_embed + x = self.pos_drop(x) + + for layer in self.layers: + x = layer(x, x_size) + + x = self.norm(x) # B L C + x = self.patch_unembed(x, x_size) + + return x + + def forward(self, x): + H, W = x.shape[2:] + x = self.check_image_size(x) + + self.mean = self.mean.cast(x.dtype) + x = (x - self.mean) * self.img_range + + if self.upsampler == 'pixelshuffle': + # for classical SR + x = self.conv_first(x) + x = self.conv_after_body(self.forward_features(x)) + x + x = self.conv_before_upsample(x) + x = self.conv_last(self.upsample(x)) + elif self.upsampler == 'pixelshuffledirect': + # for lightweight SR + x = self.conv_first(x) + x = self.conv_after_body(self.forward_features(x)) + x + x = self.upsample(x) + elif self.upsampler == 'nearest+conv': + # for real-world SR + x = self.conv_first(x) + x = self.conv_after_body(self.forward_features(x)) + x + x = self.conv_before_upsample(x) + x = self.lrelu(self.conv_up1(nn.functional.interpolate(x, scale_factor=2, mode='nearest'))) + if self.upscale == 4: + x = self.lrelu(self.conv_up2(nn.functional.interpolate(x, scale_factor=2, mode='nearest'))) + x = self.conv_last(self.lrelu(self.conv_hr(x))) + else: + # for image denoising and JPEG compression artifact reduction + x_first = self.conv_first(x) + res = self.conv_after_body(self.forward_features(x_first)) + x_first + x = x + self.conv_last(res) + + x = x / self.img_range + self.mean + + return x[:, :, :H * self.upscale, :W * self.upscale] + + def flops(self): + flops = 0 + H, W = self.patches_resolution + flops += H * W * 3 * self.embed_dim * 9 + flops += self.patch_embed.flops() + for i, layer in enumerate(self.layers): + flops += layer.flops() + flops += H * W * 3 * self.embed_dim * self.embed_dim + flops += self.upsample.flops() + return flops diff --git a/modules/image/Image_editing/super_resolution/swinir_l_real_sr_x4/test.py b/modules/image/Image_editing/super_resolution/swinir_l_real_sr_x4/test.py new file mode 100644 index 000000000..8ee583bfd --- /dev/null +++ b/modules/image/Image_editing/super_resolution/swinir_l_real_sr_x4/test.py @@ -0,0 +1,58 @@ +import os +import shutil +import unittest + +import cv2 +import numpy as np +import requests + +import paddlehub as hub + +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + + +class TestHubModule(unittest.TestCase): + + @classmethod + def setUpClass(cls) -> None: + img_url = 'https://unsplash.com/photos/mJaD10XeD7w/download?ixid=MnwxMjA3fDB8MXxzZWFyY2h8M3x8Y2F0fGVufDB8fHx8MTY2MzczNDc3Mw&force=true&w=640' + if not os.path.exists('tests'): + os.makedirs('tests') + response = requests.get(img_url) + assert response.status_code == 200, 'Network Error.' + with open('tests/test.jpg', 'wb') as f: + f.write(response.content) + img = cv2.imread('tests/test.jpg') + img = cv2.resize(img, (0, 0), fx=0.25, fy=0.25) + cv2.imwrite('tests/test.jpg', img) + cls.module = hub.Module(name="swinir_l_real_sr_x4") + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree('tests') + shutil.rmtree('swinir_l_real_sr_x4_output') + + def test_real_sr1(self): + results = self.module.real_sr(image='tests/test.jpg', visualization=False) + + self.assertIsInstance(results, np.ndarray) + + def test_real_sr2(self): + results = self.module.real_sr(image=cv2.imread('tests/test.jpg'), visualization=True) + + self.assertIsInstance(results, np.ndarray) + + def test_real_sr3(self): + results = self.module.real_sr(image=cv2.imread('tests/test.jpg'), visualization=True) + + self.assertIsInstance(results, np.ndarray) + + def test_real_sr4(self): + self.assertRaises(Exception, self.module.real_sr, image=['tests/test.jpg']) + + def test_real_sr5(self): + self.assertRaises(FileNotFoundError, self.module.real_sr, image='no.jpg') + + +if __name__ == "__main__": + unittest.main() From ff03236bdc7e0c4e23e1c189249b638d256017ee Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Mon, 24 Oct 2022 15:15:21 +0800 Subject: [PATCH 116/117] add swinir_m_real_sr_x4 (#2075) * add swinir_m_real_sr_x4 * fix typo * fix typo --- .../swinir_m_real_sr_x4/README.md | 163 ++++ .../swinir_m_real_sr_x4/module.py | 129 +++ .../swinir_m_real_sr_x4/swinir.py | 903 ++++++++++++++++++ .../swinir_m_real_sr_x4/test.py | 58 ++ 4 files changed, 1253 insertions(+) create mode 100644 modules/image/Image_editing/super_resolution/swinir_m_real_sr_x4/README.md create mode 100644 modules/image/Image_editing/super_resolution/swinir_m_real_sr_x4/module.py create mode 100644 modules/image/Image_editing/super_resolution/swinir_m_real_sr_x4/swinir.py create mode 100644 modules/image/Image_editing/super_resolution/swinir_m_real_sr_x4/test.py diff --git a/modules/image/Image_editing/super_resolution/swinir_m_real_sr_x4/README.md b/modules/image/Image_editing/super_resolution/swinir_m_real_sr_x4/README.md new file mode 100644 index 000000000..66a8e5fd7 --- /dev/null +++ b/modules/image/Image_editing/super_resolution/swinir_m_real_sr_x4/README.md @@ -0,0 +1,163 @@ +# swinir_m_real_sr_x4 + +|模型名称|swinir_m_real_sr_x4| +| :--- | :---: | +|类别|图像-图像编辑| +|网络|SwinIR| +|数据集|DIV2K / Flickr2K| +|是否支持Fine-tuning|否| +|模型大小|66.8MB| +|指标|-| +|最新更新日期|2022-10-10| + + +## 一、模型基本信息 + +- ### 应用效果展示 + + - 网络结构: +

+
+

+ + - 样例结果示例: +

+ + +

+ +- ### 模型介绍 + + - SwinIR 是一个基于 Swin Transformer 的图像恢复模型。swinir_m_real_sr_x4 是基于 SwinIR-M 的 4 倍现实图像超分辨率模型。 + + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2.安装 + + - ```shell + $ hub install swinir_m_real_sr_x4 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + - ### 1、命令行预测 + + ```shell + $ hub run swinir_m_real_sr_x4 \ + --input_path "/PATH/TO/IMAGE" \ + --output_dir "swinir_m_real_sr_x4_output" + ``` + + - ### 2、预测代码示例 + + ```python + import paddlehub as hub + import cv2 + + module = hub.Module(name="swinir_m_real_sr_x4") + result = module.real_sr( + image=cv2.imread('/PATH/TO/IMAGE'), + visualization=True, + output_dir='swinir_m_real_sr_x4_output' + ) + ``` + + - ### 3、API + + ```python + def real_sr( + image: Union[str, numpy.ndarray], + visualization: bool = True, + output_dir: str = "swinir_m_real_sr_x4_output" + ) -> numpy.ndarray + ``` + + - 超分辨率 API + + - **参数** + + * image (Union\[str, numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; + * visualization (bool): 是否将识别结果保存为图片文件; + * output\_dir (str): 保存处理结果的文件目录。 + + - **返回** + + * res (numpy.ndarray): 图像超分辨率结果 (BGR); + +## 四、服务部署 + +- PaddleHub Serving 可以部署一个图像超分辨率的在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + + ```shell + $ hub serving start -m swinir_m_real_sr_x4 + ``` + + - 这样就完成了一个图像超分辨率服务化API的部署,默认端口号为8866。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + ```python + import requests + import json + import base64 + + import cv2 + import numpy as np + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tobytes()).decode('utf8') + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.frombuffer(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = { + 'image': cv2_to_base64(org_im) + } + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/swinir_m_real_sr_x4" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 结果转换 + results = r.json()['results'] + results = base64_to_cv2(results) + + # 保存结果 + cv2.imwrite('output.jpg', results) + ``` + +## 五、参考资料 + +* 论文:[SwinIR: Image Restoration Using Swin Transformer](https://arxiv.org/abs/2108.10257) + +* 官方实现:[JingyunLiang/SwinIR](https://github.com/JingyunLiang/SwinIR) + +## 六、更新历史 + +* 1.0.0 + + 初始发布 + + ```shell + $ hub install swinir_m_real_sr_x4==1.0.0 + ``` diff --git a/modules/image/Image_editing/super_resolution/swinir_m_real_sr_x4/module.py b/modules/image/Image_editing/super_resolution/swinir_m_real_sr_x4/module.py new file mode 100644 index 000000000..8c40ab719 --- /dev/null +++ b/modules/image/Image_editing/super_resolution/swinir_m_real_sr_x4/module.py @@ -0,0 +1,129 @@ +import argparse +import base64 +import os +import time +from typing import Union + +import cv2 +import numpy as np +import paddle +import paddle.nn as nn + +from .swinir import SwinIR +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving + + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tobytes()).decode('utf8') + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.frombuffer(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +@moduleinfo( + name='swinir_m_real_sr_x4', + version='1.0.0', + type="CV/image_editing", + author="", + author_email="", + summary="Image Restoration (Real image Super Resolution) Using Swin Transformer.", +) +class SwinIRMRealSR(nn.Layer): + + def __init__(self): + super(SwinIRMRealSR, self).__init__() + self.default_pretrained_model_path = os.path.join(self.directory, + '003_realSR_BSRGAN_DFO_s64w8_SwinIR-M_x4_GAN.pdparams') + self.swinir = SwinIR(upscale=4, + in_chans=3, + img_size=64, + window_size=8, + img_range=1., + depths=[6, 6, 6, 6, 6, 6], + embed_dim=180, + num_heads=[6, 6, 6, 6, 6, 6], + mlp_ratio=2, + upsampler='nearest+conv', + resi_connection='1conv') + state_dict = paddle.load(self.default_pretrained_model_path) + self.swinir.set_state_dict(state_dict) + self.swinir.eval() + + def preprocess(self, img: np.ndarray) -> np.ndarray: + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + img = img.transpose((2, 0, 1)) + img = img / 255.0 + return img.astype(np.float32) + + def postprocess(self, img: np.ndarray) -> np.ndarray: + img = img.clip(0, 1) + img = img * 255.0 + img = img.transpose((1, 2, 0)) + img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) + return img.astype(np.uint8) + + def real_sr(self, + image: Union[str, np.ndarray], + visualization: bool = True, + output_dir: str = "swinir_m_real_sr_x4_output") -> np.ndarray: + if isinstance(image, str): + _, file_name = os.path.split(image) + save_name, _ = os.path.splitext(file_name) + save_name = save_name + '_' + str(int(time.time())) + '.jpg' + image = cv2.imdecode(np.fromfile(image, dtype=np.uint8), cv2.IMREAD_COLOR) + elif isinstance(image, np.ndarray): + save_name = str(int(time.time())) + '.jpg' + image = image + else: + raise Exception("image should be a str / np.ndarray") + + with paddle.no_grad(): + img_input = self.preprocess(image) + img_input = paddle.to_tensor(img_input[None, ...], dtype=paddle.float32) + + img_output = self.swinir(img_input) + img_output = img_output.numpy()[0] + img_output = self.postprocess(img_output) + + if visualization: + if not os.path.isdir(output_dir): + os.makedirs(output_dir) + save_path = os.path.join(output_dir, save_name) + cv2.imwrite(save_path, img_output) + + return img_output + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.parser.add_argument('--input_path', type=str, help="Path to image.") + self.parser.add_argument('--output_dir', + type=str, + default='swinir_m_real_sr_x4_output', + help="The directory to save output images.") + args = self.parser.parse_args(argvs) + self.real_sr(image=args.input_path, visualization=True, output_dir=args.output_dir) + return 'Results are saved in %s' % args.output_dir + + @serving + def serving_method(self, image, **kwargs): + """ + Run as a service. + """ + image = base64_to_cv2(image) + img_output = self.real_sr(image=image, **kwargs) + + return cv2_to_base64(img_output) diff --git a/modules/image/Image_editing/super_resolution/swinir_m_real_sr_x4/swinir.py b/modules/image/Image_editing/super_resolution/swinir_m_real_sr_x4/swinir.py new file mode 100644 index 000000000..f4c490a4e --- /dev/null +++ b/modules/image/Image_editing/super_resolution/swinir_m_real_sr_x4/swinir.py @@ -0,0 +1,903 @@ +import math + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + + +def to_2tuple(x): + if isinstance(x, int): + return (x, x) + else: + return tuple(x) + + +class Mlp(nn.Layer): + + def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.): + super().__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.fc1 = nn.Linear(in_features, hidden_features) + self.act = act_layer() + self.fc2 = nn.Linear(hidden_features, out_features) + self.drop = nn.Dropout(drop) + + def forward(self, x): + x = self.fc1(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x + + +def window_partition(x, window_size): + """ + Args: + x: (B, H, W, C) + window_size (int): window size + Returns: + windows: (num_windows*B, window_size, window_size, C) + """ + B, H, W, C = x.shape + x = x.reshape((B, H // window_size, window_size, W // window_size, window_size, C)) + windows = x.transpose((0, 1, 3, 2, 4, 5)).reshape((-1, window_size, window_size, C)) + return windows + + +def window_reverse(windows, window_size, H, W): + """ + Args: + windows: (num_windows*B, window_size, window_size, C) + window_size (int): Window size + H (int): Height of image + W (int): Width of image + Returns: + x: (B, H, W, C) + """ + B = int(windows.shape[0] / (H * W / window_size / window_size)) + x = windows.reshape((B, H // window_size, W // window_size, window_size, window_size, -1)) + x = x.transpose((0, 1, 3, 2, 4, 5)).reshape((B, H, W, -1)) + return x + + +class WindowAttention(nn.Layer): + r""" Window based multi-head self attention (W-MSA) module with relative position bias. + It supports both of shifted and non-shifted window. + Args: + dim (int): Number of input channels. + window_size (tuple[int]): The height and width of the window. + num_heads (int): Number of attention heads. + qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set + attn_drop (float, optional): Dropout ratio of attention weight. Default: 0.0 + proj_drop (float, optional): Dropout ratio of output. Default: 0.0 + """ + + def __init__(self, dim, window_size, num_heads, qkv_bias=True, qk_scale=None, attn_drop=0., proj_drop=0.): + + super().__init__() + self.dim = dim + self.window_size = window_size # Wh, Ww + self.num_heads = num_heads + head_dim = dim // num_heads + self.scale = qk_scale or head_dim**-0.5 + + # define a parameter table of relative position bias + self.relative_position_bias_table = self.create_parameter(shape=((2 * window_size[0] - 1) * + (2 * window_size[1] - 1), num_heads), + default_initializer=nn.initializer.Constant(0.0)) + + # get pair-wise relative position index for each token inside the window + coords_h = paddle.arange(self.window_size[0]) + coords_w = paddle.arange(self.window_size[1]) + coords = paddle.stack(paddle.meshgrid([coords_h, coords_w])) # 2, Wh, Ww + coords_flatten = paddle.flatten(coords, 1) # 2, Wh*Ww + relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :] # 2, Wh*Ww, Wh*Ww + relative_coords = relative_coords.transpose((1, 2, 0)) # Wh*Ww, Wh*Ww, 2 + relative_coords[:, :, 0] += self.window_size[0] - 1 # shift to start from 0 + relative_coords[:, :, 1] += self.window_size[1] - 1 + relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1 + relative_position_index = relative_coords.sum(-1) # Wh*Ww, Wh*Ww + self.register_buffer("relative_position_index", relative_position_index) + + self.qkv = nn.Linear(dim, dim * 3, bias_attr=qkv_bias) + self.attn_drop = nn.Dropout(attn_drop) + self.proj = nn.Linear(dim, dim) + + self.proj_drop = nn.Dropout(proj_drop) + + self.softmax = nn.Softmax(axis=-1) + + def forward(self, x, mask=None): + """ + Args: + x: input features with shape of (num_windows*B, N, C) + mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None + """ + B_, N, C = x.shape + qkv = self.qkv(x).reshape((B_, N, 3, self.num_heads, C // self.num_heads)).transpose((2, 0, 3, 1, 4)) + q, k, v = qkv[0], qkv[1], qkv[2] # make torchscript happy (cannot use tensor as tuple) + + q = q * self.scale + attn = (q @ k.transpose((0, 1, 3, 2))) + + relative_position_bias = self.relative_position_bias_table[self.relative_position_index.reshape( + (-1, ))].reshape((self.window_size[0] * self.window_size[1], self.window_size[0] * self.window_size[1], + -1)) # Wh*Ww,Wh*Ww,nH + relative_position_bias = relative_position_bias.transpose((2, 0, 1)) # nH, Wh*Ww, Wh*Ww + attn = attn + relative_position_bias.unsqueeze(0) + + if mask is not None: + nW = mask.shape[0] + attn = attn.reshape((B_ // nW, nW, self.num_heads, N, N)) + mask.unsqueeze(1).unsqueeze(0) + attn = attn.reshape((-1, self.num_heads, N, N)) + attn = self.softmax(attn) + else: + attn = self.softmax(attn) + + attn = self.attn_drop(attn) + + x = (attn @ v).transpose((0, 2, 1, 3)).reshape((B_, N, C)) + x = self.proj(x) + x = self.proj_drop(x) + return x + + def extra_repr(self) -> str: + return f'dim={self.dim}, window_size={self.window_size}, num_heads={self.num_heads}' + + def flops(self, N): + # calculate flops for 1 window with token length of N + flops = 0 + # qkv = self.qkv(x) + flops += N * self.dim * 3 * self.dim + # attn = (q @ k.transpose(-2, -1)) + flops += self.num_heads * N * (self.dim // self.num_heads) * N + # x = (attn @ v) + flops += self.num_heads * N * N * (self.dim // self.num_heads) + # x = self.proj(x) + flops += N * self.dim * self.dim + return flops + + +class SwinTransformerBlock(nn.Layer): + r""" Swin Transformer Block. + Args: + dim (int): Number of input channels. + input_resolution (tuple[int]): Input resulotion. + num_heads (int): Number of attention heads. + window_size (int): Window size. + shift_size (int): Shift size for SW-MSA. + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. + qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set. + drop (float, optional): Dropout rate. Default: 0.0 + attn_drop (float, optional): Attention dropout rate. Default: 0.0 + drop_path (float, optional): Stochastic depth rate. Default: 0.0 + act_layer (nn.Layer, optional): Activation layer. Default: nn.GELU + norm_layer (nn.Layer, optional): Normalization layer. Default: nn.LayerNorm + """ + + def __init__(self, + dim, + input_resolution, + num_heads, + window_size=7, + shift_size=0, + mlp_ratio=4., + qkv_bias=True, + qk_scale=None, + drop=0., + attn_drop=0., + drop_path=0., + act_layer=nn.GELU, + norm_layer=nn.LayerNorm): + super().__init__() + self.dim = dim + self.input_resolution = input_resolution + self.num_heads = num_heads + self.window_size = window_size + self.shift_size = shift_size + self.mlp_ratio = mlp_ratio + if min(self.input_resolution) <= self.window_size: + # if window size is larger than input resolution, we don't partition windows + self.shift_size = 0 + self.window_size = min(self.input_resolution) + assert 0 <= self.shift_size < self.window_size, "shift_size must in 0-window_size" + + self.norm1 = norm_layer(dim) + self.attn = WindowAttention(dim, + window_size=to_2tuple(self.window_size), + num_heads=num_heads, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + attn_drop=attn_drop, + proj_drop=drop) + + self.drop_path = nn.Identity() + self.norm2 = norm_layer(dim) + mlp_hidden_dim = int(dim * mlp_ratio) + self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop) + + if self.shift_size > 0: + attn_mask = self.calculate_mask(self.input_resolution) + else: + attn_mask = None + + self.register_buffer("attn_mask", attn_mask) + + def calculate_mask(self, x_size): + # calculate attention mask for SW-MSA + H, W = x_size + img_mask = paddle.zeros((1, H, W, 1)) # 1 H W 1 + + h_slices = (slice(0, -self.window_size), slice(-self.window_size, + -self.shift_size if self.shift_size else None), + slice(-self.shift_size, None)) + w_slices = (slice(0, -self.window_size), slice(-self.window_size, + -self.shift_size if self.shift_size else None), + slice(-self.shift_size, None)) + cnt = 0 + for h in h_slices: + for w in w_slices: + img_mask[:, h, w, :] = cnt + cnt += 1 + + mask_windows = window_partition(img_mask, self.window_size) # nW, window_size, window_size, 1 + mask_windows = mask_windows.reshape((-1, self.window_size * self.window_size)) + + attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2) + _h = paddle.full_like(attn_mask, -100.0, dtype='float32') + _z = paddle.full_like(attn_mask, 0.0, dtype='float32') + attn_mask = paddle.where(attn_mask != 0, _h, _z) + + return attn_mask + + def forward(self, x, x_size): + H, W = x_size + B, L, C = x.shape + # assert L == H * W, "input feature has wrong size" + + shortcut = x + x = self.norm1(x) + x = x.reshape((B, H, W, C)) + + # cyclic shift + if self.shift_size > 0: + shifted_x = paddle.roll(x, shifts=(-self.shift_size, -self.shift_size), axis=(1, 2)) + else: + shifted_x = x + + # partition windows + x_windows = window_partition(shifted_x, self.window_size) # nW*B, window_size, window_size, C + x_windows = x_windows.reshape((-1, self.window_size * self.window_size, C)) # nW*B, window_size*window_size, C + + # W-MSA/SW-MSA (to be compatible for testing on images whose shapes are the multiple of window size + if self.input_resolution == x_size: + attn_windows = self.attn(x_windows, mask=self.attn_mask) # nW*B, window_size*window_size, C + else: + attn_windows = self.attn(x_windows, mask=self.calculate_mask(x_size)) + + # merge windows + attn_windows = attn_windows.reshape((-1, self.window_size, self.window_size, C)) + shifted_x = window_reverse(attn_windows, self.window_size, H, W) # B H' W' C + + # reverse cyclic shift + if self.shift_size > 0: + x = paddle.roll(shifted_x, shifts=(self.shift_size, self.shift_size), axis=(1, 2)) + else: + x = shifted_x + x = x.reshape((B, H * W, C)) + + # FFN + x = shortcut + self.drop_path(x) + x = x + self.drop_path(self.mlp(self.norm2(x))) + + return x + + def extra_repr(self) -> str: + return f"dim={self.dim}, input_resolution={self.input_resolution}, num_heads={self.num_heads}, " \ + f"window_size={self.window_size}, shift_size={self.shift_size}, mlp_ratio={self.mlp_ratio}" + + def flops(self): + flops = 0 + H, W = self.input_resolution + # norm1 + flops += self.dim * H * W + # W-MSA/SW-MSA + nW = H * W / self.window_size / self.window_size + flops += nW * self.attn.flops(self.window_size * self.window_size) + # mlp + flops += 2 * H * W * self.dim * self.dim * self.mlp_ratio + # norm2 + flops += self.dim * H * W + return flops + + +class PatchMerging(nn.Layer): + r""" Patch Merging Layer. + Args: + input_resolution (tuple[int]): Resolution of input feature. + dim (int): Number of input channels. + norm_layer (nn.Layer, optional): Normalization layer. Default: nn.LayerNorm + """ + + def __init__(self, input_resolution, dim, norm_layer=nn.LayerNorm): + super().__init__() + self.input_resolution = input_resolution + self.dim = dim + self.reduction = nn.Linear(4 * dim, 2 * dim, bias=False) + self.norm = norm_layer(4 * dim) + + def forward(self, x): + """ + x: B, H*W, C + """ + H, W = self.input_resolution + B, L, C = x.shape + assert L == H * W, "input feature has wrong size" + assert H % 2 == 0 and W % 2 == 0, f"x size ({H}*{W}) are not even." + + x = x.reshape((B, H, W, C)) + + x0 = x[:, 0::2, 0::2, :] # B H/2 W/2 C + x1 = x[:, 1::2, 0::2, :] # B H/2 W/2 C + x2 = x[:, 0::2, 1::2, :] # B H/2 W/2 C + x3 = x[:, 1::2, 1::2, :] # B H/2 W/2 C + x = paddle.concat([x0, x1, x2, x3], -1) # B H/2 W/2 4*C + x = x.reshape((B, -1, 4 * C)) # B H/2*W/2 4*C + + x = self.norm(x) + x = self.reduction(x) + + return x + + def extra_repr(self) -> str: + return f"input_resolution={self.input_resolution}, dim={self.dim}" + + def flops(self): + H, W = self.input_resolution + flops = H * W * self.dim + flops += (H // 2) * (W // 2) * 4 * self.dim * 2 * self.dim + return flops + + +class BasicLayer(nn.Layer): + """ A basic Swin Transformer layer for one stage. + Args: + dim (int): Number of input channels. + input_resolution (tuple[int]): Input resolution. + depth (int): Number of blocks. + num_heads (int): Number of attention heads. + window_size (int): Local window size. + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. + qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set. + drop (float, optional): Dropout rate. Default: 0.0 + attn_drop (float, optional): Attention dropout rate. Default: 0.0 + drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0 + norm_layer (nn.Layer, optional): Normalization layer. Default: nn.LayerNorm + downsample (nn.Layer | None, optional): Downsample layer at the end of the layer. Default: None + use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False. + """ + + def __init__(self, + dim, + input_resolution, + depth, + num_heads, + window_size, + mlp_ratio=4., + qkv_bias=True, + qk_scale=None, + drop=0., + attn_drop=0., + drop_path=0., + norm_layer=nn.LayerNorm, + downsample=None, + use_checkpoint=False): + + super().__init__() + self.dim = dim + self.input_resolution = input_resolution + self.depth = depth + self.use_checkpoint = use_checkpoint + + # build blocks + self.blocks = nn.LayerList([ + SwinTransformerBlock(dim=dim, + input_resolution=input_resolution, + num_heads=num_heads, + window_size=window_size, + shift_size=0 if (i % 2 == 0) else window_size // 2, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop, + attn_drop=attn_drop, + drop_path=drop_path[i] if isinstance(drop_path, list) else drop_path, + norm_layer=norm_layer) for i in range(depth) + ]) + + # patch merging layer + if downsample is not None: + self.downsample = downsample(input_resolution, dim=dim, norm_layer=norm_layer) + else: + self.downsample = None + + def forward(self, x, x_size): + for blk in self.blocks: + x = blk(x, x_size) + if self.downsample is not None: + x = self.downsample(x) + return x + + def extra_repr(self) -> str: + return f"dim={self.dim}, input_resolution={self.input_resolution}, depth={self.depth}" + + def flops(self): + flops = 0 + for blk in self.blocks: + flops += blk.flops() + if self.downsample is not None: + flops += self.downsample.flops() + return flops + + +class RSTB(nn.Layer): + """Residual Swin Transformer Block (RSTB). + Args: + dim (int): Number of input channels. + input_resolution (tuple[int]): Input resolution. + depth (int): Number of blocks. + num_heads (int): Number of attention heads. + window_size (int): Local window size. + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. + qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set. + drop (float, optional): Dropout rate. Default: 0.0 + attn_drop (float, optional): Attention dropout rate. Default: 0.0 + drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0 + norm_layer (nn.Layer, optional): Normalization layer. Default: nn.LayerNorm + downsample (nn.Layer | None, optional): Downsample layer at the end of the layer. Default: None + use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False. + img_size: Input image size. + patch_size: Patch size. + resi_connection: The convolutional block before residual connection. + """ + + def __init__(self, + dim, + input_resolution, + depth, + num_heads, + window_size, + mlp_ratio=4., + qkv_bias=True, + qk_scale=None, + drop=0., + attn_drop=0., + drop_path=0., + norm_layer=nn.LayerNorm, + downsample=None, + use_checkpoint=False, + img_size=224, + patch_size=4, + resi_connection='1conv'): + super(RSTB, self).__init__() + + self.dim = dim + self.input_resolution = input_resolution + + self.residual_group = BasicLayer(dim=dim, + input_resolution=input_resolution, + depth=depth, + num_heads=num_heads, + window_size=window_size, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop, + attn_drop=attn_drop, + drop_path=drop_path, + norm_layer=norm_layer, + downsample=downsample, + use_checkpoint=use_checkpoint) + + if resi_connection == '1conv': + self.conv = nn.Conv2D(dim, dim, 3, 1, 1) + elif resi_connection == '3conv': + # to save parameters and memory + self.conv = nn.Sequential(nn.Conv2D(dim, dim // 4, 3, 1, 1), nn.LeakyReLU(negative_slope=0.2), + nn.Conv2D(dim // 4, dim // 4, 1, 1, 0), nn.LeakyReLU(negative_slope=0.2), + nn.Conv2D(dim // 4, dim, 3, 1, 1)) + + self.patch_embed = PatchEmbed(img_size=img_size, + patch_size=patch_size, + in_chans=0, + embed_dim=dim, + norm_layer=None) + + self.patch_unembed = PatchUnEmbed(img_size=img_size, + patch_size=patch_size, + in_chans=0, + embed_dim=dim, + norm_layer=None) + + def forward(self, x, x_size): + return self.patch_embed(self.conv(self.patch_unembed(self.residual_group(x, x_size), x_size))) + x + + def flops(self): + flops = 0 + flops += self.residual_group.flops() + H, W = self.input_resolution + flops += H * W * self.dim * self.dim * 9 + flops += self.patch_embed.flops() + flops += self.patch_unembed.flops() + + return flops + + +class PatchEmbed(nn.Layer): + r""" Image to Patch Embedding + Args: + img_size (int): Image size. Default: 224. + patch_size (int): Patch token size. Default: 4. + in_chans (int): Number of input image channels. Default: 3. + embed_dim (int): Number of linear projection output channels. Default: 96. + norm_layer (nn.Layer, optional): Normalization layer. Default: None + """ + + def __init__(self, img_size=224, patch_size=4, in_chans=3, embed_dim=96, norm_layer=None): + super().__init__() + img_size = to_2tuple(img_size) + patch_size = to_2tuple(patch_size) + patches_resolution = [img_size[0] // patch_size[0], img_size[1] // patch_size[1]] + self.img_size = img_size + self.patch_size = patch_size + self.patches_resolution = patches_resolution + self.num_patches = patches_resolution[0] * patches_resolution[1] + + self.in_chans = in_chans + self.embed_dim = embed_dim + + if norm_layer is not None: + self.norm = norm_layer(embed_dim) + else: + self.norm = None + + def forward(self, x): + x = x.flatten(2).transpose((0, 2, 1)) # B Ph*Pw C + if self.norm is not None: + x = self.norm(x) + return x + + def flops(self): + flops = 0 + H, W = self.img_size + if self.norm is not None: + flops += H * W * self.embed_dim + return flops + + +class PatchUnEmbed(nn.Layer): + r""" Image to Patch Unembedding + Args: + img_size (int): Image size. Default: 224. + patch_size (int): Patch token size. Default: 4. + in_chans (int): Number of input image channels. Default: 3. + embed_dim (int): Number of linear projection output channels. Default: 96. + norm_layer (nn.Layer, optional): Normalization layer. Default: None + """ + + def __init__(self, img_size=224, patch_size=4, in_chans=3, embed_dim=96, norm_layer=None): + super().__init__() + img_size = to_2tuple(img_size) + patch_size = to_2tuple(patch_size) + patches_resolution = [img_size[0] // patch_size[0], img_size[1] // patch_size[1]] + self.img_size = img_size + self.patch_size = patch_size + self.patches_resolution = patches_resolution + self.num_patches = patches_resolution[0] * patches_resolution[1] + + self.in_chans = in_chans + self.embed_dim = embed_dim + + def forward(self, x, x_size): + B, HW, C = x.shape + x = x.transpose((0, 2, 1)).reshape((B, self.embed_dim, x_size[0], x_size[1])) # B Ph*Pw C + return x + + def flops(self): + flops = 0 + return flops + + +class Upsample(nn.Sequential): + """Upsample module. + Args: + scale (int): Scale factor. Supported scales: 2^n and 3. + num_feat (int): Channel number of intermediate features. + """ + + def __init__(self, scale, num_feat): + m = [] + if (scale & (scale - 1)) == 0: # scale = 2^n + for _ in range(int(math.log(scale, 2))): + m.append(nn.Conv2D(num_feat, 4 * num_feat, 3, 1, 1)) + m.append(nn.PixelShuffle(2)) + elif scale == 3: + m.append(nn.Conv2D(num_feat, 9 * num_feat, 3, 1, 1)) + m.append(nn.PixelShuffle(3)) + else: + raise ValueError(f'scale {scale} is not supported. ' + 'Supported scales: 2^n and 3.') + super(Upsample, self).__init__(*m) + + +class UpsampleOneStep(nn.Sequential): + """UpsampleOneStep module (the difference with Upsample is that it always only has 1conv + 1pixelshuffle) + Used in lightweight SR to save parameters. + Args: + scale (int): Scale factor. Supported scales: 2^n and 3. + num_feat (int): Channel number of intermediate features. + """ + + def __init__(self, scale, num_feat, num_out_ch, input_resolution=None): + self.num_feat = num_feat + self.input_resolution = input_resolution + m = [] + m.append(nn.Conv2D(num_feat, (scale**2) * num_out_ch, 3, 1, 1)) + m.append(nn.PixelShuffle(scale)) + super(UpsampleOneStep, self).__init__(*m) + + def flops(self): + H, W = self.input_resolution + flops = H * W * self.num_feat * 3 * 9 + return flops + + +class SwinIR(nn.Layer): + r""" SwinIR + A PyTorch impl of : `SwinIR: Image Restoration Using Swin Transformer`, based on Swin Transformer. + Args: + img_size (int | tuple(int)): Input image size. Default 64 + patch_size (int | tuple(int)): Patch size. Default: 1 + in_chans (int): Number of input image channels. Default: 3 + embed_dim (int): Patch embedding dimension. Default: 96 + depths (tuple(int)): Depth of each Swin Transformer layer. + num_heads (tuple(int)): Number of attention heads in different layers. + window_size (int): Window size. Default: 7 + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4 + qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float): Override default qk scale of head_dim ** -0.5 if set. Default: None + drop_rate (float): Dropout rate. Default: 0 + attn_drop_rate (float): Attention dropout rate. Default: 0 + drop_path_rate (float): Stochastic depth rate. Default: 0.1 + norm_layer (nn.Layer): Normalization layer. Default: nn.LayerNorm. + ape (bool): If True, add absolute position embedding to the patch embedding. Default: False + patch_norm (bool): If True, add normalization after patch embedding. Default: True + use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False + upscale: Upscale factor. 2/3/4/8 for image SR, 1 for denoising and compress artifact reduction + img_range: Image range. 1. or 255. + upsampler: The reconstruction reconstruction module. 'pixelshuffle'/'pixelshuffledirect'/'nearest+conv'/None + resi_connection: The convolutional block before residual connection. '1conv'/'3conv' + """ + + def __init__(self, + img_size=64, + patch_size=1, + in_chans=3, + embed_dim=96, + depths=[6, 6, 6, 6], + num_heads=[6, 6, 6, 6], + window_size=7, + mlp_ratio=4., + qkv_bias=True, + qk_scale=None, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0.1, + norm_layer=nn.LayerNorm, + ape=False, + patch_norm=True, + use_checkpoint=False, + upscale=2, + img_range=1., + upsampler='', + resi_connection='1conv', + **kwargs): + super(SwinIR, self).__init__() + num_in_ch = in_chans + num_out_ch = in_chans + num_feat = 64 + self.img_range = img_range + if in_chans == 3: + rgb_mean = (0.4488, 0.4371, 0.4040) + self.mean = paddle.to_tensor(rgb_mean).reshape((1, 3, 1, 1)) + else: + self.mean = paddle.zeros((1, 1, 1, 1)) + self.upscale = upscale + self.upsampler = upsampler + self.window_size = window_size + + ##################################################################################################### + ################################### 1, shallow feature extraction ################################### + self.conv_first = nn.Conv2D(num_in_ch, embed_dim, 3, 1, 1) + + ##################################################################################################### + ################################### 2, deep feature extraction ###################################### + self.num_layers = len(depths) + self.embed_dim = embed_dim + self.ape = ape + self.patch_norm = patch_norm + self.num_features = embed_dim + self.mlp_ratio = mlp_ratio + + # split image into non-overlapping patches + self.patch_embed = PatchEmbed(img_size=img_size, + patch_size=patch_size, + in_chans=embed_dim, + embed_dim=embed_dim, + norm_layer=norm_layer if self.patch_norm else None) + num_patches = self.patch_embed.num_patches + patches_resolution = self.patch_embed.patches_resolution + self.patches_resolution = patches_resolution + + # merge non-overlapping patches into image + self.patch_unembed = PatchUnEmbed(img_size=img_size, + patch_size=patch_size, + in_chans=embed_dim, + embed_dim=embed_dim, + norm_layer=norm_layer if self.patch_norm else None) + + # absolute position embedding + if self.ape: + # self.absolute_pos_embed = nn.Parameter(torch.zeros(1, num_patches, embed_dim)) + self.absolute_pos_embed = self.create_parameter(shape=(1, num_patches, embed_dim), + default_initializer=nn.initializer.Constant(0.0)) + + self.pos_drop = nn.Dropout(p=drop_rate) + + # stochastic depth + dpr = [x.item() for x in paddle.linspace(0, drop_path_rate, sum(depths))] # stochastic depth decay rule + + # build Residual Swin Transformer blocks (RSTB) + self.layers = nn.LayerList() + for i_layer in range(self.num_layers): + layer = RSTB( + dim=embed_dim, + input_resolution=(patches_resolution[0], patches_resolution[1]), + depth=depths[i_layer], + num_heads=num_heads[i_layer], + window_size=window_size, + mlp_ratio=self.mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop_rate, + attn_drop=attn_drop_rate, + drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])], # no impact on SR results + norm_layer=norm_layer, + downsample=None, + use_checkpoint=use_checkpoint, + img_size=img_size, + patch_size=patch_size, + resi_connection=resi_connection) + self.layers.append(layer) + self.norm = norm_layer(self.num_features) + + # build the last conv layer in deep feature extraction + if resi_connection == '1conv': + self.conv_after_body = nn.Conv2D(embed_dim, embed_dim, 3, 1, 1) + elif resi_connection == '3conv': + # to save parameters and memory + self.conv_after_body = nn.Sequential(nn.Conv2D(embed_dim, embed_dim // 4, 3, 1, 1), + nn.LeakyReLU(negative_slope=0.2), + nn.Conv2D(embed_dim // 4, embed_dim // 4, 1, 1, 0), + nn.LeakyReLU(negative_slope=0.2), + nn.Conv2D(embed_dim // 4, embed_dim, 3, 1, 1)) + + ##################################################################################################### + ################################ 3, high quality image reconstruction ################################ + if self.upsampler == 'pixelshuffle': + # for classical SR + self.conv_before_upsample = nn.Sequential(nn.Conv2D(embed_dim, num_feat, 3, 1, 1), nn.LeakyReLU()) + self.upsample = Upsample(upscale, num_feat) + self.conv_last = nn.Conv2D(num_feat, num_out_ch, 3, 1, 1) + elif self.upsampler == 'pixelshuffledirect': + # for lightweight SR (to save parameters) + self.upsample = UpsampleOneStep(upscale, embed_dim, num_out_ch, + (patches_resolution[0], patches_resolution[1])) + elif self.upsampler == 'nearest+conv': + # for real-world SR (less artifacts) + self.conv_before_upsample = nn.Sequential(nn.Conv2D(embed_dim, num_feat, 3, 1, 1), nn.LeakyReLU()) + self.conv_up1 = nn.Conv2D(num_feat, num_feat, 3, 1, 1) + if self.upscale == 4: + self.conv_up2 = nn.Conv2D(num_feat, num_feat, 3, 1, 1) + self.conv_hr = nn.Conv2D(num_feat, num_feat, 3, 1, 1) + self.conv_last = nn.Conv2D(num_feat, num_out_ch, 3, 1, 1) + self.lrelu = nn.LeakyReLU(negative_slope=0.2) + else: + # for image denoising and JPEG compression artifact reduction + self.conv_last = nn.Conv2D(embed_dim, num_out_ch, 3, 1, 1) + + self.apply(self._init_weights) + + def _init_weights(self, m): + if isinstance(m, nn.Linear): + if isinstance(m, nn.Linear) and m.bias is not None: + nn.initializer.Constant(0.0)(m.bias) + elif isinstance(m, nn.LayerNorm): + nn.initializer.Constant(0.0)(m.bias) + nn.initializer.Constant(1.0)(m.weight) + + def check_image_size(self, x): + _, _, h, w = x.shape + mod_pad_h = (self.window_size - h % self.window_size) % self.window_size + mod_pad_w = (self.window_size - w % self.window_size) % self.window_size + x = F.pad(x, (0, mod_pad_w, 0, mod_pad_h), 'reflect') + return x + + def forward_features(self, x): + x_size = (x.shape[2], x.shape[3]) + x = self.patch_embed(x) + if self.ape: + x = x + self.absolute_pos_embed + x = self.pos_drop(x) + + for layer in self.layers: + x = layer(x, x_size) + + x = self.norm(x) # B L C + x = self.patch_unembed(x, x_size) + + return x + + def forward(self, x): + H, W = x.shape[2:] + x = self.check_image_size(x) + + self.mean = self.mean.cast(x.dtype) + x = (x - self.mean) * self.img_range + + if self.upsampler == 'pixelshuffle': + # for classical SR + x = self.conv_first(x) + x = self.conv_after_body(self.forward_features(x)) + x + x = self.conv_before_upsample(x) + x = self.conv_last(self.upsample(x)) + elif self.upsampler == 'pixelshuffledirect': + # for lightweight SR + x = self.conv_first(x) + x = self.conv_after_body(self.forward_features(x)) + x + x = self.upsample(x) + elif self.upsampler == 'nearest+conv': + # for real-world SR + x = self.conv_first(x) + x = self.conv_after_body(self.forward_features(x)) + x + x = self.conv_before_upsample(x) + x = self.lrelu(self.conv_up1(nn.functional.interpolate(x, scale_factor=2, mode='nearest'))) + if self.upscale == 4: + x = self.lrelu(self.conv_up2(nn.functional.interpolate(x, scale_factor=2, mode='nearest'))) + x = self.conv_last(self.lrelu(self.conv_hr(x))) + else: + # for image denoising and JPEG compression artifact reduction + x_first = self.conv_first(x) + res = self.conv_after_body(self.forward_features(x_first)) + x_first + x = x + self.conv_last(res) + + x = x / self.img_range + self.mean + + return x[:, :, :H * self.upscale, :W * self.upscale] + + def flops(self): + flops = 0 + H, W = self.patches_resolution + flops += H * W * 3 * self.embed_dim * 9 + flops += self.patch_embed.flops() + for i, layer in enumerate(self.layers): + flops += layer.flops() + flops += H * W * 3 * self.embed_dim * self.embed_dim + flops += self.upsample.flops() + return flops diff --git a/modules/image/Image_editing/super_resolution/swinir_m_real_sr_x4/test.py b/modules/image/Image_editing/super_resolution/swinir_m_real_sr_x4/test.py new file mode 100644 index 000000000..0b9179c50 --- /dev/null +++ b/modules/image/Image_editing/super_resolution/swinir_m_real_sr_x4/test.py @@ -0,0 +1,58 @@ +import os +import shutil +import unittest + +import cv2 +import numpy as np +import requests + +import paddlehub as hub + +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + + +class TestHubModule(unittest.TestCase): + + @classmethod + def setUpClass(cls) -> None: + img_url = 'https://unsplash.com/photos/mJaD10XeD7w/download?ixid=MnwxMjA3fDB8MXxzZWFyY2h8M3x8Y2F0fGVufDB8fHx8MTY2MzczNDc3Mw&force=true&w=640' + if not os.path.exists('tests'): + os.makedirs('tests') + response = requests.get(img_url) + assert response.status_code == 200, 'Network Error.' + with open('tests/test.jpg', 'wb') as f: + f.write(response.content) + img = cv2.imread('tests/test.jpg') + img = cv2.resize(img, (0, 0), fx=0.25, fy=0.25) + cv2.imwrite('tests/test.jpg', img) + cls.module = hub.Module(name="swinir_m_real_sr_x4") + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree('tests') + shutil.rmtree('swinir_m_real_sr_x4_output') + + def test_real_sr1(self): + results = self.module.real_sr(image='tests/test.jpg', visualization=False) + + self.assertIsInstance(results, np.ndarray) + + def test_real_sr2(self): + results = self.module.real_sr(image=cv2.imread('tests/test.jpg'), visualization=True) + + self.assertIsInstance(results, np.ndarray) + + def test_real_sr3(self): + results = self.module.real_sr(image=cv2.imread('tests/test.jpg'), visualization=True) + + self.assertIsInstance(results, np.ndarray) + + def test_real_sr4(self): + self.assertRaises(Exception, self.module.real_sr, image=['tests/test.jpg']) + + def test_real_sr5(self): + self.assertRaises(FileNotFoundError, self.module.real_sr, image='no.jpg') + + +if __name__ == "__main__": + unittest.main() From 57d977303b4f6002eb8cc40ccb774146921c984a Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Mon, 24 Oct 2022 15:15:39 +0800 Subject: [PATCH 117/117] Add swinir_m_real_sr_x2 Module (#2074) * add swinir_m_real_sr_x2 * update README * fix typo * fix typo --- .../swinir_m_real_sr_x2/README.md | 163 ++++ .../swinir_m_real_sr_x2/module.py | 129 +++ .../swinir_m_real_sr_x2/swinir.py | 903 ++++++++++++++++++ .../swinir_m_real_sr_x2/test.py | 58 ++ 4 files changed, 1253 insertions(+) create mode 100644 modules/image/Image_editing/super_resolution/swinir_m_real_sr_x2/README.md create mode 100644 modules/image/Image_editing/super_resolution/swinir_m_real_sr_x2/module.py create mode 100644 modules/image/Image_editing/super_resolution/swinir_m_real_sr_x2/swinir.py create mode 100644 modules/image/Image_editing/super_resolution/swinir_m_real_sr_x2/test.py diff --git a/modules/image/Image_editing/super_resolution/swinir_m_real_sr_x2/README.md b/modules/image/Image_editing/super_resolution/swinir_m_real_sr_x2/README.md new file mode 100644 index 000000000..b79ccd860 --- /dev/null +++ b/modules/image/Image_editing/super_resolution/swinir_m_real_sr_x2/README.md @@ -0,0 +1,163 @@ +# swinir_m_real_sr_x2 + +|模型名称|swinir_m_real_sr_x2| +| :--- | :---: | +|类别|图像-图像编辑| +|网络|SwinIR| +|数据集|DIV2K / Flickr2K| +|是否支持Fine-tuning|否| +|模型大小|66.8MB| +|指标|-| +|最新更新日期|2022-10-10| + + +## 一、模型基本信息 + +- ### 应用效果展示 + + - 网络结构: +

+
+

+ + - 样例结果示例: +

+ + +

+ +- ### 模型介绍 + + - SwinIR 是一个基于 Swin Transformer 的图像恢复模型。swinir_m_real_sr_x2 是基于 SwinIR-M 的 2 倍现实图像超分辨率模型。 + + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2.安装 + + - ```shell + $ hub install swinir_m_real_sr_x2 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + - ### 1、命令行预测 + + ```shell + $ hub run swinir_m_real_sr_x2 \ + --input_path "/PATH/TO/IMAGE" \ + --output_dir "swinir_m_real_sr_x2_output" + ``` + + - ### 2、预测代码示例 + + ```python + import paddlehub as hub + import cv2 + + module = hub.Module(name="swinir_m_real_sr_x2") + result = module.real_sr( + image=cv2.imread('/PATH/TO/IMAGE'), + visualization=True, + output_dir='swinir_m_real_sr_x2_output' + ) + ``` + + - ### 3、API + + ```python + def real_sr( + image: Union[str, numpy.ndarray], + visualization: bool = True, + output_dir: str = "swinir_m_real_sr_x2_output" + ) -> numpy.ndarray + ``` + + - 超分辨率 API + + - **参数** + + * image (Union\[str, numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; + * visualization (bool): 是否将识别结果保存为图片文件; + * output\_dir (str): 保存处理结果的文件目录。 + + - **返回** + + * res (numpy.ndarray): 图像超分辨率结果 (BGR); + +## 四、服务部署 + +- PaddleHub Serving 可以部署一个图像超分辨率的在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + + ```shell + $ hub serving start -m swinir_m_real_sr_x2 + ``` + + - 这样就完成了一个图像超分辨率服务化API的部署,默认端口号为8866。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + ```python + import requests + import json + import base64 + + import cv2 + import numpy as np + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tobytes()).decode('utf8') + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.frombuffer(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = { + 'image': cv2_to_base64(org_im) + } + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/swinir_m_real_sr_x2" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 结果转换 + results = r.json()['results'] + results = base64_to_cv2(results) + + # 保存结果 + cv2.imwrite('output.jpg', results) + ``` + +## 五、参考资料 + +* 论文:[SwinIR: Image Restoration Using Swin Transformer](https://arxiv.org/abs/2108.10257) + +* 官方实现:[JingyunLiang/SwinIR](https://github.com/JingyunLiang/SwinIR) + +## 六、更新历史 + +* 1.0.0 + + 初始发布 + + ```shell + $ hub install swinir_m_real_sr_x2==1.0.0 + ``` diff --git a/modules/image/Image_editing/super_resolution/swinir_m_real_sr_x2/module.py b/modules/image/Image_editing/super_resolution/swinir_m_real_sr_x2/module.py new file mode 100644 index 000000000..7e2fd80f1 --- /dev/null +++ b/modules/image/Image_editing/super_resolution/swinir_m_real_sr_x2/module.py @@ -0,0 +1,129 @@ +import argparse +import base64 +import os +import time +from typing import Union + +import cv2 +import numpy as np +import paddle +import paddle.nn as nn + +from .swinir import SwinIR +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving + + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tobytes()).decode('utf8') + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.frombuffer(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + +@moduleinfo( + name='swinir_m_real_sr_x2', + version='1.0.0', + type="CV/image_editing", + author="", + author_email="", + summary="Image Restoration (Real image Super Resolution) Using Swin Transformer.", +) +class SwinIRMRealSR(nn.Layer): + + def __init__(self): + super(SwinIRMRealSR, self).__init__() + self.default_pretrained_model_path = os.path.join(self.directory, + '003_realSR_BSRGAN_DFO_s64w8_SwinIR-M_x2_GAN.pdparams') + self.swinir = SwinIR(upscale=2, + in_chans=3, + img_size=64, + window_size=8, + img_range=1., + depths=[6, 6, 6, 6, 6, 6], + embed_dim=180, + num_heads=[6, 6, 6, 6, 6, 6], + mlp_ratio=2, + upsampler='nearest+conv', + resi_connection='1conv') + state_dict = paddle.load(self.default_pretrained_model_path) + self.swinir.set_state_dict(state_dict) + self.swinir.eval() + + def preprocess(self, img: np.ndarray) -> np.ndarray: + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + img = img.transpose((2, 0, 1)) + img = img / 255.0 + return img.astype(np.float32) + + def postprocess(self, img: np.ndarray) -> np.ndarray: + img = img.clip(0, 1) + img = img * 255.0 + img = img.transpose((1, 2, 0)) + img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) + return img.astype(np.uint8) + + def real_sr(self, + image: Union[str, np.ndarray], + visualization: bool = True, + output_dir: str = "swinir_m_real_sr_x2_output") -> np.ndarray: + if isinstance(image, str): + _, file_name = os.path.split(image) + save_name, _ = os.path.splitext(file_name) + save_name = save_name + '_' + str(int(time.time())) + '.jpg' + image = cv2.imdecode(np.fromfile(image, dtype=np.uint8), cv2.IMREAD_COLOR) + elif isinstance(image, np.ndarray): + save_name = str(int(time.time())) + '.jpg' + image = image + else: + raise Exception("image should be a str / np.ndarray") + + with paddle.no_grad(): + img_input = self.preprocess(image) + img_input = paddle.to_tensor(img_input[None, ...], dtype=paddle.float32) + + img_output = self.swinir(img_input) + img_output = img_output.numpy()[0] + img_output = self.postprocess(img_output) + + if visualization: + if not os.path.isdir(output_dir): + os.makedirs(output_dir) + save_path = os.path.join(output_dir, save_name) + cv2.imwrite(save_path, img_output) + + return img_output + + @runnable + def run_cmd(self, argvs): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + self.parser.add_argument('--input_path', type=str, help="Path to image.") + self.parser.add_argument('--output_dir', + type=str, + default='swinir_m_real_sr_x2_output', + help="The directory to save output images.") + args = self.parser.parse_args(argvs) + self.real_sr(image=args.input_path, visualization=True, output_dir=args.output_dir) + return 'Results are saved in %s' % args.output_dir + + @serving + def serving_method(self, image, **kwargs): + """ + Run as a service. + """ + image = base64_to_cv2(image) + img_output = self.real_sr(image=image, **kwargs) + + return cv2_to_base64(img_output) diff --git a/modules/image/Image_editing/super_resolution/swinir_m_real_sr_x2/swinir.py b/modules/image/Image_editing/super_resolution/swinir_m_real_sr_x2/swinir.py new file mode 100644 index 000000000..f4c490a4e --- /dev/null +++ b/modules/image/Image_editing/super_resolution/swinir_m_real_sr_x2/swinir.py @@ -0,0 +1,903 @@ +import math + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + + +def to_2tuple(x): + if isinstance(x, int): + return (x, x) + else: + return tuple(x) + + +class Mlp(nn.Layer): + + def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.): + super().__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.fc1 = nn.Linear(in_features, hidden_features) + self.act = act_layer() + self.fc2 = nn.Linear(hidden_features, out_features) + self.drop = nn.Dropout(drop) + + def forward(self, x): + x = self.fc1(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x + + +def window_partition(x, window_size): + """ + Args: + x: (B, H, W, C) + window_size (int): window size + Returns: + windows: (num_windows*B, window_size, window_size, C) + """ + B, H, W, C = x.shape + x = x.reshape((B, H // window_size, window_size, W // window_size, window_size, C)) + windows = x.transpose((0, 1, 3, 2, 4, 5)).reshape((-1, window_size, window_size, C)) + return windows + + +def window_reverse(windows, window_size, H, W): + """ + Args: + windows: (num_windows*B, window_size, window_size, C) + window_size (int): Window size + H (int): Height of image + W (int): Width of image + Returns: + x: (B, H, W, C) + """ + B = int(windows.shape[0] / (H * W / window_size / window_size)) + x = windows.reshape((B, H // window_size, W // window_size, window_size, window_size, -1)) + x = x.transpose((0, 1, 3, 2, 4, 5)).reshape((B, H, W, -1)) + return x + + +class WindowAttention(nn.Layer): + r""" Window based multi-head self attention (W-MSA) module with relative position bias. + It supports both of shifted and non-shifted window. + Args: + dim (int): Number of input channels. + window_size (tuple[int]): The height and width of the window. + num_heads (int): Number of attention heads. + qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set + attn_drop (float, optional): Dropout ratio of attention weight. Default: 0.0 + proj_drop (float, optional): Dropout ratio of output. Default: 0.0 + """ + + def __init__(self, dim, window_size, num_heads, qkv_bias=True, qk_scale=None, attn_drop=0., proj_drop=0.): + + super().__init__() + self.dim = dim + self.window_size = window_size # Wh, Ww + self.num_heads = num_heads + head_dim = dim // num_heads + self.scale = qk_scale or head_dim**-0.5 + + # define a parameter table of relative position bias + self.relative_position_bias_table = self.create_parameter(shape=((2 * window_size[0] - 1) * + (2 * window_size[1] - 1), num_heads), + default_initializer=nn.initializer.Constant(0.0)) + + # get pair-wise relative position index for each token inside the window + coords_h = paddle.arange(self.window_size[0]) + coords_w = paddle.arange(self.window_size[1]) + coords = paddle.stack(paddle.meshgrid([coords_h, coords_w])) # 2, Wh, Ww + coords_flatten = paddle.flatten(coords, 1) # 2, Wh*Ww + relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :] # 2, Wh*Ww, Wh*Ww + relative_coords = relative_coords.transpose((1, 2, 0)) # Wh*Ww, Wh*Ww, 2 + relative_coords[:, :, 0] += self.window_size[0] - 1 # shift to start from 0 + relative_coords[:, :, 1] += self.window_size[1] - 1 + relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1 + relative_position_index = relative_coords.sum(-1) # Wh*Ww, Wh*Ww + self.register_buffer("relative_position_index", relative_position_index) + + self.qkv = nn.Linear(dim, dim * 3, bias_attr=qkv_bias) + self.attn_drop = nn.Dropout(attn_drop) + self.proj = nn.Linear(dim, dim) + + self.proj_drop = nn.Dropout(proj_drop) + + self.softmax = nn.Softmax(axis=-1) + + def forward(self, x, mask=None): + """ + Args: + x: input features with shape of (num_windows*B, N, C) + mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None + """ + B_, N, C = x.shape + qkv = self.qkv(x).reshape((B_, N, 3, self.num_heads, C // self.num_heads)).transpose((2, 0, 3, 1, 4)) + q, k, v = qkv[0], qkv[1], qkv[2] # make torchscript happy (cannot use tensor as tuple) + + q = q * self.scale + attn = (q @ k.transpose((0, 1, 3, 2))) + + relative_position_bias = self.relative_position_bias_table[self.relative_position_index.reshape( + (-1, ))].reshape((self.window_size[0] * self.window_size[1], self.window_size[0] * self.window_size[1], + -1)) # Wh*Ww,Wh*Ww,nH + relative_position_bias = relative_position_bias.transpose((2, 0, 1)) # nH, Wh*Ww, Wh*Ww + attn = attn + relative_position_bias.unsqueeze(0) + + if mask is not None: + nW = mask.shape[0] + attn = attn.reshape((B_ // nW, nW, self.num_heads, N, N)) + mask.unsqueeze(1).unsqueeze(0) + attn = attn.reshape((-1, self.num_heads, N, N)) + attn = self.softmax(attn) + else: + attn = self.softmax(attn) + + attn = self.attn_drop(attn) + + x = (attn @ v).transpose((0, 2, 1, 3)).reshape((B_, N, C)) + x = self.proj(x) + x = self.proj_drop(x) + return x + + def extra_repr(self) -> str: + return f'dim={self.dim}, window_size={self.window_size}, num_heads={self.num_heads}' + + def flops(self, N): + # calculate flops for 1 window with token length of N + flops = 0 + # qkv = self.qkv(x) + flops += N * self.dim * 3 * self.dim + # attn = (q @ k.transpose(-2, -1)) + flops += self.num_heads * N * (self.dim // self.num_heads) * N + # x = (attn @ v) + flops += self.num_heads * N * N * (self.dim // self.num_heads) + # x = self.proj(x) + flops += N * self.dim * self.dim + return flops + + +class SwinTransformerBlock(nn.Layer): + r""" Swin Transformer Block. + Args: + dim (int): Number of input channels. + input_resolution (tuple[int]): Input resulotion. + num_heads (int): Number of attention heads. + window_size (int): Window size. + shift_size (int): Shift size for SW-MSA. + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. + qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set. + drop (float, optional): Dropout rate. Default: 0.0 + attn_drop (float, optional): Attention dropout rate. Default: 0.0 + drop_path (float, optional): Stochastic depth rate. Default: 0.0 + act_layer (nn.Layer, optional): Activation layer. Default: nn.GELU + norm_layer (nn.Layer, optional): Normalization layer. Default: nn.LayerNorm + """ + + def __init__(self, + dim, + input_resolution, + num_heads, + window_size=7, + shift_size=0, + mlp_ratio=4., + qkv_bias=True, + qk_scale=None, + drop=0., + attn_drop=0., + drop_path=0., + act_layer=nn.GELU, + norm_layer=nn.LayerNorm): + super().__init__() + self.dim = dim + self.input_resolution = input_resolution + self.num_heads = num_heads + self.window_size = window_size + self.shift_size = shift_size + self.mlp_ratio = mlp_ratio + if min(self.input_resolution) <= self.window_size: + # if window size is larger than input resolution, we don't partition windows + self.shift_size = 0 + self.window_size = min(self.input_resolution) + assert 0 <= self.shift_size < self.window_size, "shift_size must in 0-window_size" + + self.norm1 = norm_layer(dim) + self.attn = WindowAttention(dim, + window_size=to_2tuple(self.window_size), + num_heads=num_heads, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + attn_drop=attn_drop, + proj_drop=drop) + + self.drop_path = nn.Identity() + self.norm2 = norm_layer(dim) + mlp_hidden_dim = int(dim * mlp_ratio) + self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop) + + if self.shift_size > 0: + attn_mask = self.calculate_mask(self.input_resolution) + else: + attn_mask = None + + self.register_buffer("attn_mask", attn_mask) + + def calculate_mask(self, x_size): + # calculate attention mask for SW-MSA + H, W = x_size + img_mask = paddle.zeros((1, H, W, 1)) # 1 H W 1 + + h_slices = (slice(0, -self.window_size), slice(-self.window_size, + -self.shift_size if self.shift_size else None), + slice(-self.shift_size, None)) + w_slices = (slice(0, -self.window_size), slice(-self.window_size, + -self.shift_size if self.shift_size else None), + slice(-self.shift_size, None)) + cnt = 0 + for h in h_slices: + for w in w_slices: + img_mask[:, h, w, :] = cnt + cnt += 1 + + mask_windows = window_partition(img_mask, self.window_size) # nW, window_size, window_size, 1 + mask_windows = mask_windows.reshape((-1, self.window_size * self.window_size)) + + attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2) + _h = paddle.full_like(attn_mask, -100.0, dtype='float32') + _z = paddle.full_like(attn_mask, 0.0, dtype='float32') + attn_mask = paddle.where(attn_mask != 0, _h, _z) + + return attn_mask + + def forward(self, x, x_size): + H, W = x_size + B, L, C = x.shape + # assert L == H * W, "input feature has wrong size" + + shortcut = x + x = self.norm1(x) + x = x.reshape((B, H, W, C)) + + # cyclic shift + if self.shift_size > 0: + shifted_x = paddle.roll(x, shifts=(-self.shift_size, -self.shift_size), axis=(1, 2)) + else: + shifted_x = x + + # partition windows + x_windows = window_partition(shifted_x, self.window_size) # nW*B, window_size, window_size, C + x_windows = x_windows.reshape((-1, self.window_size * self.window_size, C)) # nW*B, window_size*window_size, C + + # W-MSA/SW-MSA (to be compatible for testing on images whose shapes are the multiple of window size + if self.input_resolution == x_size: + attn_windows = self.attn(x_windows, mask=self.attn_mask) # nW*B, window_size*window_size, C + else: + attn_windows = self.attn(x_windows, mask=self.calculate_mask(x_size)) + + # merge windows + attn_windows = attn_windows.reshape((-1, self.window_size, self.window_size, C)) + shifted_x = window_reverse(attn_windows, self.window_size, H, W) # B H' W' C + + # reverse cyclic shift + if self.shift_size > 0: + x = paddle.roll(shifted_x, shifts=(self.shift_size, self.shift_size), axis=(1, 2)) + else: + x = shifted_x + x = x.reshape((B, H * W, C)) + + # FFN + x = shortcut + self.drop_path(x) + x = x + self.drop_path(self.mlp(self.norm2(x))) + + return x + + def extra_repr(self) -> str: + return f"dim={self.dim}, input_resolution={self.input_resolution}, num_heads={self.num_heads}, " \ + f"window_size={self.window_size}, shift_size={self.shift_size}, mlp_ratio={self.mlp_ratio}" + + def flops(self): + flops = 0 + H, W = self.input_resolution + # norm1 + flops += self.dim * H * W + # W-MSA/SW-MSA + nW = H * W / self.window_size / self.window_size + flops += nW * self.attn.flops(self.window_size * self.window_size) + # mlp + flops += 2 * H * W * self.dim * self.dim * self.mlp_ratio + # norm2 + flops += self.dim * H * W + return flops + + +class PatchMerging(nn.Layer): + r""" Patch Merging Layer. + Args: + input_resolution (tuple[int]): Resolution of input feature. + dim (int): Number of input channels. + norm_layer (nn.Layer, optional): Normalization layer. Default: nn.LayerNorm + """ + + def __init__(self, input_resolution, dim, norm_layer=nn.LayerNorm): + super().__init__() + self.input_resolution = input_resolution + self.dim = dim + self.reduction = nn.Linear(4 * dim, 2 * dim, bias=False) + self.norm = norm_layer(4 * dim) + + def forward(self, x): + """ + x: B, H*W, C + """ + H, W = self.input_resolution + B, L, C = x.shape + assert L == H * W, "input feature has wrong size" + assert H % 2 == 0 and W % 2 == 0, f"x size ({H}*{W}) are not even." + + x = x.reshape((B, H, W, C)) + + x0 = x[:, 0::2, 0::2, :] # B H/2 W/2 C + x1 = x[:, 1::2, 0::2, :] # B H/2 W/2 C + x2 = x[:, 0::2, 1::2, :] # B H/2 W/2 C + x3 = x[:, 1::2, 1::2, :] # B H/2 W/2 C + x = paddle.concat([x0, x1, x2, x3], -1) # B H/2 W/2 4*C + x = x.reshape((B, -1, 4 * C)) # B H/2*W/2 4*C + + x = self.norm(x) + x = self.reduction(x) + + return x + + def extra_repr(self) -> str: + return f"input_resolution={self.input_resolution}, dim={self.dim}" + + def flops(self): + H, W = self.input_resolution + flops = H * W * self.dim + flops += (H // 2) * (W // 2) * 4 * self.dim * 2 * self.dim + return flops + + +class BasicLayer(nn.Layer): + """ A basic Swin Transformer layer for one stage. + Args: + dim (int): Number of input channels. + input_resolution (tuple[int]): Input resolution. + depth (int): Number of blocks. + num_heads (int): Number of attention heads. + window_size (int): Local window size. + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. + qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set. + drop (float, optional): Dropout rate. Default: 0.0 + attn_drop (float, optional): Attention dropout rate. Default: 0.0 + drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0 + norm_layer (nn.Layer, optional): Normalization layer. Default: nn.LayerNorm + downsample (nn.Layer | None, optional): Downsample layer at the end of the layer. Default: None + use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False. + """ + + def __init__(self, + dim, + input_resolution, + depth, + num_heads, + window_size, + mlp_ratio=4., + qkv_bias=True, + qk_scale=None, + drop=0., + attn_drop=0., + drop_path=0., + norm_layer=nn.LayerNorm, + downsample=None, + use_checkpoint=False): + + super().__init__() + self.dim = dim + self.input_resolution = input_resolution + self.depth = depth + self.use_checkpoint = use_checkpoint + + # build blocks + self.blocks = nn.LayerList([ + SwinTransformerBlock(dim=dim, + input_resolution=input_resolution, + num_heads=num_heads, + window_size=window_size, + shift_size=0 if (i % 2 == 0) else window_size // 2, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop, + attn_drop=attn_drop, + drop_path=drop_path[i] if isinstance(drop_path, list) else drop_path, + norm_layer=norm_layer) for i in range(depth) + ]) + + # patch merging layer + if downsample is not None: + self.downsample = downsample(input_resolution, dim=dim, norm_layer=norm_layer) + else: + self.downsample = None + + def forward(self, x, x_size): + for blk in self.blocks: + x = blk(x, x_size) + if self.downsample is not None: + x = self.downsample(x) + return x + + def extra_repr(self) -> str: + return f"dim={self.dim}, input_resolution={self.input_resolution}, depth={self.depth}" + + def flops(self): + flops = 0 + for blk in self.blocks: + flops += blk.flops() + if self.downsample is not None: + flops += self.downsample.flops() + return flops + + +class RSTB(nn.Layer): + """Residual Swin Transformer Block (RSTB). + Args: + dim (int): Number of input channels. + input_resolution (tuple[int]): Input resolution. + depth (int): Number of blocks. + num_heads (int): Number of attention heads. + window_size (int): Local window size. + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. + qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set. + drop (float, optional): Dropout rate. Default: 0.0 + attn_drop (float, optional): Attention dropout rate. Default: 0.0 + drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0 + norm_layer (nn.Layer, optional): Normalization layer. Default: nn.LayerNorm + downsample (nn.Layer | None, optional): Downsample layer at the end of the layer. Default: None + use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False. + img_size: Input image size. + patch_size: Patch size. + resi_connection: The convolutional block before residual connection. + """ + + def __init__(self, + dim, + input_resolution, + depth, + num_heads, + window_size, + mlp_ratio=4., + qkv_bias=True, + qk_scale=None, + drop=0., + attn_drop=0., + drop_path=0., + norm_layer=nn.LayerNorm, + downsample=None, + use_checkpoint=False, + img_size=224, + patch_size=4, + resi_connection='1conv'): + super(RSTB, self).__init__() + + self.dim = dim + self.input_resolution = input_resolution + + self.residual_group = BasicLayer(dim=dim, + input_resolution=input_resolution, + depth=depth, + num_heads=num_heads, + window_size=window_size, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop, + attn_drop=attn_drop, + drop_path=drop_path, + norm_layer=norm_layer, + downsample=downsample, + use_checkpoint=use_checkpoint) + + if resi_connection == '1conv': + self.conv = nn.Conv2D(dim, dim, 3, 1, 1) + elif resi_connection == '3conv': + # to save parameters and memory + self.conv = nn.Sequential(nn.Conv2D(dim, dim // 4, 3, 1, 1), nn.LeakyReLU(negative_slope=0.2), + nn.Conv2D(dim // 4, dim // 4, 1, 1, 0), nn.LeakyReLU(negative_slope=0.2), + nn.Conv2D(dim // 4, dim, 3, 1, 1)) + + self.patch_embed = PatchEmbed(img_size=img_size, + patch_size=patch_size, + in_chans=0, + embed_dim=dim, + norm_layer=None) + + self.patch_unembed = PatchUnEmbed(img_size=img_size, + patch_size=patch_size, + in_chans=0, + embed_dim=dim, + norm_layer=None) + + def forward(self, x, x_size): + return self.patch_embed(self.conv(self.patch_unembed(self.residual_group(x, x_size), x_size))) + x + + def flops(self): + flops = 0 + flops += self.residual_group.flops() + H, W = self.input_resolution + flops += H * W * self.dim * self.dim * 9 + flops += self.patch_embed.flops() + flops += self.patch_unembed.flops() + + return flops + + +class PatchEmbed(nn.Layer): + r""" Image to Patch Embedding + Args: + img_size (int): Image size. Default: 224. + patch_size (int): Patch token size. Default: 4. + in_chans (int): Number of input image channels. Default: 3. + embed_dim (int): Number of linear projection output channels. Default: 96. + norm_layer (nn.Layer, optional): Normalization layer. Default: None + """ + + def __init__(self, img_size=224, patch_size=4, in_chans=3, embed_dim=96, norm_layer=None): + super().__init__() + img_size = to_2tuple(img_size) + patch_size = to_2tuple(patch_size) + patches_resolution = [img_size[0] // patch_size[0], img_size[1] // patch_size[1]] + self.img_size = img_size + self.patch_size = patch_size + self.patches_resolution = patches_resolution + self.num_patches = patches_resolution[0] * patches_resolution[1] + + self.in_chans = in_chans + self.embed_dim = embed_dim + + if norm_layer is not None: + self.norm = norm_layer(embed_dim) + else: + self.norm = None + + def forward(self, x): + x = x.flatten(2).transpose((0, 2, 1)) # B Ph*Pw C + if self.norm is not None: + x = self.norm(x) + return x + + def flops(self): + flops = 0 + H, W = self.img_size + if self.norm is not None: + flops += H * W * self.embed_dim + return flops + + +class PatchUnEmbed(nn.Layer): + r""" Image to Patch Unembedding + Args: + img_size (int): Image size. Default: 224. + patch_size (int): Patch token size. Default: 4. + in_chans (int): Number of input image channels. Default: 3. + embed_dim (int): Number of linear projection output channels. Default: 96. + norm_layer (nn.Layer, optional): Normalization layer. Default: None + """ + + def __init__(self, img_size=224, patch_size=4, in_chans=3, embed_dim=96, norm_layer=None): + super().__init__() + img_size = to_2tuple(img_size) + patch_size = to_2tuple(patch_size) + patches_resolution = [img_size[0] // patch_size[0], img_size[1] // patch_size[1]] + self.img_size = img_size + self.patch_size = patch_size + self.patches_resolution = patches_resolution + self.num_patches = patches_resolution[0] * patches_resolution[1] + + self.in_chans = in_chans + self.embed_dim = embed_dim + + def forward(self, x, x_size): + B, HW, C = x.shape + x = x.transpose((0, 2, 1)).reshape((B, self.embed_dim, x_size[0], x_size[1])) # B Ph*Pw C + return x + + def flops(self): + flops = 0 + return flops + + +class Upsample(nn.Sequential): + """Upsample module. + Args: + scale (int): Scale factor. Supported scales: 2^n and 3. + num_feat (int): Channel number of intermediate features. + """ + + def __init__(self, scale, num_feat): + m = [] + if (scale & (scale - 1)) == 0: # scale = 2^n + for _ in range(int(math.log(scale, 2))): + m.append(nn.Conv2D(num_feat, 4 * num_feat, 3, 1, 1)) + m.append(nn.PixelShuffle(2)) + elif scale == 3: + m.append(nn.Conv2D(num_feat, 9 * num_feat, 3, 1, 1)) + m.append(nn.PixelShuffle(3)) + else: + raise ValueError(f'scale {scale} is not supported. ' + 'Supported scales: 2^n and 3.') + super(Upsample, self).__init__(*m) + + +class UpsampleOneStep(nn.Sequential): + """UpsampleOneStep module (the difference with Upsample is that it always only has 1conv + 1pixelshuffle) + Used in lightweight SR to save parameters. + Args: + scale (int): Scale factor. Supported scales: 2^n and 3. + num_feat (int): Channel number of intermediate features. + """ + + def __init__(self, scale, num_feat, num_out_ch, input_resolution=None): + self.num_feat = num_feat + self.input_resolution = input_resolution + m = [] + m.append(nn.Conv2D(num_feat, (scale**2) * num_out_ch, 3, 1, 1)) + m.append(nn.PixelShuffle(scale)) + super(UpsampleOneStep, self).__init__(*m) + + def flops(self): + H, W = self.input_resolution + flops = H * W * self.num_feat * 3 * 9 + return flops + + +class SwinIR(nn.Layer): + r""" SwinIR + A PyTorch impl of : `SwinIR: Image Restoration Using Swin Transformer`, based on Swin Transformer. + Args: + img_size (int | tuple(int)): Input image size. Default 64 + patch_size (int | tuple(int)): Patch size. Default: 1 + in_chans (int): Number of input image channels. Default: 3 + embed_dim (int): Patch embedding dimension. Default: 96 + depths (tuple(int)): Depth of each Swin Transformer layer. + num_heads (tuple(int)): Number of attention heads in different layers. + window_size (int): Window size. Default: 7 + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4 + qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float): Override default qk scale of head_dim ** -0.5 if set. Default: None + drop_rate (float): Dropout rate. Default: 0 + attn_drop_rate (float): Attention dropout rate. Default: 0 + drop_path_rate (float): Stochastic depth rate. Default: 0.1 + norm_layer (nn.Layer): Normalization layer. Default: nn.LayerNorm. + ape (bool): If True, add absolute position embedding to the patch embedding. Default: False + patch_norm (bool): If True, add normalization after patch embedding. Default: True + use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False + upscale: Upscale factor. 2/3/4/8 for image SR, 1 for denoising and compress artifact reduction + img_range: Image range. 1. or 255. + upsampler: The reconstruction reconstruction module. 'pixelshuffle'/'pixelshuffledirect'/'nearest+conv'/None + resi_connection: The convolutional block before residual connection. '1conv'/'3conv' + """ + + def __init__(self, + img_size=64, + patch_size=1, + in_chans=3, + embed_dim=96, + depths=[6, 6, 6, 6], + num_heads=[6, 6, 6, 6], + window_size=7, + mlp_ratio=4., + qkv_bias=True, + qk_scale=None, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0.1, + norm_layer=nn.LayerNorm, + ape=False, + patch_norm=True, + use_checkpoint=False, + upscale=2, + img_range=1., + upsampler='', + resi_connection='1conv', + **kwargs): + super(SwinIR, self).__init__() + num_in_ch = in_chans + num_out_ch = in_chans + num_feat = 64 + self.img_range = img_range + if in_chans == 3: + rgb_mean = (0.4488, 0.4371, 0.4040) + self.mean = paddle.to_tensor(rgb_mean).reshape((1, 3, 1, 1)) + else: + self.mean = paddle.zeros((1, 1, 1, 1)) + self.upscale = upscale + self.upsampler = upsampler + self.window_size = window_size + + ##################################################################################################### + ################################### 1, shallow feature extraction ################################### + self.conv_first = nn.Conv2D(num_in_ch, embed_dim, 3, 1, 1) + + ##################################################################################################### + ################################### 2, deep feature extraction ###################################### + self.num_layers = len(depths) + self.embed_dim = embed_dim + self.ape = ape + self.patch_norm = patch_norm + self.num_features = embed_dim + self.mlp_ratio = mlp_ratio + + # split image into non-overlapping patches + self.patch_embed = PatchEmbed(img_size=img_size, + patch_size=patch_size, + in_chans=embed_dim, + embed_dim=embed_dim, + norm_layer=norm_layer if self.patch_norm else None) + num_patches = self.patch_embed.num_patches + patches_resolution = self.patch_embed.patches_resolution + self.patches_resolution = patches_resolution + + # merge non-overlapping patches into image + self.patch_unembed = PatchUnEmbed(img_size=img_size, + patch_size=patch_size, + in_chans=embed_dim, + embed_dim=embed_dim, + norm_layer=norm_layer if self.patch_norm else None) + + # absolute position embedding + if self.ape: + # self.absolute_pos_embed = nn.Parameter(torch.zeros(1, num_patches, embed_dim)) + self.absolute_pos_embed = self.create_parameter(shape=(1, num_patches, embed_dim), + default_initializer=nn.initializer.Constant(0.0)) + + self.pos_drop = nn.Dropout(p=drop_rate) + + # stochastic depth + dpr = [x.item() for x in paddle.linspace(0, drop_path_rate, sum(depths))] # stochastic depth decay rule + + # build Residual Swin Transformer blocks (RSTB) + self.layers = nn.LayerList() + for i_layer in range(self.num_layers): + layer = RSTB( + dim=embed_dim, + input_resolution=(patches_resolution[0], patches_resolution[1]), + depth=depths[i_layer], + num_heads=num_heads[i_layer], + window_size=window_size, + mlp_ratio=self.mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop_rate, + attn_drop=attn_drop_rate, + drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])], # no impact on SR results + norm_layer=norm_layer, + downsample=None, + use_checkpoint=use_checkpoint, + img_size=img_size, + patch_size=patch_size, + resi_connection=resi_connection) + self.layers.append(layer) + self.norm = norm_layer(self.num_features) + + # build the last conv layer in deep feature extraction + if resi_connection == '1conv': + self.conv_after_body = nn.Conv2D(embed_dim, embed_dim, 3, 1, 1) + elif resi_connection == '3conv': + # to save parameters and memory + self.conv_after_body = nn.Sequential(nn.Conv2D(embed_dim, embed_dim // 4, 3, 1, 1), + nn.LeakyReLU(negative_slope=0.2), + nn.Conv2D(embed_dim // 4, embed_dim // 4, 1, 1, 0), + nn.LeakyReLU(negative_slope=0.2), + nn.Conv2D(embed_dim // 4, embed_dim, 3, 1, 1)) + + ##################################################################################################### + ################################ 3, high quality image reconstruction ################################ + if self.upsampler == 'pixelshuffle': + # for classical SR + self.conv_before_upsample = nn.Sequential(nn.Conv2D(embed_dim, num_feat, 3, 1, 1), nn.LeakyReLU()) + self.upsample = Upsample(upscale, num_feat) + self.conv_last = nn.Conv2D(num_feat, num_out_ch, 3, 1, 1) + elif self.upsampler == 'pixelshuffledirect': + # for lightweight SR (to save parameters) + self.upsample = UpsampleOneStep(upscale, embed_dim, num_out_ch, + (patches_resolution[0], patches_resolution[1])) + elif self.upsampler == 'nearest+conv': + # for real-world SR (less artifacts) + self.conv_before_upsample = nn.Sequential(nn.Conv2D(embed_dim, num_feat, 3, 1, 1), nn.LeakyReLU()) + self.conv_up1 = nn.Conv2D(num_feat, num_feat, 3, 1, 1) + if self.upscale == 4: + self.conv_up2 = nn.Conv2D(num_feat, num_feat, 3, 1, 1) + self.conv_hr = nn.Conv2D(num_feat, num_feat, 3, 1, 1) + self.conv_last = nn.Conv2D(num_feat, num_out_ch, 3, 1, 1) + self.lrelu = nn.LeakyReLU(negative_slope=0.2) + else: + # for image denoising and JPEG compression artifact reduction + self.conv_last = nn.Conv2D(embed_dim, num_out_ch, 3, 1, 1) + + self.apply(self._init_weights) + + def _init_weights(self, m): + if isinstance(m, nn.Linear): + if isinstance(m, nn.Linear) and m.bias is not None: + nn.initializer.Constant(0.0)(m.bias) + elif isinstance(m, nn.LayerNorm): + nn.initializer.Constant(0.0)(m.bias) + nn.initializer.Constant(1.0)(m.weight) + + def check_image_size(self, x): + _, _, h, w = x.shape + mod_pad_h = (self.window_size - h % self.window_size) % self.window_size + mod_pad_w = (self.window_size - w % self.window_size) % self.window_size + x = F.pad(x, (0, mod_pad_w, 0, mod_pad_h), 'reflect') + return x + + def forward_features(self, x): + x_size = (x.shape[2], x.shape[3]) + x = self.patch_embed(x) + if self.ape: + x = x + self.absolute_pos_embed + x = self.pos_drop(x) + + for layer in self.layers: + x = layer(x, x_size) + + x = self.norm(x) # B L C + x = self.patch_unembed(x, x_size) + + return x + + def forward(self, x): + H, W = x.shape[2:] + x = self.check_image_size(x) + + self.mean = self.mean.cast(x.dtype) + x = (x - self.mean) * self.img_range + + if self.upsampler == 'pixelshuffle': + # for classical SR + x = self.conv_first(x) + x = self.conv_after_body(self.forward_features(x)) + x + x = self.conv_before_upsample(x) + x = self.conv_last(self.upsample(x)) + elif self.upsampler == 'pixelshuffledirect': + # for lightweight SR + x = self.conv_first(x) + x = self.conv_after_body(self.forward_features(x)) + x + x = self.upsample(x) + elif self.upsampler == 'nearest+conv': + # for real-world SR + x = self.conv_first(x) + x = self.conv_after_body(self.forward_features(x)) + x + x = self.conv_before_upsample(x) + x = self.lrelu(self.conv_up1(nn.functional.interpolate(x, scale_factor=2, mode='nearest'))) + if self.upscale == 4: + x = self.lrelu(self.conv_up2(nn.functional.interpolate(x, scale_factor=2, mode='nearest'))) + x = self.conv_last(self.lrelu(self.conv_hr(x))) + else: + # for image denoising and JPEG compression artifact reduction + x_first = self.conv_first(x) + res = self.conv_after_body(self.forward_features(x_first)) + x_first + x = x + self.conv_last(res) + + x = x / self.img_range + self.mean + + return x[:, :, :H * self.upscale, :W * self.upscale] + + def flops(self): + flops = 0 + H, W = self.patches_resolution + flops += H * W * 3 * self.embed_dim * 9 + flops += self.patch_embed.flops() + for i, layer in enumerate(self.layers): + flops += layer.flops() + flops += H * W * 3 * self.embed_dim * self.embed_dim + flops += self.upsample.flops() + return flops diff --git a/modules/image/Image_editing/super_resolution/swinir_m_real_sr_x2/test.py b/modules/image/Image_editing/super_resolution/swinir_m_real_sr_x2/test.py new file mode 100644 index 000000000..f56226e52 --- /dev/null +++ b/modules/image/Image_editing/super_resolution/swinir_m_real_sr_x2/test.py @@ -0,0 +1,58 @@ +import os +import shutil +import unittest + +import cv2 +import numpy as np +import requests + +import paddlehub as hub + +os.environ['CUDA_VISIBLE_DEVICES'] = '0' + + +class TestHubModule(unittest.TestCase): + + @classmethod + def setUpClass(cls) -> None: + img_url = 'https://unsplash.com/photos/mJaD10XeD7w/download?ixid=MnwxMjA3fDB8MXxzZWFyY2h8M3x8Y2F0fGVufDB8fHx8MTY2MzczNDc3Mw&force=true&w=640' + if not os.path.exists('tests'): + os.makedirs('tests') + response = requests.get(img_url) + assert response.status_code == 200, 'Network Error.' + with open('tests/test.jpg', 'wb') as f: + f.write(response.content) + img = cv2.imread('tests/test.jpg') + img = cv2.resize(img, (0, 0), fx=0.5, fy=0.5) + cv2.imwrite('tests/test.jpg', img) + cls.module = hub.Module(name="swinir_m_real_sr_x2") + + @classmethod + def tearDownClass(cls) -> None: + shutil.rmtree('tests') + shutil.rmtree('swinir_m_real_sr_x2_output') + + def test_real_sr1(self): + results = self.module.real_sr(image='tests/test.jpg', visualization=False) + + self.assertIsInstance(results, np.ndarray) + + def test_real_sr2(self): + results = self.module.real_sr(image=cv2.imread('tests/test.jpg'), visualization=True) + + self.assertIsInstance(results, np.ndarray) + + def test_real_sr3(self): + results = self.module.real_sr(image=cv2.imread('tests/test.jpg'), visualization=True) + + self.assertIsInstance(results, np.ndarray) + + def test_real_sr4(self): + self.assertRaises(Exception, self.module.real_sr, image=['tests/test.jpg']) + + def test_real_sr5(self): + self.assertRaises(FileNotFoundError, self.module.real_sr, image='no.jpg') + + +if __name__ == "__main__": + unittest.main()

`47y?5`B;eEfLpoxk+oKH~IJHRog}{JJNuDP?i1m_=Q!ZgYe^+*ays& zTrU-Y=H%zNM)gR@W|M%MFy(Ts`2>$P@ zF-9?Qxm46_8X{W2^9|hV?b;|{@$>S@E3W#MsxD>s|)a`JN9vdMlfMTy~57;_?EIa0G=9yFf2&`+2 zq%rV15V*TPdYzRT3}U7xD)mCBdouWF(3ZsfagOVcM7Cc9q@Xa?aAvK?Uii~EW|OQN z&Oa@^s{Iz3bpe+{hWU_{GIvV71m#l9?)So&^Ki2pMEnkb0g&Z5n~OPrg*(HBh_HZm zkUrgWO-KR&O&P^JZj#4p|MIdyu``vp9PM@QPoKBoI2pmceS|I&Aq;r!M*{02S1ZRl z50-WZ!(`Xk!b)QK{-f-!`UiK3O^#lSyQ!U|F*#`utHe-JYSk}(-xX#{9`JsFWW<-Y zhf>Fud-!x7tV*5tRb2cmqC6cCWjHBm0{&%Wl^EF^ccdGE4Z7oli)v|Y2=Vh{8KD3D zOa65P*E3(ow&u}oCXW0G>+A3o6ev_Rlr-C{em%@dj-xBg?Iv77kAFFgimb!+y8dVW z<=)PYFDj{G&LU-ufwRE6XG(GlHY5loY_xrTcksc_xB@>o6WQ7$VTm)8er=ZZTHlg0 zNgmC&6} zQ>hl~aRCORTh`njxsq87+x`L#nBy+jL8QB4Ng(9|F z3FdnP{qZH`)e@Q}8@H48>LDpp2|F@uu#$zgpwqy%FPyFIKzv^DaX2skpS?KiN<lF(#K&H7OKUV&Mr2yC{@aefJEZ+XZq3{)o*LK8=tY!xUvD1N{%B$d z0x)znU4Z||eR?Wle#6nNQ}09423Fm>N0IKPBB8M^0RpQVjc-Dvm?cPit^B{WWBS%R zT7yYzkFuY>Dmm3gNe0iuZVVKfmsif3ufG_qM(7$)-g);hYl-keoV9aMsR~K+Pukkw zu6NZx6kxwJ-`kv*!|4nu18Y2-#$Ic@sRgq1BBNd;XG zvn=2Sm;Xoz(^~wa4b?Q23;=!^bxTm?9v}_yYCiCC%5alTyF!7~4}>4@XB2wYp2Jwe zLyB(G(rb=XB1re;Hwfs1!#bqa6Aqq1{2wFl8ldtcti420GUXEg)idV%VZgj+&})7C zSSlf{qhTF`_cDlGC8tv@m6$R~( zv&(S3Yjb05@$dO)%*oW@;o+W&gF`LGZ6cNjs2NyaGc{RwA_f3mgYh$F&T+|&8tzBp z4xu@9%}m_4$h7!}*);`)MW!(Frr!C`1kbtnPWNdj?c&pzeG=s&a$_Lx;#BHVtoklc zqfQw(?B2`icRd{!_c?!uJNJ8h21qulZmBOQL$#4zxY?0^tC8|dCtlVMMe}s!<7Xq4 z+>rq@5VWP)WPCIaFTnaSP$qzdg6wRZ71>;^_!2}vqk`hDP7gIqtgj589&Yn6()N$J z2L}3kBVoBWo%!L&3iaOJ ziD2U4ns3`1NkJCw06tRgYS};YWcC4%Ty7PW8Qelg2&s1_l(Im1Bg__{y?8Py-VB(o z>0VuHhuasbk~Tg(Al^wA+@anDGdyt5YA`a@LHNX@ULX1Jp|U7(eGaI2*b z`+!1pI3EvQ*ebaP6E4yZJ81 zk(KIOTYA!MFxk>ROoAP z9lL9P&rZmBdU|woZaNdxu9{r;5bP;*eq0s91HJvHr^@MiOeAiVmKc0xMJGV~*{`20 z#*r@m#E?tSAB#>Zi^7*)u{Fud3zx{01P(8ogT8*E0V8vhPW5wYB)PezOg@=g zz0SFO_2zrxLi@|ROkAFx>k>!Ybl(sN(c$qul^!f)a!e7Vk+-dzHU0e&8x5FBXptcp zhzl{Bn)TDyy)F%FZSFR^G2G6?R79_$ar3%o>0rb1rqk3y=X?Id8GUQsUj6Y+3z{To;IQnDLq&2ZA^JN7R zg~)`z0s)F9v$mErMq}sp=&(qV>C@q+Nwn2f$46!g^=65()QJ)K*G3fu?{IP1LB$pO zZ!)kU{Uf~&$0Jdu27KON68i*q{)*pM>QA(?f8zTm9t}PJ#i^z=I8?tac!{vK0S(gq z?iVTB?d$Snc5Sma|F$;0u)sk)#Ki;Y-$xWoGqd)(H`f98Vehz>>kZu(w#8W(gpGJ1q15qI^%Ac5Hdg`^mN6>$llHQtZY;N)~ z;N6NM4ntu?@3!#M?fBgj#?$^&x{J~5g-Bu+&+$aV`X|yGs+f({dtP$9i8AI>DA_FO zRc1?YQ3McDXbDl;X0oSPT=->#=4BW82}w7k+jrw217LcU7!?jHFZx~(12bZl_L=Qu zip*=!uXid*?|)m14ZOOY5ctqqd=`j*mBWHVnx7|M)(Q>q5+B{O=fVbU9jj@5^s~lJ zf8w$4Y|N<;1gV5MXJ2h=e<)p!W{FcBJ*=vK4i6^!95MtiF(fdfAF#*E(E36?U5XBi z4=Z&fe9l>Rd8B+HSYNy@2UqZ1AQ9sST#^Y`;hd%n8SzL26}q0{@;(qUFjDPldn z=97Q)=TuGF216UKW~onqLXM~{>=`)6pxlbW!+kFP@fp2hHj%fevAvisIu5&Y zwHE;06cqeT)b{xDAps#U`Kh>Ed38k5 zs89u8c&N}dvB5)9tn|#?8C_79eUYE%Mp(ct;M%d_*t@n$8|6Xsk=s*U{$U{@&Hyvo646U3khjH>z$MyWRtU>Ulc9O#i?_^KT>oFzvIvI1&O$;V+ zJzzGQ_ZNZ^3S_jdhzj2SAzOnwKJke;-QXg~>Z)z40GT@4p& zyX&Hy~LyR(s;dSS1vk$P0EGjg-oF0bdGI6Wuev8aQ^*33+<%!^jUo#OlFo1xcq#y&yv3kv>wI~kPni#7gy z78iZ!<)BXKB6W9IW&sjyv)b-kqtGf>R((<7hG}p4hZBtu2F9XWY&yFsK3xrn+-miW z#ypI02&w2B{TDeURn__V`O3T%wrG4_oaR(-0n_#iZ|@g1KGin^_=HNly_^HuEXlSK zIn)~}y8?q~{G=ypW`6!oQn3eOZ01wUmce}W%?44$bQeD>?9KNoHs-Ds$V=YVPqEer zZO)1)VW)+ru0I?ft)fEbTrRVHLc(l(9`Th;-eT$ilT_1O;>yN3Ncnf|_*?fe>=msV z7(uFV(k)&V+D|5;gMy)?Jj}Pcq4CJ@V5~ST*7~-o0l=qUN$PN@Ufelyaq}v;-Qpob z5J~zd^ZK(;ak0n-;?YDSY(s#>&`t(DAvgB79YcFXL^iA3!)l6P%a5kM#qb{Dj8U#E z#V(}jG14rA^Yhd9G-3gFTpT}+-A>{B-^z$wGGVWk{K72Li&ND9=Ewet!N`DsPJjw* zTkau2^Kil?A|gQE1~T#LmVe72)a)JyLquocSW}J70KbolRndUegFw2za*xyaXRMWasocO#kIElCeS&VP zV;(r(-vzn@{uGWWUR&&RV|e)qkt^`_Q3X<-QtX}!@FD<4dW~1Xo^d`Op2jhsw^Sf` zI9!LZzF1G&by7s9E(u!SG?8GLc?%>>3#)p|rMH;>KJCrx0A+`HK^i`5q~$fggfHEl z)ADw9Q&oW0VJ2*2IRA}V=Uo;4RrG?)qnXQm9!Q?TpUY6)Z%qYg<2Ie@$(eLBag%^{ zTZ!Cq{SV))IS1YbZ2S9Mt#%O$m$whL`~JS@5jnC8Fl*({MU6KZ);?H<+TVF@;bVIv z?kZD%sYRCS{FSothK9SmUo`4x0ufSr*~x(=2x742?5tP(2(l5^E#JZibPuxQs}r&>B&A=k@WR4KM(p!fdn$BK0MK!&e@3bG|Im0SMA2VGH@^lAWL}%R0 zTHLrQ#c-<1cNl0J=2eX%a(bZ`J*S}-y84^>#5Tn%`L$@5>CovP*Tr>QjWZ4Pf3(#! zgynu){~Jtr-sO=e@^%?23Zv%=a-#uJnHoN2ZS^zPy>j_(d2(^f3(gyQ+=G|OJPtxH zzD^NG2Q;-XSRpJd!Jw)H%PEf{HF2Xs($*{q`L<)bJT){>}Sqn(4{X%z`+l4S-}* zQ&hbii>avTON}`j0bXFL9-pW!ZnnOiUU#|ho$9ll!2+m3#omCIzcTKB(&+gbP-&bT zbDm0JgGYd}O2v|vClQU9gidG}k8s*#%{_bih2V3Z^Eq6nA1pOd;w!QF1@NEX@u$yI z?)RMZD=$rI97_a)JrK;#>b``8ymmNSoGMaNd!?{A6-|t9sy6uX-L0xZkFK+JE+#nC z*2@M9dy0JCP>E`cIb*K}V0`$UFO?hqAKHn~+>REqyQCfN#4OUO%}Zvp zJC9=CeRu7&m5eWX!8*9bsZg-47TMvDHC$hv?EUo)A5WQHvf?o9_u+8YKjU9-I#(oF z>f}HC)qo!qOS(oJ3m;0RZmqSk!8V8tF^=8@{s7lUvJ8E`?ylqRsVFJ3*TACoQOzB6 zu--JFZ>5khL#-fC>=c!!XWY>mfE=5T>|&L@7u?!<@4@)l*!fF-OS2nW;A1 zP8u$qET5oN96pUlY(H`(B+HJp@c!hps%@(uUv)p&U!-A%-{9-X#8~Pj(DcLxyvu)x zx%7aBft{CET0@Ix_+w9)4^oJaS}5VYZPMZ$S7b`2c4;{RawPZGxmVc@c`xA;@a$f0_y z6*rx-dq=4B>z8LYA~oX$1|j{BWc(k9#T0BRD1T4j;oAp+Nb);BHEWyUdU$}E!ydc%O=1I<=wVNEfcc&u!)>4jx*cBG67cSa8tJ82tRNw=(Nly*uZH6iDD@ziZ7B8c5 zG@J;ZqUYt_&ayIPv-wH7J0K)DE!tvNx8oZ?FB&R5%t^3hah(#ByZO}#XUz*F7 zqx8p7g3;rwb%eR!6_D%f=V0wdF=Bi_zZa5Ovw4;2opAo1#7O61y6|`Gj0#CdzeBd8Ma(Bz}$OT3Tz-d!iLyV7)pL{5E8MqtcNN-wLZs-`70<^rnCZcI6NYe|MA%~_3 zZSlyn?{+J&19p)6NBYu~qEWX+iX$xQYZL9Fee~`C?&P0^l-&%q5#DBF+UO#W=R)Rx znd!ZR5w>)Pqq0$4_|i&Vv-&k1+Et5W7huO zneBrWiD3Xu#Y8u;JAJY`sPZG=S6k)K$XKq1gX9p~i-q$Y zOgvOHAt^g~s3dY7MM`S;8>r;Qm%R5MD0F?&2;ycVzBeWb>e#hYcT91pS#3#Rvp)}g z8Y27dwc+04^fdbQ$;>Q|TIdy;AFV%<8Q6geTS$NY{ckk^lxQr8oSpnCfFrF_J>slA z`iKdMJZvlHu1@79sYj5VaTiPTdfmHawetW1ZlJ@v{qX<`;U{Xu$+)7jB@Az({1w+u zUgtUgjJtZ{toLF^{YCEcs3=NrJl*_M_xaIRTU<*ae|v7(Xd!p#abs1UYd==k#)#34 ziM!yh)$)TB3z(fb%)(~A@_{Z(cO-e7t$Tk2S+F&+4*1qrs<17+?nnEBfDwBg$!VcH z|F~jeYs+wU9mApP-O{ummDDly@Pp}d`nw%v{{|Zr31@5GPD*(M#n_FNl6bw;bMD@@B@Bp3p z9?K%h-ts&ty1;?ML*4%{yH@Gb|DZ!M<=3^e{n&JjC;w3TpNhURIDrdJ=peIUtoMi% ziHFzNg+*AQ!gy^oOVZ!KHi@L=)#O^Wh=cw0hKgC`Jem5YJk+FrOBa7}FP?I#$GvfZ zZcJIRAPd>_$-?ZZITGGk0e-d7$i~f;E`k2>@_s5K7ayHA5xJ*n5DO2`pw$MOD%>O7 zPI&*}dYa&Pxsc)CjwdEp7S}RBthQIGXKTC$BYZlY;|My`iv=(i6~0oBe4)zSE@GR& zvmY)MPRpX?{g8KUkHtp7Sb3fL8ntwo5H zGQBe4Xlk$~s-&jKV5~sh4GmFvG^&PJJqfRSk`!xMNTx<@?CeOheg-+wd9jq4`X({n z<9{-)75%W#)ta~)%Jm%}Nos*fQH6wsSq;mpt#b>fKunKd+YijD0hzD0wX}au1j^^# zps3Mo-0bm0Et@dV_5-Ja++z&T0fLAAmtjn}1-K5DY#FJ4NGI_*_x~K4d-YVF$y2ge zT7>RwCoC(Tx$lFpzxFps!u3p(kzL6Am* z$rYm?AHfemEK~_oAUM4!Ki`HmGQ(gQ{`r@+4W9i9T7qv##Kgc)R5d{dBxzCA>&UEy zi-U#Rh0J`E5C?V0r>wUPR&S914freOJl^X77cLy~r~zI?owRImy}!F7VjYKO`fUg4FO z*Yn858q$LjexdFVOu)~PchP(X+O+_UF&A?)s61Vuj^>csy4VcS=S_HTP-$-rqNWn?Fkw}lM|~y>6x-LT=Zz-zScxZqdX{_v zuWA3TOHBqm!;^@%Iz5jdb^lAtM*1}jM*b!Z$Z|8j5f*lfh&jW9zIJsfswsahR3l$1 z!u;!P=*RziJ_!ItC#;xfLjf>xd;#bwqe1% z%*<(m9&>VcoZ$i3Ci%74ms&5bO|?BWIPWhy)vE<;ER+bIvyEES8EYIgDp-z-(INVH z#ca~ltf4cY6~cgvxQOR$wg<%;oiA3D0k+K0KSUrSu{FIrmXg<;%;#kT%w$A_-S6v| z6*`_sW*ru1Z@D#K1NyAG_>_s{svo|xvGCX3!5`Cn+uZ;6>}5B$s5xK9pH?eJXH`Lo^j=QSeAy{e*Cy9j*G_NgNc*#Iqe@cX%}Y7JGsuox50N z&HMpYJwJ_>1$bU;%V^AFWexR=)gT9f6Tcas;|1w9$=HGbyQxXq=)K_(ELOg27qi6H zeS}%5iYh_ESIZmPvtL{gr4}Y>YaB+u^KdLuYy#g50l1d$>P_R3MYb71zIU7<`yXBp-#P?V2bL%Av#yoPVK|X==8Q2*)LjXc2 z$oA#LN(}F0n)moy+Qjp=>gNsYv2^}AX?h=U?T0QQZh%lJ$)^^sgsi&4Tve`lUR^`R z3*#SrCjSh}AGssFR+wc-SAv4iJXfo={ty1rJ&@2@mP|)8O-~Ri-MuD_j=daSM+bBf zZRFRz%b(B|p-IiWq66+}4#Ia{Hp0Z;j|Z8DgmGU5z}T2;e2OH&H)HM}%PUz;n(Zck zf4(Ly1k&)jrL~!95>_}$*lUx|Z-xUovBi8Nq3h1E0fprrfi?T;+5DW&aq7BRf-k+f zxIPoxB!~S~xG!lq3SCdbhNG-?jOB_((*>@m)6c6q1A{68(PtuTI8j4v#6Bx!9Jpsm zejN@^eD-9*j?HqUcMD*=0ivCy#JC*eCK3pc4LGrEUrc)+%(cWzN@&N0L*IYpI!>&6 z|Ni4)cv$V!@1WdrV}Vzz~IXK%)w;ps?Jv8Zr44s)|I)HXT%Sv3<$ zkZXcKEr8b2U(M$|^7W%0poES6*B~3IH{^db5R-h!T__!S^Rw7pbSwlU!Y*D$eV-Xr zsQ&C@ykhp})ODpxJ>#a@#(ZTeK2BVqmQUnsT8*%(gx@og*;R{B^Q^KA69Yk)@oF&` zd<@2==TS^+&a zF=G)V369$s==~KHekb&`3C1Vj-o%cEjOV*yPJ!(}EJoMqbJ)oSjTQ{DS@gfDi(~eCH6q_MhVuOyx;J`M)FPa z|KxyPppu2P!G7V>O$}WGuRWui-T$6mP;f;*-yIyt1AH+L6+12!<>P%$T#>8J2O+}= z5H-syi7gTHzp9|18OArqNE-#!W8u?Ob~g4$s6>D| zZV~$qgt5UKtDl#}h(Z6O?VNJB5ZPN@9m%6R)hjxC39KcWyT}W4A?H7f&UY{7i!Mf+ zLxO!4+B%!M=t07Gb{1qRwtToAN2;(=&6DI-jT~>zLI-bvk7Bk0g$@eZvYxO1$WU&k%&2*Z$1oeK9Gb3*-i2sd=Qvbhwf{4 z9}v5vDKlH7lR^^3?P9qoW`&6RS-<(JIT{DFnQ4~}4gBjrJ$xrrRYUCBd4>D)$MU+? zBcXJOVTiL&=lK{>p{AhVF;l9M4=@P^6kSXL=kgigI zW=Myl7)&6Y`p%`IM#JOj7&7}rhTeBN_2YE)!no2Lba%-ko9X~##t+X zC&Gl9KDYdM+bB{(KdCkvKcDRoU73B82aqY3DEN^Pfp_?o`m?(+8>44q>gV$pd+KME z>4TAkf7{c6$7oKT2m<9WIJo8=+sWbiXP$fLv%b-bVyrI_ySh7Oyzp!yR5S>`>S|?O zoI*DIyScX3QnQqqU88HvI@(5YYe-K&uBz?oF>7iFs)V%zVrj;fM>lSCyphBnpZWl2`n@tS6hc^PL0)4N{{!Ea^Wt$|q+sR*4hbpLu@f z<4r&@=-)e0Ns%8_m~9G5&=am|g1vnp8l+_JA4uW5Y($lHJ1#6hHh+Zf^nBFwEmWbRJ}Muh0nu=S(mM7ER8wW;Xp^`cx+q#)Isj2G@3GC> z_oYyd)i2-t&F)m0gvfCS0`^^F>O6!TZFr~stQ8sq8o-{M97~iz33JxO-T@EOeSp`Q zA7EQw{KE{$2AdebltBl#&fMNSfPZFIi5<@`ktsll2=Xuh`3vo8qLS}N0FM3$GcNb| z132ya%fr|o%&h?~a6x}FGpZB92M2VowasJ|FsGKc)gUJ@c>&_^cjM5jm~s3rlq=DI z9({Ybe$d4vYvtr5T!a_I`aP{Fi{2h!YbhyJ*P{}Ag4Itb}Bh53hx?n~S zI#f$TQpGW&6qCMQ{r+uJ(|Ue}UlmbgmAePJ0RYr0i)mQ!A^@=?Aj(;(z#r&bkH|jQ zKonxi#Z16YvdS86vW#NN8K~GF0Fb61R)wxKf#KnNW!}JeT|e{wHeQ8lA!2836;;() z?UurQvwz*`d5!{_3M6g6f0B%dYP>K-xVL@0KwZolosD!KO(_z15 zJAZOA{Nn+( zm&Zt{m@U19lh54;Qx|Baiyfwzji*HhAN~&Z>Q9*a<#!#8+go8zPfz!BI$K*?fzZK6 zDFClYMBqrIt*!a)n5l>h9tuz;u??&@_s9x-y)y`I2HKE?s8qkB-xX??-GSX}fBz+n ztNA|aWop#s^`c_{v3^c@6T+>w*=T2jmjXp@J6x<5|Mp0zxE>z9U+WM^{I^fiqI2V%%bxGafXhjF?C;p4 z-DAPns_N~re-oun-)D}0QcL&1hWffhc&wuGK+F!0*6!wrg13*aV2Sz74HN3*!k>V~ zYOghne8hfJYo2_LSop@@pEcj%zn;#2CN0(Dw~oW?BiY`)J6ds@ynR0!zXyy)MRp7x zg;7gIUHJnI?awKEM-vD;fh!(~vAcr*y0kWne80B`?yfWsr&Hfu8jW4B>3WqL5wI7J zU0drTFEo_0s9qkh=SvG~sWlbm)D2VdDN0Ut#(DqQ*ig$5eY|}1Zmjx)M5>RscMvNQ ztKfF9?fbph{6AwqVR*Xv3DFRIB^VPXcHY)|uj$G_ZE%e9jKa~5hT|KsEdgWG$?A_= z-oCp572DIzImH+`PWC03rxPM?!udUHH%$2&Yc>tA@>7>9MA;ytInt0Q(xXCugTK_7tJ0m+$fR9vD(vR(aI}jSYgice1M6$ z8E4aMHoZZfxka1=_L`lU5T_~HVMAm7nxTZY0bAcp$R`t;Q}c2{XJ)K)L~#T+p*t#R-dVamSpAEQNE0#H@++)B%vif4VNI?BB}C85 z!r;&UxRlSeblpyuH=|7IBSLR61?n1X=Yc7|odkIzm8v78$EAN`dy_Z&Q5?>{0>9ds zM3@V)4P1oyJ**3`e41x9mi3ErPLao`@bZ8{?)$qhM|$f62^A<@-5Cq1#NW6KQMNpd zhOc8MPHa{t1ijaPA?&B)axGH~Sp;uoNu>3cc@%E=s{{{b($4c92XDR(Zo^`Ox0bhu zr~iyPOkKTtsI`5obb5xpO>?u?`@fd>kkP`7654iEmDuL|_HI=UP}e>D-YJ(D_HXac zZp`{%!AL-0;I(f+%n{So1%OOH(iDJP1W;xA$bY7HDq`MVcqw}Q6?_aWDr}ei9v{OB zAw2)I29VFcp<{d(?hHY7?{wQu@os9G2P`{HU67|IJDXHuC3@>FUw8e5R~Y#W3yq56 z=f7|59=&>i1)WIOy^>3FIj`POH&$#ZE#t4QpY~T3FqN*sA+j0>p01~O9N(BzSAy)t zRt#d_zH5&b$rx!;CJb8ALZ(jM8*1%6cB!V`_tIEazJT$8Qc$X>z?Wj-v3GkGU=)20 zrUeFrAv+vpJ&gSMH7EkzZ@zI|{do|ij^CXh4pJAa^YM&qiQVA3 zO4QojcR5Ok{iziOicdV-8heAJ#coCFlZ}nV7VlqEF0pbZz;}S&WpL0&jqI*}i~hSe z?IjpZeH=5^b@KH1&sdXDdaHuF@oC7EFCJcERvhSGue%s3@LLk!i4AG;Zo2+GQoUGS zHT1v?2*t^5^>biN*ST$4dXo2aeJy&s#s$1%BX;NW(%j+hdI^7MWTd6+B$z)y9Ot4I z24Z*HPIekf*9Fl_;eW?scA95snB;F~+#qnf7Y=yw^~H4ATmi*}&8AGlx0`u&LfT_x&L<>h`qr;1`q2h@_ua<%>zw`6gs(;1nI zi=V@({2vrzR@&+v@ezkhJvj+T;3&Yj22U&|SOW3o-}$?)9%}sZBNZkGN9h-($8G0Y zF`EK<9?TchrdoIJPxKQiE+MO`iXwT#f*kF+yN*Z+Vz-p73g4Hhq9^>`Iv1n9$DAlp zZsuj~=v^2jU>Uwt#qRCK0_ufJZl0zyYOo@=wnP=N`z6R}%I>Lh2KoI~wQ8FCgo;xY zPYt3NssHtvP#R5ly!#CfOQ7hxVkIH{@iOTv(Aau)wo2rp5I?XS#Pn=y$h8% zdQ{;dz`E61!{_owPna^3b=6yS!nha1sAr9>*!rY$cC*O`*WYB4*5PqsoM@o0{DkXt z%SB(6=XF%|95D_Nj$KqB!?!FRl%ZuytL7;R%`--wMBCv9U+4-%d;J<_Rr0P93nfg? za|AnGDJ2&BJIV;WiP0KsdKSp~6ULL!1Id~~P%Z-To2PDY4Ox#K<1^bDPSn-Qd!Oy62COd8KgDKi4eUSCbK0qPNK zVq&^@x-%7(tr3&IGE#Khy!T+?g8G)T!H^9emoREn5WaPP-F9PfV=#7i{p956kJia8 ztz+Mlp9j>F440FW^@Nvqe)gh|_(vACN=LJkt%#9yxzrQabMSG1 z&XX^&v-XouV#;?`wE|J>Vz)5MW2`%DJ8iLJUAyl{`#V=_J(3d~l|F3!`^oz^^>1E* z%hzd=h9>;mth0?tQrR)k3(cjd30Ra%+2>7SsaNmktbhe`_WKKMcx=uqJM?Z1P7@CQ zI3YID)PL_`@EOh4;+5S)(&1!OC6=&x3SwQ8wD=*s56Tv~)h!!9xL{}nFv-QcHDD(7 z`GVEiFL z?suz&QM89RFiIs-W}KU^uz8#u&qMV{{_;A>Pw5mAZ4K7O4|ku;0|n`N2l%gg96H5# zguxWI9o{Np4)oc37}W%S=Ylw|EdT0bngHarluX1?>uPIj+n75%#b*{|ojMiwA};(; z4Uph{_rb7y(*4gQ;hX=iAp8m9Z*%GX{%HCFiEv=5Knj`xg^x<`VSUWXR@c6R%W>!W zi9Fdy$yTfA29*#+UQGyhfA}rJSSE|>sO+jy&M8PTl}~E2r>D7kE!PT}vw|sVWB)Th z|H8wp{^~}Z2E627x1?4lz@hIQT>TzK#$ZWS0~_y8^;NFJf^%nM`cu`G9U4-voTt|SlrHJzFZ|@oe+6+oT&u%v4`Ot z{BK%_vl<#{(>J8gSMZ8>QbFc$c`fkZH!t0L+d2L`A6iP_vE=foq162P;5S-pWR$g& zjJl4vlD<;gXVg!?-TPW&)(4w1xAD=ZwS5mN9^~lSB!1f4`ilGb7+VG#Ad+NQEEDb7 zZgV9eJyo4<7jD8CIqEM2s@v_bsI3B751Pe5f4VNnmF8>p=S4Gw2IONZIQIi1!y+Qo z9qKf<-&jO1U2+LOoIP<1^c~HVMP3pSki=JdNLC?ohsv&c+W!ig`hskpmc5<$%+Q}! zAd~qG2q#b)fCdQP_EO<^Kt_ShF);$9?A|cxr*~!HSLx1UWPhYVmshE_;*Y;M?FaG? zC%tRnCI-wSP{1eCe=j28o=sz;U&3_WOd?0y=+|~?$5oL2PTVsRR~vfyb2k+(4k@3V z-(WEsveizg$VL6R$u*&JMMCiY8H&c`ioq8J1*<$Ru)08)f~oD70@P~rtXehJk8;GA zV^oOt6vu46nn@ETfFA0#zVf?IeGWc&KRqPn86_Mz zoNsm)@k$amh*s52buHMfeMbXNn9HKak6LcWBu;mfX-<@SG1wXV5kQ>d=CDP~k3Tt& z?ZH7gK0Q6XwwA&EgMwU9q z*KWQYiW@uXR)S;LX{8z_-(s!b3_;HsdQ7NTnNYsz|9%_U>BP$3yI-pF`b5@Hq%7G#qF zjjgRA5mB4Jr<>SZX zDjx?nh6Jfp@c1&_MO^VO>^Rz)MIih^N-V2ajC7eFJ@^hNtes*vC1VzcEjmvY7(;2g z=>RneYi+_TjltU0$A+li)e$rP?5U8by_hp>m-SjLqZW&I_C?4ivOh@$X}XtH|I1KO zRaK4Ib=l7v4vX4a9<8pUMW55AB|HnvS6VB+OjI6#j7MI*kPcQ?hwHJ=#5~n52nZIx zHiB6Q__;>AMnx>QOgqhjLMOfd9xIC=JmyQ`+O6+fAf6!oyMdVNC^>egXJC0}s`EH~ z{K4Npf+w8{C;0DoP}1T$_Qmb@NlaoCFNb$IxJ^>1Q=804t49mbY$CZ&X7R{anB9`8F4xh) zB}YebU?`IMd!;MtZ+*-fut2SK9b^>%&h~MZ;C@!ZQPuZ<2UE~pe_5AU@)e0`(%Mq& zPIIYU?Tngq$wfj$f`HK2A)Y$*@b|xIo{ZzcNaDNl@^W%c^Ol7lBF_xiw`Wz-=S>s^ zl}=n^e;NXSjBo5JwW^8?@IT;@-38`voIiOWEDG25?nvQyNy`R3_cq)|m3L|;x7f7l zON*Q+2(Fd*&~#o|hgZ%9f#&fqQ!{kyl%WvA+yJ(^5M69w`9A9xjzl7b5c1uxTu7g3 z^}#M%ot>Hj%NP-Ji5xHtki9=rEgjk#g4}AKcMMg{yh9sueof7lCCqD9-IQGIz3JQo z)hIGoJC_0Hpnpe?2^K@}586Adu{1+witr07K&4%EJ15e>A5ylr7~G8K(A_BxYQOWvxyZGyYY5+ASBjiL8UO zK?ZH&BG3V%-1$)6eYVW`W( zZv)LughUJkt}%3`j~8Q!T5a9T`(H_s>p5y6gns`U1d%WTuZVhiy#Hwp!?N)QdMuzm zn2=gVSH01DuQvSDqKdrIvEmr3_H)@x%09dVX;+B_;S9n_idy^Y?~q0_ukPmfwyx`oIToAj5&$ zHB0~x%1RIpoD<}k?f!eovZGC6QiUb;wIx!b}C=w0*sMPZ2iHWiTe>VhzW1+&d00=AVtU;?qldN?zJ!bEfBPMOb z9o636sZ=!DqN;ECzENFW6VRf)W;`2hQN-*O?{6xjTNgLHRRR8RZTrA!DoMeln*zoI z#d@=?T?rYqe(?h#dSd5yNqnK1DHIg0&w~hM=PQdU`4B zvQ2&QQi)9Zy>v;Xv6MDzK=ol|P#7y1y<0lUgv05q7Y7zM%bd~=^Gf$81UsD@JPnt- zzf~rcZ3{!9b>qWA{{9~6+U6_b>zi$Rxay7ZW!Y?!y-GaDp17m6wo1kJC0{;x(+5s3PQ`I)IqVq zTC=AwfQ1vOn1$1$%A=88rLSmvvDcH9|k+(4wQs_G7u4$!9V1AL;4ABA<&{zx1yJeSmJEgk1qB z->AZ44IR6&aTm(Y1O5rYbiLa%!Kl3F4hW%v!+6g!@;|@h9j&IWu0g}ZYV#f%yJFF? zJ$kZo$)CT#of+Z_PMvMc3B|W-r%H1mOPH4Y5emQ?fgus<&*gnjrY-P?IWGLAa#!0_ z3)1xSFA+{<>AR6Gv}fQ-H!A&W_x3!!AE8t_2VO-gPaZNFh+sr ziM{NIq{R*(2+X!l633PoobgW}nAf7KU z;tR8IW}}Lg(}dz?g7esl`Dt#`VH#dnQngwJ;FdUC{1`fXS0a)SIwY0q#-)#N2wy)R z;EKAkT48E~4%{04!aK@Awp}4uPnrZQ+TGCeu@Su&b3I{u9>2?P)Sngjr4>t8pQzVs zWI)K`%xV*R8$^(n4xVBjs6nh?(hz!j+5RFe#b>wkIBvW1*60a|%D1esH|lchb7V~n zgf+>*HZ#sV8M+p({UQ6S1|!Q8DQK*7?eHh{KKg@!%)yv0i$&OAU9*2}VI`+Y*k2z9 zy3Ac?>6f{uHQ&d(_@I^?!+o*B1V{%3-7(Sl!^zMl=+6E`Q`P5!G1!cu`h3k)w|u>9#sx(qLLz(XW##+b;rvzI)8&t!1|er( zUHU9*`dO%^c#PYl@B`CqYv6zi2t=QlkrL-21<1g-gteLE?qvG+`ynOD2B?&#gNeiqk zCayt#gNbPk5+Y6Jfc{)Z#XZz4rM8mMm(G9h`j0CA2aM|$$ZeX(jZeF_iHu+_91yVi zV|8Q^&He+^MyjSB{Qdh8+6Pmo0OmVLi`s%~%Y%l0V&$zzp-?Q#;d-E1fznv}C%**? zJ4D{Drb@}5eo<6=yhDk*om47~c#L_@ec3##2*F&B>=br2kycFdnRdl^UAW{24=;J6 zyTWQ|kj$Z^!@t{9l`_E-O0 zGJbDi@pOt2zaJH|-p9thwM_Qy+&PS0XN%n``u=<7fKhY%Xlwr;GG-ZRomP>XjlQO? z9`GJOl@xrP)64(iH3YxmL%V!}YgU8oeSFSxry;b)a#3`lzKynj5Bt59W=S&#U()YltXsa{$}sMx0CibRO<^7(tS_0J@`-@^1874p|uZEubW?FvQSllQ|Jqk z#*Q$aE$XVz#b~9|^?iM5-%XkMyNDiK!=bLef_SiW&!bi$^~c7>bEW%g(B*JVC<`F5 z11wdOeI56RyAuK(NcGNNLN`~-NS6g5PBT1m&3!*$FTM0xmru4&HUG=eXO^JYp9~^>zq*DYHYVdh2% zts#C`AB=@&74>tN6!VP_5Y6DcKwfBvCwQktCn%GXMB8|THgiI=)T+K0Y&nAXy-n0P(TZ( z#m@>9JL_N+>0hW7eGr62K)j+$&9((nt@#w4Frbj~?}9?YByWZy z`2TEUHAV8>nHQnT;_h7f?{9j|esQWV$Qj4gM3ZY8&ZmN7`IgNi;cOSVWwQzCiXSRU z#jAy?8E##fOrO47QDY-TCWEDmPNU+&>(ze#Ip>qSElk0SA{$3@xl!8VhX7J+6Ru3h^ouo~l~v&huVo-~iT zRPiboAOf#*b<>L)qFrwlusl(Enj&zGmNx7ZqZ>Z3S7U?pSj8}dh5ZSXz z{ioqd^0U^FTp7=naJboB_Z11o=e{_1K^TRO;y-9^cJZ#94Gan-z)HPv%t zsi_tzlf7j&;V-YCqWIeFo2q^4Ybp*tmhk2wm$*s4*@PnTv~Wwa-fcvMk`q5$;+-0V z26Sf=jwQDET;^v@69ES-(H^r6P|1E^kSLQ;zTkHUbN&7J1FnW+P^)=bA@T^*iiW3! zOU4RMWu&}f55}6169exvpK5|Xk*t#RL99Dv9p5`t1F_U-641qwntB=CIPft5x z2<1`bQZlI`uS*`Of$&hEUdFlKI2G08Q8?@t(kM}t`4}p6OIn=0SqAEEc-Q<|sWqMr z^-x$3frt-3M!(u|wBKM9Vu?2w2xR6@T7-1dZ)GrbNDe}C zbRdc%VgV&+V&#yl^e31vcCB#0ZUkmy72+dGPu)&68ftjyhjR!i6lP)|d2NAUJ;}0YDRZ*sa}>sfV(qKYtOo^#cE9TIovqXwnc^z)YoM?VBl9x%;q z5|s3ia;e=#=ge;TdeemhG};c}lI9G}iw;@N#1M^LxV&_=?CnivG@S zEYqIYYSa=-F&o3?r&b^KP~KCJmb~i&P!UG?+;60rUpRv8;l=`4ExoO+mGtjB_^THe z&%_=E>K8tk+u7veEkT>59&c>yEiRv#5?t3BZNA&?=+}m*TW<4pq>J;i`ES@UX<$Mm z)}B2;kF4AyVeL&i@$HT8npe_~O=I^@_6m+axbW$hSO4c}-<{Xwz@!t|O0MC9JZ!vX z*V)D-nxA(5A)YK~ot!!8`+hQve4I1x=SLn$cP^YnTz0u#;#fb_5^Ck|G>wyiNfCyE zBBB+msZCTII61rXMxL{OhPtL(8uROCNh;$c%8dH^Urz<6Z*>lpbq@-OP-N-2jTs+jdP?dqUL>Y3v&_yg4KS8}Q)6(XxyvO!|2qK7bh#!_UJTBN3&+h z>Pn^HKX2!q{q>lHv?04sK1K2+f9Wn?+f4?6nof_C#cy+Au-FnMYcBozsb(T59{7pC zyc!wVT>YVrYF&fqHf@$YlW_lzNt+K}NQK#in;rN4A1?%A1lIUIAh`7JMzH1z3O~21 z_Mui*yPjtj2AUps1@D`MO`9d?#$YF3Z=bc*)#8$pT9 z?s3|*j|O3(8NFS6P%FQMsUF*To6GOFOm6gCfDGH4#S!h-du*$6f512H!XTmndQ9^? zY2o7|O)9NG-R8{oJW!?7A}>|#UWqvBx>H+;3XQaMQ%n8q!H0^u(C^V#&A1E?YGSwR zFSv3ye^&cS5@0dpLyxG*C8n-vMR6K>pzr%2QCm9t_>MLis=J-CF!xBcMk;y6>$YE{&1hnCjB@G2hYyVij7KyOHL}S0Nwaf?9JH!t=OmRlV4K-z2<^?R zy!KwNa#78d9)*=ApWk2Lkz8RrNf$iIJ6Yz5-Sj<{gS-$5(*2FPJH$@ILDT4^;U)`4 zpR!xd$Fuzx4?l`LQZ?Xn%D;q+uK(^eD*-ANt=eCON-@-=*_=O%3uH;OFxgR>*I+gP zL8Hw-qGG%SxA>f>#6c9ip3N>>X|n`wz{-b@@+t%Csy&labXtNB-A+7)UY{hKfSpnQ z$$?8ydOJ9YWVi>beP*x6ij{|Xd8KuHopJQKj~q0#(52n<)I6`eNpit;m=bb>M?_>k zFGgzq`QwYh#ybxAr`t zFeY|ye9C2gm!Zui?9bxfkg5|Tk*orP6`j|g;T6qI`SPM*Ae_76NkFA(+223p$(aY4 z?#5jOZ9XFvAfD}Ro!M;bS;{jBkGB#|4iz3V!Rgueu=Yv)f=JnH9c3G91XO84N?2qr zmjLGc)*75YF~OuXz1wC!s4T0q47v z`{T4P0|hvsBCfsXID{c#93MAtGTkj^J=74Q6$)@>N=}I{UHp{fe}-3-(w9B}rQ1mP zf$r#KiVHsAk2DO zR#E&Jr|-?Dh*q_Fm+QToZkwJ=N)fo0<)ZMsel(FM=nvdLTEim{0^rLMmqZ zgh=EMm2;>pLkRbv61yj5ON4?lmUcZ)_{Iduz3DX3FZ1x%{pI^TfFN$`M@Z{ngMiH| zKQg;JxHzNC)&%Ie)mPDZ$~ucC{Z3OrBCc;K_LkEzoad3D9Ri`FJV5h_2Tuxvw`Qz5 z%{(6k0q!huB{t;{1xCUa&&c%+P@^V;(<$ z=0D#P7_amyEL!E~_JQ830A87&YlMuo-}kU81DqUT%Ed&V&=BR7-AIVHdhw<6NIMSs zUQa9#_C-cIq&{eO45-XQ(ZNia;F_AOvJI6h^Ex-VKI)>y*Wqz}X+v(p`mdYxxMD6v zQBup84*qI$upO1>eunb=knUSuK#OiTzRFnXZj1epO3!8gSmD2j*|wI%M|ru9LXkL`5oM&TF8d=xE4^w-(8NC0OAlxdB0wo` zt&}+hidN@tIIyK;WGcam<$gQ^>y&_-Nmno+2UW z9s=bP?~Q`7d~9FsP&4MNIC(!P{uEgI0zuu$(53oXXHN~JZ|6Z%;M)93rxh@K7LHN? z_=zKN{wSuty1LE_M!`{kNSWWuHNjZdR9UdeJI&;n(63_W`HfVA$sK9sU!kLqx&@;r zuHdi7MyW2OkcWf5uGdIqXKfCos%5xNe!(_MsLk3^v(@~;76yKxmWm<{eT`C0BcEc= z%_f8kzvW6brAa{tQsj8P<+Kr)UZw~wwkAGkuC}+M9Gji=?P~t>Q@H@_sGp_{Os1({Uaao*3wJL?#{d<$9z7epj^8B#Aacx@%L$#v(*WT=gv0M8=7UbR z>?ZO?INKX$O605F+}m0SQ!ZKU4Rz`47_1N}le7>BBs9woH{G+VgFbf7_r!qU9sl>` zVlk`Or%`(g_ys?;QiVQy$@T5nEz_ln$kQ z|K(y=2xJJ7G{1np$27z%k}UC27>4A3xyvM6aDj%G^Tn=e>SbZx%gIX17&0wcE#n-^ zW#;yEzk@A-%+G4f$BG7Dxi1i4x{E`(AS5)LQmj7!Wh5l#HpzQQyCV<)>GlLm%rc=J zV@RP1-~1NA@bjX7@(neHLdPjy$j!y9;xpO>HRseUezC@k$B+4S_KE5ZoH_@$dcNEF z#*BYZN7*7jlaEr1y&c-@zJ$s0UxY`{mcP;BAC|hWJ@E&;%0pxG+l4OsTJ&St5$iNg z_Kw}_Ikymq+19OnfZAqx+y}D1ZDo5`1nsk1$4%a(6@0)Ffo~}|dg(*nv^V!{10GZN zi+f9?t9OSY>!d&1pBaACUbm}h2BeEbA~99uvVoRh?m{oG=*OPxSpG1Bg)y>I?B}kS-ML_cSn7vOTbQ&v${*rOb{A2@O|sVD9y*EMN-b0L(-!&|MdfWX?@W?YLO9Y874v?p*xSl(I|E}cJ{`7tL zkU*IjZ*V~%PKSSuy^2*LD0BF(ZpSxx2fgCNIAF~9K~G>u8Jdzi{TxYA{)3Y?oAvM! z#PDG-!uRatx5@^g4=@%3t`%R1YY!2sRaI3E8v6$ljB)Egc>nIQ zF87x)LZq5T(QO2_v9sj;^u5NggGG;7+#I-Git(+Ow&U)KJkKro_t{xKOnSDM@Oq~I zMhw>e{88u1O!2CZ#H+Ss;Y%Z+J#$koCLp(@louR?)lQPejz!2);(-JuH~sqWt?5{a z><>AnpKZM+LRKHNX!2U{k|lE{wzsjkx(Qji&$YEwHPb@S{G1Vv6KeVANS|B<*oo4! zt?8d@J@~Zm4CPvxO_;nnZa(=F+h^V|aS^aFnH*xYH%K&!LJ%QJF;|n+CYxZ)bmWw( zS=aZ=1G3zR2~QDQ2SH`GnP)vO6cQsg=0Sv>T_GfHtz}n$-6eYaJmf9NLta>Dp7fml zG75XP(!mZMQlp+k_OyO0eGUi z=2nnvaE-8Ddf>dtl%V!)??)|&3P*T|-F3xuwY4!qd}eoYt&DNa_-T(3XpW^fDjb}E zV0}(B;7eQbR{0Go;lHLX2Oj{ON1dDk;_)KDlp1aC1@LP$29{P++v5w&ib@x1FA81+ zxk+4hzfi(uO?9F2F2eH|Rd+kEzm2!vYHzleMieYH$tL_1{o3%}A6@BzK0JEn-kJG*-5C{DoLHVm zWywWNUe+n+i2bCwVpGA6lbG2c8AOs%P)*9qU6Q)s0GlD{-TcetU?kn-{G-p*-n+E# zd!KM@wEAr_3AgP0{wB^3csNis-?g!}vX{+l9N`O7*B)snKRTBssp^s!zbzsTJX;!{ z1Ud?=IPShkNuK(TMnkssm=g&QQ68WlP0MYnP6=evWg>r5_wla6GSCVqDmgRBCw_8z zhGEX)5Q6N!?~pWHyeLBM4S#Di6+%3>x3zx}$~O3#{q(h@&r5JAUI(xA(D&aLti=c8FuXaC&StYP)8JWRFtC2R-WZl@82YRt z&KPbF=^h^Um|aH)J~Bqa!^9ZPCOyeB)}@Dgp%-0c$0AUTbyv*hlEz+CdlL4eIu#8h zjsIiV*7{(rs;9{4NJ6`k`p0_XY^I(Mz}ELM*9z~VN`(#ir;FR&9T zD6F)@SfjUab&Lr;QJo-<0(_TrIPB(_@6JC6Br&-nR24FNh#c5JJ6Uq2a9!dZ?p&>9 zFY~~OnSV*%gphK!&ApC#7zJys5u6qN+1T_}bH>1Ab|Ze(9(TJ7tEpw5W~>n)lVjf3 z^oeCIlklq+8eg`20Bc(p`c}j9n{K^h0S%-)$@8r~rMjPJu&jwM;#$}(8?J?$kDNZ^ zx_*rUeVDiLUXS9vPk1l@RrY*G%sDIc7+XoT?`YV25%~1Mz^C{tR(G-i-fgAsnmXL} zBjqRt$XaIWRAWwEp5D#v1- zEx&aHQzbY!MtRPxU!z?Uu$gK}_OB1B{sfu?D~34RY$-~DmNa{OU=Kxr zDTTc@RENn8ntzwrE_J}_8V<&BA{8CJv#K8_Q@S&gvOmUGJU>l08#vAd2{JpAB2Y^= z-%Xfx(@XyMJE%;=a`9+8okK(&N$9_*T>{B8t|*fdF_D*08cRgwysdNaRwtutG~{yw zPqAcWve)Vw8kGq9FW@DU<3FaOIM{V1VkxPibsodvkdO?0_6mIqlW9P}Yy50)Xbk6P zovq9<1v&$vdaQNUve;(oXG`Qc|7 z6aEXA9c_C5O#UMQv7QXI%uDIL9k*T3D7XqQj_=Q9YKs&@Q)cy!ZA z{+A(V3jXc}xa#g*sc|G%{zt1VNGJ*_p#bvp%bMcRO0dL2z0Capi;Rc}2?*dkXacsG zkphK5e}35m~RdY+Z%Sy17aya8v*ev|--{6M7;g|y4RGTKsC zwBX0idPIR!6!RD|4VgFwmBLwayrjy4Z#AvoJW93(Q&r9d)t@GTN z(9-8=LL(3ob|USxkEY#zp%qdc1L8}TYhXzBnm`@c6JXiAvMGOtiMUf)=HqO?$4<(Dj6U?W~ddJb*4KQ8g<%zaIwg+fO6{Tl?PYpT#Ml zBAIzzZEl`Hj|3l(rVBjg-(LDvQb)wJ?8AjIP$SY*YT+DD?4F#l@h{jG}HNu0s@bp$ap!3Nz!{_3TjS+w#aEp zZuzpCW9aDez|5aN;8y~%yydIj)sU!wXHp2lpdF{a34r% za=3F*&Bv;m&1cPLanu{L19m1qML9TngVZ0+hBVp&KK=iRC^gloh%dngscJ$`X`q25 z4Z~xRR~bLy?Q5{V6*f-VXA@u@Ta=$Qp>Ze&FWx>yU{?S!r`_IXyTo%BrF2p=wNDK| zu7&Fagp~~TJ+c=6J)V*x3gi)A1T%vNN+0(=H;-JGdtCODp47ePJ)@E?>A;nwG}oZ@ zx%G?@(snMHwJ(ybdsF0WBQ1KXa_!(Ke*2=X*y3GW2q|j|otTnqlzU3VEB-ROj^&0P zL#7o|kM08}KYK;RPR z73Y}z#HZ1oGu*BAvX2)w7d<4Juo9K+rlbg@VK=;rr)4*MpWiWhfIqLpM8IIpZVi$G zGaw|`(AKw{YjAa@-1+%sqeU3f9xRPT?x2>!d)hzQKNfpbN9||}cFGw9fB-2@9d|p9 zYC@1EpM^w;qS-B>`}oU5#sn@*@I3-t!!Ae-2+?U{Jp!4f&^$&{xsNf{f?=XJN4d&7xh`^i`@hv34 zvPH73a^0U8*)#6?y%ZT&0WEk++9aS+zJDfTd{1PapKL2FPVntjV?EUfxp407M)f0T z!H_8DJ@&xJ8VyK_+sCXk;`j#Aq4t}6E9;)a4OnoRY(S=?rbR20%U_7BLsi!6I&1XH z`62y9%z1S9X7^1d-Uvil8Xwy@N5D&__Fyh?J8M8QoJ-mjYm}m5=(XQX7PsQA@_(#q z#*VXdB*ZihWhV#!WJZscy{@ro^+|Q^^6?_Tavq`i&Q9SCPc85>L*!vl#+itFS=z2w z!WvG|>d$@Q--4&n@IRVz8sx2^S~FE(BLBK+jRxLfg1lFxx-Yo5`0!ks3|h^@n~p3+ z`9!bo?S-|=^12p}f8VOoGCQS!4>}x3=UbfXV+26=QmAW_%IbE6aP=ie?Ng~Dc@$pV z+-%d~+7^F)j`3VIz^(osrpNK7akAJoVGa}|8& zzeP-V%=V%B)E$C)kNfPTV?&t{wVEMrKLBhD#c$_Uh1=!R!eF%P7q4^a3k*e$sWlT8 z^ko^t0!n%Z2pa4te_?i|48&|pmKEe9uVF?am)?AQ_c`RZvf-^8O!BaIEp3reQ8(sc zWj8<2aqIi4@QZtF?#!;|xx+4G%(P;PmWi06s}H{cWna|apEN@Bu714!dVljD3A6C@ z+LPgm1DSVi(Y#f>%EA?@GdTs0%A1b!ZUL&L7HzGe&avc4w)Lpj*p;BcQUfrkjwcuFH$g$)%B^bRK`pMdJ?ygf?zh6tE z(x(A>K|N^%zcKOx7@*jwHTAzxyLOQgtvU+IWOf76EyYKVyS0#xxJn=9=b7WxwdwvI z;}aRxxMKNg5EP?*+a2Mtj=x%LX(CrW0FDW&!8yzJUjxW91KL;JWw(Z>0WcQX?g;4j zO|oQ_#$p>2#WRUQAUZ)FV))+Z$xR9L#E_*l0UEgWZ%1-dFPc^vk&K%(40d?*n)@+O~!nKG6xs>uOW*S5-G`c`WFyecf&B#sQxik*T$(1Bt)5I59i zhLpuvBr@SUM@A5=yvpOlvlLO@x3t!w+9gU4 za5#^a-FaD0pP_PgPvwh@msF5F&{ec?Sxmkd5BH)CwM(U(Tq=N*r9 zP)fqK7Y0)@B>`P~+)oXzrUoX#Q`BqG5eMrhlY%R33_vezkS!Lf8Rn~6)apYmF3`E) zm0&aynZ`4Y7_esv%?Kpexj`X!rNxzj(1uJdpY(LEohUZX{<;0MGbSv^=oB9xbtzyw zul{(454>6LD$lysTdg89njsC4qg0oVkFm_ha{h3OI{1**b}f@#c`Sf@W2#j-wKf!> zA(>lvdSF?Slyq$N8F6t=jz{dh=jg?zH3oT`iN@0O1tEV7U0J0wZPK&gC=(MB4Uqj^6KNT(67&@y2;N$k~&J#Z74A zlyDPgVOmg@A8KHfutTcnZpKYsr*bSZQ?6d6@Oe2rmK-}cx)&X-$orIv%g2}G2B;0) z&W7D{yvZd_^elHkrfajLDGumqt)E*6usoQjiO(F6hMY>*bTX1eyYlhTE8f)MMb;pg zWml!0#O`*x-yMD6K6KLwJQBh3%>{*ne8 zX(84G;zYh5Q_RR%uh z2ZASD&lZnAM)_w)dkWuW(uv6+Q)Fid?u_s2J2L%u=Djg%ZH@Q z6lh85iycToO>(l$Pnu%EOz&t8-+FG{R$fv4mcTO9s7{Ib4F$A1?UE}(3oXP2MU z#vVdim}F(ZFMJZ#;nLS(g0Rd8cQiz@DCl|tuM3tCnIgiGYont`%Lp-%mR`}(bi-h! z!uT)gt>MppruVw&{5DpK{&z1@cml!bO@^n&2j72Uia?*y^OW^c_mw0BBMLy$B z%R$dMDzdT+@85jb7um*RX}KUQy7a`(^+|1Ri4@8V?a!f80pagX%4eQ_VFNLQC}P%e z5lV6#&s!xv(bMJ-T4o#unK<5J?_+I=i1v5*E1K6Xo;`?N8;t!2v=r&D)Ya8-u_p^r zn%jvq!AO=(_?6a?>|CY0#V&G1b(d3q|MGhpSQk9_KaS2jp6dVo!^b|N9HSf~9AuR2 z?HC<1yR0y_G9m?%gSzyxu58ucGCeN)gYi zUtM*hHimx7qKKgaqK<|-8JO%|Kz|hTsuQYSc(2s&ivL}h~w%%)mf=@&SgCE zP@~dS{(UM*v1j1V8Zgc`CzNi+d^Fy{68s_cc!o!eZ@%h&Ws`bVh5qzC?Z(!IKp&sg z)Vrw~+}ioAthAJ#781n|!ryYhqcZMIxtYnWBZKsxg$@hp#b|35mn2DSE|~j`aBp;{ zPFp__$vu1{mOzF>QdKp0u~L!8fWC?dD{&PRvipy6vse)Fsx&aM(LRN^pKjEBCBURH zLE^K!BAqrjv%ISON^Xh79coAhitfqn5K_Ys;e+5^Zs%7#_RRBtae*|Dt%y6FvId#3 zxWi50;PqC7klP|jZjh57Y*OIR^S0-!UFvGTZIO2nT+ZZuy!OaZQ9A~D`gI)cGqDg( zjV5RQJ>ap~?29TGFZw25Tg$J)g)!s3Ej}bib&FynX{0>Ecz{ClHV8aoZXr|PU-mRY zX`FJ-mjw6_TDlvBLsH3D3aue{R=E&{}qrhDpM_~y0&YDUSX)j60n`Ep3P24 zn_a%Sa5=n+4z@*jGP@*$WGpu;QOleV$4ybl42pil7PK2-O5-i07o~QJ5)QcHgvBTB zil3$l+Fpuu4y$l``QC={hDHY*`6< zM;1ezF~76$N+4MlH<{Ww`0SC6c077c#R2@tn3-`;Zfe;kDXb{HRgI~P4@u2^qZ@Zt zo?!drc5EbYMTZDH1NF_f$IQ{6raVRBn!LYQ)$%)M!l9~|_RmlR6Y5iI18Br7;EH#5 zN$A`|TvNXH)F{gblX>WGp6!omkN3YlOka|0kEbPieItmW^Qyvr{YjNgQ$Bn@`6F;5 z6Zr78w{y%~r$y95KWgP{bTwr-Dwf`**AZNp38=M0Lb~~HGLbeI;p;pt?%fLkFPjt+ z&(-O_&|5j5Uq)HY-cPG)Z1AV?!C3pE_i0~iQExreH<*MYvK%AVncw|;zTOY z*aYI*&7KR~lYd)(1rAb(;FDqQfwTDIW+`9@(k#XYFi~qE1M(?g64rezk0{I`188>D zv>26PBMv88KWUau(TH^IO?XGF?`Ky6L55|Ad_wQ_m8SLO2o`J2738`srsfF<+D!l} z7a9ECx=Y#2EjqeIQ#Si|ww7RSLyoRslvqiMF%BiRFn#unTaC9{ti91KU()C+aQO?y zGOp9Ikdc#$qeU@fg;{*sCMM3n1aS>{Sm7#S)81iOSE#74eVkk(kd~fq9eMm_|CAQg z#A9539460F~|CHRMiL8c^)sYdUs;X9+dNP**|1{ zA-nm^{i@GWhfYp*F9?N4S!z}9;H02rm11>w z&qecPmQiy1nyS1i^C5aoO7E!c)b8%?$$#6Ep@vBjaE{7a;p7r62{pa%E@JkpR?6e8 zGBVat#~WJ#ZtR{r+kBa8nfL*SC9N6ABx$7mvC-z4a{@Gob5l{a zvWOXg=RfvRyD?MHKP4D5I^zZcXM_uQ;8=O5>&Db=X&Vo>;e$}d*avTXe|uVl8|qM4 z4ul&fF}2cT9X0J@ohc|!t&abr|#DF|C#-EihL7|6w*g$IO-PE1L{UP#{QLoA4k z-%|UE3dX1({7V7$^l21T1b!i3ZQD5@bp@$kHcu~53&!Ud+sS?^@JDl(gr&KK5U7Pn zeABdTLVU2{DO#vN?EBUh43qk=yK?w~ffPQHX0LAxduoACr1Z1ZrsBeh_12wTD3LA+ z(BAf*Y_n9ww6P!Qa)49_u*j|PTTcLFI{PL^T>lB|n_P8jcGhsiUv)v?lxop$GZzY6 zwA+go&?S5@2mFF}YwI!;U%nU0{%>r-OkZ3!p!7PfgBg=$OR^T2ax)|BhLG$;%A01!aU9 z{`yle@kcw$)TFzqX$WGFmacrdhspQuIs0Sb4U1aeny#$xIuOVGy9{0lKdhAz-q`_i z?eTE>xS*8s(!3=zpyX6~Bzb@bcnDOu)b-ARq+)X9A!GO)ZE5_m0}*{-(HziPXQf z2UE)PHl~3k2OCv>g(NB&Rt{!-YGs}`ZBh5q3u$N*>Eznp)sGzvs`N}GOWtF>m8hNd zt`(5aErg=8T$hJHlJ0TQADK-fFp>qk$o0oU)zFy0D~4z-e;~Bpi)BzZ3;3lD9#&b8Zl{O zTODeNYM|e*=|gcIr_k{3A3>PsxGpLcL+}Xdm=^R>f+;@4&X3B@h1AyWx5^Rnl|NY$ zyiaa>doXXD4CDh#ij9a*?C04WNo`7lQ@sH9fmy`UEX8l@N&|Ibw1K!l?`fZ0qm z9u)(R2`ZFZE}8qgyBN$d`Mk11G3rnOH{4l$u5*>mBrNSnc<{ zy~6(b93mbglA2)hpVfS%u;K$4Bd0Knr!Ou6513eT65$ou7 z4xMlH1pmTGy^9}dPmNgBa?zQ73N`?5z|_F>x=|<>Ih@f4daIoE zC&=X`hpd}}wy9JxLgOWOl+Z?MbDZ><+gY#Hc>ki-;Tmk}ZmLMD`(?j7z3^xTfvjBm zOyeLLS~OnhV0NvVo{dyT4g_tX5lo(#@cbYpCM`?8&i)0gZx7ZI^z9GF!^!tLO{Xrn z82X_%IU;;O9N%D+V==}Ih`s*j>!lAxVew=wraXhIaDeZwL49oSWB|DWd z=xZbTsleFxqY>u|rQr<}g_x@2u*lDC^)25Q$h~jT88mV8$1MD=s78St#^u^-Z z-Be?Uy%D+l<*7e&jA7sPZs1~DtRKHr8EX_u=8nph#pN8h+{>-_L|jFtzLGOECb#i! z&Eh|9b<~50g%I8-U*`ty7S#9pNB9BE!YhX+GYJh;6Ogya-^mr92_(ZE1EWn~IN6=p z1q_|!m3s^IzR4@xip%kXYL?bEnVS9-Mba0Kq?BN18h%4=0BBSG9Ba$MCkShJv{&=h zOk3|IRrEEXmx!ezbR8Zahh467ULP?>?_U7nc4lVWplGXAu#br&2O7eBe=2ONO)*D%eTxW3L0QKb4gb z!hyaSSr)`r{Jpi66o%7^TkW$9H_jXcp=J)Be`>031#E5mx_Rm`ufaQ3@+ zT0z$YSd=@lRCF+X|5|pgtqUVCXM4E$1JBf!8LV0cJ*h(_ed%(sKx!8|a*TbCgymN&MJjr7Iik)mUGBxzZKV-{tmyg0Am@G;1kotiM{(KHn*Dy>9^!%iB>4_f zq|UC=P0Tq6=?Mjcq3~V3>7Y2$SQ_E@f^1n8_Rv9nB@6gG%3tSGlG3fg&kFxh$e=^w zSHG>yZE|yCJ-&KR_AR=()SgH`*6H>q(Mr<11F}t;X^#h~$F9~MV%lRm*^fd_C|QS< zd70?n(+MM9?>_{SAV7CK?hP6 znmX|LJ!}O4ca`Jr-AY#-u>Dg~NbPXY6aLr8Kzc8~-50OEsH>4gDa}RR=1JvpOM0jY zh8ZJ*zF+yUMHNKrcSWBI) zt|`Dn+a%OEpT8@;A|}IaZPCWPu=sAk<;`ucR;~sQO*!*`j5O7th(4q*V&o7vx!6j< z1sKh4V8|G>B&cB(!=U2y6)tfQ1hEwIf-#YATVw89;N2H@Y*g)!M2{N2qJI~CUX-!l zUI%d%(~(b=`tSak5QCkh$P|q4R;*@n!QcjxU&3Wb)my`<;ws~L?m-3XVLmM2oAEC0 z-EFL-sLiG#{tR(Y8+1Kz%h(I|B{9e4V9<&KWshX>Udq>0y z+>?tu`3(r6&L-jpyXm4%cCpwci$QRiV}ljVULE0Y+!^*kJ$6y(nR&W;F0+{dgyfU) z7+YjW$TpXY#JO#@nv)p&cO zTA;_5?^#-V_9}YELI*En9eoL)%$p{as`2tFTshL7`A!PyeUkK6i#Pye+b&gs{V_Z| z{JIQG{xTk9%(wm#@=~I(%yqGOpMg7R(Nm(x5J0fx6DEj%IHQm3R)S652YX)b4z*;Jy<<8PmVL_h8y`~@_E zm)5gwp0n20)^48hkZnE=HZD;e;J*!(VdT8p=si5{J)IOEOYlXSH`d*qek!}Zhep5> zM86tJ-h0Y62)+D0K6w$vMc$nViV}ZFGCH*3X!QH-+qXGWgb191h)vI)*V~)-Wxeyf z#6K~-{}EXGh7x$L6tu;sfD6U?1z`Q*15n4D$A1e_NOeIF6bqoZHs*uGHkjTZ!KT{7 zsb5QlwOihCJYMZz2B;MHZlL%K?tYFcWWU}Mx%^AGzUU2)fNOGpN0Rk9bPSix)wmDT zm}0^Wp-q@n4FzXj=b6Nz;|-aW(0o7Nz(5ldM%+MHchup83}^H{umTab9esi|?C-Q} z#2Iq%XR(2SZLHkf%|>S=HHo0+@9mS2A840atr*FNT(;AITZ9*;|AfvZb=~r@w+C+) z7ypRhI>53gx7TyWqYhuXIo3+&NWi9T^;LA_ZfjGL*HR%uC3Cf+9)VQIX;mPkH@!43)jRy>QzpLdLJ>YN z%%~RF?x{TS5cW095XzPl3zp(JhCQbv=_koj@xtFKrcwiwh>2==xjI6*hmhj85a5`R z-+nWKIArmEZ2GbMBw%tCDix@@Ag`Sy#xGJPZ52m%iHu+0R@&9&5>&7&gl$3rzt5{W^ zuRmvz>1!JTz_4UwXkVnoqA$DPV3PXqSxcS!*xc2X68HUbl#DMlm9?=^h^4emRKjVE@^pZWO}fU z86&B&14lQV^qg(#30eXn@WGUAhn-plSeod6u6W|P2r!srG9|TNO|7RmPg;j=Wa^!k z3A6y#N4Mokr*KaTMp83w4w`GbR{AKBHv?z9#fR6cwAEascEXW<4IfWm?ZT~cYb%@x3>(cRO; zWKh)MJ#9Z5;{p5?q)u3b&*T%DzK>ZhoLH%Rh6(N^KIlCdTsbQMyeZv84#lve{ox{y z*?eG{$n1P8Q&K-NEUYpw00bfzuCG_VI$94W0xOV&Ss+EkjFQ70r`1rkF>`Q-p3GwN z4J!4Q^_^~~6dU-^2dT!7%-mAqBE84E)aQ$a65>oLsmqaJoZlsD`Lf^Qa4;1I^T^Ac zda1x==~Six=h3k4?%?YvFth#>!z%5p3NbNxFT0MqDf5lc-GUuT6H^NLVzpa?D&W8w zhIn4x7UNGjh&@T|H^%*)E+K`E?@e`d-t+pg|I8jvw+UariD?H+A&+9M7Fq3ahuz_H z9JeApBWwYeP5*voj-PKg>+n<_SG+*78o?&#L=Ch&Qn=|39bF8IIS7 zg}O#ew_iNA_eQ!7+ljw-$QJ)=P0GE28h-j9lUGm^flzVOml3cJL%+*`Qkfy9fWp}~ z6Y8F5#J1*;=aVUmBQ4H(O~X+NPeSa)i>qmpbT_SbJxb9cEsYiD9pQ@agLA*85MLti zSC7x~Pm`?pE7&`&4<`KQ^@tKyaj3Df_uL(ZPLJcy(clztCrg!odgGJc3qzqSUimpS z)*Lb6gdf1v2HDRR%W)2gy{Q8FCtIbJ^$G%p${~d03Ka?-aGiwyS0oy zGIh>xJHhFYnp-2)^~YV;+UlxrZmTHZF+e6Jo&4m^vo)sCkAU6#es&0pFYLT0edFQM z$Dvn{&gu6VvY)s#;z;j|I@15go&bwq?Ir`+mn6QwtEmhA&xw%``QsG*?R11JdDw>6 z&e06n^UX-Jh<9{oXdD<=7`eZ}y(cP7@UH)E#S(;GbALvXgWk>AJxYRF49!`wNZnEY z)%{FfC!wb8mcUJrmRWag=9I!Pe>ZFXL{OFiN+LevT@+*F=W;zjUnk1&B^7n{G|U_2 zOa9G87fc@;y3=9`9CxA9bn!QQGLn8+%9Oz$GC{6D+P*>uVwA|X%5tZjth@vUA5 zjTy!U{P*nto8LQ^>G6#SW8FBxB;Ys3$Ufxs!jzy2}NYc7A{+4w#C zfbE3GPt~m@^*vMHAY!R}`d;;^G=FE}S&?akh>kaoS62%e4%?B(GzwYwz{d}Mm`@)^ z0vH^wi7Yy$kn2R{(_bQ&--%bhl>Zs!dRA4PZZ}Ov?Z1r{)ET6Pp*PR45s7E?M=&4{ z1Xq@ol~q=l_I>r?HY^^yM*+zY3&Y9u`d(e13zn;7xKz_qm(qe_i8BAlSc%Lqa-1SF zI|+3bB(^n=i$q07aD70o*RpP`an1|h_#g4)MCszlo`A~v@42dgQuLKjGVe3|&giJo zzZy?G(0ZzH&E=(Lw$x#{RI2eu8R=_f@rD)#dh(Y`YGiBC?Hhp6&&ODtQNyK!toF@? zhR%@H^S|eqg6T=+>#U`3w9F|(-JQX`$IQJK!B@my3z-QIC6bs{&tYE&6tt9AR~4+< z)*V{gH%AhCn_c<(sI>HQE9^=Avy{^HC+H;B1ORtLOtYG$2Lr4~VB9sdiO9umTyaoF z`<@J#S=?c%{1JwO)Y52Yf!2nG&VbNVvsmDw&}_=(IxaCtW;b^P%r$q)0JgCLm>NQbm1RX0uR``^ zalgnoU8^g7U7S=jw(t2@IYAh-Pkr*g38%QPBq0a^*wcKmu~-Bcd(QTc#p_Fl(m%_= z4n}76zk;w4xpXFlB{Q$x<}6=%WPKp&eEk}>?RT{HZ0@yS!C|M7LKSii&sF{#)t}0A zA2aWpnu%lC&Uq?yt7pG%jSl_ltLIH1X`bvgTiv+~f+1hm;a|l5dYEb^s18njsJkQ( zPACNg9YcV=#@dK!>RNmTR2zuY8f)AJh-h{A=V7?Rv)W-`EI;MRb3(HJf!jFd^OH+b zu97Lr7fv9U%zp}W5U9PNt@JkPh+Dkj|gj+Snnh=4v<%rm1pnxGrC^6xD)g^tQ&hWsM5}b^lZBe zlBM&c(X3R@`_21p%K>`=lt@n0v}1ZsO~-yfc98Dsd%gO*om3fXPW09G22D(#99!a} z8$lrO96KWU%K}qPu5w)|_vi^#u#eaiOW-DK(V9aY<8l;U` z%ge;@lo_Y_lKFFUa`-MHC%bYSs6TFB_yxS_?B97o`n2Ss^}HEd#AYl(Cxfc91_AM@vM%dNW{3A<;t=p~&mG{K=1aXLE8 z36Joh0!dc``A&6CknAB!x4OB%5Ayyu)&<8zNjSEpT=;<}T(8j;Dk(eXe5JJ?JT3ka zmstPUXYtq%c(-rS$rKb4cAOC|KaHH74o9PjHs`g>Z{J?0uUtJ?pc?onrXk2iir_NK z{ptB+qQcX{+3askE#A}NmKdT*4Sg;{TM7&_4tTXKzagi4*g&e&9wc$WFl|SRS=am1 zlKZBwR;%xO;|=g1?&~1>!s}a1?Kutqs2L+HGj$0XQmwwnDqzpNH1b9(k&@nSy=(}I zwR8MK!6Z&66^+!N-rm`ft7M@Nbjkg_00o`^0E*0)#GZ)E2_-ZNQDbUn0hEX7pP``c z{kkpuO&p4{%}NAnZ~S@3&DB%u$)oqHv^*DCy92C`eE==eCWSM3d5dYa(gM_<@0A+Z zHs{1t_f}T6q^*Vtjc{}&c$HC|zV1;}@Hb^ZI-#1BNkw?vep_2ha5HN4n0 zsaRuf{4J0$r%;2$GOUTo8b<8py;|kQQP7vRtHJ9j413J)zc@~{As6IU2ZP8-xT&Y1 zWRfp*K{oU4Va}*F)D0b1A1q6L<)_sEb}F66K78>X+isSkxX&RqD`vF2e>{-7-YT&( z3tA3MrccIrVqPa<-Y<@6f`7%{7|nVLiZNE`e*gk3WsBt%8H=_hN#3BGpOSYDtmLq- z8_lwk3f3ORKhFR^Lc6H7s)7|NEp-ZHltMWA(vt%fiY^8>g(4Usb5f12lEi_QU|J9R^APy{#?%T*iko=cdlvP>#cB77Cbc zr9q5t4L1U8kg9J|jyIVNUc8JW6V%Qdg#=`d zYrk<)N54{uDG?&^5ibCC4RKuh>MO*<`KLPl0{6>hoq?LrYVl_2cbv2H$_18q^=IYZ zrDu16K~+jhiUM)h!K`Ecqc8igSl#b_^XSm1<8$}=XX*{hUa4Y^M)#%oj5RO@g^npd zjc@!?^%0wU^BAWH_>a|Mb>uMIHVbKkP(;jXZ5n+3i=qla@q>f>!_sMfP>tyWSjg;& zVKy~Y8&&A`&!i_*HU4S_gIQ<+?Kj7B_tli;{(PnIC z!AN7oNLahpJuAze2+&I2vn{_CStVJ^ho(D!k|EpqToPO)NK$88>2k-sot=p)K8Ob}+B-{WP94!E9Nxtdh` zh0NFplUwL3e>dUeXcS=87SW(pJplDY?o!RRS>={x9aeY=Y+&U2p@Up<`=)_9H&x%> z9-U5+zyPA`{^KyYeEksaXVv(@xVRpIF$Q={9NRy3Vo#@|`@3WQSc5+FGx&~Z)_ywoS)y`2q+s>*&ln*I#DDa~t)RaO*5A4NJ3|H8&yT-l1#s;*Q^X3pHw zF+(Kfe4+sxd@^FOKX28aw$y1id&oFSz_vG8T1SVrzB7=Z{B{&~Pc^G?wbodJ+_>z& zP($!zRu7-J@9_|gcBB4KD<-qO1u|;km|(MkoEsPXRAM9W)5muNUkD_>h>qrr)G~%a z?=f{vV>cVM($h{BThDf`*ND<=@AOnsW5CAWdwg#d3(Rd5C;+3Dnf4w+%gh2ROiiz= z1^eK(zx#kwr=wZ!4tWjlk{~Yq$jrEm&UwWJVRpJ zU0x+i(jz$tuE-T0gxQ8RH{Z0-+8MH;!WGojFy^8um(mI2T)X74M;aeR;f@(!?#e2% z*?)gZx|iSbkq+jvv#y%D=W(jTN(W+6-~UM>x#aSy^zr#gZf0@>{==sC*Wo0nVnL7m zY64q>UrzCmvhn$0?mgCfSb5cDB`9dlLwa8hm?V5x+TF- zsENz@9(5Y&y0+*r{a7zJ7cysl3_Li%8!)Ud3ha`^3JW&|* z138Fy-A~-2*3_uE4;p$5Y|<8Xf9d+dKw7PB=Fmt8&uuzx>QMQ8EqKSJ7eFuO)sJAbGyq z3HtOSbC`6~!7x?>);=Gz)d!ibAc+Aoq@?%H}nv@r#Z;)W> zgt>=PA;vjBoti3gD8WfS#NFjqPm~n-2iLznPh9!vzX2mV?JPbJuXgs~+qa;M4LL4p zefL^9xNLEbo0Po!wB*jYwcs0GL2iNCLrH%OS;uMtB{ZSXSLqf{c>q+?*=m&o5=)%P zgG!m|S6onCBz<-9j|_CIjl4X5wsKY!f$P3LTrs!q0>vP{4{f3+qy!Wu_xD!82myY_ zLcX8P0od(H=kRRm4ZR8q!-M}B=vKzz-Eigbvjfr$phouPg)sDY2Av-4PghP_H?M!A z^L)>TxxajKQU7$+*%|(-M2F>9x;9jS-ID^(h z_~7;R1@y#?9M4j4DWkkC{VbJ7w&zpt)F|WO4;ur*&io$nn~e>8o54-$o83v@<#bUj zISgLuS|I{`RH_T0di@xL?p~KQ9)OO+f!%bO-!Z;DGHk;_QH{+UL}!2W^IZP@cwY>! zHUo-?#Zk+cD=4~G^Vmyj zm(9rQt7Y+8%`Q&mny3rPRM`*CsSlSYnQOkjZ!e@xL3$ij`Qc=fCq&mZ$Bv$tgbWnM zSrf>i4Tgn$&ZNb6r$-GSGxJ}5SMb)@@E1w!_jyzj#TQ$r@(qfMqhbbAAi&ELOC(}< z8;6=5xXs#t_qBFGF?ZflB^WMbD z$vvZDcBzQH+sX#OYeLxGdO^L2oxTBShN7mCU#PoiW$M2>h_eifDWwSXkb+c1cmaEm zkS^c^$Cy80?1>hg{7wo&HmDo<2-|dp&?DH|pJBrpJf$O{2{3KoESsU!(bjwSc?LU| zJY}~D0)t+Dw&+rqdx+koCwR$;Cuu>>TPeXg#>L=GmVWred)udvfvUkH&oftt5PAPJ zPQl;X%EV!{;4lFeMi7LVyt$p71JTP;0b^stP^Oj)W$Vdv1@wcCDK;wDV z1&~V1nsMZe`gyf8m^=E<7c{(}>UbhOm#gU^RD2S>bAgi9XQYY0duw=y8jpyF3#;t!5)- zyja)a$Q#l(^_;(bs;eoJ%HVx3VZRa#dVFVoylF_}((u|Lw_;Ef9(DwYwGn1+az)EY z%sj>rP=#}wYQ-kGDJi8IUvvm%Mtrd7s)W~Mh_A_ z42SezF}{Wn)1raglSzLqhCnP#Y{T4}jim?Hxx9%`GY@m@nv~M%#@rjBf9p z1MODXWf2aJ-uaf}t*Hjve=W1tDZjtM8GkmL3)L^Q-t+>gX2UPoqA%-}&JN@pM`y7S zAyi0|0e)229u7+UJG;w+#1lP5im4idT1e&ebQIGt7%=c+^2egDcPm z-BJC|!a}G{I{&7zc!4QS1Y?@AyK>fcwUJ1-dpX#99V}v9DCXQwXfZ*lfAA=u z40-rWe<&p=CVsH1EV6r)T#w&lQ{h0hPw?zWz{da_A5(63bl@PpGB_}o2z6E(g?Q8le})EKc6 zhch{Ddv%qpt-oZ`@crT&|JvlxsYt(Odf!MLX8+G>U-QKzy-hN&HtJBU8L)i=(D@RX zgXU2T=0&2agIS{l8%!+et-GRIw`x8KaML>Z!JBn66EraS1PO)37w2%tzoWR**V%Z# zVpVj_o=2z2i$5d zBxCU5nZbiLAIWFE2fnHvbCm{t^Z6t#$$eU#TH3X*+i0BhX*BjNkS#@brO z?9ocd&TeslnE`mC`sW{ul`CQUf1}y_!U z&gd@-H5b0|mnTf`7`{j%YnDHMgPpr7Ae|WhTVPE1*8=5*;dEt@WJ$h@ z8?c&Gu{MNjcbiNi1c3hMfonC#TowaBUzWWReT9-ykH^|Y&4< zxNG4~pYd1+@-}7Y0p0FD@9WuXT{WW3z);aes*NFI)XBg7s|b3k~ANWcAqS4Q@^fX|}G z(%Qy2CVJoIRO+)+bo8||Aqpt{5#Wa2-l98+OXKm5A zUTj^PQ@mOX(e`c`6O^oQARsTt(WpK*kt+OukZ)q|^`*vTYgdU z_-s67UIlrm|HyTJ5?C zR1rf&_wJDWRR=>B&z8{}yRjer_(IlD_6V^$58Hix7ux&UpW6V22&vob z8L4BS5QE2$>!&t*2zfCod05>?-{EcdX|3K?d}2be__JLz4XZIz>C}40>j{>F03Xdr z#Al}f*c@;ZR{_)X53&9y7pz21$bjgGj_r}e5`_U=8#BUfM-HbAp-PnaTF@YI@8GW+ zeRTovfz#gd)T=I{Z?hv(!MRpyN^GfJ1>C+Nz+&%JMg)Pdi>FA4SP!uwVMzGmQzzr?NJSkr>i6;%KF zlllWj4$}^SlS21jzjf3Glh|q~lsC8BB4t2;DK)OBQE(;4Qc2qw2&w@~=qdo76IFq% zeTkB9#_`pX^q*Ff&q7VA8W?jn5EWmw|HvXcOro@!kB!T6eg$<#p8W<$!4pAmZnQ6Z zU9>)9MRTCYbV%qkNZ)eZgOE_K5pgoQDalh*P$Fv4n9r103dueeyKpDKevAU06+ldU++%}PWaZ&tOq;%9zCl-Ttb{wQJrhYYYpQY1_bLHJa)YzA^DfWft(vlUYm~QaVg(1? zExLN)+zYBivs}3pa|%Hy_Zw=F4RFX$009H@^^cHX zOJF9qkq+e#1!M#Q<#g7eOvgqkLzJXiK*<1xL~hF3=ZF#D&fBF5Z@uBm^~rkfcTseZ zAjUE(6e4jqz!{&(<-pc1& z=~^*~#6M;_)L#@T6A#a}H-CT5V*){NH+`f(z{eY*C9Ej@tgjZs74t!zxlCZ#*DuG% zQq9#}_ZffuAlfDcdwED#Z}@dC19{J7ukxJf5&@c4Wx2DTHdn1;Xcro_|4vRyxtTzu z{rHsepFxARM_A779cnkUoo6aNN~YRLyx_vNOM1$xj?=_ikFH|KqW(JF0Wlte|Ea$0;oPKcayzmq@6PMvx=$BT>|H!6THYjrrJDs}uOTz8 z!gKvthxTR!*qKr>K}b7AukP(~$FlU|!}6yL29PYZ#{n25H8WH9)TP_JnFV)74`=BP z? z$4r;nR3jCg1cjoCQ(IeHFUgE~?0<4Qkx3L%Glfv=0E_Qyz*s8vd02eV`hA&XGjG_& zA}M3#90c(r%#iP%{XjV0at~YK%6j(L96wT?Ea8HZrGAw(H1tr2Tb`{^D5d4_q)jbt zu6FL6(i=RpaCv$8&V)*)fdfOiK?zpVDs;YAOz6Ns$yBPUI{Yj3vUtgJxbG(Tb}lEx z=r3v}NZ@iHw;+F=2BG;+C2(+i1h`)o9TTSGns<+Pw>|R}BqtU^S}Gvh))7}AyoH+r z)<|mZF@fMQ{7lp&zUJ8NT ztfqJ)cb{9$l!CpPWOE4;{-*86O|I@$y8+#^xc3X#C#VCF2E-q9sSkW|V z%y+vo+t1q@&7w2f!ZI$DqG*%Fi=^HoA*J_-wS@q;jHPx6m%!R&QMCD}Pj?Si*!s@~ z)~U2+I_^8@@#P`0Ir$wG`(4gJD}|%GI7#_;`h1SFt&uD4J3qkjAo3C2u~(?4jPBcE z^GogiUwWUpc%p*WMS&TuTRHzva{7~#KfZ_Z`T9JmagYJm1`dz!II%3+TC* zw-U2)&D#ffA?F{V{1n|iJs~^CGShl)wJaYz7sCm}6X)nPyf>&1_&&4|mX-g*{2JaE z-&Fov^M;!R{cEdR93jfg2cd zhLRjaED5Qzkaoy)z!Y1*w5*1I*(B zy~i@qZ;M^=`X)|AkxRYjH==O`!L5sdx#MR-MF#1n)eBT+Ifp-kWEB;oP7b^IPy5qY!Q3yFWs_3LCB~D29arL#TZIt&JhG zrrOGBK@woSHf-sZ6>#(ZVhZE{UqgW6v3{kuv#t$$vBgG5#!bze;R7#P&gQJv`njSmDj zds;?>c(EnI$273IPA+`kA^Q6Be-g=Day;bd9({86#ThCw7k)-x6en=w#pB1mI{8iYIKMfZw_G`;^Z*223{=s_|mm_<{ufGO^q{-Sf! z7N8W9`B=vWC8vYkaL8mxUKMaeO-W`N`?x>sx9x8Ab<>vp00PJRsQr!fk$#Mhw?(`1 zl9SZPh*3~L=rhE&JJNJ>9p;NA>C1LH&pf!>zpkQvKOVebPk^}yzEDcU7k_DO zW5HKqceud)&89h7zU*kR)0YGO7!Q`(4S6*LCsqK+uWBPO!fdAX|vE%LG&Yu3uPVDdFcGVGLh zvOs24h}=*M_f6qHffruknPMZd&#OPqif|O&R=2C2I4JXt2|B{k znP0>J$K%sK#Wg3m>EZMurLZFxm(FJfX{EhmFG4TJ9@K;SL}^0{`Lr9cLewzL#Bipi z6r3UT9VXCD$A338y8oN@+I}Nl*tlaV$42hRWR_V84KUl_bphM41F|*#Z$D`MSiV0! z{^OYYC(x>L0|t7|obe6xlEyUmEQQ(d4eR%S@5CM1FCV<8d4Zi2e^ZN6mU}buV{-OD zP9v`S*-ZY#HzNOVc*>9;6veUFJzqvhaLXcjj*rNCOl2F^{b@D{QvN7*i0nRXq;stk zQ%L4hM}XS3=m9R@jNpFvy=ug`T7fjk{5FnR7xRf2)HIJ(RMh@V0a{j^x4hZ=Z-;Pw zb-i1!7lnTd9qzf?SR#<@y4Ye9aRL<#x}0ttDPwZYbux1Uwqe|HoprPFjo>~rLMnHx ztRtf!Jbu*#@nKomCe)`APbGzn*@Vk7L(KLdegDVNS%)?GzI}LvFcf5j(lL67bho1? zjifY!BGS?glaSG)Q&LIk2KgdGKuTl?lalW4dY|9xpB#q=9I)NHpX>U3&ht4i$5+Fg zz82YITHD5`2q!9o3XbB1#ZhK%Kmx!sF|>FNYv#xvw;bbH&GJGqaDO4`sB14^4+w)+ z?@{$|PYjwaRT{qit!iw@iQMm*s%WIu~@ zA3m-yOyg*{_qxHSckx|&12DLX;vg+AA#3vOp}I6so@wwU`+N2 zd;IsRywG~^e}trSR-wVsx@UftI0{?-}5LwJE#w$1r3}z0p*6cW37jq(I>+EVnctn1` zwBDWTo^qDxS+_bLd1;4Bb!=RcdhYU@he|%ab9V|7nCXd#+ts4hHMKh|1VQ$?uXcNG zumAnC6^8-3@mIpNOS;r5nI!f(XUud@h60R@WBXPf*&OCJTgpkb)|>L@Jn#cPVHT-& zhmg96*_KbtUi0pv+}vs-q(fWj+Rh(=+f1>hCRc0hC%GKSz1%5UKF%;b#5aQmbw094 zHsKK-N~a!4V=hvS479DF!+yLFU5$<%6Hx03@Ca1op}P#evsRGgF5!v&CFK;y)SoAk z#lc1q?(TqMz_@n;YNye)`h_a`D>fYprQ}yLr1q%+uBjeKeRzzJw{*7r?V%8x?`t~y z@ClA2=Xyi`V(PN`n90q}!(`9AQj*s|`qiBibK&1-(|eYZM)N6vEV)SbuSK^s{}oifni{L-14ZTejnMDf0D+&pKi z{(bcyTC5*E)2s^?V76{tk_33ncgsFIwF9x!{+;kp4h;euKTC5(o)W<^NVH*o3B0_16l~ABX(2Rv? zTcEn8fxc|wBc*({Rw{k-35LEGW{*ZGNmPA2vCT#ptr&=Xt~QQAW9XIf(Qew2JslmA z3YRnF&w?)YCygJjtvpHCjWFW>-Cc zN57hWKj+Cs_{?E@J*P-rs>u6zH2-(V>D1oec9FaDoBZo>maZ+n^TgspbMfJ2Yiar& zjXyFO5AuX88#Zlg%?xTZ@CDMeFbj8=EH^B70l@z(|ILlRm&fQ!K4n~yzVkd19ET)(_R1n10n}SyRK$ zf75j};?w0B6r@EbB`s;49I!M4FdlFV3K#qA|IvLCmBcjw0nC>UU(g;en}ZZuLP1Cp zewEE|mVR*;0`?uuo2iF44xNAB0h;Kx1(YGqUoJiqDD6kd*`mL;9NVC?58g-r0{+w{|2B9_1eaM6yfZI zO*rRQC#t<_EAW~qaEOCxGJ=Fqixm<&6ev8D<^PJ&cjAo`EY;NVKJ?O^=Oa9`X{$|- zp$KoHppmK3R^CMg|9gfe9A^KcGt!BQtH1)e$K?(dc9+Ief{`ANKXY7$8U! z%!P#6^DN$>H)9}_xk?nDiOU)ts42#Yw-{sG9}*_oB73~P#q2z3Dp7?D-{F-t4D7W7wwl|=y!XRFF zrN2KwMoE%XJ9(htxU@kVSk;vz`&g(YF)}g1N-j%twYju#_#pkd5Z}-q9fx{H!#PUVFK%nK1(!};DoL_ZYrCu!dxX;IFbpioLoq4~tDoD6kZ|)TH3qzn( zP;%U_aM9oT%1|s*r!4#kHB??sRFze|0%#p+7=6_e^}~^@$%#Wqkpd2&=wEYX)U|O^GwDRq{UH0-=r`Vr)17Go=ot(3e3Q z#_ZBB<#m8wI}87xf2X~<$=2D!hyVH>!M|d2yF1*(-4Ly>R4hS}C;l3A(>UM~YGBu} z3ItEeG$>VE(E25+fP@L4PSZP)K=BHk+9B4BGuyK=8QN>0L=H#2Wla3d*Bj&8N#Odn zqHwv>d^>DahVr&xM<(jC5(QA*ODloF_pua+Boe#XvBvfa;vU)}vH=qeo##UL`BL{L z<6`$w&rrzjrfNM zdtFVlSmap8kEFz|9lRflJ+(q>OOo<8hndW|njA&P$Gar#%iG&3nXe5Eb;A)~b{vWt zZNp2FsTB8q2Hu5=m0IB3MlkMf*!w-5#7mk|Pq@|e6K3~rclZ9w_rEz<5|FOv^JkSUz(&Um@f& z#bV(3)CW!Q@QDwC^W*NH?9F7^;C=1J0^{9O*d{CS<-c zr~J=c(KY86K|v8}rA|E=Nm#iRryp_Fl%*5(Ng?ExO@r`LXfzW~wG>Pi17Y z&l%!S`mUE5{3f>+{*_MyY{yKyRGFb!vixP4nZO?Zi$$e4YG@Av3V2rVaJi#oWcg{lt(LQ<;oARfQU z@>d)rwCSjJa&=BJHX}O#FXTMgi4($J07}4H%th)P5z+tVm0AwpwWH0Y#%HZJu&UkOFb$DB%&uk5y@5-v%=(h#5f?>o-}u8*`XqPjKek%RR9Y;Bw&@-Krnz!boiQ)4x;o`XfZ?P9`4=JJ{z6vG?-m!YPylzLE>cps$;`{4PKB+x93)QYt_G9Dr4_%A-(I-uqK+m|I^L(1UXQa z+y(UEfxfI#k8(=kx{A+C!O*Pd?Eb1k5Uvijf@F8NI-4(W@rkvDX@Np@3&I2kGJu5? zc1Bw5GfEdi8Erma#~0<^fL(R8sg#$OV!Z0{Yc{VkfF1IAjR+2c!@9OW5fmB?v<(=5 zwZH*V?3@)hwFKu$?rpUhnmO8uZ&}`_)@R(_p{t6U1P*?%Rc>d~EAaH+t0tm)le+XF_y;-{( z*y}h{xP8PwMxRULXZWaMl2>vs$D+n5ob?`oGV4Qd*AZZGU=!t{UzOYoQfxWj_rbds z{A>((cLr{QXHWPEF;YsUjpCT|1#poEhm#!WL zY0B0zOm)|w@m}M)d1I#kB8PqRn-D@FSyuF<^q;Jiq;%b zQcNIe&i2HErWk~?HEWvFd!pQFd7$BS&#v4V?x7*#- zaeY`suHf$CHgX(M*s&ibS~dJJkoI?=d@B6>SM-ET40R^3+tf#lj*OIV6mQXEemK>1 z9v&XzfBO&$RPlk8xvT4_*;Q1?c@*SFud$U>!0BIrk+4I=e)SBDhYYaRGT_#5c%PwJ zXQPYSch{|0ksf}&2VEq=r@P{3)iBu1al}RHep5s>0+g>xG2Y{wje|vx`ngJmrFU5z z6BpZu|FQ}z91N{o%ge{g%X*Cbybk_3;>h6O0zO|bRFRcAku3$~hmQhg8{8?=W8PE7 zLy(U*H=V$G-zddaJo}Yt>G9X;cam#S{c2`@3>}_OcrsvXc}CH0DfAIl%O^kroy&A# zygG@jM?+RSpZ|LCQsHwRB-ueF>rsTh7S5;0120j-xzj*S=|AvE=-j;;RAlykSH?^J z2@6b0BQs1t9*cv3ikK9VQ0_fa&~GXYZ}x%GW=_JoK!Mr|1Q|kv|ETUh^u2}8N~clp zZJ^n!4MEagLEsr1l-r2fsciXA3me2}(rp&u>a_E8kiPo||L89@wB+9w=fC>-|NfKZ zEzj{pjZ2n-qQ31DAI+F70?F)CJ_MfY)t3LJaG> zN5`!+58eG0k0%#f3)L1RLf;>eD)FV-R3uR;d0O(@?^E1;s4jVE*qDW)tKJJ_;)322 z2C?*<*mK^#W)ObN1U~m~DK$uLeU7fGIxp3C&|JTu@<5-W;XXxdDfT-_aJm$Dorjh1 ztF~IEust5N|5{|VT8;iD=F^iyN4?df>1Sfo?Izdb@Ae>L!KVY|dl^&A`F}IhRwfTW9_bpYm~t5A!R$}hF2>?ieS(@}qGVRyxr$HY@=cv4 zG!d#&c0CtvOJ{U&)=-FS36f$k5eaE2K#lQHpx>}CMuu81_4P>s!j9M`gC$9}kD^*~ z`?IRwq=0nyj1O7SPcx^u3I88QD9^mttyuO?d|D}=Tzd%Y(Vh+sE-wkxc~;Qwgf9^J zbFJ;uqS-q}juGmCppu^+hBU1?FII9VQXJYf26K)EBp+rwGsq2e9y#M4`DwtYay(T7 z-3WoO>^JGPzbEgr$a~!a0}IhY*y^dR&GlTqCm>MRs^4YQ-NN4OIyXz{%BOpPE%&fJ zWKH2F!uak#a==jO58Nh%^R=UFw%hneRopDM#tD2~yG~AV2L$J*eHI8iJ9UuF$?+b! zQu^w^>h|XOIK!H-jk%;7)@(2uk|T5^b<5+(Ahujj0X2p&KWIZR zzSg*D1c>iqX8_n2*6Rk4Jt?%@E#Szzy@K6q=icRr%;APhHK>v}%;PN5Vx}9ujsQ8P z9C=`P(B`2uwKH7mm-aBI(~SRPoibZ^f)K_MF?U*9D;WJb^-=e&1?cmZ+0VMIiXquY z7PGoA9$mKS;?Fsr042NL1by!TO0&$M1QSKh0t{XE2c}ht!D!q9fi8&hnk-`d41H$mOk;e6w|d=-FtcL$RNgzrMI1+#->gD+s^p z(lThBt)8hKIqcvOc9N4G%zSf20n%tNBQ+ z*^8-skBuF=>10p(F}-cAdmjw!ml~i*!%0z%TqMNxJ$C%Z~u*6HbYAgpl& z)Q}abAEy^>qJ+-0wHxWHn{K>IB5f2A=K$XKPk@`m*5tY@X@odGk3C2|*`xA4lV-9; z#jtm~_lUhA7v*?-m6zEUJc>rKD`MjRJ@78|T^LHuJ_uFkp+Wn*c8FjTFr^x+K>uT< zk_)M>3ZqgPm|U;)9Kn@3K_&m!AiLiKP##tNvI73)%!~J~^qqLj3KfY0;9}cOA^+or z5^MA5YG=ANRVSLt$q^Iq96e1rf{l%cD2!DY0XtvlY)oU7ZW9>>gU30UVT){9JYutw z@w4i%(O+zY5BS&hp5^Hw?yb#9J2C1kOP)$>9Q(@PMd3)h?ywNXAtVEX8YA)tVXPBe1l~0Q%0#Q7;au3m8O+ zjKH8A8n1X?>5W`!%Y_`&(M4{FbzGcu)~An_OX;RDZnFr@T6M@i@%6nKzj2 zr=A0?Zqy6(RLG7tZbFd*uQrb%WBwI&vWp#mU7EpNKMsc-2u6qyw04;C^LM$mcp{M|79 z2R~>hM78vHUnZFHl01@M`152e6<5)4DxT6Bebf>b>fY+ zd^*iL+}|%VHaO>VP1e62J;$x$9eBq@w3F7nTH3lCPJlLFyH((==yPtBBzshesVXdY zm>ft7OB`c*{3qVc$d4WsekNL~^ugoNm*!dNWa$_3tt_QdM$kS9%dxWj-+*5k++xDl zafbOUOk-Cw`KAFYt-4F`SjVBt|FsxR;GMOZ;pE#!`lv{eODsbTJDbk(+s3wzHvSx^ z?5-Sb6yQOmfq-nW|4GKzsUON3zhYDah{IK0nu7yJ$&fm;r?yV1_I`@94Nuu-TcZe` zaUh{Y`cq!X9EOG(Fgg`Dm|Go8@E>ISWz~z~X`-$76QXZLnF09hu#qDfmH_`1o&Cc2 zOEn(WQ0p8)wtIHr5%qAvXCx~~(7o_4nw369%(i&(&!j=tjqY=Y-$3|CMIK573cza1o4aZ2~u zNEG&1b!y4#EOqwf?mmqhY7Jdqs-7|%^2_4*OUv|n{apNe;%6J$!h$sYs>W772XdqMN9DKgQd!wY}Vv zV`Czk6lKC^iNs6d&=_nz9u@F`D3I1^~5$1J?LdYs7PY??gsYv8oc&l2vcxD4=bW~Ya? zeC7i0@f4}YY6jnKMAhHOMd}(F{%xZCZ?jYg0~~$NCbe+y3E00AENbwkATi&eJx9t- zYrrE84UJb8f2rY0r^27?RC^fG_jG$&kf$ng$||T&T8lkOkkD0(9ZVQuZJ4=cN@u$J zmQg6N&1K@ns%d2OoZl-=a2RR0{!fvw4rKs~QgClfcTnf>4CO*$ASbw#5>^Zb@5zY3x!>7|TixMB=aO77%8+-|L5&}f_-lrYHm(jv@ zPd{loS}eDUt4?`|Wl{`I@Okl9^7EHyLNwS`wraXnNi3L;JLollK+hS;qH8Z+xr&RTsk$cR+F3KUh?jf#+uHPLuwMj zKVoA_HBEo`swJZO{iT7R%F)_Q1%9ILe<;r{n#E|xtaA4?uM}Xk_W~tSWN0yEq0lhC z{WGF=?rP?eM;IwO1S;V9SiJ@8#1b>4%Nm34B7oRWC+Mp=|1Z%Cs*r!#*!8Zes@k`V zCCkD|=)^jnLYBt>mo5C>Fm#>-DIJix&wu-t8X(-{yo| zUS~2jg-cgN%``29FSAkn%&^w#HkEOHB~xzeeT-pnHJWIxe+KLKtU-(QP|&j`f`Opu zkLU%D`Y%ztf7{~-%!Pn+ALw@4@WAEaRtBt-o(8(BkVClY--yeh^dOE3daW zzl8U`>)NZTXKvbRY=jaRX~%MC{16o6?>rk1!nP}dZNfcA)@r&OP~%I2q)Iq%tl9lA z{kcY{EmF(JKDKwv?RC(CiQtl8^uug4u{jsWLo3x84nt7t7&x}wU6 zwu71jIfiXK(JlT`FTc92s_OpRMNK_ZpFX$MMMKp}Z2zOUp}TI^hg?@F)8@}!l`TS5 zd)W0s+TK6$wU44^`G&wfZ)O>DCkrF&wo?m7Bv)_xx)bi+A6dWIU;mGv`QzW{pniAM zeW|K`a8tiCdgmDVGR3K&F5&E24r!|A3l88aRyw_ z!XE>;>CHnvfqL2JOTTg{C8gv^!74_l5p`IWpJ(mW6WGzx0HD(Rl%Lyh1$BTyU=Bs8 zV8rl5;gxrcKJY(%Yjk#IRYz=EuMnS?@hQW@b(uKsx6CzcX*E|qqL}PC27xNcP}c7h zi%Q{cE>!@y47>Yx7}F(CQNGvSwO7Hr*CtM=1Jn8?N|C(sUu&X>HZ3tnxF@0S?e%m| z55=cSCtT5zQc)v&B1eqS+x37oi?Py;HXGWj;4zFFreBXc8H(f^2B#n)@R*}TfA`4~ z2c0;)z~iL% z%_F-}gndGq3#1|95KgbP@=AiHR{ioEqS1Yv z>+qS_AWhod87A2^d+lVA{3^PK-YHxaq)2I(R3|Eig}b5aLVIYR^OR|NKr6k})j=Q* z_O-!@9i z$z3iV*vn6|q#ljva{beNmL&N)vP)-zF$oas-n7nN?}?t2e(A5+iaY(cf4pmTQFA-< z+N#NCFD(eCn=IvYRZw=c;az{Ro+2=kFMsz9V7d#lOEyl_c6eB$ zm|7V(f^0qM{zvA3$pCT`MTw>&IY@YMM9R)^{|)NPM<7DO1$=9;BWxwzc!*;tkd-<5 z9!H=M#z(ET4u5NA>dLW5LUc{`X0;sr62w6j8 z(0*x-awbkgye$2BiVJ8{r_LjkZg}E-$8iJ9H>x%7;jH>dp>PgD23A6NsI{nju@JGu zu&^@$;NTX;aOks2x&lg9DB*}7mjS%>q{aEIo%hxpm|bnxozJT|XW1mu&bbA+nhm~} z!E>rw@?z{YUwZ25>iIx--mMYdarb69QXQi=?2ri&#(s5Y1a7_-Q_TvGIv0RL>$TH1 zZDH*4QzS!R0TOVw`J44^#zrT7cuCzSxmkxs>wnMiW>nEi7Fv{RaArv%qtF9$*_)-+ z11CpC#SP4G*D$h!V8>`}kb6kgEFI#)sB}!p(E78Ne<%vYwJc1q1L+!Fh-gKLb~iaPcS-EIdBH)l_BWt=R`lV2Pv0inXL{Of6Gx(0 zQyrNTDU-~e6RiNCo6G|joT$Rp!cldzqrBEpYa<@0Fqevr5)rgdVQPn>S85bTvT{;? zqx$dk^sJ-NUN6lL^S_Eb02?GBGfjh4l~mo6S=Y|s)4r8|Z-?$$r#9PawyGW5LPG(jb&b_r zbB``&jE?a7?%FuyVy7mg$T_k+Ct=1UVaV&HrA1$hI}3cjdaI2?gLc@Clm}{JNL7U4 zNvdgnX%9#4C4$!YK*f&Ixq~}RM4_S@4G=_P=c6xJV`l6T@ zDoGfr{w;uTnQh}L58{T^Mc|DSuoj#!2MS{E#d^OJRym3YX)6eb#OC9t$Mbvtn{{dHGf2?K?(kaHTPqKRV+-%z!{ zRQC-o#LhCYT`k2m=%#TmXlr_0iFk>tkTURu>0=lO|g@@*>=$*O-e zK&AWw+v)4qWd3_@_!(* zx(d7zXlZiy-Az?0=lmt{c*=+9D*Ba9onPjwE0S*zvUZg$KbxE@Cu1j@oMk}!%xHW> zj~Oc-JUV0d5gP;_2{L`?7pC`fIS8WF({Dr{7Ws@fsa|s(x7WTT<*je0Q?P#G~h+q)(nBbX#(Y1z5BF zc52qOckjI9Ku{-Z_BD0XC&7&fBGlH8|s7i0YOqB zM6~CHPE26eNA){{5CV`p+)H94ONMAQ&gHy9c2j*nE(zg|iIe!_(!7YNb7xd5d0{c- zm)Gy*DqC6A2o$k?@Ql>(Uk)j0K7Hjr_>2-Il=SNn%{Pr?br=NeSqfOS4^B4tKU!P@{Py1mu|fYUl)j7;-vd&u-QL z7~AHPgi=Gmu3n4=_!hAN#}S==)G>erPYhS4lOKFLUFKzx&&Z?=U_8?Nl2o^!2{S{9 zN#Er9U@2J_BIk=3>6WL?f)Ax0i^pu*FcPVR(%aFJrA(s_ z1T$=C350$9TK&%=5&+XAe(5CR=Qo>B_oRMBS>Hh((061Pt-c1>f3Jf$Z$En20q7Ak zsHuFbNh6~HG?OW>XfdVE9}+?8JXGP1VzRz&dxx96qba4+?=4F?dD|791*MarY(W%M}d@Rck`SKJLIv%M@s4VBp{|0w@ z@O6=~EG1^#&Ttvp!l7|`cG^gWD^&@#VLar^9w`-~m1&D*V%~1HkjO<3tqhv6`9200 zYqXhFW$i?|KO>n$Jblb7De+}+XY93x$Jt!2f4kftg*qcLrK%UMaVo6lZlSDzm{KwU zo8s-pY)z{+m~kXNnbu)fac0p0%ksWgR|7=3E4ZF12onLNpGY(%?Va? zM~0@X0`~uHGluQxIHVgt7XU0SuGrYpjrjP|(LCo>S7S$gMy9!ekAtLD<3-Ku7W+`v zv5udBLqVXXx?k^X_N!@gQEd+A(&xAnGV@~N z3TIrCJVZIrFIa3|2QQjLGWRF#2Q_k@IX=QF^3shKwEp1M>7{P9294(qd=Vo2+oWWkDP@hUEzA<;zwENAZdMXG4(guyjWA_}Frw&_ z2|I}8!qKdpum{YXPrv+~r17Bopbg^|)wS(mppl~s`}-KZzWpi!N)-KIB-6y8kOi30 zyIDQ`7XBOoqY_Rs2QSJI79%06xzOkw;r$*SEjnmuPXMCulL#wo3@|!=MD0EzwMOCd zEN_7TbO5mhFi-0x%lL|*W1}mG?Tv(Y*>3p4knDYD5=FrgGK5nVNpfMLEzdA;P#>l@BNC4#a4r;;9%889Yy5l$r!H}2$*u=iEoANN2N^w z9+1r8e#nJf_LC=7Vhqgnto{C2g`%CX#(JcgP)^y)SQ?s+Dp*Bc6yY}O>nv+XicHglv`f(1C+Rc-^$osDjmAf^J|ph1tZK3mAeIXZVoymx z+k3-68ewvv_!#Rxa_ly%zNpv;*3&Fe>O&4XC@yRaW>Ys&Mgqf zWGqgMRVM{0GSVulN(B%1!Oo2Eh9|1;Qk2!VLJIe;{*;LzuDXuoHMvr%_eefDrp?RA zjjKM;PVO}AoOP(*n49}aE0V1(j%%B9i8~oI;Xn!@cnJ4e>}YeA2_uXVeH9NNapTbQ zuDufK=pRxyms6H^0(XN}cd>U7A$#+;61R5ecSl>@e{P4*hwt7TI^8Yi-=4YL-R9o_ zT7fc`i&+JNkc0UP?%?IS{+jmwyYDOsYamd~-+Q2zHc2O$^fHY+PG_9$K6d?+Yo~!K zIRO0#QUH!z<0l^={FZq#uN@{fudn|~Wfy;OA2E4JUE(um`XCyRsXBu+K;kGP*wx`R z92|o@x87=l%B%O}dV;orC;ot&$qC?uHJiwCU*~*H4VC=r0+`?3pl+Ez8ZvIaDBP@{ zk37j&kh@#*QLwdTD?XZI$Lk?15?UU9w*R=J89J2W@)^h!E=qJI(aBK?w9nBosBY!u z$~kX2r*b2s%?DGKYHn^7&Nb#IMHMkU#J{|NrSG{@_pQ?hWv5H6&Qq)qR}*ga-}U9n zV{Ja9s#+SHt=5J?kW;hnW;xgZU7VY7JYBv`}^$r33tsl6i z^cj28CTC*14m6A^u;F24J!X$-+i7XWyQS+5IP!n#h#1I!CO99tQzb(~{MzVH&R-C7 zFfE)l>^qYcuBp`6eF+BybE#d%Fd4qrs{Bv&u@A8XP^8HgS|2NC%qvR9dXGyHK@I4lpp?k=8zLD(DlcuYW|W8 ziC@@=QuZSsCRIlJDs)f?tA9ui^`o(K{YdFUp2mYDJJpjcsx@ebu^_26hx?ihj7r0 zR!)>eP=gse^Hb~iP`}Ka7&0hIo^AzaR6dP-9Fko`7JYf$n1#LcWG``(Oa(k;Lcc4ZdPS9clG~u^hg`?seLxh5wyL6;NQ`QNAg*%9&3RgG~-5=8X>fB5Qp7Q z$ri>22t0$UyMOcO=+!Tyyx%11PR&(i;h$SeczP zN~uLX&^O!KF*5+&S~;~Xc_z0l|?93Lfo*bQ5-u5XR zE!E_A->u%HIKBhYj2D>fgy%1vn_fcSE5yS;pR7Ix*`iL~r3K`!mjy%-yLR8aK=|RB zmL%I?C>OJusU3$3K>nLI?C_yhUJ|OTM@t5LMmDgreS|(~&&UjI#%aIBk=xBD2pud0 zTBJ86y3ke%X&F9OtZEz^i~^g*Lz`yD5)4RTDL9b2#yf`xL`MT^ z_VrAq*~8eh2Sn4qD+KO_6%PI}jG}*ywC3Nn<_FzuuJ{L8JifPU66Q#&S|kQ4SzoJf z`|<=Kg?BHUywxIB$Zrq*tBGf@e7kMV1V9A>M+xLtR6$%R+Ljb7nLi3wM{FodYTs^N zV*CV(SrKr>i5GKm_iconY*Fz04rzn$ntBs%zw6vA1ndQ!A8-%9dwhLt{8-`oFvZo* z#AoX$qEre9Otj8Y_PlMpTy5@3<`nNe^D?n;{DZE_e1ezfp27tV=o{Qw zAFV$ipK-Yae)09)*E+!$-neeBy36MZH!D$5!;9k^OZ^F#r>jLe3Qx&LZhL@Zm%++c z>H5N__{G%JR-dhuRQFtiP-!7wv-8ym&D`$Huw;XmBd0zh#^fAgi!wBIZfXW~WC-vJ zxH{CASC)6N0FzH_GXu;}FAcY&s-gF|uW1Xk5?I^O))stpkgY#h?Of)Vfzi0X*eN3| zPv7szNF3(aniH@Vrmisno)ATpd=R$S1B!3MmK}kqJox*IJ;}gilc&nL%QxY+=I(kU z`!x-T1( zt(qZ<+g0}M+t_d4eke|FOH%aHVz*9ibD)1V4D{Jpe|oA+d0h>&i}+XweAlK?BBA_1 z4HYU7Z+=M*(lP+QfDiP&8~^#(&tn4ns{cc7SB`oiJ_rEM-Vg0Kn^9*=ZsFdymH&2+ zmos``MW3fJ8O~maHha^T!%A`A2wt9}T%e1XPRQHqT4XdR{1#J}3`Y!rVI{P6O&OFa zY{fqaSpz=`MdB;cXYRs-&H(N$2#gM=O9~?a)8S4IV^rV zS0!#$ZeK_W65vet&n(-7;rWMQgq6l7L6880P5ea%@(*O z1fA1R7lS7YYq9bViufNnhtl^ch6+cd-eh{28$Jq*XIDjU@M@x?DgX8jy$Vo=B41D@ z(NVg;fUkVvJhA{t`F{hPc{x*)`F$NlML=v|4n@@sb_%}|k2dSaZxAJ71hgebhF&8g z+;*Ba<&{5d9ccayP1OLmY`(`@rn4^Xg?arM9r+%k6h%H01*!KOlx;MjhW@v8L6_&} zO$>u?y}i6nu-W8AIyIJg`LpdT9j`wMoxA@9L-XW=27ek)(Tud3)*n(aJZ;K8sW7f_ zk=XHQ#fRDc1K@<&!x(qQtHrlL2Gekap2^EJF{H9GwGY8(2rH-bTI#rpjV>>Xp@ujx zWB~SiAq+}9v4wm?zy1{RC=T{$Z8b4)&yy{c?(mS5psn-ue|?xE{j_ycF@%GK@i^hj zaWP*)h2Os9tuhE#6!NjWP>N~)zEx0+5<=U5DXA4NLEm>8^ovL=b!~Ukc)sl0Xm%R3c7xW>5%WS2<~V2#|Fk1TfI(drR>L&0pXTFJnpFTj(uSb6^OR!= zQcK#i7K+QlU%O0%@%c+#qOOnSzJ1%?B zdqqe{D1_WdjLq$^enwlVg3~y)wX-vG(opY>vZzC_yWh#7pG3Z~S`?7YnsN?^_@-s? zTdVK;_l6AMYo3G;p*&xT6LZ+WmF2=*Y>$^{hsrgOu)aE8yZ45MBiTj6JJLktpc(89 zt%WWyT2VV=(R2yCXp6J?t(sc$2qD4%#@Be))cL$OaeH%NzkM&{?O-OBlC6I|!mfY^ zKp1tW*I0I)ERXBl#zbV}PUdd}Zs$D21wJ~wxpLVDT2H_Y`Qib5Vxh1%zUw1Q)`CM5i5n1`&(_it8w=i>=4B!*@J0N;K1D`7f;`g* z@VmM^KaM*Z+PhA_s|h*ps|nnh9y+=RxQ|LKFKV*GXckl0qUfvSSIIN^WeE3k1 z1`cZwTHSYxVn@Iz?&%%MpHpjz$z*al0TPn->h6|~t~Sf=E;9)&C1w1>X<7pWkBJ6* zR7d7SOCl6k0iOd$33cgD z_m1dj9+D(hiUmP!ePK4tyQEL(M9OG1e{5p-Sr}`rvS>J`rV!85Xj%xer9KP(rTEyr zjGeUS0U_Iu6fw{$(FZ9Z1Yu@HP8uT|!UIc!{GwI`JvM@q{v-+24IMW!A>%9Ij~cgk zYjn(#ofY=fqgLK;HCm|G@KN~Ov-U3ks3RNJ)0(q!)wlwM;0l~l`L6s*{)q2&@Lnyg} z%IV}LBR?>T=i5VOT?FU-eJKtW5L=1td!YU6R3&Cls!S3{=p0LHz){yEDz1*_;*rw< z8{|MT^m79#%B%hMGoz-@yd^_NG%UIN+mn;2lx!a)8**z`7Z%2%6qRH9*pP`ld5nAs z@$oLrTf3#5T3vfh^$za-{{BIpSKHG^iL`_+%`dCGY$_paU%vljj}uZQT!3Wif~~cA zOw`q>IikyXP1IF+28EH)`n;UX3c*UBzGA?N3ffycTu8}F2rXv(WNBH+mdKU}P(8mD zdg(e-aUw2AJuLgs{rxlACrJLrr}X2eMGy2c2|-C2b_L&4va=k#V`}tR34HhBuqt}I zF+!JJu8owcLpIw(by*k<-po$s58ndHxPg~?IyVLHY^xL8CGapR;#iw$?I;=iTuVA%= zo#Aw_Ta5t!oc~c=f`UJdej??4JHjK6+C=@xg^KdyIVlN4`y?F>Rk9p}Q$&8T^#ZLq z>73I`YSpJS4~Wi}i_YkL=UzaG79`^=EV=dIYw8+wN@Fu^j+FRP$%(?8w%>8Mop-^$ z0i9`Z&0f|=W<~8ykMI-j1YPpKd3e1TOPyjyDkc8Fdrc&Cm$LVEc?ME(dVb?ubEXvm&VNJVu8uz;@#Jc(7@zI2hQNcJ3>Hg`1PTce?=nGphUzx-L= zCsYegGFSg;#!C^H!^N%TTw&|+h5zRgx2s(6*3RVO;#AjZZ})9>_rdmb{jkGR4&W6U zWy1kw|CDfhczL=~@u13U{%q^SKdTf7{NGH3TKCD2dFU7VpM>bjQ9>VRgA69~?Wmq1gg$_#74TToMc6!g`qh8(ah9Sb&!+usiSR#_ zx)JEpenIP-Y5Z)!Fp`e4vFxE(DIsN$a5#xjF_T_OG^@Z9_3%Hj!5KoO9LjK5wD*!3 zisIh9UNG=unSt?Ocp?Rg)P6V21_(H;Wr>`iHjR?=jg3B9UOG~lP>ACpjO(X9J!pS! zjdu1TFkXhCQz2YoEle`KEo7&5YB7*GJWbz$;=isPN;st7yw(HH!2zVAjQO!$m6YBN zK%YM?ky5Ows7#1PXVfQS+$*-!DU0sf~AbG7>PXsq;>2_#JR`4?MnJIZw>d^G&6f^>-{g7k+v znvReai7sS`w4lYzvak^N`Fu5NbiTSgL^xR(iisqo7$dY&UOU(%@%T%$3?zAKlcE)$ zpI{=QM(6HEB-?0!tHEhm@Vko@frKf68Qe+lRl?w}F8>REo1Kp8`gzmv-;F_f^kKe1 zCvV$3cF5k@GvnAR;~Y6wS=o}kL&z>W;@Erq-rwK(13Ww&=Y3wU`+h#J z%LIAXw@rygQbT*vZQrV^(G;Z$!6B`p5Y>K@qJzQ*f(mk5eq3?JvNy#CR^4dH!4I5Y z58NKvEZIbav~|i2bbvZ&BRuO{Y^7#>otlO6=xu9s9lu|BPo_&%iT>%aEB&;#>v0 zM5SyUC+&hHj)D^#iZ__Dk8cI)_2b#Q-8VD6W$Npu4vb|7tg6O0zNs&bcizz^^Z~$` zdDY^sk-31KF5$SP^|ffPuMS@~2Sp{QToA7swmW;0QktfW=8c?Ut97Ro#MuVuDk_B7 zD&YrBeDaiI68f)HNO*bs!|6(Z1;S^ew&n+Q#`2GYyPa~_?mv{Mciwn0tkKo>c>iq8 z|LR4+sdDSZ9PzS$sesw{dTL*&mE_yjk=V^51;>m(XB!(dPObI&0$hm-7!-;b1@GjF zZ*+e^HerdVFaDg8IC(FgJXfn$SYw+(O^*XqTN4ZZN6q9}P5VQsSDk?EB6y4< zkUM%>($9Z{u65%i%doNuk!!>Rwa%zs7(aV-=n0joku@ z%|Yz@iVT-~=4zG(0+SBci(@Qzof@DaU`RX`aLAm;JBrOz+S+Rw&(CJfH(p;A4WGBj z`kefJ8L%%Noew}$i03vLNR+WS?U<;$x$NO^ZuST54-tS$GQFM(S$d1^+3%H3T{9L; zf{MfkhGd-@>}U#4_kDhxD5rJ|7DmsjLIs9%d5D{+D%@di*fNcQZvt-he>(VYSw?SR zR2x$S)vozn9}wDvlLpNYLUZ z`eFFgp;)GH;7GBOQ?Ia+B;8f*p?DLFx>rIT_yb)4Mv$>Tz$s|aqvuU&bRtNDl0U8k zAUA6Cy&8rQDXV`CwSoc`jCVCtlt76H?nfcuy(96W|K=0CWPLbXa*kZ;j$82C(eHQ) zEe+LEHb?^}o^%dhe6DOYMPbosv?uY)=PCabm`(GN>7-jW8|;s3iKSIJAu7FQLR?%N zWTB>|t1E^uU48oB^W^$2+tyeO`07H7^}XBOvlO%i!%WX#Ba5);>3Y*HCVCtiiIR>B zMi&b_AGn(HPED&GcANJQM(~hFfI`LS%@>ZdU`~x5WF=UT>2U&;Sqwc#I*b7k*yoU} z6C$dvZLHVxs8NPS1 zcvxyMD6M<$HaFal){Vx3&6-3&Rg;^h`WjpEcaSq>Ne3>iH?uS zstJx?+wts5>`$w}D5w%1O!`O#4&C{2Z02HVU^sD~LHSc38unO2UQ#IX+j+bzr6#DT zgG^j}6Ickk5$B6DyM7zLZ3&M<3zXI<)!SgA6wRXYEkL-R%Xo;n_T+eJdf12|Ke)*i z5t0wXP)0Pl%DImD%D4)qSgy>j`^KZg)9^M7qXLXo;6FcL2d+tM>n`7-qq`*}_}VbEQTi4L#Lpf21IdA4 zHjk($*jiDtMpvfs6k%^#nF}CPzA+_zQH=RgZpVHhN=F|WXQlwnzKeG5AS-<52%~H z@2*mIYB~<4lSdu1OP`!1c@WuznpqxvQ9|Uh>D1J>G=18f3eT0f{@rf<;lA{GLmW86 z%dc@x#i7yvpR&nMS1?0}fSr$*SLBm{)#p>^wJe;J1vKpPGqqJKivVWcVWz>Rp;lMP z)()A^I>`njp=5mLeGS#j!mHXn)Ffxm==7a&jMLEQh6>n4t0f&PSd?++57t&qSLY|3JGi2XXd18BMv| z{9$D65&=?T>|D z4|tRtm6hl8qYD+uKn`NUg@6IG4k=|dKn~TW=BxV@GXQyGAqUdX!CJZ}@p2NSZyP%7 z8Dg2!^HWfn?~$yt@xCf*1F9UVk)--mIw;$+=9rI}v|!^G&7 zVDk}u^>Ey`(qttCM4b7;SHn%KT0ZLA=D-Y@gI<^`3|p2{H5m?$kBHGiZ(ten2fkzM z9tN7?D@(OeZM^0XZs7(~tAgm3S8Lht?)OFKza4TjPE{Q-+{6=RY9A{-l7IU;Uk6z~ zf{RC6u`Y|zs8EAR(=fUz&=4kRR05GYlzF+I+K#npN_p0vMbTmYHiuNE-?xCij?5{{pMEdfzwqaV4h{_+B%j!dF$$f ztM0k~yQ5NIv(o<8*!ZBs!iy0)B!^jD!<>fT-+U_Cr(SuqlxHw&@VII% zs>BZ#ArKBES5O9e+TPogoDv#>Re`$^mlfd7-96fyEeq945Ke;)kVxxQ+X5mi2dH!9hbAHlGj4gqi9fIty^N)baxNAZ1wq{`>8szfv&mD)G^*j*@B)a8$v{PnXI z7~h(a8a8nUad3$zWE3nF0QZYVFq7B4^KszIpRF4UR#bsRRDXH-@+GO9YC0zEWy)HW zwUErIw(N1yg5UA0mahdA+M=Vs{9VmaEIoHZIgem+lx)77P|Cx>+LwyV64ZOGJG|FF zD=s%$_2>BY_Pz1nM)mLD?jX&*cYAr2!;gD*J+8;aP5e%5#&s%$a@n(4hy0Hz?#k|V z1_$FraP=+!k$~bUUuT;<_1e>f^Fto-mOaKrz(TN*k{uRhhfqq0T6}~y>CKyYJa@4Z za7Bz@p%t8y+cX2O!M`Hk6UcfyE=j0g1fRyxxtxq8Bb9iM739&xdnAN1bkTk-d!a?G zWhj+7oc4+6)8z2s79GiN_qh*6uWgcGO}oo~Qd{>r&DLjHPkL>jYuzQ26*+fTPgc(Gn+@a7GPxYjw!9l< zp0tS9jVQ)a))>b@DfuV_(csRFgAMOX?}+ejy4S`{K;qH1ac-JWy7`1!cM!Ne$D1dv zj=VCdYiaR!zvA;rop%(+S1bQg_MW2;OKAye)a|{G(O} zoa|ouoYL`)%WSq^*8FR85PpT2k@Iz<_^D~z zBL11Oed5tzu2vs?0*oPYISGuk%d8ORLgPybq>(@=mV>B3z1_Q{+#OEw^6QrJu*t10 z%1FKFV8R=G?~5TQj!^!@rM*t|HO$nZ5D5lC=j}c{AuD4#z+u=vq zLVhd%<_#trqx&;QaK9td%my3 z&rIdRwsg6@o3BJ}szuEt!SisVbBV$!=e{hL5{D)eLUM0$w_V(-#HxAD_fLC?$HJ3U zSZe>&X1^Y)_)e4W>Dk|z7g4|nPw)9(KTTAj0+bt$6YCTP(BC??Rde^u($Th;~~yJ z(fgAVjNB;gpA_nTK8?;mNmJVXYXRey{dTQe@~p_Jfe@TUTWs{STwM99r1ed)aRY2=P&me z6e^yRG|KrJZYHwC&uNC0LFfo%p@%GFpNr zz))p1M*w=52aSSr7ukT3zmQH7z*eDbG+8crxYv(l=&|B_J(U-+`4}BlDbUw29c{Ol z+EbJ!i9l~WCFLfKgXFWE$uvY{6}{&8{ckx}Jk!#EQBiB{Y$FSSFW#8h!hb69Ri-EM zT+PhP2t>@aer9w2Rn=!LuBAX?sPbUHBL6}|BwB41>&~OWg|)C!0C7RvB8FF;q3$Xi z^POD31vUSC2n@+}Xt;(s@J+r%u95}Da0tuZMhL5#pSVWSEDDl=L*o;VBfov?0KqM( zlV8}ZJ%EDE)ev1GdDEKB_1vM5b4{QQxql zWnLDfO{0&17p}{jqPo()u!Ef?ou+HIYPam&-nzI@*SRnK^vE{l;4COizccsQ28AnB zcjRQh8|%Hd{%5JG-Qzx+Q^jy(evP5s<=J5<*!QjH!9liAilU!ijby-~_gy$BO;TOc(?*VRFUR|9U!EwLqDm$DJkVpWA7tP zEeN#p_k+%C>}yI08l{9&q2$JKLDLPWN#GRpeN28*aZt5r%L2CS*4O+tprx(VFplip z){9pGn@*San@%PI4mM3LJ+9yBrriGyRFM5Ic>S9Q1b|G&m=RGnvQqhG2C(%I8aP2^ zhXZMunVa7YgFv)XHX0u(p(+N#+<6@m>V|)@u&)5{U`<+^5Cz;n@PrL~`OH@6Lp8H6 zbM{^S5)%42rl4YBzOeP8-d_Z}M)7%1*T${LI)|@H{Yy&)x7|eIGTYmI_J2W?&Tl3E zaQjQc52KwqvXV4u6Afh4=3vY0<<|VK>awWyVx~f9Qc*fr=vH;)N zlh7IF?*zSN-T^a5rS#9J^I{DLtbxys4d0F3)iZqM3gu3x$GqW!5uoi_ zDhM82J#u#qexEP}ad-3lS+fMx;k7b~oLVd$PTMGm>PZ#RmGDgV(*z;f_MPoUVw|}< z`EM(kDnrhq@MHES@5@o9s@E0FU($z#UN@71h9-_tyx(k?8q3AegDE8hkr_vh{DzD> zx>O#;Yh93tL}v27PT%^LW-92zRSJOZcG~Gu?|XJo?7BT(@=}+4|Fp@e@w})A+4Dly z>uPbTNd1xZYWWPRzDHPe0!miqR?9P+r1sS~*ZQm4=bmv2W^)eCdlZ1lM}6$4n{nD`tn_Wm zE{uPey(J}RNdgc=KmD(p=~03n>TkDio4Oe7&A>j!XseX#O~pxfv_JAxk2fSYfNG^bzqz;W>yGZsb|48cqOrmW97 z3i)kwvVu7j&Zm$bC?C<6CKBLZuOG@}UdR;r2}aGC(L@J@6|C-ibur+3SAK<+&JJeA zH1*E^ZA`Di*as9?$n58QF00XiTV@Vlz*qgpm{^cG9sDHI)z<2ZWJ{Jii>XYxBWupsD)} zpDW|04LLbh)QoIW7C+gYIYB>@AH?o}N~C46Iy$Uh)%f-$TlZ(z=r?0^BFS>!xd?yPt<48xRCs^mdUX^*gb%rkoM%G*2&1u7t z=%|r5>e#6K2*l88RSSDad?ZZV;?zg5P;~kPJYAkUxHfcZlM(X;1(&18q&==;l7D+! z__?Lh-6%jA>VBQMaB(E@qzt0%4NQL~1yjT~qdO}Jncr<)ul{-+Xz$7EVj7z-*4v5V zHMq10Q!y!bjf}+|TR^;Wy+7GsnJ#n-b3n#<)7i!4{c`KxO1G12i-*s?uhrmyA&2ko zzoaPw!8Rt}p-7K0S@g-XonDVirmKM$BG0j_o&7Y#Bs34MM?vk1W)JU)fwhIxA-X(NV!)>^6&s^88V z+IuDKmNO3Xene>nn?76nqRQ94D_7u#yCG`-@^M!0oUI~F{&t%$jIC8dv1aTW)Cx`?8V+mQv&_^``uqQ8=5rn6)R}VfMKvZyBxD+-8KcD)gK!l4s;lKqtm3hr5FijX8rr(!nY5w?o@=5G z<$XTu4_O_KpSfr7U^F6=#dLMaQPiQCN+5;^WZ!qJB#M&E(MdeAnCMeh1@e4K ziy`+Msb(RSAh@^eGc`2QtPGuRlL2PfM)CZ(RY$&NL@kN;DB{ybtH4E8egV<$!PS1n z*LSKNGP@U~0`GzqV-VIa{oLm$Ikn0iQ1=wbG`=A%Xi0P@cmXHL#B~3A_i=iJG779E z`0d;uHn&TNa6kG9dLQBP`u1FU?ee{tPa+4Ood%H+k&)@#(X*-~AWAu?(c6f$B^q_D zP!-@G5MwX_gmhUYHC;Ni9>jj#6wiIy6tLHE?}r?#*w|_pD(|_Ux4T8ze{FZC2N(yf zs=RjpaX(VVm{waVk_G32EsJ9(?z!ha6fvg_3gqNYcv;F32}eZ0IqKrMI6L3;#w+B3 z$T{g1tQ_n=iXNHkbtvb)cWoEO{bq)BEHj*{gs3zfI;KW@ z(Vncwil3G9_Zz~t%t`T9Vio<1tE>2O>*+LSCYR-=htxcYC|**9Amm%lz{XB6=(Z2B zpAZFYa2B1I7;jrE4TnSALQg@J@q>J zm;Si3&DP()WjBxTuT8ww>*`-pbne-11?||`>e=F(`lk|-<+gq7h_%Pa$UM3}`>%-`$>t7c4* z0Fa*p?|HR{1B1g+ZP+H%)US;@~xCk5E7fceF(Cop0H%S^*+%$cVVx9xYf%i9i! zhnUGI5C8Dy+FI6#^{tyRfwkTwht+L4(G)zg?ST~8L^C8HfF`VP5Ez)~kl!$kV&#VR z<$nqRBfz&&a4lOUDh>>-_QAwPB>@;Q;G_Hb;Wc!KVM!D`=soDc?(&TSQ;3YGf=<_} z_K0m#88)4nd^K3~kwatzz3Y3q^13eIpi+p{)g}&tPluZI{~7rg*{h&+0f-3j@?z>& zMOIA-+>Rn5Rm^0$t=`>Zmwk#AdC^?Y$`dm8BiO_6RjAZmnp{p0Vw-v+PL)Qg>}?C# z`ByCO!!nu7v|~39*~9G;VvGMr2-Gm7_A5Rh8QuYnTdPBJ(4|?xE?m= zFvp`5^tnpSaH#)dvN>WbXMen>>hD#9=zLKUObR_Dd#T@>&<^O0#!6Zy2`q+UkJ&QS59(#06PC{=Y?H zF@6r8e-eIp&+E*T_ow-e4Ul|vXtc@DgaKQ#@9s18OxTea6$L}PlE+rm3v*;dznGCn z1u1D;_7e0U6$|K;iY?!(RNXw9l4>9>m60W1ko95fM-jGk6v`#8@%|Ylf(QPEu+}HqkTIOnEhIE?y|=lbiDwu!GFlk0?yvCMvIDT3 zRoB}e(gSuDl>-i}ug0$b5h1N_kCuBMz&qP1=2T?j?l*n-Da8A`>4Vum(@*g&|a zG8~SJ7;Unt%+Y{9j?*@Ch(L6i+9To~JPHMuA`A*kt8d8+Fz(lGrA?iGexk3Qv_Jox zTk^q#0_tzCvfXC5Q`707)8(@Ty1a=AO+!`l!xR{s16K9;Ey*c`LYb-bQ(^4;%!W#TY%)a`#jBfEQBcGt(`dL9^% zI~f>@B_C}s9`szSn4C#cvvGa+NAkcFc26~9{&G2K=BSq7a8}Q*0heU2ewbVdMa~?R zjOSqqc3*XEfWh^95BP{n{Rz;94~d^%X9}L+x#8z~w{Da!82yC3uF?3;4Qhd?QKknt zN`wpND@Moz6jaW>(ZN_3!M&;@9?QaO<(M($?blChEc*-CwRjlIW=>j~rDJv*Urwf0 zXtn{bGpni#;`IPat8|O+sR%&CeOyF0RPH1wd$A)gr^=zdXi&wfeD?-NU*47m`)n)V z0twsRUE)vA>{umZBOZNjt^{a!LP6yj5(|CFk2`kZ*fj2l_e0uGAt`7fYe9F7?P ze8KnRPq-ujkhL(G_f`73?E=A(_(h3^N`Z><^40zoUzsANM=p#;<@mox?y)_9fXrK?JSe~pwDJ+>vzcLn$YW(%cEBl)0v0kP3crY?ZUKUpRgD*c5Rse;4-rC z6&X<}5Q|X2$|Jj-EtEjGEpojTh!z{grbueI(>!LNUI!fvIItG4Rb=8r8Wq7+P&I%g0T%UNxff zZ&ia6qY^2xl!7FEWNqpyJJN0}>LfVs^VwVW_9NGMBT~5(YwIQzlug;rymh(c4Ha(1 zmQub=2hRQNR*x?<0UhcXcHSms??_58DfgN*2SD9)erxzjCxfXFD^tjNT3#b&NCN7H z_)#hh6qj4?pJst&PIfNK*`xA5WlDlHrf-(?@hOBP05Dbcx;BLtD zbNvCwcNp4PMI^XPI5~E7IooGgF@;_D0?)31qoyVIt&A{*ZyYDvXV&5mt>Pe55rM8= zH$bGOC?x}h4k<5D;^0>;bbbS{B%7f#F#!TV6i!b~R~>xR5mX1!23}4zpKtO<1FBC8 zr)QM()~kx8elx}A56YOMg(PglX~8#V#WWp zGKnen2Rb|+D;GIs#$p~5DWQd$zk&Omud8iVT9d+Typy?Um#5~RDaaLPucnnU5i89} z&0+DrQICh73Z8Zw@r<+QIgf&djZ7}EZ)5Ze-of8rI6rl zx~VkL>g^KsCo2_K>wv3vnyw(84=-Ud&|g{Cc*mn*U!!Y1UC!>hP5osYZTKKqN6Y}v2!69j4HL4lnF zF+3~pg6RGx-~u>OD%sq8MW$b-7kP13EB&-#vUVno0pU2ULH(y`($HMZaI=!oZAOa# zLaqBDatSbKB^tvNe(L)NSXXMi5K%J^K>)1|L{zSQcE0%=u7Fu+r=U*wW%M5*$n(}q|6hrd!>WKUu09zg{?+ z_W^eEwQw4cN+jK_7u6|46rj`jbM_QPMCM?c*Z$s9MX@f&S6_|T^bQv<#&v1S1-9JB zI+q{OzM6EK6NOozWIEC!u(iNQ-ZAq-LM3cZIF_UN*s#x$sb&I!$EWi@=ypCpQ0Ju~ zc)uGw|6d2vj>)gEm#$bn@aNC{#!-G87S(IN1+7U$-?K6Yln+|SIL?R!^XbwGmn?ev z*fFfxGV8=^h@wrOz=_4`R&8T*^TR04)EH@^ZaNZn+kDntKIRpur^-pI7mGIg+1aTm zQ{j?4HLPh_Tdk8~z1_`(8?bp)xfym*vNz2! zqgMzaQ4l*}BrW`j=3F!`zWLk!h(|FedCXYn2DAfg&S~c$D2pb{qQb#2g)&RvZ#0Ir z^mBEsns)?oRFH`Lr9G+YnU~g2kS)CQxwvraN!rY@Pws9p`W^~Kj9vw+z*UR0_uqcq z9ek0q)VA-bjy3xe7y+cDs5G}{1ZSsA`mu{GEWgC%FC_PnXQd$lt93RpUWXgHGquVT z6qbN-z?i9k>FGZP%@s*TfdU~3zHd_I{Rl%Bqd*V3Jos@Z94i z8o5CEw0Rh06fxBk)Fgru`8u{ZEy^~dFMWA)!j!D)NPB=OHUxD!~=NK=$y3HIHQN&ibGP}&zeAKW{@{L)ct*rl@R z)*q5^a&=STu_m6chur1%^WnXZt70L}bqqvS{znH!Lm)g)` zC!RT0RTcfRBE03f!lm%QnrqqNeyUK+h~6f4(Q1adBuJ85u;fYZU38{DQz&GfeBn@6 z7f1jhsiA!4|ABy1ZB4v)i#~~|!DBaLH)Eo2QK6n;C|ExKoNfW=LwH@Dy$6wF9+hl1 z82O$zG=0q%<$Lxrwc>v3?)n+Epu$h_Jrllny>wh08QM_jTwq71L zbStxSWTdx!e}8}QY;Pr8sO7q?s&}`b>T4EJ*86amFp`?NC3`v^p!Lf2`d^1(@x!vX zjsBVF%$i1z>#LIfeV>GAQw;^ZV4d)<`gmrQ77(!pjqY+xhsF#9rzj)pBdAvSq#g!`n$BR!ld4HaOn9Ns1+Nc6(>ID|AtjdT^(Ql==$O!utoUahRx0Rqs~W*4i%&Esx}{xCd`Pdxa{W-x8Mo) zFZvEM+d#itMnb}NwZG{|43(XmtNfi%?91{XNH^5uKKyYNaB!js1a7+U8WNsT+oh~Y zLmyeHJjQ6iLBDBGC9Id7olU>Lvck>h2Uvq*mVo|5C6CiW%>)B3K9Pa;66<#(7OlXg zNlxTa!rbSPr8VZc)PH5ghi23jsXBI+F`brt`lCXhIMqF`hux&oEcM8L!WEv@$9jA7qu&{zguMX%a=V&Ld213jZVsdy&8Q1 z1$f`-2sb^fQA{@HTQXXI3h%hS+<)p_J#Ryv)6AkZW{cWyLabj?Pj=H+8|^7 z{EM-7Z{G^j0!hBmacBquMiNMg{SnE`LGhB(3Onk_u1!1QV-D1u9a?K9+~>p|Rc(7t zbBC_q4jOo*3*^tAMbXnIKs~LK^?!=}AHopQJ9=Zf4-&{eV6hB&T}^-Dk#hD>b?k^QEPMG?0Ss ze)XJp3yhqM|4q#&z8lCBMPuRm(Tt-bo|87)PSVtUMN50mv_V4V)>WQ&DZz97|89Sg*E1V z48+j3#6>JAiAo5VA^#KMJJW4tr&6V5+I$rvu%DBXlR6)uidT!Dlr-FAB8922QpG~> zTU|gDAx(3G)X0b}r|ER!E--}1y{wn@^Ya3{jJ21*!HF?k8GxOiWG-Bh+-UVaB$N}z zf&7Z%WDSA`wB69+NXUo%k0>czCHGFSgK4t?jjp+rQ|s?U!W*}0BhB9B!OUDx;h|{`h0NYTPWON!}}&IBJtzs1YnmhzPAY^w!JG&GW59jJ)y7bc;nJcI`gV62MqUU_Hx8 zi>=8@OpFC%8s|_rFb6$}{KWgO#+li~%Cr}Jl9J-;8kEH~VrXXbtorUrv^6~Gq7Zfxu@Wm2dhirdgOQSJpb#{EGi*SPE1wB&T_y5} zWHnO7?jac|rOx!dSHpJ^N@as1_S?-1j#7~P^v9C7g*>iL6LNWFsm2_s`UBHnCKpMz zZk3KJYAN;!6}^S3IwU73j3|?uV${eyy=OskaXI~Oy>vJ5HX^#b*CL^OlhX<$Aw6*< z1HQ_!M_%)T8nB48XaB&+xTe7xv8AQ-+~(ZO$Tu&DR#wC!AbLzL&J97qrPUDh18X!8 z{N@vl_y+6`c(UrPi~tZ_hY%*`$-Bq zDPo}#G0h1C*g%`D?xf|U+fcoKT9>m0G!{Y200ME-Lw~Go*v%4K_w}>9Y|kGHFOaKP z{0x#kt~1x!dg{D4Ym_qKbL^!8(Br=x`!1}}tvI&Efxd-wF+n7;oRmQW|Jt!=!logA z|A0bPP-;F5c$YLZG{T(i8)Fgpa(=s-EPAzv@3V;Ms%+7f*?M3Z;c)(QIbPl7xz%9kb&OiM*ZEH?oV zVc)*+^_7;UPCX6MUR`4`$BsafXzpv>E6~gX=P%ehIXWi3P}KTar8xd`ru+d3vlh^w zWdJjoYhuKx|AW(WfWX$O@>%a+Ac*Q5&;Q83RZ)d6ksiiP%e?+-j^S4G^YZg?Kl?ol zJfDt%()~3{>-k~p(d5-ebZ(i+CDDJW;$pn@sa2yauUFm>nYNg$=0v0Xv%@c>r{1g!!E4)fKKh;g@IWZaT) z=3t=fSKfwl8Uze?){;uF1Tb^<1+p-X&+QwT#7J40s{n`1PF^%B2@6Q(Ux1d|qG_dBTz z-wx5hyGKYg=LPuSU}j16F^g)QQL;5Kz5d5U3JPV^#{W}q<)Iz5cHPctv+<3?E$qhm z29NGWOk@b(Qq;nBy*4%xxJxl6`ky04saTZr3~1jIgh9T_j|Sp=?90kGf$3Ow(XiSY zYqu84ICb!cI~oNt8~{XjXw)hBJLB*OlwG`S1|EP%e9RbZJ_bgVfTKz1Hn$3=t@unQ zCMd#ThSO|JbfD?4;j4^cu|%x#w#USA{nNQYAs9}_knF3gXdi`X*EJdy-T8j|)5v)` z1*ofh_|Fq48BzL=27BhQ$GJ(ijCDzT^v@j1=5SBf%pAHSKaz~c>M=4QkoVCEn1j<+ zO9OsxuqDqRR3x8_u{~78>QUdqg%2)}l)s7iIl73&mC@o6hK(y`G=G}ZhOap$K zGj9})!a#8lbTV&plR$w?AAuEjNxqyZ%Mg2fyF%HcGLyTKSW` z;bGErY!oQ1^PFtpUlH&D*( zLVPe~YFv0OrtxJ4m1HR%5U7yVOd$)Ml`^5yrmlM@)Xa(-;bb{0yxrwjK;FKdorzpj zp#;pr%fB_@HO5ZBE8Hdn4eNVS2n!(v$x0^ZTT(Tqcv4i|{ui-pw=H~~&E*Nfz zJ3WHyv#1#Tk+yBPDEDKD49@X!jGJe*!9oh|a&hddRn=KiZmo4|t=G|_@0@$4`}v=u z+>6-OUD>M@lhf;;ve!LQ;!aj>#{c;_Ri~ivkl4qkOtfv!i^)iDkgZ`Sn1haoRwAh2 zmJ!b=%^VQ4+Q#BT_5bSXjUMhxa)x zq6J=(fGeR3PcyzKcwO7QaR(>HfJ3{fmx)Y$gwZbHeTg_l#^!^|wzfZ~r(P)_Na$mw z8~Gzv8F+_ADkvVi$Hvg~5=Aph#h8`{6VSR>q^cSTb5Viq$H`})Nuk&YHSE$xAq36? zC8K7>Y)}8WxW1%vnRT2{gLZwwPT*l`r=8cmFLJJx7wDk&;&#BHKSm4A{VI9B8=NF} zLq#2qw=oSu+G3~AB_vITGb&5>Q*XH*`2d;3=N1}(E1>*6DGs4{^e+>y3WtUW>q&|8 z;Zpj&Zjq_M=ZuV53hrlTyBl{yGVv*BOJ**_8B-RGr1D)!Dku+e6aVz-(=2>3&`uHD z^ky`lt##0XWR}ZHNPNb>TY7_?pPv_4i{9MqZy}P{)ac_OjpaTEC;xFa)Eg@=j@trJ zsl5x|`lmk(txJdMuNtU6N^P|b5hgO7wVv+vgC3ZyogP73X#ldRKr7z^_8TRh%Up}o~*=K9P>&L z`Zt?wD9pCth+JYa@o9#2K{^n_^v^q=RQEvtG;(s8n!omS8dxrRW>=PM3FW;T?oZtT zlu4|HiaDqLc`8;5CT_ub!LOH3huHIQ|K&^HPsAfgqmDsfnV%-atFcmtkK z)RQJgZKX3G_jlR@7TEO&3{Up+Vk-TQoDf991Y;;Q)MDbeg-)(R`YfT5>mufA@cip_ z{qyFMZm5EnHGeJp$Noy%HX=)0BZ>F1(>|-v_7S-ksUDK0AkB;Q9NdhBaWi!P)}Cj! zPJG$rpA5hdY8T>`Jm0<3Pg%h?b8{BItx0>~c#xDSKK{2?8mH6h;J0q{tHH)q|0P8g z#>Y>W<)lx_$uOGXWM>^HBn)oV3O~-zIzuSlpSt~kQjPw>jo=uv2uheoF-#7f59!H* zouPv)2m}HwMoteku9%1IGe}3G>>fTk82eSbwLI%mAA^|C)rEBaiY;&j3FCgyCDF`7 zlE zJDBdp6c&Cw97p%EP5oryV)5*HM@VUHa|Ny9=W_jTX5q@Q@L+_3y>GtRg{-upSo}qsumA1_ zH@MDQbw6^%9(Bx21PdSw z$2Y(dR1B9^Ptw?${r!DT&$>=`53c_FaX#%@dPMA6XX~iWdO7e^y#TydyP138ab+ zAnp}U*FG$@e2--aREZ1)tB)P{5oh%QAKV>ysM-2^mi_x1*&g$|ju-2}bif-aKiTj_ zTxqSW2r+>re}8~bG4HQjPLrc<8T;_gPgXq%{?DpPQc`lx-4A7xbzxuo!_dUoc(9K^ zoKdE`edYL`pWLm>*_xH zCVBOOGJ(@NIeemFOBot6Dm4bw|6R3X=o!}h`4{NUG4?UL^T5wJ^kbyi3T^HAMu{qJ ziW-vQvHXvr5!gXl8ZeBAo@IGht3k;MJG`p+xhD>31Je+=_}#vLqY_RT1FD=+L*#2+ zw1Lly6>HO#QqXn}>A79MkDF<&ZF`b+j>Cm^mmYe%)rKMMo7l&_=V`>eT08o zjK79|JQXB`-WWRNUNKB2Z#+e=1u@Yliw*oU`t>1aYEurhO2(D1e5V^2x2uc5oG9QOk|* z#JR!b?8zhZ-4KJjmh)VeVibr1!A6At&MuAFTF{EU`>zk$leg53Klq6|5!%NquRH<< zE=~NeFX?1eN#kB4~!K4 z5`Q`r)Wmz!3Q7*L{?7#k4-13=MolljyFr}_2*n_z9r-|DVC!CNLn&i)Cy+=jvatm& zCT>k^O%9C2fW1WuqCc2k`Zu=*|MdRZa<6kps;qLR*46D?mgt;j^ph&H;*GXC1Om!0)$5g2@i}yZL5CMRZ+9G-=A^E-atHEE4OPWYjl0~3epoaVXhx)w3g^o~I6ZU! zCVgwv)wzm;mD%uWtLp0Fv8^UCNQ-YKENsE4Xw4{4DOLj;qDi)Mc)cXVem}?N)w`J- zsyWfO)F?9pgUF2!mlXkjDXue3q7gS`Il{}E=^0k{4UT9d6_j4~Xkz;$i>d)0QYhlq~QN3I;()Dy9SI81RU|o2mu8pq(*m_W7H@a-AGDzcS-j^8fj@Hl}3;b>5>70 zq;&W9|8BY3#dbL7iQiNB-i_PxzzT-LKtsp{5PR&wxH>3{4B}EqMfoJsW4VcWWaFlQ z3%NEr^LOFi7vjI-KwiK2p}6?l_ZHAIgo$pN|4J*QSPNX2*XMi!eG8^xKov)$n)K6f zo;`bMHf+CQZSisNd6Qn{Gmh%&R32ik;?G6tGo#nn*Ao*&(t3PA5C+9=)aHc|iQHZA zQv>!bZ)6e6jXjq23%8HvjMkR?Z|L4|)sy=4yD6Y4*6&f`XlxeCO+&mOY!Lc<8w6qo zy_n1~W?QkgKRK>I4L$c9Zy5TU0rrcP3iC9`AkaM=Q|zh?pO-8};(&s%onwQEu^Hmn z&(3G`BWV5J6c?2Xyl~x}+-N1o&btF3$ip29~K_Q>K16ya}q>?!Cg4x1M>>UWdHf?_+6+5&s4TBP&s3U8MEIfTcuC8?!7c&^)<7zJDs%8*pDMH*m-} z7fa21WVO(Dd(u`@;2TZGTZDpG1mya6qaXj?|HJnLcM>O?4e=WRVHaHI-sQzN@#~4r zoqePuKwv_YV0~~f>T70(*;(&|o}uPXuHHglEBhtn8MKzzWLOq=!F_3Jop-giy@eDz zU?)aZmC!0O`)QpOr$E-I03%2AM^KEecouuB!>>tZdJW_|y2GMh+1{L4^RBmyT1!`!w}3&FKP&k6|Ud%*qyEE>unDU88`e?g?C zfE|g+5WT!rx0ex{h#W#b33`W3$AeqU0Pgs{T;d#yg{-)CnT^8=k=r_0?!3I40?v$Y zk5iJz>z&t>okmZ9V!+8mDDNlYCwI%8x2m~q-iKBJn^s0*zSn1iW^kh_vaA~8Vo7(# z;z-(FQX(mZL4qKYIGj}!Gmh&2*DjzRX^Kg&Sx;F5yFhl+gEpF);dvzhth69~d}~gd zoV#zv)Uv7?9blgNtb6(P59Msyk~=vaTf!Fk-e}-H=iQER*_mL+c{_0GRB}Y$5>{rg zhRfD}^>V&8ZAym$|{#0p%*{f=mB;+R!>uqW;9>WSs7MMl*wtIf0K+nKo4q2Fc^gFmba+V$Dq5aN zKp2X{xXHrjb0)ZXf{>Fh&0ZM>)S5Zj@If=IonU+yXA*HGh8C% zTBkn;k8QqIw(KjfJOkBclH0dt=!<4?29x&&Q20QB%xYD|E>MZ3`-}yJ@v7TXhKB$} zN<7>zVH&dJP4dvUbPQ?*+_LV{ojR3uL@=Dm()Y34X|o(;_-4~W?p=Q0^qy=g`=cN+2^p;f4mevDqA4I;ySUK$V5a=lS`>R$ z1}K*lWWIVuT?)xLg_BDATwSf%d{^3Nn1u#klDM|`U04G)CUxgdTiz((F7%Im|5RiA z69m*hxHkV*XnTt$64%V_#S%o2M!a`|nj^^po8<&qfFfL z>5GesQLOt^-EkvNFnoe`HD-VOgx}eD;?qrN}Stv=%Tp5cYhsMdi%LxTj+p z?VYCW$ttT9eBxaSQ2%BZQ81%p%M}Qmz+tOzw0r(2c1mQ=^at5sXG@`t0+ zbIvV{IqX-76Cf%QGN(e#Vhc|-HUD0wiQ}2ZF3J}snGJ2TTIQ7$3>57Vz$;q8_)uWu zn<%o}#$$t%bWHfL5XnMmx`9XldH<6(kAp#l9n1fvMun(H_oYu6OU3#ucx!b3)73uU+;gtKQ^zJBAr4?&LLQ<6(-S`5(Y+4h$OdU?$9DMvqSZd(3Z zf?$?LM%o_|YFR+n_T?@+&d!oH-Uelj$5`*qE@c78Mtl4vNWN#%3+|}8O0(F>ZTT`_ z$f1Z3BRy_H%bGNn02Tzp0MUv>0dICqGAW{@xCc80RXAqqG+r3u+LuJOs)!5;wP^eg z8qWbYr-sQpXNo~6i?P4TGozFcCE_De&Yg0X61NAqv~bg?NKAL7?lj9OPv%P1KU3t| zOoV7Z_wPA$wA(G!g_n+!{=b*eE+_SYho!5Jk5`Yy{iA_bS-r8Ns&Ey(XS#vixkHf`em@)IZ)0^^S=_Y3>4CzahMSnXi_B)EVw{-0ITu%JIs9Qrw7_T~Z@Wynd8Pqib>@b9L zl(!5JkD?~Iyv?Eqg%96g^d|y5CmaAg;N<3X_3!8DNNr58gQFwv>Ex@+EefE2>1LLf zZ%AhXgDYk1!BKL(2B+!5&IPE3>5PiTOXDH>eDRWRc{V4<%wbYc3Bu@&DIMEUPlAP! zQX&|$bx6!e;_)>Ah~7JOJ@70W@wLbQ_ni4w81WlwaKUL7!B9$SrhyiwOGe<)wqMc; z6}oFa_g}H1_a3jn1Qn8kxM zC^tRkXH@c}Y2e!nS1z@0!0OvS><9naTsiFe zo}ak#J|V$3fy?5Ce`R*g{Mbv6;+vHZ*E?uBNA5N)8yFcqiCj;a^4NYG4g0h>@4R~` zK}PX$&7?7%m=Zf?D#iLAr?D}YntB0|hORWU(y>}4o~*uHXT>NkV7njm<;^OWGIG0s z4>`XvpqDimF`$O~OOFUUr=UL}QhlwlSeQQ@E-Nh}t5%&2&E1w_;W>xwjmb*18 zAC`1@^3x?AW_Qa`BqYLwBH-Y1%~!^QOX%qP;E+orf+LBYQKGsYn)3nKkg#b+8PY}Y z^t}*3HC(T^Z1VAM_de_)*X7q(0EoWdzk6@(Hyevds!mvkMLZs*T%vtB^J^{KzZ(Uv z0WJ00*1&o9<<8@nPN9n)7w*=8^C5Yi6@zK_d@L}ArXfYKgJU&jF@(ogWz4c31ydva z^r>d+)LTujJJEg)0Nd3<74`M}UETd%4!;P0QF9p}+<9`ua0ahC=|$HpK@R47#6y zTXbdhvQRc3FZNmAeSOUEY5i1T){?FI%rfc!3-8yB)VDjv*XJwkdI>StYKXI$nt(5~R|4w`A-;z0;2*D=lKz_G}h=z8>=vG&^nfNR@Z zs;DT-@=FG6G-4QH3x=nF$nYc9V}66lU^%;}r*2NA1E$3;;!=;IAt< zE^#G|>36oRU^pN6u%zv-sl$-tDmVOdX2wF7RCUE~+EE{F7rm%eoCTZW!-C*un0l&VtA{$5ULk2@G$fK)#uENH9TVf-`$nKjh6Qlm!8`zD`03C` z2yp#hA?PJ_nTUd3qoi>2|Bd~Ds(w)iN5T-nFs`ITOo(d^U@Xz?Mkt`)E(G<4Eih5c zTU)~-7s|LEJwOABfB${VIo`iN`*%>8bnOZ~hhc`GJf9gjF}<19W6gOLNXH;W`%W<> zvNb>h`2@n_go&XLW%wJbHWG^%5_ZN-6js26u-#&`k4^rf!QdeqD;na)0Yh8)N%p_l zdIlrjU(9HYN_>?xD}cf4j0UnQLI28s#P;aX>Ak^jp$G#>1&@r7MtO?=d}i}5n)xMr zDjb0U;wh7}<-bl(F0bgz9E#vO%Oz4{{%9Tx8#yDBwS2=5jznSo;fkOA(!wlEM4N!l z_)$Ix#sf2B;n5~)6qr;*U-4$eh-=+lX9!6`)WBuM1l@meuwDL|JzzPr*fiHEoAcZ*ToG#Df8;h)EI>*Q~~)&sv2pCQuj-X`tVY7co|AQD(AAL z*86oaX%*4zhhS6)Yt`$1H82C#Etib|BF0*t7`vDr3smZCmsO(6zDse>_x>{+hEE&I zt*5WDVW9JpB%>{_#2OKdL0X6fDvO&zeks`Rvk~xAZ~NBffAPor@Xykrn78{vPOiAk zzt~sQqo1+@*kYqvuF#&Hfqrf##ScfJmz`I$qQgHYS^%W6G4-ogj|Yc-ucZ90@ei|? zMekE2?|C~a;XqG_K=gA#x6g7P^4(R0Gtw^f%t0m}p+F0ffCViAWlB-3JP%18hE zn4h<5;$3UlY-N@3ELlzVU?N0z)^Tm4plXbwl-kx}3;)wbWWlA4iM1wc22VvzY0hZ6ggP0kp zYk;?o($of&MgMC5XXO{P_sJ3A!%=&>j_rT%#S8_e_=?}NJr@!bo~x=EtFR=y2O$}L zIHbp`9z1pf&c5b<1%4-I?6&UF0j-8}=YHR_0X5g+x&}X2p%1H&5ETZ2q#Q08*;2w* z?Ur6EysS#y7feYsiA};p&F@3r)~3l+k4hX&0vTE!1*rPQU;+qc`{K>{6qrW+4D=zE z2-)8YC1S~G{}d%$+<+hPga!{SSKOdMT<4AIGPqTKPof4SSBn8GBc>$+Q4Ya09r`oQ z6^=^ADI@JeF++%G2Slg(!U(B3GK3`$6Vx3ZN4iZfOu8tR)Sv-2t7=t6$m;ov1J(7dW9 z8`vnNWzZ@)qCLNd!#L8O3N};H{XxZYr2nae_6Mk^?wV-wDp&jsftg2FdlDYxO{d|7 zYAIM=O!39HUvlu^hBLo~TEvy3xRK27BIi|~rH)A35cm@AF4xfn#(+L}CSdrI6G`E- zRZig1rx3F-u+Vy<7>d0WQZq3C0@k?XPdbjyBhuRaM73?w?9ZSycG&bGLb2 z4{4n9DU<#YRbBCUIF?h*6EmvPs{i??HSntaQw`_G_g5Kt59)^uBQo) z50YXcIuz`}vEivq5ixUHTgWv#Vah&^;$9`RYHRV6@ak8n z1_n5JYziwxN#62GR^I{9$exz7j=lakj^6vJr_8;4LiX&zS3oc~;5^UY%Z=cN&U8~b zIk})j5sn zY&YXjS}%w$`$YIy_;}TMb=1cBsiOWVp;4#LvE%(;V>_jPHM#mS#q5g?zAk>M^4TRj zM^1nHUV~7?%T3}UA~vn|C;8Up?OmDAQeO-^@NKi@nv-3Y9xX4W-t%8(J3DZNw;@;zHHALT%OPrf|SRiO_mBh0#U}{ zw6yxUXYgt6I2idJhzDd@JOsX;6=tf>+M<24{(Jqx$hT(2zoZA!ke1>sT$9y$`C4PP za93xl%HXfKRrJuAvt9EZ`SMjv< zx4r%0V4}bHPBdxrzFFdGBhJ(Dbpqm-?(6~JGrn)FPy1W7c0HoEVt>A8$pJTPNOo{k z-YGin)*X*66Ikz7NQ&+*EPUK{eE2zM+!}Crv9xFC>f%B=Kw6cX{LjyR?*$#Xci)H} z1)U46%d01JHzqlkjo5oY*CjFqiLhn}gOmw8m1EjRTkg{sFFlN1qV3O{ISqd=?U28; z7NQ#4oGP)loKXT}L~5>A5i+Bu7wnAc6RB7H=;`eos)6`eyxbGZl|%QosyYpOhw5?5 z;#m=eQ~E(%l>5ZvlA_EQzg}ibl+q$s-ltxIFimyN!0$0MzacPId-h|#F0eCBB`R}N zu~>sIxxZLk)lk>Wg)w$nhI&4nIZRdUiKNKZAJdoeM6P2E+q|bp>&L;bJ1IXo=w$c2 zK#@{kDCT;R^wK8u2Yn%kC}#Cy6fCMj#k!Y}z2GGXFAYXV2kh3;Zv@2>ClzB!JtG}p zVr1E3ACLsm1gWvD!YUp_*|xb0tjn3MD=WszQABw^;iO{ThjD6pOdDxPMtE26h*AvX z`J>@+l(vP07%gA>9bu{UE+&pDh-%c_zzMBiXaQ$hLw=J=G$-OdpnIti1~CO$CRk@` zjS*vi#MT$f1r`(|wPk}@gZF;czHjuZu=3i8sU3Q4(g<<}{v~jT*8_c3fXwvy3@sS< zpOSOKdK)=@9R*DRw}|W+K%?Udg6Jtg)$)nH(CgVU$da&pZ#gkAlk4FqJ0Bow&+IUjbefM$8;cqyxewmC0>c zmBwOV!KIM~lTtac`K7)a82LgEa-xaVv!)?iEnKDOqQ||aA@8-#DAC;rQqVx_VPUi7 zFwnIQT1sRezeV~cNRf)=K=*zYd-Qb&Twde2&*07S~PX72bQ38C~+LvL2&alO-MVHpPnV;h~}a=JUUQXX8F|&#(zcI_}$HX^(HEf?kLTljnAL?5y?35mNKR_}k;s zOj1mSO{$5XSPz0Vz60ACDfPjPP4Wa-hkoq~hs|~93!mm?m2#hd2I4u|Ik14W7+@(* z?BZF&&msAn^6y>Wly<;z&He8P#{i}!cAWlOTU*7&JhPs`rww1$#h(b1gPlpwy>Aw}9JWAH<_dYd6IoAS8cE|A)$FJdWgSanA z`ppjFp4TB7tM)-+3-das15{sLtJeJ6|1bU-_GEwaA>Rj&(~I*`&fk*g&a>%ZjJ-sB2sdy46Ss*zmUSie~Tn2e`|_Z zH|jAnU{fr$tPCYQyLh}Ye&~xUtqFK+T^V$sQmi;KRd^cQ#2Faw+wUA z%!v=?vA}m?Uu*7~vBeld1K*U%{T8HMZKqPoi#KW~!iQ*#fhja}k20OwNEk7TdN(til}oFpkZe(6>t>2I(5FWl8#ycRmEu2IzEhj$p+^-b$o>vANxxCn=~L8_3Ugcz%D_L%O<8Q4t=R+ox->;3nCgOvmM#v{rB*e%e->lEp$8Jo3o_cp)YyoNfJ=r3Of)}t4gCDzow zW;T*wm~~x=VREcZs%3)Sg& z4Rj$`cB0;-tMth{HMKPGIj0GddK=t!8Rr>=2x8!IF=61h;leDQHl;D@H-)-^fAhIdX;gm&FqqIVs)T-ICcV9V z<)!|<1YUh-U~QlXX$F$<`|VEquy)N(uh-#J8ZhMZ4zHMCWc~Wz(OM|=?H}^0MhnAz z!;X{9EJveOAAg<n@k>(mJ4Pt{5DYo7vK| zB$_oF-lruqv0Jr|?(lTE-IN=b@HIQTXk9<%`ayoM(z!CMS)-o)8okm!Ki-h8;m%fB zU7nQ0N<_qK&ohyUS(PEDl%$9hRQ%L=eiJ;yiWCXzeG6nvjLJm-ME ztIdk$QwuGWCN0la>S}cnF|!JXp!wIQ%ml?(<_(rwYZa-&OcXhs29_2|FD0y>4~~j@ zPHF|5pZvwS-ahNPNdt_*aSj8$xfWpw@2lx>{AvdyzqRlz>JCkct3K7c`H9e{qkg}k z4W#oL?wxb@L!SaFtL-)Un-2qS277cXJR`VvDvjayQt1Mr&S|PD;~JUv_6JH^r*&*r zQ+0FtS)Zmn(mX_}`WCtf0D?IPM9XJkLFsi{C7A1Pb9(YuysjsuM^(Qn-hJ=`AJF!< z(M;GnEuY(yIb7RuV+JJuP! zQg?nuk}8y`tZfw`O*mt1zIC)pR>&!O)1?u~rJf<2t=&45*0F ztFI2D@~tyjF0lQ-6rc+dsnXlte87|weTj$l^(-RwJR1U}>yqh3^j0ghiwzSg)_zx@ z_gRQ>X$>^-F_4U)on2K`hbwr48w1T&b_fG5xZsHQ4d}DUA)sz?wDi7LOIIXSTxPJu z#0gf6^1}LW-m;_(JGy!;`Jkh^~E%m=rj|b!NAD z9!!s~GN@f%(-$!1d7nu3F=KgvSvfT^WzFpgLgjD@XiUBq(18AY2+6<{-omPzX7M^{ zs5D)_0YC5Y&{;I-E!0;_dylKR-f!?}{tPlg6^RiJV0NY9=m?=lqe=oykO;trx6b6j zcrpcxNET-wa8mV?`C|8EBga*?6vNck81#9oODQaIBg(ZAP6~sIa#>6kFt;_M^-i>S z7|I6KpX3k5R+hYB_@?>6+PplBp`U(ts2B%>9CKP$p$sd@3RIXn31clw{l$g?y(xke zoHY&98@0ZI)oQ+iMcRA@UX$&vV`@^QdI}8T$ml2X?2rXtRpe$_8h8V$JOwPu$KV8= zlfvA-@F8U8F`&f_JRp;0SS>?{Gla-%O?*Zt2JCD~OnZ+-1=~LNnYgU29By$&SdEMM zwijd-O@($}FPL<6e05##i(AS==V?F3P^>D^greFWkGkD>7A1w|ms_t^(f5lt^sK-p z+p@Ni_urG$z1$y?_oa_+j}xI4@%QSUrd39DydAifaytrr{TE8|Dy`EA(0T$5r z+nVjCw9Od}(8s@}z8g|)8Qf`Cb$tCwH2-s17m3)dT|!31_0^5KU>%!`Iho7h&Hm&B6lqgzgnTRcFV2)ZeoO@N=hofwz)(H)FFU^^Nuj z+M~07O74sG)r2^ga%ImCY8D=IyQA6a$<$BTT?b?e^_gbzqdq` zxE?RoKaBv&*iYdN_4VTHn@tWr`)M?G2EF^T(BYD?T)A0by zJqcI*)L&>FXkY(Co)!=Qh1}5ykA}mLbjP!NJ#r&2fwqk`n{T1)lm3^;|-A)>F7HQk`!?D~tnvv-xWr=#MNQFtAJc|Y0 z!?QCt4`=?>~X-c0g;RMci zV1b88y|?z_42lBF#J+-Ha)m-Z^RpE1eveLKb?~SAQJcEE(dWyF1t)4QDZ>nhm&r6X zAAuuvUcHTa_cc4*L_5fwl&yFp{95c}a!s{&^p)xK+t2;qZ0Lq!jZ$qZ=HG;Uv8!j< z%=K%(^x!}DD-a#kP1D_jdcpHi@quvx(O@1ezb+yHs{cY>r+irFt$5ckByn{71d`N! zE7ir5NErO$En=h&HJ?P-8^i`tgTs`jUz5OCHOdrPYC4ss%I{&Sjf_OM`am*HcO{yszNMU4VS=d zcwWx|UYlQ2!oKhY};7omNjZ*8rM+gR8U(_V)_ z^3%-*rHw#HhebM@#!>v?3Io;@T+$jr*E;&j;P(x(IP_ZJ;6j?HPe@s8s;z9-|A@k+ zz!UyS7+76&>ecqor+7rpBQbYoktmeY*3HC=f*6=qk$(Zc}E&JtR)yEU#N6N?X$D2GFUIo2Oqe>&Q}Z`;qbm9pSv?|KM~ zZ$1$HGmy{qUkmpPIO?{NJUy`!K}of%I79PkHmMj~&}i zrt510jzT^4zpEjmrLsgnwP}T=96nxOcAj02Q~#&3w69e&*BW@Zw%`0<{y1@Taz5N2 zx)vUz@6KDbw>eqNx@WzWbLfA#FZlXyI>Gpxhqa@C2rF@8;9)LNW!yhfb_=9wOy+oh zinG1KUfwNh;JI<&>E-apgs1B%lGmqs_uo5b_Z>n=b9@jQs zb~d~c!#xZ?HQGa*`Ka{tDKUtDXi2vF?`<%3T!hny4<4=lK{pDbue`h@uK#TrsVM!i zXGmm98H}Tx8z1`!3V9+oX=qYzr>?KBjKBgC>U|UdUQDUO`sEuS__ChjN@}SU0rD|x zZ>?W;$X@=vsB3IYh>q{?IrgHck7Qh6{kgyp3$;B=P6q|O81 zQ1$V6L`SajvaKkJ*V&^fo}@?q z5K-jD)giNMt7I(9gj0p8!<#YbN3du>iPEuDR9GbJMWT`NU>(gaya%^a8%ggI3?WK{F_AX`(s4pm% zJ>k$Tsa~TXcyACWq@u)f18!~amS&+hJm4crt!!|Fd_gvkn)N-5o6{o^YtnVA##OZa z@z?P--{%$Rt}Y?J@fnq{MsDPn>M+Q}qa53y$%t4MP{eqvt1iouv70V{OHBplLgK== zFqt#(-q3T=;sY3}Zc*|OPpViGQ-ZGJ2Q`E+Vf}&FM5A zr2^1?t%@0Ycl|0qWFKui)~39~94t9mAsK{)8$>WbgMr;I_9Umue_1|CmI3j(QQ6Iv zF7lol+b`)sC~!OGyzC zp@%ywW8d3lLJlL}-KpDz*(Sk=LLlMn&$(iL_Uu1I_c#_(`b79dn3%3sfC4 zwX!;$QgysvZTGz7I0*2$bjaO8hXXte-rA=Tx4!!Nh4s3*7umL!^%Jszl;-K5)Qh`) z^BB9IGFPxDtWru|e_xzze)u7VwclzKfk?aJZ&eFG5caKa&!NQHR`Kabq{)! zR!j*5&3AZR_qn$J-%ZVZ25|lLJtgk1*7f&QJVkPJ$Pa$s?&sv-Z*5K0@8sb7equ@i zIfUM8^%Z2@Z+6K4yhH7kBz9q#Ahy#qT}1VjVrQ>WegON4>PpB`rIg#LL7 zr1#_nY>n4+US)J%+>AeJKdu-Xu6(;&t7;xsu@Y^ioQ;nsR0({%?G%08701f;GDrKb zd7=J0-c-yY7@VN1v|lcD{a~?4&6=x&tnMta$tR`d_hJVhMOxrhf8fdA8oy{j{D%m6FZH^KEcOr z&O2XYG`i)+_HkEGvfXS7OX9od-FZ#>*_*$&7X1xT*wteoXfB&Sa3reT1HMYpONIh>xAI8cF5 zaZD%3$X@y4#>?*aQaGD5Sn_b%M9^NzTR3gFfRAuOIA8|YVg#L6SKH#@$0sro_x*xN zzHu^pOA}%q2L7SJof)tckphCt~uv${>9W{eQ z{FN!!h;8*);~nN^_ zld5eBrU&%h*48B%N9l^Q*$8Tb?_S5-N0qtbc`ZKPd^twH(IUdsEbQ&>Rbq}WKe6Zv zK74Ru3A?%5s|j4ET=~$b7x=Jz2yg=!%M5^JVk}Di_Qwa`CH-Zsq||yf(`h#W_K3fB zddaJF>_v#p)7^u6-@Po+AB&7(gKpI+q)BhTG&Y{g-9E(>QmB)PL!F$RoqeYqS7IcI zLLe46^c2bmW$;Pxurd_V6~O;ImaJzd78V^J(0NAReXb85?>ip_AIKjUwu}O8oLr6` zZVwMDn^Tdtw*GD!DpvB5VVasV4rrTVSv*9qp9C4pzFTykK;#{84V@~vzL$J_XS0@V z``5ar5d$5M6ABL@9ieE+p(@T4Y(!cLWnSSNWwc!g<=pP$&z0jmZ1_@#28~vo534R; zT{J)5osauJY(^ZC%W%I)5Um=!zWz?~>%cy(-aByp5Aa^TOo;F{^1B=*%6KcAeUJ-TW|mc#nR(6Adg!UGF<~mbxXI@0j!4rW#-Wok}S* zeiU38;`P4%F*eL8W}k)RitDqA^Cb852t3z*{CRY?QO4>kCl?W#L^-myayQ{C$Fx4- zi58*+a z`A#KEOo4n%+mg+p!~OxFvWjBgUWeW`Tc&EM2dw?&R4s9oc)Y9kJPbJ70ay^C6Y?3| zCC+1u^lvRlYu&nA8Lp=&3N@3-R0oJ~2&tmm?5p-Y^w*ESEiH+?{HokNtthTnI)Y={JgX-vqPc;g3 z8X_^W%?`m>TAGIZahDoe|6wC0QS;az!BRLFYOUf5Zn58#r+%5Xlop}ZkA)PHzi?CB z_JAPFT)e?F;N1BXY!s3Q@k*QvoCy8F&kCo%h=dE!%+cuJ#t|oCzu^`Rp^?IaCSuo1 zkp-o1eb*DBxtdq;=+;)wvB)?d6D5XiR5i_tu+Xs{+9z+zi{oL0L5K{=eA~(tKwSc< z+$#&mCx-XJ>??Xt7|_m_$;!CGNVI*ijgf#qR)>2zOeCRhby}t51n(N-M4K zo%=HUxt76I9~hDet3@-&x{JIjlgCYDrh6e$EtL2Pi3gltpCRe=rXZ7#%LD(@fR)YFeHpcZno`)qYiRSXFP1ciz-- zDwfo?PITS__omw)+I{QXZw?%)#KnXrqTbrW>`MV*GMS#Kv=rdibw1Wh-qwTufBqPM zbLx%|8UN2)9>0FUQJOiThj$KTO;6W zOpC!qA4l{zHr5b(#uVM;-Eua^-(DA2SMm~0!MlBsVMa9KQn2UdnP=wcDwAX8D6;XOWPE7Pv$`9B5t9Dn{@`*qnHnknL=)Z#?a5ew-KmB9zz|TQ;2&5%Gqh*hRy)J^J40`|8 z&&~-f*=vUuyRB^lWbSJd_ms zu=Vp`Y;0mjw?^M>_Tn|w__L=|3tritP1LWm4cpGn^f=X6)O7N4ayBw_|NiCn{DjwS zD`m2W8!k2B&>WsDWmm+!r5;sKy(f0~4a7FZ=BQ9uWAOcCt=|(h(XK)cDoH7uWKl>i zu`cL#aubT|+lXXFWu~5^i&^F5qb0lU1kv2qn89K~QRmVh)$V|tUG*$Vv z#?=O(7k{)d%wG6Vfc{+G6py!E=Dv8M9=iuoYsCl>mN$xXF?*iy**ch+W}I~J{a0`r z(g#@THel-_Nx0IsHA6@#5=@)g7e$9K;!klOiWf}kJ*6wlkT$Wjt*Xz(EhZilj^@_j zAhgFoHcKMCIa3Jr_JX1x|q0)8F!Tuzz)5)__zC%>i50;}8|=#cFGL>$FMcIFc>> z`s4S*PfPY1NgL)0jG;x^#jb(RXk<-5o2!-B1k=#jh|;IVcOwpEFTdXtS*=-=Org9OJVqGaxpS#;$2+#hx4WTF34tuGuow02b^3{hOSSy*&;9cS z^IlTf9$l>>@79?;hwPmHpsP-NHjTb4iss91g0s(nitJXk!S@iP#_V6&k8qj1vb>N+u@#BJv zQQ*b7w#4`QiTjA#j-70=J2$KLC4rNn#;UAlhcdXkuZ&rhLQ0(jO2rL{MLdh~bv#Bz z_bK#Dk+ZXlr}KJ(-O;wz!HGvTDYxAtDl@}gNP*zxGUf~4Tk?x)pOTfkO=@(DD_~~a zM`>Awa#igjw+KQe`EDDHZ_ib7t!zSd zAAR3X*}vP49@b1e1inUY54PPbZapphsvs%=9^aifE+by_$sMh5U{yRF>+ZSb>^yQ4 zo%%k3{UVOo z^fkwck8YZFwSjdT2_yHHOdA$AI=6@Iv$o~s`S!c%hl%UFfW!ZWCtZHfwSV*U4}9EM zoEn$7AHmQ2-M(~qDGi>hHuOC@Qn!qDU>02>zaUKT3j&GdsCsZ3%x5}~9GP%2pQyC9 zN@NL!?ReQNRgF%`c|K+8mwns(AwvlZQmBy1vQ38Eg@3N?P{SwJg*_?}`Jt>KNjDj@7IgG) zfg3?vJW$urJj*%kB9bxZZr>9!>PY-b?5bFOWqlhn(IVqg!sl7vOYuw`Od1+Y1Wc7i zift7>CP!Ze!oZ@^;gY7I*$PjD(bkz-hHYQchG7ukS6NGifr8(F<(8zRKu9{*^-uyf z)Y&X91H(rts{Bq&Nbx&8CT>s<9*im-^FkpHO+*iu`!;6TFq!;VIP!KUm4}U4rNF+GAftjGIVU^{Soe|(T|&@>NjB}$DkH2ma=m&>ePRq<@*lhb4$#W5%Y?k~~9kkckyIFboW zx^;sE@rDf~^z5d5m3-a1>-GLGJu?MSu$t7JH#5|8ti=h_eX)V z1sU;kDcujxx5^gf8@U!d#)N4Ch()@Ts0_gQKL9k3)%C%}OH6$^!i4cmm$r$+oH)5p z)rN$$1UOr;FVp6QYs}9K1eygo{~6u^Ki9_jt7D8JltMC}at3oDs@!ty0|qFx+gn#3 zZ|EtIL(BLiz(xIvnp~#UMaAmc#E*wPi&D_xZsD?{bwS^DYrij|@`GWU-`}GN|NISe zfkXFOcZ+byGr*k}$+dSl50}DGQet#Hi}OF*Cc=Bt!W?ZUlJhV<{&M`r>C8|yq*5>NjOkaF z-!9Hhr*_0f>$3zMjlKu;2y2gRZk2`}FI@AEi!bfE`K=!=KQ=54RV;XU)?W^HUTMpQ z%4uJEV1*Z0>{_}!KFDcv7=M zzk9}4BdNC9hQf?}&K9BlVu$>>diVCjS8Jlwzx|!&y|Aozac=IH7ur)c{aqWW7~h5w`ItOMcx|2Td)oEWEbx~F40cTC5L ziHWc7Zl=ccrcE5C>(n_mIh!#(%o(O=!^@6p3pdp#WThX3IsY>)$(pC#LRjv$Gfs!rP=eo@lu%-`#C`q@ zNm1gdN4eC6DHLfSs1Wyn{{3Kd`!KcmDzBC=Ps26)FCD%wCHTMmO$RfzNWbGz#u`ot zWxnH*R4-;nc2-VCF0db|`{G}{YXUY-bP9dXzjP!BZd;jUG->#PHPPf0i+{DNJ>JRl8(ep{5W{> zu|GkNW!z{Six@vFzmQ$a1oELggz*=oU_e5L;>;+LqjKPS>I&ELeeU+vh(?oXHc~Ip zfzAN(O&j8COey%hD@htW0hK$kP1^cCi05Y-DnB6LbtZO6kc-XQU;2|IdOab>j*4lm zl?Bgo=arGs2ZtW09^^`?h&K1TD{HCRdFkw!B4JagD30mlbyru0ur_S!xn~fp5G>bs z8wGwOUsPy`$Z%OSA$>Tgmqn6n#|q#BQ_VL~pe2jUN;t&J^7lsEl(C@bA3-!Z znL+wbI0#izq6`zICI{OdB!V?+UgVi|MX>oQKwa3UIk1QsDN%1dGbUG`4e>pdZq+@m zrwU`DR3gNfJLsu@_|}%1#iMT!oboMGl%5rzjYe9lUjh6Bu9_&xw)S2Oq^`t@ivv4f z3RmDwI-gwa9!s?Z7W{>n;qt++=^p$cLEe{5&HwrNWkR^t^QP#@@L%^?U(OXv%gcAa z+H~x+G=1U3wRJ&=tS-cue%}Vkm$knqw~Lx z`RybmdhuWX>s;)K3=ZyisGix1I6Xoj#}uH}9lmEZ$vdxCufC1>D@fy3oR~Oblc<$# zjar6m=k+!5*EL0Yhs(TgP1+B)dZasHR%asXRojBtRLcXT-|1+P=^^*9G6Y%_86Z#J z9wxx%x}lmcocTCMCnImw#-4PdVmyJF&ThL_<`-b6@RQtMTIMFIi4L^D1tEgQ)d0@` z?|ew-A_?)HXT*A(JmKy0$YXA#iIbh1Q)FcC^w!6y8~o=N`-{`&HvjzClTt{iZ@ztq@dDe} zcE|bB0l;*4c;@)Y7K|l#c^TO5das<(o~@BWo+qACS+CV5I0+<* zpq!XIeHY$Iz^e%ZQ>R`K+nPwC12(#f#vJeN=euui%qB_NfXLK1B5X{n(dr&kd|NZ6 z;+D-ZDNU3W|BS73`saSspzql>tx$qZ%`awlzg=umkYH!AEgf%c7Coq32^*d~rPZ-g zQ3ZWQ#SWr?u;{0w`t%Z5hASZHN-3DUm_GA~Xn$L5(3dYC9Ots3{N`_F((ErBHwA?r0U*UA9!SDW} zt9NoHRT4dFA8eH3i+I|6eP*cNI`e{Tz#b{^TAwq%0CO8>V|O=Sib60aU6?7$FDd6-pO;D5)w%u?Xo2zeV%u2KwizZeAK(a;A##KZ?W0k-* z(fnXxElP5-1gSJAoQ>LS_`3m39PKm*RTLuy%w$AP540M?Pm+?5h@q9kE#{KBl*w-D zL<@dY;o{YYfE;0P9`z(1=V60MG5)-*7_$;WI8h=fW9T`FbQ<1j7k(R0sierjxH5Q< zBE*rByc|{|91SDbDtyMP#9!;B_gkvCef|hjyCajIO2oR&1*27mC=@*bC-PuX$86R4 zyRLp31!B30qMBp|#$zw412(b68c#bbpdZuDPgSl>WNWsM+)cP$K0K~R20TCc>o3kU z8xZNSb9iYJwzGD=KVIO~6S6m-y*|B8V;$bZ@8ptG;bNYRN=56ysz+RbTUd84q8M>H z?{TmVjk!=RDbg9~G6qT#JfWtGr3&mot`tDJ3UxQT?;U5vSr37RuZgO$D#?rZud8Ku zUx)15nwuTS{S=s9_THbK-r96qlu#mdoy7B;LW0E6JU_)IEIUIs@2f6I4#ItZ?zB5q zbl(l-rFrkoOCYK&XxhD&Iwa`FqCVpuAcBe{pGszEhaMO4jzjRdtISH<1BFumP9OPa z{6(g=9&)6J0K-|ap_2)D!`Th|gI-k77C9jkNxUw^w&AbNl50@Mj=>XwC~juQ6VZv2 zflj2!&fl?j9gF)bx2=o&@WVc{)Wg#=w*m{<_;~#1@$aA;K@YnCGcs|>J&@!b6NiBDcvZ(gg@BL=^93zJeea8n9u&=k29f+pHxk9D}7xXyEk9Q&u7JA8Kx!RH>jk@M#~$=B$fhhEEe1qD8n~xb4_+m zb&ZtC+mfRQ6HN9=jlQvt;>VEN;mt;Ock8wFb-Bz$>b}Wvg<12>khfsWJX9m2>(`Ic z0tp;G@Uf>TlNN7KkWf)^F)7|_@cC7Z0-3%<%WNx+s3%zex%}2`H`# zLsC>w#&Cc>5JT)aU@4Rjsw&JnJC=3~*jA@O?50u!)G8qAF8fwKETT`$Y%I1mtekB0 z6D6naX4_ zbtfd+L^$|B8VWJMNI+oJKF77GuIMC=NuB8^=N35?Y;8@gjuov(706(n8 zgfmPVn`K^Cgb2(z!>5n?)|KQ)3`?b4b%=Z7&-Su06;m#s0yC*$!)>K^x@#;Hj6+uQ zD?U9b4+jiU7=qJ$7V8t`*5@0ij>%Gavi0)W9|LueZic-$qTGzhI;XORodiQR6pZaN zC(jl8Ytlr8&CqyU3bq>(AP1boY6*EyEpcj&AQbB72<#+fiZEoTIqYea;4R%81$6ZvMbm{ zq45In!y)8c(Nju?yM7a-x_YgjJ*zetWaXHFx|BYJ>1-H|Qz` zbJez4kz0*-;hl>f^#bN|Ho?9d+nm>@AKa>!Ex4WVzZ&E~xGc*d2|RJ=ryAse;Rma) z`C`ZCWuDyqyM*oZ|I7L%IlYYxTI#z0iEfO%+j-Gw{fITCD)a05^zz-}a(C#}vAgKw zcU8cAFHeK~;O^=Q0OSv>Is?yYsv2f>=EQY!ICbQcKT(3Z9suptQKm&>&vo9Bd&I;2 z9dLW?y4mmCxGXQIo9#W1D+_C1e&ye|%qwWQEpRhvGhu})2{VC&k zP#!?@kR_U!17cLMIfj=ebt|z(i-5iRwA7fX@raxH<-St*4H>f}h$unGsIhNoM;L9{ zxHu_6K1y$0qY2Y-aX8F7q0}G?kXn`qH3&9^Bvoc>fqvfwHd!zBtRhraefq2jnpsi` zh-0DDMgGZ&>^>06uAp(cicAiyffe4QlMp@DSpFvvso=Lsto=&F%@so_f?$}Dc@$+V z=+v}2WbV#l_!U1q8}~6`E=$cN0jpdCJ1>-Ngxjcf%_5Kbtp?VY^~ae!R&;uJy~c7} zYzG1{rwYNOtWDe`#F_-i0C8=5M_wJ^=#^SWwalbqLk(vyvA41FSNhDIf<1)K1~d(x zX}f#$v!^Or!2-!wT&Hu%*%JugvnwcpaD4MQM)|Q)Cqw&cr-)bb#9>1M1Mk=R>xN}M z#DX}Ol`?Hun6t=RTkT36cqdXcc@Hdq8l{Uwzm8ik2RG?(Vs9p%HeeAhSTKC3$#bJ* zGXCQx*5rh3qtAB+gD!Aa(~OMuLt^{EgKN~^<_`F$R)8JtAFKGnG*FHNJu;#dx1g^_f` z#F2c$260itOCiMCe>_-VKQ#fL*(06lYTTv6W@|_jKJ36Fzcxf#Z`!1&9S?|&EIuwO ztWpj156rRW4Que&<83`Q!SH4DzlWX8b^ti2W^yTyXqogclO?x3PYw;yd<3hKSx%o8 zM;4ooP7<+>nNDTds`vZ$q%LR4z=)0EY<{oaoBct=`QKD0>tFL--U?D=n&J0E)-tk+ z4I^9l%;Bw_C~4Y4UA6Vi;_m-iiN`zNBmi@{rxiwAP`PmbUptg5-`{LE0@%?l8uj>DplvGS_g!802*q*rx%^h=f0hv-Qj(-6$vEv9u^fZoiA+TX(!aynQw+) zME-S(JfEB_TYh-G3t#jkmJrTeW4jM*XE>3g9;_ z$EEsDb>s`mi~y_ZaQwl;XXlz!`_d;b1OX(Y^Wl2=cHr;%etoU-@Zysf+|y=|o*7P*C4llezpE83Uw*V{1FR`>Nih?EBPJ!EzX? zJHo|d$JQ*zd}kyVXmoL$`LkZYti!dLiiA4*y%NxA`TEsxQ!c6rlSy>eRrGei>2)U1 z!j)NOholhjH)+MM8kFtq?E93O+H-=w2=qJ(mz!u|RC>$pxAm-T`Zzi-MqJeK0m1^U z0NZD4QbzUR&Wii(xK0!u?uLsa#`VV4%)%bQTEe!5kHVPcBliXt0akcHW`?C)9WC?4DJi-LoEAXX)8o67xy!usV#!r-0Y#E2vb zrC8cJUn@=*DE!-SVV*p$n}^{{a(Zi)jD~Hq8q8o-g%6;Mn!8<)R9fV%&^l4cr*!Ps z66c)Miz~kp?t`AJsB(Qv22s~(kg{NVNetR&{-_rO`I`#-ue5k+3bR`k`@fkD*}a1suG3OlYa;RkS9EO*#q z6LC-SCC%mzqtmB%PE;=WI7(m$PE;1q*A|DTbbugTGOx={5_73B@@Gi7f3y_Fz$udG zekf{_Hf2cZVMkL>Y+GYXINs60i+F@>pI;eZ#FGhPff@`jbE#^xlb5v^y;v7x|5CAc zf4Hlp@k5hFm5s{WjW;T5USwt*x6)Zt8ID6lxKgHvCyc9x#RAskRDwov3&lc3$;d&` z2H2t$x;t>lq&UepY+nV$c!~zaw2U+ZN&Hu+DKC}MtbT}s^3NQhG>+m?V8mv6?Q#<5s$`ulXY`+7LrL)C@$kW~A z7bLoGqE~t^6>oDf^AGot*PF}{r%M9^=Do{$hNxzYlVhf%*?h#wSbMh{G?v>pC!b4S z33+C3g0655FgMF7WGd$RKg&KcwN{a96?%b`Br6X(^V73_kbRh32Jp`5b|H5UnPZ5N zq^CIjIf^*1RTqsxm~_s#5V5}bisH|0JLi5LzjX7k7jw4QaZre|9Zrc+VoqP%K-$Xm*{;wv;TC&n(_g$Sb1adKM zjf!VKFaH1!WuAXJrH?hTmN1T7MOcuM(XK@cj4|K5y}I(Y!l+d1R#jxNmU2u&eu?XN zKLXJqv(5iKgVfmF5r0pBmyqS>cTAdV)fl+Iz#kG=grmdj!)R3wmxMSZ%2UWB39$$G z)HUe}%T#svLCP#BX!@Hc`Xve4A2xnW<1=(+!ArmVB4x*>Oumlz=4VFvo8MHO6__C_ z1lrqCQZ~%(c;a7vDCpc5(@p4-skhjuDsOOB&cB6e0>s~&THd5_0yCg zf%@l+xCn#=>vcfQ(|#*#JfQAy-m{$@DyQOJ+hL$dSLQKYPNt8l-ZOR6fqe+{;TQ*q zKy>G0x5p3;wVf_dMeCYsldT#lsOLE0GS*jg5IdAA8roD!(JQc!L;faT#MxqMr><4Hv)wHB6 zN~IFHcKgQ})f=Z1yuB9s0FAt<^o@*gBsFwkZ-)dipQUhP>sNCMvQt z*vdt7LaZb-GXBXCK&FSe%r)#^U@-HbK4}`i?v*b6YH*rg$dWDi9X}6js@q1T4?Dnn zg*BhG1x-zs4F@ir1dOuG{MpJM=nqo#;eyvsmTow-+$>zP-#|qL9v}TOR$T&&Fdz;v zf)>G%9l+%wNivVz9|)BcnNnvyaX-jE^DN;1*+AG+WMY_IUcj&@H-SVefyOs9MGVr* z_pAm#{hb_&_N;I)#{WUOuP9Ue);jDeuYEbv3t3?KYXR6iTF8c(zX!ncrH<$Eq_23A zhyU?s(@&_qR`PuHDohliTfnMeCQDE36u*3ZzGDCJ)1*v<|LK$=O`l}uYI^&$c3e#X z_b+LM$6aTy-%STTxZC1qUO!w~-&^CAp(1bJJ?xZbTLDB26!2o~(2+LVM~A z^P=~>;`uwK$D|wfqVy^8^hbXdGm~=#U9xjeeu)2C`W5aUUO!##gVsH}BCzgiLI-^i zlM98Uc>$%vm%Afgy$=RT*)(zJfPAXVqq5bxSI4YWQbmu5P0~0n4qowO)z((lA|LsO z!>N$Knrn6e!zC}vj8&2~6<_P=Uew-dA}bqq=edd5TL_~O%E=Chj)MXtKb`3=^}L~{ z#Eh8R zYCzG=zjNd|n-_(0PUUmb{qEbT6ZBKrf*k;01+E}&md4p-X`BO36~JmWQ2nOEXpp`p zXXfesz2)Xhz!s7LgY-ow@uM-=E!1p7)3o1^K}0|Bf_W*~M2U{0*H@@2h<0PF+p;u1 zfav6}sjID6<8170Y;$;CK|s|smB#^v-?4=Tl2a|KpF^n5IKX6A(2Xi679DLli@D8n z=Sdz)PzETQ@+~%b$xm}OEa<}fa+H$NDnYpsS2o!;B`ty|!9dm!s^lM?V4>k&Ph{Sl zO+(%GHV?(09Bsyv=WrG5KfY_5L$=V?oF`nYJuPmmnO*USl_QZ_MZ0R6L&Id=I6Z}m z^WYn4s>_Q2J~oI$#?OzyEm?^dj6yPC(Wj|0X0i!p(qx?mcih^eE{1|{r7RLN{Z=hm z+Atkb&$2aH{XoP9Byq|hFbM0jrn-&rz@Lz}@L+RjH`Zs_3_@IW60l}{f?yLZw4b1# zFzp*$foq`OFq*jS2>#^YS=F@VzY4Q=EVZ?|)8cxl0h6=eIBD+_l^SNY)Opuqnare; zUb$}+ifNRly60;GtjF6F5u}NmT%o<0NjAE@;kyOvrK`fh#SI#ryqg-uw#jwL)7aI9 z*>f-*)0OzTY5i)+cvDr#Z5!_P*pxmFt7K6#4vLYcP7LTP^~=F&;w}aCo4)Wv28;@K zo%Ca9Go6O~ttwpzK^U2Z;J$JW$C`1S& z*~!rK(J%4=vxh=F>D>H!mD_VXlbkDbckrP1usIRA|1B)&AUynJS}1aLLwAXeME9n9 z{Oa)ZeDUUjY1i#ho|m`f3IJ^j;LCQU0v)fmI88ybK9rS3=cdNr?Y16 z@ZWU^E%GF-9akhjzqz5`N-lmSt#NW!_AGNHd!k#M;SoTumj7M3bQV^m)&$%Si$t;) z2g^W}WZ6{(QEtS#_5Jlg$JKnU!aL2#t0ynak;}y?@oLBM2N8;6{=JrzwFP|b7@c_T zXAY;?-HWR$_6pKo0WuQ`H@g6?|70-Zs&8)PpS}0RsuH^0E8=!*0;us&yd1wcHmM|+ zemo{<>3JdZZROIy6BPIUMd}nbsD*!|rGCMqm{f08^1A&XTuM$lDD_EIYs7z>t)AEt zOBy=@&-69_ssh~S!X62k9@jLLZKd-@fd#`Lw-cd5QI%O;@M>JHS{WILk(rsHLeQ#< zCl+9F`3wf^54tb*!$lXDURv*4s+RzwtBG8j>5bQV5SSUn= zl|22C9I^`QTU>W?-xX{Js*>Y$GRGg7fgM&ECno_g2w?0)gU!qMj>P5)9_mn_UlWTq z5sP}agzCMT8)s~FttO<|H&J3hShdD(1BcbORV+!R>xnizg%aGgHX;}pU}KZg%pr`^ zoUEh$(iI=ODdBSg?sg#V53}oT&E8;swwQzjCsPFzzxCk4@lPgo73jB%Z={S7x>YXu z&H@DkKiUT=Sp~Zr+hR-ebl1b1g%U9{jjjsJ)3Bt zg4dn}4vG0g!F==^DRY06xvqL^Ulnw;r^E`2r4`z0N+!V*3J@vlaJbdGHGDZQH_;8W zl9tj?u*qzei;}fd*>MYOh~_+@U1KGR?&qNP6#z$>?{G{mg_UKAIEogb&5>arX1kV8vK_M%T15YK)Fr>D_z}U6az}A` z0@kF`1gBjQK8DhKaXwxYOlLwjG_O$icDC1BVET&=gFgA3OAQArln5*UC@Q!HSqP${ zo~EAu6_zdMOA8BY~~_3$}0G$hMO5Ec@_y`||6fHn4>r*Xidc-!&; z0p$%Tv+kyjQZ@%37^sX5(8!>*>k3HW$T1A$1;-Gxf@!2kyrP~#$fGz|ZCRd^5pl)7 zRH38mi<1&?Oe9-leOA&^pN;7g60f#}O$gviNGGXlVwYBg>q={Cuj*+L#-duP_Ij@9- z070}SFKa95Xaewj{oe92+Sr|;vHs|bymxx|>ZNe@bKG8h@8S$dPo9Y_UmnfbV<-L# z3YE36_!$e$ANr_inE5CFWts9{83!j|qsvDoBVVis0NH_p;Q*u*x(>8r?FEL3Ggi7> zkBZICAztl`OlXn$W0-Vn&5U0iq)G}|xfj119K8gx%x4eFiuaM5yoJR~x2Jk{!Q6Vx zOOa{S!VEw`=@HVC^r?*g)22_^RWge2N!_IjQh!#A6^Teuz5JtLl`j;rk9zlZ;y_+* zc%i$`*=SZmBv8gBzY^tAgKp~sjzZ~vRyl4!&5hLMX<22V9+z0fXt$ZY-5KO33N_Pgh5Xywm;MVt>u%U9MpCr8{t z+a2}!Eyp^33I%cjSGJSGFD;8nb#3JS`|DZcf1L8$%4WvGF8v~)G;`E+mQuj3z-;lC z(&};c;@do(36i>w?Xy5&g%Gld&phh1xl_=670yI!9dYJIPHP<-f5H6wQ;3s{kN`L4 zhI5Fy0#0u9yWbIIma>cu4Q}QQ=v>Uq%q(>dl9Bgq>gmx?kPANi%yS3e9WQ&rLp+c{ z{-LcvY%5ME{AjFw=~JYzGg>AzEZ4)~Rawq(X!G^qxFP6+fo39B!GHl&lNBd;r_?7| zDfY8QhN<;z05WTMYHhsg!s>3+>8+j~6`_Tsc6n@ly@O*-7MeQFHc8h_h42w1nkVg@ zNyHKb1V?tBm+TUD^2=A%TV+z zSvvWb%x5r%)~qjvP?%;SZ7p^1{N*Ymd$kqbG*uf-g!9SEoN%DgXFyuW09Gnhk@7O( zK$QQj4htK*vitA#4E|2SdGnoL5X=86ZH?UMXZKFohKYSX6p%G*nSXELuYe)Zq<^|! zXwuu|SBAW#7yQcioGftzB*mo1nD(ZEhn+3-K-s1=6+@qg>rL#LyqK9ttNz{isaiA$+kdS~i*0a%h44=0f}&p>D0R-8(OCpv`% zf|C~e{fUiTYXI6WRl`Uwnka_fz$~X=0bmbz@WcPSRL`(~TF_`>A25}bp_$nKeJa%V zY-GYC@_vRUe(*vzM6<_)>|9NmP`N8(kvb(Qjz#J7XC?kLjV6$Q7~3(U%4w^bf1fOK zSG$@PgjGu?ooh+L*jOaaq$p74@xtXwh@IP@w4@Z>d+`ya3D*~rZ*fDJ*F63fo{SSD z!ueoEDw@3i@gBu7Q4ezyC-Bf>Q$Unq7<`x!oSrvZSHJ0^$ly2zmj^2W8~@l-TC&;r zF{l-j@KZchmyi_(8J9W00b>&;@JN-Q!2Ds0XDdEZOuCNQ7!8=ia!0_X(ET@HZa=>r z)QSE20<94n)X*x%>8g=qiBKCP(@#a2f3F79HM&K=e5zxdyQBHBvT&?!e_#Lz^nSTm zx;zwX>^>YqSd1Rt9rRvhGv9>Q9!^^@Tm71M-^??~;0#fuK#LB5O;AH!i0Y%m`qLGS z8k37L$z?qY*^`bAzfBD0+|zOl=w-ntEw=$nDKb;-HINS#KP)KrJXHVgZW2?Fx(C>{nK30 z=(c;khxmtky`ER6LHFZ&5nF>jhhe=Zz)Q|{w9CK? zn6aG2Me`IC6^j8zH`%K3`4|88#z_AIXS<-AzOwim{EN;-&-ArW>oXa;%tw7xW^Y#* zWc5&M8!w7N0FmiH=1JnF&(vkfRz6Lg|q$DKsAr_if=cG&Xzkcydxd&Yk-Q3&+?T3f1e`0vu zM?e{ zJDQh#cm}fj87!Bwk(;G)It+Z|m6Um6_3P0&)hdu?+||NoXlOd5RmpsEsyDi*NzN7z z#(Eh4)-_wd9!!^&G5+Z){H=6CwO30C!Gk_7V~!eb9l9@$=wSU{%NKi9$|r;rKHuS* zFz*H|j&R1b%fv?0axNb??i!6WbK9j#Yrctoa zo#4EIjSR$d7>}~PVT<|a3%GEZC^Zw|EkP;K35mQ=*FHEet2PUb08ItC8a;_b$OJUu zKT(gDhC%a4tz5x9cMnQc5rnQK3wAxdY+m+TGM&{Jk9<6RUltGx#2qJ1vP7xYxRR5W zqeWjPR*6zbloL&kJ*4_nUgWh68!NUjh@Bl02fSpA*MFFM*q5TID{F@@LpQ@uTdnk$ zcFYSn_>=#`W35p8;6YbNWX^5Qof_a^7DkFJl||^LQcrKS&h9v9z-KEc?ZKbsHTxQx zIVD5bzTf1>hz#IObbZ`>)}+l7$1)I@-g|zmg?^Md@5(u$M-sV9R9yELRa^M~#I0iR zznxIAz$_i2C)BBD#XJ`OU^m^DT<|{j9 z<8&(S^!da)_(I47MY?Cw+j{BMd*@WVUrgx{;O%?7u6VENR2O-)(`!Zhv9%LWght*j z_naULIu{OeBd)b3mV-n6UU`HEKFuR3FXs3cpXsvr<80)`bNs=tC<9AfLF&4~gQ5X;_fVW8@jIRa5*F&^h~5)?%@$c)s&v zy8yT~nHt~v>6!N^&`p*bK!t7MfQk&iL@Hk1J-=C1yqk!;zFcWQo#*XL^js|h1G1HW zBi7EZ{%$aEzuSMTa9N(6kX*3y9RFQ|WykRmuzx-NJce=_X19a?(j8o#6IuudsbPbj58sMuO_-$dD9)&H7ZMZd`QN~be7g5ICqcjP5AjF%Ko?$^HKhAf z&y34Ot0s88?Cupvd%C{XmDeE%=_N`bcX6}hwzG5o`kkFq_V5PIo(D zHuNJ;4{mZrE&MQ=E;VO-`u7~v)a@%svj%MvBwn=G5pY-m&}BdZ;q>Otq>jg_j23AZ z$5m9_2+fNs9qa(rWC6AA)2^xU&kkNy@uY}U1!T6*KfxYa|4hA8H@L86XU2WG`Xx7a zY}Mh6?x90JFiB{7zCT7`T!|($Z?edkP^qa;3j)`)Pw4iKO8A%b!J7=v1ZRv}WybEi zcQ`+vjT-E<4?y5aBGsejq(??4dDs*vu(6s<(N65yPjzDoDid$={RE%p**P0&vdO)X z_%>2eTiem#gfint*j-VrJDcSQimI6CtT1~jD~8aHob=d?_vdHiVsuB`*F-2>a06aP zZZ;1a+9bl>*{sZ{epPyc-llEBvKh*FyCdK;_dB7&zCLt4zJHdV%u1o?pOozLm#r*o zG#XT#rDYzaP9(O`y9A?*yyq}0-WhQ!G6JllUv{l;ReBOWV*al!IFjtoRR1G#oWR1T zQ3F-ky`?ZnZ(rAt1%9NY625+~beR(G_TQRXTC&REAW}c15P(9mT#R+N_xL5?MyQ%7 zX#Jyc!WC$YTfN`7^b3i$1|&K{p|Q55W@lcICZC24uuZtRZ{*X1QZme0ywzJz8%kFw zV+?#>+3gKyKLPNWd~Gg37vnolhcIz4Yz=$SVDhgzgxAOOUO1`L!Hfyp%#CKv z*=LROoBPwc9T^!lHCsAOR(&;xC~+@i2Rnx1$~==qKP)n|=xCvXS$<`k=S#a+O~6jw zjQ0bYpW5(qVBHK`>1WCr5rm(DDd>kL*vrFku0uvT_~Yb(dS5$GwSLmr{c?;ua-;E9 z#XHRjNvilUz&|*@;uE~qnNwTG+t0?WAVfMZiZsN0^tqT90u4Of-9ZmcwZRu_*@}0l zgFhkW#v3-m!Q0yoyk9VZ%NU)R9~KA8JwZ~f<;L^-UhZuRdk6P(K`Eukv)$r*$%pl= z`>NhkBBA>+>)Wkli^ty676-knwI>lHJf4Ia2yM7n&<2b}`EXnkc&YLbx}1(<0uARP z&lDdl@jPr?eiwadR0$%hKd zE9zD(SdDC!t#2K=chB~Av$8RDIhkV1C$%JiDvQT~WNu?lsrN zFYB8`k96AaT-kRuFZFf>kwuT|;y2W^w@F^REp`-=cZwS*jSU^8!%G)-pzWpYfx#sx5h>jZuF0+zC7|HxGD{eVz!$iu~PQ>~P z3Sw{i+`^y9F*i4tr(GExB7+!^7ts8(MuAd)xt11Des1|q_;yuSKxnZrhp>fpe+m$( zxU5YWYJPJhCzC#4IIyYtNTWC=O_LpdNq`(MW@@_b;EvShcY49YO{STb2I1q9$A-qRYWqRHdRyHy6o^;k1~gr2tl*0^A>yGt7CZ!mc%WdE+h^=lG9O zc}&%E7(nT{aCKgE*6y<=U2JvULUyD-q|Xq-YJ>`@)!8lnSXuj*QxvEt2RoWQEuI)@ z89)9LPx6Q$3imaJSFCCP__h2>RfFJ7o+wP>PtTwOcz)&UJ{vhEib=pM0>FP#Q~%)- z>cjf6VJhql%?BhS^2E7)pXdzjAjR{up$s4 znetoKCO<))Y0#uSaRpSfMhOHl5c-DehJ2A>&`kP84Dn8L^LN}63;e;dN&9ipW?cwt znuA9{3qMfoYkqA*)?BmgsYTQzGh?o7mXjgxEK#HztC1iTTfM;h+adgPflW>J6fDt%9wzYQ(cn2twPmgD&*mjXQ>gg=ywT0wp(C z6nBjenUMf&w0v1p{IR8-_v2REl?&1po3tYkV2nx*T3Q>pQwqydQj6?0VCCa}i+SkZW&9o*yn1g*b*%pE?YlMZcb_fY0}*on zmZEkHg1NRF74qL;hi_>3J^>oq&*I&@MOT4?XKPRW-8n^ry^1|e)2MD{Y)Y8!E%TTA z3Se)ryB{4N(J53x{D=M^gG0n+ty^AR{?+}rT)pu(xq`c{Lx{jibT)iq)2|Kn`}+Vs zgeX1yoKk9=)22Q|4tQ)b${PX$EQ#0yN(&#Au}1En(F{xv4Gau~D-`QhhB%sh@QmhF zv4@QM_@=@MRajWg1u^6yMeJYvWxVKT1zVso(2OXT|KwwjNUZn%knKDpt2%wmUd_0++JtHQpOB)*WjkC$o z(w-r8(g2qEK4N)aK0Q!anxH-YBJpQ`3v{#v7XfmnJ!*+0se5omJo zN>u)etTf$pJA(i3)yO}Ff_u#TVJyWYU+^GzF;66OaoKw{(C6Wgi8=Fmb#rZSA8f35 zq)_H|ecK(;9Fzgw*RXc5z4E9eV2g3Y{=}iNR+9{oveRaC>>vGUzqOWhwfX^Kt3BY* zzJRP%1Ar$TO9_|!v0<4B3m^zmR5XB-H?gT|P_MRL7Y72dNih&7mr5XXUqQ-_vY?E( zdeI57KY8reU#>&+aVas<1SL?)7`2QT%1{F3rV9OyMAL(|c~!R0=})NO=}C)wsmYzP zVKjjYNOAsoQ9y-(Ps-HvdC2~H^|>VTW2PA$79c~y9Ct)K4!~*wKKi zmw7eFht$?KH3C4o6+qMPW7JjM)gE9evn+0$NEwP8om0~XPTG58=*HV$I{-n@j+qS# zOuD<_23RYd%-^(jfyNeI(Jz+xog@5 z!C0*L@Ozq+IedPH(kv$+ba4Q_w4`JxCnFPiH8TqkNRba0k+5tynz+nM5v)w!&avK1Fa8h`#>b)xL@+5@2DqeY-wr9{i&?9oY{d_#OW&X;k@@M z)r$FkYC>^#>(6-HNN&S&Z_w$U;#IK6(`74vWDlpD#Cmq=zpg7qmX2c&=HiW0r$5Timbr@;K1>i%Fk!8SJEGwa;V)M7Nc#`GDoV(g*mwfV=sx${Wtk{1WF|<{lB1#;<%^mIM36z9Pf=K|C`{ke7 zKHeP9{W|QUA!G#Ft`U21TaW0^d5cTOe6-zaa)JT2jA!prx1v3UCI-qTlhC-``t81V zcS6DJZKHXU-axLyZk3Xt=Xg433JAB9s!xkQB{s>v%k+8P|6-7CQ&T9n*m>+L9OXh4 zoWR3H3FEIgrj)~V>K1V{a3;c1QAe2S`Sh(BZ@h&3mtK_R0QCMk7%o&(GFw#E;i*GD z@FaCK*!_GdKqdH%e*LKb?M3D@FIL#k$1ibfX*0+~{vWx=V#4{u#`J1(*0POcZ47GH z3&oa#t@&D-1emQ(DQ1MH0iHsLezt=xB`%FI!Z0(S;7G zS0~`1&4LFQwk2s3zMpV%bGw^!&*$LMAm1!}+TxPmXCsUlmkMQ8k#CH zdeP12tWX{Vq$ZQY@N>p%aMiEGA1o4T`m)9~e_)re%fJ{nfklm(z}~wxlw?syOM52@ zmyuE>opUB!h8}?fB4Q+Y9*;kWGSf&wpJbXz6y>I#5&ugBra^r?2GtxI*}8JkehvI? zya74wuAAN=EIz;3E3`BE=Vi_}L!>>GJ|u`hg%hA31D`@aP)1Fezj%r!S`q?#W~Ipp zvkZ%mt)FF5Z2|maB$B2|M8mc%xw$em3(mtSgE0BIR4}m#%i81x#jcbp zH7Jpcf9e=-0h4iOY8X(G3r})A#aU0~idFgp3Cq(SG#_z&I)-YU^`H|N+s2abe@awI~_WR4bOsw0A-rLk2KRd*+GSjc@(|6R6v;J5W`6t13M0KH`(^sFyUTRuX zb!vFUT(H#{CP*#&?yuKF--%GKu+ZX4c&QP5%EqG7jAF6Kd?% z=w6j4|6uIBm9mc=_#Bk)oR@bsea*K46t<3N@m3hC$wjHTXVq}0X!KItZ23*@B zu0)eN&&F&Tiya14gFhiX-Oaoe?r8IFD=%?+P7P{Rpp0hp9ZXvG5ObtAV=ZXtV$rTI`GM8G2+MlB3Q>?Q0bsS()x0P zk47~)gJoD-Pz0NWfF7D4F7_Vm&Bo=wKYQofb<~eAxm0*)28z$m(>5$EEiV?s#udU2 zP#4!zLJ8k1k~i`1_U*ft_h?~N-+950+0`Kh)ys z1vTZ$oEsX_sG3eB$Mwmsp07kJj1j61nCEi1809G$0p66ZZ!7e6#tUs`=1^C#!&HCh zru09K|5#WAVy%ANs74wIjYa2Fj7&?&367JIGRSs>)^xQJ!s~>|a<4x|HR9EaJpGo- z%HxD1_CJcwGo0H??bTLG2>cu3fuo6%{i< zwSrhd#P<9iZ@h8ka^?Ds`+M$lKF8V!5|LrUR9WYc$6XHhQJa1FCeIcsAt5n{q;Y_F zyAtdk6~fJE1KEy#PrypKew7#+XFAr%obw2DSE763$9b=QQ#X5Ib?HOqS}e+WT74$P z*^<67EcSC}B4_t5E4fiRl}{lMd7lfgfsuwTop^AZL4Ls_vZ5z;itwtrU%rAM{RFM> zSbH^9eyb)gbqv%Au1C|Wd(hp~7wEQ(5O0fOYs1cN6!`cMt54+^w9o8l)OcAS6|pPt ziKK(CYIxhpS!;gtO(f#YxlJvW3&w3ZHyUv7^e2_l$+&jBB_jJkAdYn4yW9XB`{UMb zN{x&Rd`dIo)f}A6W`%8xhR3!FYA^vJ`4hHHjePKy!gs>Fxi~-6)$l=(R))uG=r$ri zPC>B}Fki#;D#~B^#FrI4>z3N4cj#5VG@9Y{7S*w9LwxKW51V>bu)OHEy;x%~d4TgW zo3d-=9+G6^iLYILZgoTNaknhV=bgEz8t@k0)Z2@OXg!4AZ;Hx`L0ojW)>6Fw-XsjK zsjhHjf86!e&feJx-iBBPDrJ(if-&+}oI)d?nsKPI^I?Q- z+iHRXGf#QT18Y{mbf%UhI7|WzxATaPgj8W9k+1diy7$*c(>Z-zQ331cXiT+K=>A(| z#-x(vqnBQ~w}!~S<4;OiRtQyjs=`QX-Tre&w1iRi@s?EN0XqCbYrst|uiC5N{Y5+I zU(wLxH~y9MQl})CNh>4(iqX@9yVSZgmdrBN_8tvOuAWaOF<<|!?7h4;_yelVo02^_ zY8oL)eqk;l`Irjb)L6Te@B1WN{Xiz@C_CgPG|o8qAFoBh)DH9I?~$=l4|}K9B$u&@ zLeaUdFRGZl?{y6>V4`BIpY#9MMk}{xZMV}C%f6%5BgaR6#uB;%@b13h(=unOWToQ{ z?haT-or_6!)9A%g@t2yajH4wdw}M`G?4B$opE%-?{;@4-(qENTtH{=XWdEdv-&8+vjT$9frC)-wc!L_1pi0 zT=qrzUz$a76AC7H^(}*I-!m_ed^kNS_nI$iKf1KI{<-YdOU4)5F>luU0k^1L39uwL z?uoRbB4^6Zy227eg_lL@f2?vU9)d*c2nN}wGSySk*!S$y(^bb1(gB|C zK`-|xKh7700yYIF3AIm3oPgMRe16pzNupBS*4CEL<f%`i`Djx20gJ z`3tJ0oWUyg2djb~hF?kb@Va6blV5pcj2TA9&83BOh?r$Y3;s~p((Yoxs+-d}7-N2n ze%jd6^JmJ&pM6dx0yNGvYsfL9{MLs~U{+Jf8vZYyxXf3hDT*{JqC61AGtOULi1qHB zSW<{?uf`d?`iwUhB8hkLBVtR8h^=91Skl)M8;CfFiY01x9F35tm+CJ`Z9ECQ=sWk9 zk_wA{9LEPT0$<5D0+v;-lc6{D>`r?x$zm%86TUJDmCX|Ddds3u{DLz7<14k^Qys}xb_}=^v*$s zdVg&*_fF_1F=@F6Y0xCy{m)yFL!mgF=vm0rW4a2`7DXBZ@0Z94@Mi<-_K)CNUM3O) zT7|sLRI4XhRIH#BcCrNW6(WA+_}5WGMq`5ik;usz1wFpgboW^m(H9&6MCBp-bGgFE zjqjv2X`)fZ-oo*lT#!9_4KYuUVbu!h3s4CQ+Y3K>>CQNLOLrL$^K?aBy{6)KT;+rm zpd5B?3E28q*%#2+#D|$8&k-S8rOb>PH3k-;?*jux zCv;zl_k`>n{k6Z1M%NQ)>UDC)q^oR;RRUD=7tfCj%+FEIG8aFn&G2=#!8MC7iD(a4 zNd*~+1j5K;Y0}5h8?JWtOsrKfByJh6@vW7!?SLs!ahtK8HOF;Y>4zE$`xwO1e~Ag$ z%x}!k-MZ5n>h0$jO56*x2^&df1}y3K`%vvocxGg; z*X^`%?-bzx4rY|5q0VuSlS-@nI`rgC=A$+1YBk&Jf)&b5 z3PuB<@xer*C@5)cfrB5m{SAPwI6hl@7y7M``lAnWf4?wSjYIU4h6M5kV0m>FD#fTl zawlxr{xH;3SO0W$3XW8hFkO9|T)BZYgxDDA|3w0%?C`Nh_EHUw4LBU$QRjy05;rut zPa5eu#geR~l#i)U7AuGTC-=@8i>g$7HQDA;Gn1InNX5gr<5<&QXSd9HLVe)(ja`gp^}r?SIE7ig0-980cu0XW&^YC_K(> zidm|Bbo-vg-mJ$GJ%W@Deg9J2ROPy{6JObS*4W{gQ1Xk1OcktE1BEVCcCn7O&2!!0)~)AgG++tb!+0ki5tshni%cy zIsB(l!_E4DKk+e}gv)wlGo|*At`n1lO5Y3i_3mXLYWI($Cg;(Vi=OMSMV)Pdpp9YX z$4dVFY6i3NJ2h7}bO58q3sO@w8yI^&L{0mtW+Rzw-oeyVYE5b?i<_|i(If=@_#o|{ zl;7+6PyC-$vb3wY$Xne17SHOCh`ow2XBuZ)C1|N$YC)g^;q)&u9oKC-s}|IYI`J|Q z#@r@e(lgL<}%CAD>I9(7@A$0_{Xa+D<16fSBBPVzHG0 zFir(9{3&DCZw5^pM50N$5K^`E30iQqC`BCMxu~YdkcAlxXq8cRERxS$%KF1-V zj3*KUAq|3Cb@w_k*OSKev!m$~??T;todUgb98T0GTGfPYRF+1Me{+%5!5y8J73zmb zFcHTxAjM$Hsp(LsX|!P5mOe=^bl2|oM|U88hVM1a1M0>U_dcDAFR@%;y^-w2jwZPuq*ZhY zfDIFHFT_`^Vlrl@&+ILNf^kdHqN@QLLPh?ksK~`gB_$|wdnI+}gKXv80zirhu%lFw)}JMX=}RY94MX6AR{H!__l3d=4S^}vgiKFVrl6>#q*K$ zfjwl)uPk!u2+mlFl=ypZS%}NDQuT*(n&%m#SVl^*K-KlFtG>57hYshro)#37E4$I8 z@^K@ZOCDmP#?vMEm#&`4{3HU%<*vZ;eF1vPdY|BS;H6nXGI(;^3;ubRBS@EJqS1Fy2u`?s41K z-FLADGZCE^75#`|RNBBcH#Ys070(0tNN;L<%C~aC*IxsA*UG;d=8GT6D?P{G8Sade z%j>8iNQ?@#+J4mo7@FD1Q7-K_9vm$1sOoS4aDwyt)5YM3IW70WYG>-RpKbR*F0QV& zMh_ZNK;Zd;jyjkZ@U920Pa3}kj0SMXasJ@#&cfzjh!2p4`$RDccol5#!xkZ_^Q%GE z)>5Z^A!~P-HOa@$2Q4j4K2X?rAJ*;oP*AZNwr#PnIjI$vJHn5(B~h|Z4w#c_5X8D= ziPQqHOkZZHlh*-}ea<4w3k$K)ztTB1IF1_|3JG3A9PfM;sL)Ymt+wCe93!Ya1Ftfm zxv{*l|2DR9{(46*cx&(}p?~Vs@ucgtv37IyNmCF$f}uv|e~Pi%E0G-KC0Cw#N|G^t zehvIN?2D6=s9YhtfbJ*Hu?sbkQ98f=RgHXg9I4Yk0JFzGg~@OmH~OgazpN=q)hD*P zqBQ^2fZH~`qy8$zKio$<_OJN2XzWf2y14n26^c!YbtN?ylQP>-SMW_Mr{^zt)LJib z2P@2$z^g8yp|m}VWNs0EUcc>)7!;W~oP*09%s)M{x%*Aq34`dMMZ40XA}O$a>jty;oow{46b^z!%W zj#lW%^FqoHtyFqE#c#EVK6{}*M&A5lNN4Amw$3PflmNxYmzl7`qC6vWhoLpl8a}ln zzUMD>{;Cu{W5oO?|K(?{lOwGNc@q>bf&9&m>KL_`f~i9q^69ZMabnR!4bSe98aNcF zc$Z|#$7lNqT0jmz!F^a`an7(dU{Pv>lwhIkCoE24Ew{Q=i?C25h$Vf{uh078{#eRx zH-56|c*WQaKpB~>AG1+4)KS$3A?6w!y6XF_NnF{6X|S~V$0}W z6`nJn|JDdX2d*YnV_O}Hx&v$^WL69@Y*CFL;N_g0_epZf9HvKGeGwy^Bz>Y$JzT;O zO794#t6D_LgtQcYfViiuA`kE}K67Obn`Su^s!rGL-?GZui>n!zB`SRg<=EsE4FC>} z-6UY0sY7;PZL1HLGQIrs_6~}!hTU!u6bF~zYmYKXlsGhvB^Oqt1JQk@9%QICH656= z*9~&H%_0!jH$<^mbhaV;R7xYM*U#1IcD-9DP;&fmYdZbB>u2L`Ut}O)TWzX-qNdp5 z=DO0+?t8PcKD`hr@o4IykN|SeYid=J>vmuEjJ_K;v5r^_z2Ln)uC4Aw)^QZ7B~R|x zspNFu{NxP`jKuc_QB@&6>dB|>J!M06bnw)it5iqAlV4aY?O4C_K1m7g?riUyL_2&~ z?^h_-t(*l4r{<0E0o_1uSV*!H#;2jy*xvqdVDEJE0R-q%RZn$-gTuqf3?1HT!~T)6 zsaMQiUta@%jKle)vro1?lJ8BgFlG^M!hZgCcY)Wi_K>ZQp)`?-^{b}-NrL(+u|%%^ zhSA>Z?ONp~`DYH5v+X_@{pD6IF@6=F!V06T%;S#QNAyv|a0)~;X{F^|Co`Q_tqr>m zpH;K+SxP4)T5y)75A`RoM1@h9V;qsTsbKh0_B);%`p|>_+)-`Q>#R}VR}S!T%`K>Q zu$`mF(Bur88r#V`$mRpF6-3i}QaSk_qy!Pk#wYJ&Y82Cr<_uxhM$^w=-&kriJ1$sV zqj-$_3B$C9b56KrMiZJoyOaW!=T>5fWLE*={ANnyx@9sW@4go7G|YOMAXUanjg6W3DSj=C#Ok#mB(xh0Y%X@# z_&%?_7*=8~n4ul>PA}&X4A0FtdaRg^9sU20%5p34vIdd~4$gU-v)i;$I~}m8nvS~v zNFCCYn3N=j!cEp6{lPgqnq+>E;Lm+01;bwk6N|~nVCzon9<)A!$y0wOYyF@`*3U9d z!<-A^8!uc8oY&XWo3cX~$Fj~!tS)_jQP{)LLhGt!?DR6>;q$t?XgO%}&tZ_CPc%JN zn?-;66QY>Eg@VGGUn?9}i;GDt z$^z@3hiV*6o~Qtl{=YD>vB$rRWnq(rQZix^5~EG(%f|0#I_e~iLj3&19@o0;JGZ%9 zE>4AP_FmNA)XQl?<;%PHJdUkgB*B-xeegJ^X zNMueL7)AH5##YaEaOlFz@0X}meW1q|r{*I3^dsMM+vB^s#t5H6GqYWjCfR-stt^7%=7)*)PGR` zlvlmdD=6sq0j;D7V_yqNGBa@YYA=|4VJddHchGgTHr*3rGn?{qMqF&^=@T@W9vS-qvusJ_N8wWL&O*c}v7xmtNzNk|go_ zggl&we-F!MEhL|0_uk?TZU!ta<`~;f>!mLHkX}stEW$?f|d4 zZC*dW?QI=Ly@w0%I0?MIzzNw+b^(21%xB+8>MVL&Wpiu03z#J%~obn%so z9KFmpc1r!Eo;s$_#HtrD_3U?_sK4#c#f)6CX%kM?!F@|i1l#*GsbYPH@G>(HCP=Ss zV=+jiK26v_)j;pQRx>aHVGlE=3xOc#596_Min4&TnLsT-rsSKR51QkOe)evjPt*oA zyJQ+oClf9DiK2g7p!p(QaNCiJp=M_2$A?7$oiSRe5bui&(JoC|!{^h0z3lO0seqHe zTU%w-)eIi*(QbO#Bi3xg33`3qJUs>wJ3S;!Mpxq1RLG+r0QTSKrpC8$|IYU>4lnqp z?GI7r-n*+j0Dv0OW32`pr(ngtIB!=X~awx2Wy7~Y7*HibyQfKdd> zqB7Iys+b;UcSjGY#aR-jk+pj8^6>Bv;fKp$n6(cw%SQU) zxo-Yo6`Mv9TWO|#v2qHNr_(R`I8~kZGuFiZCTeVX6>JewQ|aulHT$=uMb$P;cz2U# z8fqZ#>>-=?qN?V2!e@;m+0uyn@DAQkA7gt(a1fYYEVdan^|Kw+j2coEjIuQ2t>hEb zZ@bxM90PEaYO(^a*tzsL{_n6y$Qvca#>Y{%3C|Ckb$fw8-DW@}f zRyg1+a-UfQF<)7Fe?G%F+^1#cTT@d@d;4Cb>RUYh&8F;%q?d<8O<0CHZfWz7`Syrk z?dOih^|tKgRLH+j)G|h+VxtWeKmiSMN|1UEtm-!8#ia5;JS^QU-P0JK0;gu0((vxF zrYM0{a&8b|O7ZKL#fimtO$D6degrD&8Ubue9uE+6P@4ULncl?gZ%UGG#8T}K^znJ8 zs2k|V6cF6n+jo}sKRE;#Avtet2i7n6ua9J>B(7sLJ~1;VP8N0^UH0C>LeHo}eWhZO zuMZPc1ApgeqA?biuFKzBoa~4=e0+#j_GP=;I$xPGeu!sdJej|^{)`9!?iQ&hz87D6 zfYg8f=6P*3zj?>iK73hu6fJYxau9Tj1db<_GDiclH=Pz27tBEiYTEv34~p&0CnpP# zA-9+HviOzq1bPvEv&HitBjy+FvKM^`*}fRtrRJ7n3Keof0afitHEI3dVv;hUA@99P zy>3q>H%fy^QQ{*k-RAsxE6`XKXE6W^j@!|DKYno$P&q3uC6#y)X_Q5IH`2VGUOwk7 zhnB~?_wNV9@$FN=u2N6jTwV9~_6j6@_puuzJuWRY3WauB!0hzjY8f#RDXOzU!NE-qc{lD1LTd}hO^8pG_tVP|G2dz0>6BIT2PK#X%Jk#d zBIRl1`U4dUYwVtD`x(U9))MS`nh^6CeTqNJ*uC4G-Aeu@zm#FDMZM$)fRpes8(9g) zy;TqdNR9H*5#Pfa8wpB={WT?~Bo(v<*-IN-ht@2B;srzlF%z|3RzxzEFGCwD_!7F? z%sMkx%*Z`Kaq)04^NnWxA0a-Iol1&Ey`{!o#yjuW`6+7c>l-yAJttZ7gw;I4ZROdL z90~j$r2hDn6vIn<-c!S4{#n`ZapIry6t_a#x)v_Mnft1SwIzkm_SM!-d|QpbXlQ6a zY(8fY4d1B_Lq&fte5OB2@iR{wvdkg!U^SlZnK8ySb>tsM;rp~Q?E1RcAl=OjNDSU{EvF=Aonpbg7I)iSzY0 zuK4d@sr!y>JdJ#l_I6FVpT3CYZU-SSxTD{{EG>oS=DPBxk_2HThCxqW4EDkB+Z>&I@MuP<}b;n9v_e8(U@hQ%0omCxw+@+5R?pxy!po`S!AP7 zq6TV)6T|bW-!UJlfA%S9fehw2a5x~y3|kQXAM4m`K;MnWDAkq`K8z7IzX;iBh-g86 z;cJT$l^NO+LMjgy${kz@qLeuumRtKq*kLvVm`M9(V)=-Env?+=p(K{Dvee(TwJ{0Rcc-T=!pdKv6M(G`;aO1(b) z26nq08n9k&cQYJ%OA)$0bv^2ecNW!PwbZ9w`%h=?%c$w0oEDl|o|M3lmAh1deFC`e zQEikZv=zBV7#};OOQ-IV4_c}WCVDa+S4v%Yd(^bD-ls!iG;D;vyHK>jNFvWQ<6yz&KoUaXu{8l>YrNM+G4uB-`i~*&AnM-#~g1W_DBHaz1r+} z<*~(e%j(5%PT5BB9|zL@M;9gs5fQxejlu`N;oaWlIyO)-}r>z!&*RY!<#C*@S^siCo+l|nx zInjgQe-_tEi^tg@KYjHi{kV@#^L3`3ey2-A-RT1)04TBWSN5@>%10Seg-beqnXheZ zR6eFB_NWYwsmnsdB{z4l~5rvUGA`gTZ| z*uU~cy9+)#N-)ihjRF%!vLy|2kRQyOxB&Ht&+VHV zkPoas_dckx1F=MGX~inj!pUs3GX1AWwfYi|rAS^MbO{ihvdwI>qn4SuNZ*BbeyFi> zo3=C`7a+(b2EDHX<3#nJ331D%oiVde5COS_<{yo}b8^ZD@B9}ftY#H@Cn{>%Us0i| z3J1UUfR@w>^H|8vsFj5xAel|jF4lq;Je&(=BYY}wcVsuq2Gb~c4`={MkC(W^#(F~n z9xW-pKLnO{SGJ<%p(T6KKL?@%JHAhY&iRPvCjOD84ScPV_P|KrI(M2DQ$COm$sH`q zwNp8cXnui_h$WTBFafBtv~g0hl&_?BL|Y-W%!Zs_SO~s=#ioFb8^HUk;+;Y0<#8|! z5J--wr}VKfSpNl;&}`(I*d>(2QpP}?&;=@Lly8XQyX{~0;NnD%w+EL${K5?W+By;Q8B_E`Yu^}kX1$lWaxqH1uve;?zXM> zRFvhwwW!L<)pqVsrvBCz(tZ8cceSp98VI<188hmbR5eel1fp(Zu) z2_2p6a>0t$SKyqeG$P?RwrO%ovW5e|$)c(Hig!t)h@E)uf%X+;C8?P~{ zd>kuQ^_ns-#9ZO-zb<)b^4cqh(qtyrrYE|DZYmSE>B%JIf z&5DWKWWR-X?A;R@E`F6|^QEQmQw6L`D1ZDZr`7FR17b;Blp9?<+t`x9?t@7i|Gg*n zLuzX1e0%io;ra%NJXi6h6Qhbb=&R4GH|_18c0Xw|QGTg*z8rveIOKxwH*(dcgW@&m z{Eh!QioF$?mxP(RxZMjts(*b>ELcWb<+5yEAHxN2T4I<{1gSiPWacI^kuWBzq_gM0 zwS}ouzh>ov7EIWdy|py_6f*>9YHaUKXYXQ`I@B|4(Z_t@|2p;rHLEvW%sPMGWn?Ml z$h6P2e;F_QnS;fW_Vojr8D&K%1t{=CAyWz{jzR>1f4$&!_}UPxa);-EcOS*e6yeqn zePr=#+71gH~6=mSA0|@%Es-0v8)0DrQXz+(O7l z(Z(guHlqr8uF*7)nXnZiLLT5pHaxP*(gDbnT2{^iL+#E?Yu)mm1g6wqIc>AsTnmBo zme})|dDpgqiA((|qWn_LdGYdxwi*)&gbil>WfR02p)FO8t!g%6?Xuc` z5E@%XtV^DK;e-l=veL2%mnq+Y2++L#m8Yo%)+VJfCH-UsQsLHpRB92Y`quj?<@gIZ zsDVEx{0MM!8b{^nJ>@NVWMHA0XYWK0ie_;RW+A0)wp3(c;bvzd?)i_DQZ=QdxPZlv zYJBTI0S=WTDEBj&W_aM)VTif+yvT?AEW0KE0Ta02jwb5IV*ARSvTqicZ+&|YJA3uA z=VUKAZ`aB(*%U7OU6+&PUfJ=VT!YOr)tttVNEfu}a)-IEWjC)K-JJ@54)aEKezaQQ zhv}BqN2N~3UA#Z#molvS*%gU&ar;ZgF-} zDRa54akc4oMR6zS`e(V4@9H-Dx1JvJsS22shT2iaL~_J;E>r6`yn5>5%F?&Xt;77) z(2ePv^$Uc)!yV8!mTtEE>?WzH)YsuBM>nHeEYq937T4GQ;vvnwx7!-Ik(@CjB&9%f z^ou&+GO0J~lqpod+3r09L>SRP%zv2TG*Zp#xUo=I}1YU7|g~%bcLo6Gv%%uJY2k1E25J1ugGxT)S z?5@|1-v`HFm*z057l^hG_*V3>DiqiP>*@8Kt5-(GUWZsX0i4U9jjoR{n#6J4+PeTR z*!*37tBaF@rd}q#b#@!HZuMGS2dJR@T8s{hB{FJIS)@=J8X0=4mLtdOw^W*R$J4O2 z)iTk#LZ4G^*)hSkYfwpmPXOmc$v7=D3kNn?R{|&PmNzP1zd>qahb=a6e%d zom*3NBWsi}FxTmE81>yXVf;ct4pE_CvpU(a{5&U@7o95|2s%W?fWnvAx$h3ZjTU7> zJO!R;00nAEfH(4v{BbJt#UZ`yZS=(YVdg>T@zl+zEI#@6oFD0Ty(fFNd2szhNw&GQ z$ya+q__NR6#l=^W$Z2nyAZ42FP=Ou%Wh}=O`G+dq5lL2yl(EHVB(nR`*G;!cP^Vx4$tJf*puxwMda!X!#%o7H>1XRyri!~RGqaddodfL_jZB`+HkUJ#ZQt zwJz}KSPjKx%HH;#dG(&>-!?kb)Ccc5wR-rcS$y*g@%9b^%0?*YC#Nc&lDQwARdJ8h zcY8)MOcu%J+x_3UL+xCPn<1)9kA(ULE!(z>i#>F4gL|!2xR`$R&)PkZ73N?{LMDeC znHps&Xcg=088JUD5aS)T=9jvhk-aXr*gpN3Jp~Iq`a4>jUGGxF?A-`lcuyV&Vfh>O!p1Z`1o|rXm*(TzANc}12j}4; zo_PVltp5xY0sb@lF!IdLrci#3t;8527*mzb@ZUH6kKqLDq$ICtV<%4`Ep!`EXt1f= zgK204C2hq0??b`}{<7)~)`z7)X1_qADDhK#38Tj)5H?0P`DM>1`Akl1>Sa9(f2h@{ z>juXGsj2{E|EnguNw<1mpWgK1`XC{;D6sr;2G7w~I)BW6_6qDmDtYL`Cs&6%B<^oI z)gW;MVmCrorCHP;$yvn?a5FMTy7B|DRS=iMsGnRrf4?9knoLcc%;ebSMRv6ms_Ds9 zL7LN{qD0I&k(FRwz6ilT>`6e!`h#*>`Nx0A*_5JKB910oCKYoCpJzH{jaJp}@6BaX z@R19s(J(Kh5DMGq>6PEnR&;`(P>%rfAH?_m`zNct_)(3ZD-!^YKh#8jfDAbNzreK7 zd(#%W&V1E>ixCakGngz~HesfWY%5#CPQNZ>_fP$39JJQw-mOE6SYpEzBv!!s$2ZfV zj8<5Hl;vGKTWfNXtf-?O8&fxZd=y0uLe}GjDf$y?Pbc=89|&z1b5qbH8Xgi+6>&<6&hV3(ld zZ%EIm8a&U^Bzte}9da!iGUhJ}GeA%6tBoY9thwK=PxoD2AB1kU%%j>rT=$chhuo|W z0R0Ih4px-8NSK%1RDl+Gvby@+Zd&iO4;TKaRl0(?C9)@|?9x;gXJ_ZPod1B8Lq7f2 z)*h7b>2xWlN)~>;F%)+RRJM>&cpzdK}-b;>9BeiQ< z#9{?Yc1}OWY-~Sl{g%b6&Itlk~R>HO?0GI2#+FhRUFu5|o#mwnm zap}Z)BReU`GIFXhu^Gz;s%^GHGyN$DZu=om>X29Nu+t4+;)t?LWF15tdFmyxe$@9M z(INndt8tUqZ&gUkxN0+3Ebfi$TBXG8RsHtMp@@6#f{dt@mEC)ns$YO54(!u=gI8kI zQ4Fh%VGsD-rP$<~!F8JC1hj05NNFRHw2j95kQUyv1s>OzO)|kN4P`Pdg)cU0G&5Lf zAf|Z$fssFi#kPL(K8U^AhWIWh8VY4~x3tT+3!(wDKnZmt!rt7q2D6kyf=OT7v-Z&q z=uv(jr~DpU!WCBzk4>>0rvqz7Sn8@1ws>yKl}Fi2)O1t(7t$-xLn5Mcvi2vw zhV}k8D-3x3Mmt2#l(ECDEgVVNFIvqBEW^v?6XnUj^MM-KN*ah1e?hxg;P0>Op>$FI zfeGc{q##Yg*q6o)Y`UM|XL3~xS_)~mev)awBt>jj&C3c;+rEqj~Brq*7^=+X>&`?C?(x8*Gi zTkY~O*!DJWu|LuX-sBA3GYEZu>wYnve7msMslhM-v-)@$ZGoFk-brwM-+1)@}j zQ|P{OXp@A!k(PMqy7yaM&Vj06rJoK(Bzm*fiBvxw z2A=aLLI(V13$VnB3}Kbe!AwAm<$8np=GXC5Vem%g0e1>`>u=fiseNGJMrVDGx(a7l z;@J60D?kTxmnhPkL!k=uCmfM`MaUpLlCc~0Ju;Uhr_B}dHoc+&!I`^#FVJ^?MZD8@ zALfhtVyl)2(1Xr{I$B!p%Ws|Bg(OK8 zzU6xL;tVh_P22x9amtuHUI9NUC(|67R!@_pPtoFzj8$~%k6keP{cX%};A67@IWH*I zm<;qpu*Y-g!CgaTvUu(3Z=zY#9?j(AUw;cH=ElV;v%F4aAFIWGu5FTT6RHSj9_=24f{DnzKg`{MYcOLM{S6*%f4aVj{Bp6W&s6#^z!66MpmjotOUzB$55BuKgvW<#7Rl z=p3da$}+M^?-}a8e+?kU|D99Vp_{55@_^Tpn5B%2%)Ym$Hn+o!3P&u58nE+vo26O7 zuBV;-bamXTIj_c)(di9sb_K1n-akq~d9Fm<_@@?xN;Mrw)xMfM^N+&#(2z6;^yb{v z^fda9QB3)u?-iO2VwCf&KF5tf&9XPlic!X$H|RJ!xqoHf4fyFd%0|JcGOLx zF$t5tw7dDue2rqh;-%lgO9$@%#qQxk1ap(PnKDW}^*g^mf?9NX`-?o$o1I21E$yEw zii>l>v@sZQ@oet#$Irc+9-w86et zIpnU5XrVYjb>REo99-_#xXJkWo!yr`SL(S&ikb)iOG*wt*|50X#G8D0&8PVzUY>CQ z^=PREz3kcAIR~$Hl=xEZDLb3utLOXc*|W|-yNcs#p9jt23~GSV8R~UELH_8IL5Iha zG;7T6A_;!~{a1%VE`^%tK(WVE9d}YKmrsF*5^6P6jNvp88PdDTX#=c)^qppf>Ar&G z^z>S7>}-79lyafai|aaHwZoQ=ih>-8TrE)|u{OF;xv_)TSn#u`;#ZB@G-9e1F<|dJ zz_;jP1_r$=zsD09A4Y1GSxc$Rki?<(zTTqu2*6GLGxG5E>uTvfpLsK7@y-3kCv~M# z2+vEalJ~_ety~f=%MXa3S~<>`x0Qk* zjmuEjP=y`rVYsCP8&-afkcrTJ71 zd|a#kNy>&fX&7ma0e8`YV zWom5F`f9T_W!Jhesd$N>+I8MF>v^2#@QTTy!Kj)nS9lzK5A3KaCzqlL;epe#2F50nr}C++ z)<|;PXg~=CSjkwu1_i(`c@*L)37>L-SpZrG4g1bytU+v2qZuAlUR!d z^)M~*p(BRfPm}2J!g4GTtGsrx3M+}77U5H?NB4`)rxVa^`A37D15KS@yu@{a}c63Xuwrg8P7(mWKi>2`gJzUzdsF+(e0-L4yoI?pHbt~#M1|{;#Lx| z%9<(B;tSwO^KU`!FLTF(mV}xbzdRDbm+fx10d#=>@E7y#Lh{BXFD8-Omtqo{V(+L z;Cgm!e)aax8~z|%zD0~MyU`QUkiGfdi`~pWBQiIs@MEXe7i1+lyqrZl$ZtLeYks@v zV{`46-}}x~FHAdL!X^%6f%byMo4(hT%f#h1+q;tkmv$dM5Q}5H3D&HIcPIm4>gfIJ zz)17n)$J+UrX<{Fd=gRb?biDJ0IS5sWoh5j@u9`$TAtp{ZZGuab2zU3*bad}?60(Y zo$fBK-mIV}3#EN+eQUT!{3EXUo+oOIzB^QX-beoJS52HvtjNz-&r4Wo(I#@V@6eBP ztj*{2kib!3bSr7kl!fhp;TL3jebnB?1@5v8u+_V+j>Z5tv5#2`LMG^hOC>GaBBZ@~ znN1Ma<^9<9So@iZzBe3qd~V8$0~GPfoWxTvAm@zle_4}$F(=@T zd!hUIK}x}#D>ou90U%q?5tC(&-dj)8RAY}VAyat`(lVxeKA3Ao|M>MET`A9Ipg}#c z0g-Lf9T&d~N$DIDEXfG>TC4l{vVI-5sTdqW<|+{H=#-=Lum`MUgyn10(=dr^gFJZ$8w zUkg;pRFst&<#orMW21tt9;mv7(Gqo|vof@K)PVZ;*>=47{rH%*tC>&}hjJZwv!MD& zpHgf|y|Gg|SO|KV0W=oyCG6O;l5Q`kg|h>L9uXNUND)o_6~W2z^4z_T0zVEwO907= z>B$q6|Dl-5QPRXKOh4A%*<{PRV>rjP%?|@N_|PV^gJhqG>*hQ)VLUHCX6^_I@x~xZ zZ*fz%<+zrKqKoaKv#HynTco7L<+WSLhLex_lHk2VMR3?LC?3#8VdrVx!k>eI@EGg0GWYL$6Vhm&IG1^+BR4ji_S0 zQ0Q3`35iCbRM%}ObMVd9G?RDw04J`+2@r=m57;|{LoM>31Q=s*%*f8mC44WwC%aDe zvOhX9QZmwfAEt4i2c7=9expD~g$=^1PDCzXoC5m9aa_{tqd(_@rQLAQcYS!M=UbTrYDprM7%^BPk(u22^)5C28iBADN_ zZVe6sCn#=)F|r_c_o7ko%Elnhf3ZS%@Q;qGHLD~-s{GmU)2bs)OIBjkcU7)k<3z0g z4r_40jZICjoTl?vnp6QYz$+`W2L$z9@Iy-*2E>+5UhpuB=?ywLPrBV1c`eQ@shaT8 z4I#9#1f&ft|LQ(NoZsYYePwtIE5f!!7j%DbZ!!(Mjo|JI-az~tIG_0z7`S~_{}-($X~xf*C{xl)FypH>3YC4NJ!+KusJmVxv*hgIQk&r;4AKF#(cMEQk8kn=`}=fv1Vdr(AmSx z28T`}KHliz6^z8q{s{@4q#BvC3b05^Oe98k+z$))G_-U!B|9&!%Ueknyz#n5w!fsQxh{r&xbxozu2l>egB#1R|DTw#i*MnvCm!vXswPzZr|#3T%7 za#e@_+_htxVAEHT(e?Vr{#jl>i!(e84iX~ubItS6XlAT*C`>F}#nx^n`T)Ud_f>w$ z`p3R_Wy8L-Lg%h~V7j)XpzA`T2?vWlJnX}+)&!?=_ZuG3+Dy{i#!o%v>Rykgtb;3` zXR*K9LVC~MF7@jlCZrzNG-|^M3XGn&> zN@r~?B7+|zU=|xS7m~^jRH-T9L|WU};j)Z2ZvA2t$^hE|NPGua6Y)z>mqaz@JWkh@ zjQHby0O|;JGo42d_O!XU`cTVucJm4>+I-Y14t%YoIj*HaUhODv?DLKuE6xYW&zhJ0 ztv<6nR*7f@1W&z+*M5GM@!>I-G$Uz!{~INK(#NKeA7o}P8sn3*mjWz_g z70B;ds%}PNvxE3bwipCdpW-I)8k80B)#&~WS8r;rWS_Rcd0M0t?^Pr87=jnJ6Z`_& zMe-@WMUuxcwYes4*~@t)g%5R8GAEuN9}vZx)$W1$cj^0Ziz>`L@X@qbgFQ0p_t!JK zn;e%frya(QAPq~U7&*Mxe~%D9a=89JCQn(qq|$=2ErMGuf|&d9uROmzuVL6g{`NlsSt2Z z0X|l7y1>J|lW9pUH3bWFcLEkjecM_(2=jZUD-zY$b=gBHguGe1(ldK;+GyTe+~0Dh z{e%o}Wm#cy+9p*E_3THr7JX%7WjvtuUv|$XfBpAo?ES%X$fi$kU;t)6wE3}@VtOKK z{^Ux)a28?nw3uw0#(r?}?tVQ%7`3?aVi{Td-#M}aW;cGdg+l(m4;IaFc5>Fh*_io& zJYaLiPAxjUAB;xHg@~}CTg9QZ&MvPu^{C-GYU6D#5(+7LsPF0o{@tkM!Ux0w3rHQ9 zD0nMKRK$!{7TfREQ49C$Mt*ICpx7~EN8xK~Ou+eZlOCdEv9;+#Ss(qpY8KVgz&Twe zec^2Ny8NZg48C9iG>3g+J~=Md{Mg`#(#(sJ)up3&IR3<_jZlt_eO*Pjt&!)kyRJh` zf-DHBKsE5Yke@2A-iOs(@&(B?^}f^lg=R_aW+m2%sk<8=VIgD+eBo^WHfps`HA6GI z5cyS$aa?dRQJ~dXAH)d`<586MR@4|*BP4SujrT@OZBKny*p|;R_U+$xGOTLRjm@6k zw$EmHd`Op1&yES_1vcF;h@*iAdqGH5i#!i0J{{=h_P%Bng?h}8rn9o^32;P6ZPLpD zlragd1naS{poB{$X@-rC%zk-?bHsXHbgGw8+Oz6@NIb zvk5%U%A5I?MjS>a$K@+c0zH;s(E>@I(n2U45BAdR?O9*Q{`awCm5V_inAI_7{+nId zYURpVfK#C60*))~*gaYz3r0m}YW_GWE#3HR;NT#nI{5bL9V|Sm5&F_1yG-MSn5lOu zIoqt^pcb*?f7#mPlus=bO34XWqr!qx7`sjD03hVl6PrVCAiBXvKn!s#>)6P}Xm}_pD^BfP-V+85SrqmNXpS zM*0Wf8~wz^uU8{jlCIREwp`hOh)}k%o)O`~&hL6p&Z(1H9*U)|9*Z|2mKoo=O8qJj+Hzd;?G)be zMGIdCy>f94I6ye{uDwl4q&;w|1CAQwvn6v%6Miqnlsbn3mwj0yzSQKIpjYl@JY8M4 zw^s4jg3eY~F1D;6Q*x20%!<=4Fu0X3P$i83woopfw#fZ1C~oqV)%AW8@K>Df{I&A@ zJ#e+;`77;2O(BnCn_m0oMxDHWYin!As=2i_nQ#H2UdGaW*rD`uMCxc}!8AmCTAaJ* z@=soX56j%~?bTa@Wq^oqe~v^QtoV2>pGV`AejbwYJHEVemCz>e&tr`23!SS7IlP)U z@C?2|6zB0%OKYmyp4qvd-5$m*)EP^(%?pm}JoQu~Rd9&}m&T4as%5IVq5^OF2j2_q zA0C{>`?~nk* zOyLSuetcOD+DMPf^!vrmp>3RY(<5RNh;@ULO*jR-=yul^8m15JmBlIlGphFOY#vPLyGHJL1=!@i-f z0n-kUMOQ6!TH_j2Rf|6Ir4fWKx4t0qmysZ2ebP{;;1I^6&&4C@Fo45~M`{j#5jI!_ zgQ`)%YGg^c`dQz)hJ!$gpPHFNzguvdJ$O3B$IygEJqoqc$@ci+z^&WGB;_j~6;=Kf zOPAsJ^mktE^4YG)^PK(<+3hqA*dQ*w#q#y-`A)YAuAu8LRXZFoMo^!+lvqj!NInx1 zp~!h|Y1?U^{iq{TfK8xJxx)8O_I)Y6*dMnNY6f;W!>2KGk)J}}kr!#O&oq-i(MgOz z8jABM0jv-oTAK2xsgv*+fCXM&;(CNKqPBRcz5D2k0IyiEFd-&#v_^Bhg&2g3O)B3s z@cpS{P}g!5p*;MQA+Qv#l`I!N?5*fyL zrB%lS3l!d~E=+)O;s-=wJwiQs9NH55ihL+1C3ei68VU;_4Pm``<^oMsL2aqKU&J-Ng`%R`tk&=_(d{pJR*f zE&REYdj&TeQb)l55BjXF=cb)I=w`D0uyg{@EcD(|J`5H-Ts@Rrw73Rsi?8`P13JJN ztB$s&*PJO|Oal|MlHRa9424{U+}+iM+)cLg>&!`B+@1kYqdVYCbkp>3SP;08umF}z zW|?cF@rq2yJ8e`s=?ex+k&OXnekgw=Qi8_tWxZ- zOZ5dJ@+NH=+>)PQ^Czn{KQ1I^egwh-ft27f+Q^1kRTKM?qA#Ti_-nOK@N{$n9@ya`EzkJ@dMNG?PLsL@03o!{dbe6yw1wl<(veJ+Fb z`6UI&E+TL0ZB5IY$wuR^vW^j z0~5!bRu|?}QqqI`r4~}EeVz~rQ|8zVaZU9G+8XXlhJ(&`Gsd$vb~?q?x|4IsTb{o6Vc;Nt zqXY9tTqfPjx{OtSt>@>4qg=_V7262+aA2e$g)eX3fuHxWYAcK2;!&J4;F1ydR~2G+ zPEDzyb#!zPqND)U@NFEUlGWzx>JOrSyinrBI-Z8Ll?0@~aF$DGZP3Vhhy%xc_xAMc zPv#ev7Q6L21=6=wvA!>~f7Ef5#v>VUJIO-|hkoXEe}Pcs(~^$IjN8>%_OJtSXY1=- zg2TR;I2Y2cW@q5$?bOQR=k4zPKShf{7$wV8Gn3m_F4Yu@$JW`r%`^_BIvFf2PYMO; zV|-ALcR<)-k022qSxT_#m$)*JmY8w%{tOu5i+bOl?JoZ=TM_LJ4TOrktac+ z37qU!7eTWJ7wTnCg>rX(JmMBSkE4-}RW6{>N!?vN+4fE;87$#^no9Qhqa)Mqm{Wn~ z{rVdzH+xTeAj1Y2R_r`)4=}ywZTC{W{zrR~Z63{9PtZw=u<3#4F+e`h%7aym8`x2E@~t$>XPW}V$)nY!=E{I>HCM}% zxZaaGT3Tcx5WMks?Y>w_Sk(XaFlVh75I;>W)eIKn->Dhu=-|$`Ip^7!Y|GBjNs8u9 zj+aR-)>|8$UiaR0n)Q6JoHdGAFI=NivpusCVp(;rKI1gV2lK`yO+r$3sEcDSf2ZDX zhy1$(CJ*JgX8Y$hi_0`9{|o5r!a5-PBM0sHaA9^C@=*M6NXfi$Jy06AvGhHE!R+$a z_DJgS>=j|EL1|0y-PwXGs;9JN*Ynu-q^_BfTU)cf%mKy|f=14{ZWvGK ztm=x8^R9EMJ^OL9)2}YJIuo&n$uq(=zs-bbd=~3LF8y>*F#mQx)L})s2uAiwRiz$8 zMxHX$+|#WLEm!t*5 z^U z@j;)hl8fc@*pi{?Ey(_c7*^?p=0&!T5&garT%Xi#MMi2XD}kVRcU|hzPulGrlYHPG zZ2phb!fZc~E+>V4dy>Ko(N=`Suu9?~@Ye+7+ZI}#f&8GFr!elK{Ic9pup*)Se|9^O z$p`dAph$9a;;Ne@9_(Lm#vcQMJ+x2=DK4%)i%r%XZt|ngw1Ba|7Yp6hl5YTc#z3H` zqXR1fr^%)=YW$coq&`cUt^S9awoULLPPd_JMiTvzyCndFB;vLgV?W(LAzS)P8uSV` zpMhtwk07lOmIqBueA86vv>^ssZ&7RJW5CU0LN$T>`j;(asaS@7UqZjv;CQ(|f_AmZ zv#cyFD@?O&TFx{|N&ez1;p=caqhVUc->H658{qT;cu+%bei!fFNd5ytb9ygNdUicH&|Gi+Un)o3LYXMegVm;uT@`7w@2BxC z-&?-qbe&v0{N|SQl_0|wg7D~w#@jlhWwX6*y8CCwMoS)3Z8*&dK%>06qhsE`-`Xw+ z+8R!1gO^n7HQ7$9jTB#93s{C7yJc11;8SuLgMpmh%K^&i{Lb5*-#rkLg@sz8 z)_0|*^=5(qa?W;pGM{%3NcNFBo0Ia`J_MLBj>;2g*DiNo38VqO25|5van=$X*A)XJjv9RNPKL;(K(ShO z&^x6{0VBsJA1WV}Iy>iBQ!xQEPfnWE0vBd~zjm$TW~YDSsjT3Rb>btqUtLzeV9g9b zCR7>-6mrN>mRK&^_xc2=8}d+b+!A`i!JYzD69i!Ae_ke9wZODRzV0Rxc=`AK5T#}@ zD}b4_O^rycsM-s-NJ>e&zAKuWLM|+b`xzQ##xpBu)Gg#pI%RCiCnwpe4@YlU9Clr( zjkfnm56Cd^dO&#aM0rT(or_b`z%6QrYn^Qk4I8;=E726IRW7Q{+|GdQM5KT|b*poZ z4Kr81wV;3TTu-!xw;1Q2`VFX*I{$t*rvTYMfnUTJ}6sgk|L31XnPaH9=iA=}ftOOJ__usHx zo3w*D4%TQg9Xwl*DyrIU<|{HA_J;_R!f9zky=Yjl>9?e4kO6{>gae2QhsqDlejw8Z zZFzl1Y*49ob{Q(_D)8`ionkGBknG={VN|K>iSj9_hOy9O@R8_GU#uJWq1L#r7 zN2k`il#J!f*Hyv8d#_@kOp_MA!xxP|`dH(UwvZ0-yw!_@QZ6Jj~~98Z>338Q*8<<7zY%Cf)v!(PkVLG%8} z?%*(1<%;6sh?<&&lkMq@O-`jV60`dqvj_A;3-4AmV0NhsIiYrTZWF>ob^Om%B6XH3 zB}7|5lYpjfqfM&(Y$fmd>H!cb-Hl^{dft`vt$3Rn>5*4>sP?V^e0!ygBIV{W@$R#= zhdbA`p0?fw4{tGNR5$-JYW3YP$Q&%O@5wyphxbETXNAYjbur|0M`mWMx{4gmNXRZv zL+|$e!$w}q*?m?MWm3``;8czt?56_^I0q${>xdkAvC)%2vAqA^=08^Iy$vH->j=7- z7w&a+cBA#4&b>LjFx3&e8YKzJlp#K+!{jY#GAa*t(NOyJ?l1}v!?+^L%xCaYwA1HH zoY^gB-GjeN>YLFK3kz}eo}D|M9Q`c?XjI+YAqF?z0$Kk#x}FE*<>l41$WVrUa7fi= zoBSBT^;Kj$(6L_zh>y-IRbVnOY(x4lEibyozbV!LrDm6mk}16Syw|~0^YKoQ^1Cz8 zK-Aikiik{uRoDfW>;#m(2*TQgy$^o{c%IE}elStnCvR3(5h~EDPz&fb1-tkJN@B~b>#ln|6_UUcyj`<$|%j&uuHK~ zd*`Q2@{{hBdc1hrq@?9&Br|!UW+Dg>w^LeQ##OTtptB(#(qjx$Th3=9qTjI0w>X+x zI@*fRWD5X7VzDg1&!9DjKFbBRzcmvEwI={~4%lgVdcxr|jVMO{C3eySZMl z{0DQu#<}l%qd57991GMy!YR2dDUb3omXnWcR)B%4Th0-ZCOed5jNgeSX(SpGUrg=z z4(;b@e$t4IX<`s#g( zv4_6{DZ|M%G{{Gb1m#aH%jYzdWkTUO20FxeIF7IC)tf7~_;FyhN@SLwBSaq4^7P4m zk%f$js_&=3i)Fpz2hs{gYY&L4m8BDRP!NOghX1Z4JA0GOvEJqHX!xihd zqbl%f4u|F_pO8KlB-@x}B76tIl?FW`t!^>Zon~+YqXH;z3)dW1kjulOERxW-3^Pg= zDLSAHU)a_sYl{Dr(_?p%__3}B5eztI3Xt)D}?(rJc2;T<1Z%E;e|x_0|8hkDKX z3XsZZG8;bTm__I~qd_f>j|16Zp|PR-e!6}u2Ue*z%(2L;|Eyk!yOzHS7=61?K&N{b zTt2td^YCx|_lD7`i?GK3R*-(>7k7FtAnCRhnSZP`k^Vjbz7pAXHNwKeO8uK0+c{-R zs9$1Yl7Oz|WY@_fXf7bc`(hFR17Lo2S*OwJq!f%fqwxk4A_CN1PywG>}M+`JH9og_t2cF!}+ub6WpEzA0)!W{1lU^72Ozsj` z_=N2KI?p=DZ)sIb1yZG)oDaP_fEMP6qOimL_i{oG25-1(;W|2cguCnV&;5NcIvW@z zZ;_a;mezB%UMZEQUA`YK%KJ4hI@EG`ayRp=&I zP+RvTw4ruO!n3xfAms4+y(5~$rAxO}&?wmbp`r9+Q0BDFYA}nc=jtWWUuM;?!th%T zTa%Q}(e=^T@9Nx}K_viO`%I~pR0MOXKD_z+NFcJlrzhX%)69|5oLH$qN^4jcYsd5d zG<&;QKNZ_BMo#`hpMjo%0jZT*@}|9ETSAL!Hlv2l#`TurN-MxuhyN(r0r21o!!tnS z_&MruY^J||#yp={XRa#%y|r|*z1(_maU*T*XZ#of3pf0BCB_0*&J36S_veMJ0fj6C z>!|C$uA*=JLp0c+jd0e{Nf`?4bi(-oo%dw13>43@z8OKoeKA-CHn&yu%&-C67(#|u zxTXjns!-f0hQ@#I8$^aC=~0WAC9AOhU=mfmj2y|;`o!K(W}z*rxlOfOjT1iD8b2?s zNn`xp1y1aKsZonURegKLK?^@cy-`WMFohIxrF$g))bdK1XBaYr|5i=zYxwqNcAQ?G z?b0{DSzj)_PZMX>p9Ru9Fj;~gL|yjBYfz`A=+YFSQWCdthchMAH%-Ga#)lDLrJNIFsSFm8zp+AfV`mqI)a+-oGbyqE zD6^$QpJkH%ZT7+|)Ku0FQ)Da;z;t;r&s2)~-@Cb`n$k5KY^2~G)^omBgB+kKSs}5Otpr4kr4V4fha!u^3yPj!2zpmE}Mj6A*#W++`yso>;T;xjt-(|vCm{#gfuhpN{;MUOv z65^z1Q5_Xcoc2zcUsBh{-tEa3AKGcr&?F(5_{ZuwCyEpe`>UG}uo|B{3BrLJb>f?D zRE}$x_d%BJp=4m_H?`s7#@w@j*OYjoHf51{zc!Lt>16Cy8x`nZR*?|P$N(-wn>sVSxLY`>HIpcwdM)Od&XI; z|NI(Md@!$RUF0l31O;K%?Hq8}Q~Aa`g@81AD_d@}cKwXf7G1V|&y$YaUG0(93?4pw zN_|osUmIc~W~E+W`yH}1CpCk)5MDsKtOee@kvtBwuu6zElgHAkb(@~|K|uU*RqUfW_)iXEYFe?UnZ}ye~|CIA~X$Xyze);?~BwMj&x(k zQ;n$xQ9k~wWff20hVQG=p4YnioFI;lj?TBv@B0<)=K<)r!q6E$6`1!Qc6{EZLB@qh zWSJKcD_Hu|tELplkB&koJ3|hbQU*?+r`S|$hf<2KYml2fi(}5PsIumh&%PK^?d&ZS zkOq@XkvMo?3~0T61@!jxNJ5E{lGHR+QVo@aj|n=v0=XYQ2ERHDwQk{% zm6jgZEE=L`r3W|j;D&<+=sAcMNXU!2w6Bd`tXMR6q`WsC+TK){DgAQQ+}^Fp&Q*Q% zOzdz(beaa(ynV!#11`3aU5k(9U0@N z5GDPcryOSS11Zj=(wE=I@t5(LwbQ?o4un?fwwTyFe5KD_^7&~%o|A4{irukn{%iJ) z7vgK-xKVv!FJXo9oD=L5jWDGRA}l!1GQID1S5i0q3{=+c)f;+YdgfsP)fUd|3RVJ; zU5=0{P-8k*Yc)QBmRkkchFKD_sC&WP%u1lX=ui!y-phtR3VZi7Z3@ z3j(rW#Dy_{1eAthpFGR8cmh_MWdig6a(!15_NgI3QCZ4@y&PpII;|CE*_G|sWLZMLvpCNMZv1x#taLe!ai%ndUm1GeG8R>jOD{Un2*z(St3K_?mg=)R* zZ9X-#kkPA>X?mHITU9>^UoKIg#&yg=TIU@e&BUa|&MZKivRqY|QzdkC=2@8x{!F-^ zA0-9-ggpPYu&|)6mblzHANf-$Ug^<);%e_&dJbx89#NF)XpkeLns*9+&)A6>uet1r zsXoyH%S_vzba)#eYo0sKGEP?YyESpzDwHRY065Mh;CHpP3RSjdO)^Kc$XMn8%hCJ& zp#|61MAt3CYk^H(<{YJ~35Nxj?bE+{@5DOy5QE}UXVFroc-vtFd?idc^+*bNjn@2G zHR@C0-jnMYay&G;xLwCU81z$TxBEqs8d-SF6odJW_w1OH7_^!$VeiP-e{0`#PC%hN zEg4nrY5Y4ElNO)E z!&M13K`hxtL$qTQ5UKUfx=_4ZCItPbBxp3WxP7(V?P%TYZ&oR-gqUS8iJN&gN=#`NDRMZ+W6iZXqo2!l zI2+9>?=sfoY{yBos1^%+FFPaaP*R73bXGbB2RG<@8n`$*zB)WpN-2?yY1!`*vER0? zO3Rk?FG|}x#|wAkqs@~Fu%Fdt0I{!^4t(`M2s;~kW|!$IH`$sa=6)fCl*sh(tAIxO zqBBZUDVyBLC;$`idk7fO>H@DWAPvsBKJm+}@x(v;bdsV9T^wie$a(Pd_J#!w(K<#3 z`b98vi&>-)I9!%2RJ7hqlMXAM7xXvxDWv8ZzRR)@ul+_gH&5Is4A2vI0KR8~L<7iJybPfw;t7r%%uuZYlOH+<{2&5$Tqyg$18 zxl@@AriP^+tNF@tCI%A6a`lr6NNUvAWdDF=9Hl3HAU`_nc=1BnKP|=WPkCc8J zHp#j~w-VZrNW@SX6N_ACjD`JJ4|{#{pAd23aV{AF4`Q4-6gmLbH~MKi0FVf~j}9Zs z?6K{DZ{e9~1M{~Dz^zi#)*8Ep4*GtDH^Wq8o-dy=WgW>rzr)RT zvMLKOQMBPhO?War71SdeGwAyY4*fy&I6MK*A5*(mIUaP?8uM>+6)`h(^e=wWa z(Az-T!@AyJ{mKV|4^#`!57u zpGL$P^X(6da*dX+w6grl0UJ}YEE3Ac9)_6IN|$4?`ixTuEz-q@ydr&RVbhleN%$-& zSy=qs6979-8C1VyQ-LF9#vHm0U#%a1-2a0-I6iqp*|1{J2G%orbf<&xU7?Co)e=^f zvxI~KE ze?OV>A>CFc9YHGHtN_1eNaNw6IB;lp%ElYpkNz$x8mA{-3MUI%uIfy^!nf0H7#*2aA5AXJny7#Ngixg5-#WhZ}X%cluWn} z^IdyRSV$h?9tPJQj!1e=hNgpm9n>9$xV1_eIJ=sXf^93RUjkXOC{+J!5$|T)SJxHw(N?xQY4RxpdPZD*(oW z=U5vT^0;@Rx_pm*QQwq@T$jr&2(~kOiIHjjf5qxOv>z{*Yzw|N z&~-iZ2ls|t?=Q^{Rf+om9Csu1sBo~#2Ble0#iRC)THJ=p%2wxU56TUly=Y#_uk4?O z{O|q^mG)bVzV4=_Q1uEr`(*sqIN$7v>YPN-Jzpe5)09iMwbHfKMBxiA$SuBK41ZX$ zzm_g9c{qT4&BrKlW6WWS&Cp%76H*u)e}M{kCw)KbdWW3TTMe`9Zm^muFwf$zI}FD~wXOWj5TFGuBd&eDDF&LNA2C8sSLI#Pz&{(GDm zMs^-YjaaaUjV_B|l6!J{>d>UvE<%(goL2$jwC7DTQbFVlrplbR=70Wj?sI{?1Xc>9 zP`;|EKY_Qz(%foR2@W4JGQgXPdqSfwOZ?qVT=>97_{xU%_8jeY+P3x* z#;ee`kX5T3l^$^~N%M`(1lXb1}f=7lT#2Ae{{?rndxesTiHN{&J`7Hs_N|Bu4QV}`ibJ=Df=d~7} zw6tK`nD0wvWyZwjR4qp-1PbHBg@a`)Ba5hIC-yxSc}>_X)#44$R4dZR8XMQ*~Q=VL~gTzU5h0igX5L!ijvy$n7tZVh(G-a^ks9S>Bq~Yl z{N=_1WS`hLG?#S*3oDit;LAePZMT;jsg!y+Knu8?cTkJDLFEv9Kb5l@TM*9UEvt}t z>yo{EGp=u@lTgPY#TbV0v(3YdKrGV+1|hjJq*NcVA`u9LBoM|->UX>^TL0}#gX85u zX&N~{4m#5C2wS7;!nsqQEBgZtAr-^kjSFzJKr6%lh)4uq z+O?SOU)<0PubxevRq6dtNyJhRcsvDc&VjwSXt;eg9ta>^Mn7~u++yR(@9)@etWMMa><=j_P8SNk{N^lxp&< z^-YnvO2gkx_a(2058myDDQ-R<^0{idj&k4ET-9|QYl?=T@_%n{Z#&oSTS#WMghj&Xc!1Ikv8x!>r6Ao1wb_8NQz5|OD$n8{+A2*Cyx94=I#w$&?!)QhoCuSyTgU~lEcYFP`Maf zX?#^8SWyH`lnlHd=M?9e{w?0!*<$aGRt7&m|8MGV`o2x({`IqOLIV1zRb`EQAq^rK zmm)kNGO^iByAsFg?muzzgRp2)=Ghi<^>aj)YT4Gr!rfJgkH9V-w}ZlxgS{Was4O*A zJQUpnHv^%cBnvigk4~1yNY~%WX?y%sC1S99B|T3KNuU|tA+Uv&G}PlvE3ok*MBu=M zY_QQjQi5a#TD<(roTf&`mf2nfvRO;C(Nl%4tYpVy2raXBN30wmI~>&Jw#A)m^1}Rv zp_n@r^K7~N+$Jx9k=LG33T@(4h>jF|`{_p4w15P*C!(CZ{YRy{n$dcn&H<%!>QfvW zKB3YN&^FIFv)VNgtrcSu%~3zrQAgT$Zn zSVbhqBvS(@3r1tXo)UNX5TX491d7#+e^dcZA*jVl+)zZ8r%>}5%nm#U>HnP4f8Q-> zpR?JzRuSW+JaqH?KPXn54%iaP9}fP<<6=3P)n1wH-GD9aXkx;h`4!#KQJXx;L!>>A zXopo7r56O98z{3yna|XAe60)85{Zp}&j~JK7^ATyV&nSq*7X%E>Vx!;p;@HY3y&kK zmDYnBlo6e!6LqgXr<&{9()ry%Q&Ub(j`xg^&Z>aUoB(>N@yF?U|6Mc*T~FWoe z#&jC@#|F7?=x_Uzdh`nf^o09c-oggMqZl@giNr8n7hl&z*9`I74u9P^0tAJFk$5#{i4>P!+@`}I)16-L#kAzr<)GdI|tLHTT$I@%IdTR@IWn0J=)n8UK%Y%!Ea{ z1*s*iWCz=&C7%tAYpmvz7es0h|z*zvQ7f9 zG6W}Xh^R8L8>emKN?X;mTtNrnF)>JbQlOkF#U+k=>_9d5C_R)6;d3c%Hga zH?(Jr-T(FQn~<*->8>JI!ZJF(pP^YHF{z*-cp5X~mc+D4!34tfg)?4U@12FY`FokL z6uxa)1em{m=LDqOZNZf_Kh5TctHY!G0#ACJDm1K`_eX3(OiWZtWu;;5w={ux2Fh>C zQHO@4SkkYEt?I^y0 zdzg4Qd$^Ul>3n#~W)AI0lT(Ul(Nvbj+?e;;EkiBw$m{il7ivun2q(Kl#a9pc)9joM z_C76r_{956DCVz=_g#%R252`9LN4karV1|0C}0~wU@Um`@yyoV7sbszbQNGj=w3l_ z>Jeglv;%XGt<>Eqnhqm(*cm*e^Bj1Y%W&Zta5-XvgwN&LE^1+AId_LwRdF{LbiXX;?A!AjTVOZ4t%Y0lgmuUZTo_5ZoQWr-Huev8N~n(uX)T=@go^ z7g{`AXx_Z&u;FA-@A({~sm#QT2{O?`-Q1l86U`7H6EGuRE7{R|J(#HaS~RR36Y##i z)*bN-3)YxOP!5!M63IX}|3(+dyDj&I^&dU8YXKL*;0xWVb073>H5HB69Jfa7;^ zpcnm0Q(}7+T83o&3ltZ$HShvh6(I(8^%(CJAaLfvbvP67eWW8^8E2P~CPSMpGUEOT zL}z-Q8nXrVF?4^WdW3&awy{;VCF{l-d-Q`l8@FuBTz1mfQ=*YMpfpD1qk6ZI#!1`{ zFz3KiJVUUlT$Ma=({8Yg&2depblkzy{L7Ifyhpd!id8sDPI?^=V6>tSYPR}6bF91SsZH5|0?Ev z(j?F(?`gLVAJgHse#duV$w?%wU?d1j@w15vJFkg?{8uOgX+;u{eP$yHwTV$;u~1TD zL3nIIK2F&*>F+;1Ja!h2nh;WyFeIX z99hLGmP=?xKG1yZPo^3fIy;-%-DRU}MNnJfCY*P8u6!Tgrm^&$nozDFlB*|f)GPw+ z$rCO`)>={!YY?m%o38eFId*mn4I7)9<^#s>`Ntl?r#l+pugB+GTQ&{)5&_4a-J*9$ zpoy5^?!6t9x{VOu-Zp7_;C`40IYswgbFAHNEnEoKsV$uO&p6eYaphbOop6vN9#wgV z5l5TTF9cuy?p}Mq6m&hvblO!iz`p#A|A4e|WBZbp!k@tzmA2=7b^M39loIHXRe2%H z?YyDUT{7^GLFJ;KdAZ7RHLJf=x%`sq?YLgPxj=6+mo=E=my&aL1UHEhTDn$1<8rlR zSQ*oc$7R+`6f$i5dOF*?Iyx#Tw?ELizF#9*6-lI|!u|qN zt&_OML&Q6*scf=v1y*=!SzW1ILBp10{{^C{+%G;UdJ%NqV_3J)V;G}2ItDEki5J29 zO=CeWBO8H@d--Q6b$Jfx9$tN!sB}**mZtjHgSm-|qeP!w3x}Ktzh%=~$thPa*vbVy zAhfu3_EaWkx7}}HwI@hC@y?~qs8w|VLm9XA_wQ<4T-=aFvL*oo6yuKI-CBwsXM8HE zB&I$ssG4%@`e^BR!gM(E15G+#+xdU(P|*-9t-XuMvDQR+}CG|~JJb!rB&)_JVSHAHD(XsJ$nxQ6I8<3(-N*!(uY zVOY`TG$XG;G5*?aF0IzmQSCws3`%F5YfXJnjxk(}^FvdWR{o!{qoe|y}+ zJ?=j5_v`hXJyL6E&5@qLYj+tn<9cbLX_%z^Z#X~6Shw_}jg0EfJEl}x>N9}S05iW} z`mcJo6}-F_ifVWukx$=--nB zt_BPjj&$Z|4M&G7SFh@7(SSg@q7VYf%Q}j^tgO7x7keR|fgS`396OGDA+5FilT1`s zrF7M?TYh!qHhekR#rLQn#1}&8JmhA@Z$$k#j^EP9}+{&1??E9{=J61Gxg#_B1on)uJ?lknur3yPH!?_pO=JCjeNU^TmCMZ4QFwLGT&we zku`dyZ@6$9ayYU`jn*#UI9wfdl+*~KY}!*-O=YgoJr%_cwmzxxF&T*g*NBM~NLD!K zqfa&}YKG;641P^j#Cv0#y%&9}4>7ARfad4PJ~<1x)NLI^Nt#3?L{$ua5AgwyM-Mx+ zek7PhNkpM@ux#<_hI8Xgh@^qOq|TfaL0E?fm@tbLH7Q6A;nwQ#Iw(8}dPj--LO>3SLn8lft6d{sC0CN>fDl zi_IQ`sq}S5+i7-9^K$r=vT~w|nzX>Tt2Y>BTQ5}u8P^ubsXu9WhUj#?xfa51YS%WU^yRMM9x(3oVnb;#}siDUpRm7J|v2c@44;J%gbjk7WV9@P{XxTsdqi2 z7uA%%9L%X^J>c_Ji#6kb`;jdATh@f5SRCmnw1<#{w(2T>4N@va1aMgKWd`_Crl{Xu zeG)e;mD~m=&#l5sxY1D2jo?^&r9M1q)J%Ouoj?1v` z7Sl#3e-}Fn#~U~H#nkC2%D07uDTkvcy<aRNc^5@Bhnu@}mOyy$UMHxQasHfEVHe z`xR6+DyAWck!j(hA?#&@nbF{l!S88~&NmFI@v|sZ?%Y=EZ|>A;AG2^?hROxk9THSC z6!<~3JsIf5?flZZg=x-iPBFMD&nJJrs9^KPkAjG`QT~C+hK9+#y`0I(GJQj>i%v^n zwu&j2_C;eN&#yJ&y+=`bWYhm!%hJj}t)|g86te=|)j9sw)7^ldz_j_xXmDuIjzm7Cnv^tJ$f`m495G%XNRqm|}rTfD4^}jUDk;V5Bdj)~sJ~9=H z$75Z`n>$5e-@TjX0Nw855;Jt(`^3ew(2uq zC7H{i?SF}jmywELm=nL&do@DxQATKr+L%Hm+U|^;+_)#$8_e!IO&M@C;fNmEI4lGX zFZ$u#EI)^PawZy;(wb62*>WFs@pI(SmpAu=o^H=~op0HmHD8|EUY<9&UhXWO$LCK4 zZw9Joyi1{TCF9$w(cCxk+77eDY&8(^{Xchi>%A>Kt31rtxiS7W*4BVj*WFYfb9w#y zhkTbt3u)a3a*TM=s7{PFtBV2(WoY6wDP?RN2lzv#r{{bDy0aX~7}e%&;KInN5ouQDQ5rc0z8fk=Bl`VY3&XT~sxhRoPu9 zvvo62UV^qbN-j&2NwG$Dq{*Brh0m26xb^2`54_6?e~lk?yfT)if2a8nPRMag0OXVT zV-K}vx0|y@pWf&^7)$0+c+oQV4(ZZi(_1X4gKe4j7LW#>jGGMO1)o1j%4rsYvB-ae z$lRIeO!RD%y412fy2gNRV!sy5Cs$@UX=a|Hpi*ftw*NHc~G0-E=y0>3^Dq zyn+;0lUNpKBEB^4EVJLJuZzolw&}Sp1}qC<485SjHV#R~kq>QSk7sJ5pt&XZjUe@f z-SfRO*Rs2@@T&K@->BS&Y&ygw^~Xj1c>C1J3kg|JzEWRUd&VmbWW;)Mb%p!z$j+Ya#}^uhAm9-to7Y?wT{&$%4K2eGf5L&WJI%(UYS zr%c?C>uyy?oe!lDx)t^b=AzkUWv-!$I2=xxEq>m+OmX_AX2OhxBExh2Yjch67I4WU z$g@z~-okW=h8~OROyE256OWfGv8f%N>N?<)f3TDNibLD$J_Bpm#)PD*qJy1)=^Wi z2l*jn8+DByY(Y*ynzuat>Heo*>Q@Slc`b-8S2%_4$kDDle?BZBXfZi-al74Xjs#4@ z+M1@O*{#LG&IweUQbcUVsz;rbnA&S(4$9oL--y8XDy2|Um}XqoOEId{%lN*w>g!T> zhXg_k%pY97Jz(@4ymRU@ec*^iIqT)BCX7}V-ImQytP$#%&(6-qmtKvX@W~%@;eN#I z4%9JDGlUAE%BbO&Ko|O30+dbH@@xa}K__89JwF|VKL|bEKm6^*Y+hxe#y0Wn0w868 zaaJMhJSTU1K15)BrQ_l=Q)lzyw(Y0jDdDWSd4;+aYSMG~oYWXlt?f5K@a1(903Q2` zQ(1)9^rGWlh+l5U&HYziYY0{WPF^&T*6e?Ssfqre>m7X@+G`|%P zQ&^6NQo>J5{}A-97~OjBpx}q-#P8o6qhF9p^sk;{5)g3p_*2k9sF z0XAAgmf422Q6#(7Cy*j#UTSSHx@Y_rJL{b#b!!aZW`5@HUnCc;K3qS@-sZJ;XLdiph00Yj?4cb(bB zj&iNj>T;_&B{;%AjxO1vCR^9g+%xy2Kc0Q`9PrVH9?pCb^J!X!$>tj}*O978o)Uy$ zd3O~)K#agD;#D9^qesM7&}sfn__0zOkoZ$(K}u94Oi4|H zUsAGGusrb{wD#5YZ|ZoNdn_L!@C|FfKE{K=tS+o}+o8s$UdH6Fxlj%*4bHJIs_&z6 zZ*TxLb+Z$uJ9R2FG#_{0f#hE!l95b&1fzr+EiHsP2-*`g$?~p|H14Sm_;2NxzdJo> z9WDIuO1LLitfoeV#W78REvE=cGL=a{%WVg=Z+WvjJ|i}HaXA`I{^)2X#o|sonfN|V zee*~3M4S8ZvDzAYz~`C{7LMlK1`FSDdMcY$W@T371i?-Lm`c-_A*j{Lgp|?{O~ZWV z`=58LbyX#`lp0$^U=6j=&6+E!r4e6?3T(C6C3@0pXZuCAFy+}nbFqc;yi2Y)JD5FTBm*mkys9q#s5&-qjM8OgVA z2NGq8c3>)yai%1pKbK2{X2MK@z*}3KW(dK8V=5bCQk1D%BcLG3N_}sJ_6w%4^OO4O zFWWhaCx4DEcYVTw+q~NVFU!6D&Yh{%DSE#0d|DV&bX|GHdIuAVs1*FFl~(RU5uGEm z^19I7EK4k3I`mdF6itMuU29O&y$a~4PxDaIm~(sgl9Xs-TmGce&<<9vxxz&yywLyBOb3|Da&L4xduoEHPzhN`3?_vEGV zT=~*&*(0lm8y458I*ij-;e+CWPt~Y-eGwAxt^$DjO96{;`-iG*0?M{70FaYPfFlV& zFiwxDu3{4KjR3j3APeM8g?}sQuw}rIM}X$y_KM}y{s1(EmN$uvv7ELIJy@0P+BpEA z;I;6B@KeQq>}%pd);@{cM$F{4q@{yO3NOCj+7*l?gHLEnGSW)CvauifizEHMdG;h* zM7lHb_DV5hUbS~-O;1{X9-UPzX_e)Mqb{}0Tm5+g1G*=vk7e!keNOt8pBZ(9Qqerf z50U%1NzDDj{cg0%A(*N}`qtnNC1eiAooDB%J|}CNIC!-3XUf9a@B0iIM71{@DlpNb>E4`y(Xm}&c9LzKV-XV&gW>Pv=b9{H&*1GwP?2Kmz!dh zBC{x0%Wh|77on@W2X?|nL}>YVo#4>)99f1!Q zLfIo+g5~85?&wN2udRKsP>oZMffng0+cSvw!RaC^Ho_8C*wp3NstVaGgUD9RJW8*@ zQjI>dLhFM_66PAJRE|xL^jzet+`6z{atT`voL#<(#ZrZ)%xv4Ohv10dR!Kmv;tau{ zaT(otsaLgTQt!6WjG>FoUKR2H(b@0OXyz4me8KbYpzBmVx%2cBE2HDj|1L;?p2rp7 zJHObBS?-aGS&h!Rj8-+&;AOPFp= zd^qwozv>VXHPjDln8!pLPD-`%zKXD0WI#Aa|L`N~PJ%O_S02lG5(U=R`R@$T#DOH~ zEU8x|XERG1mrDX3CGFJmo8~A_ zoCqQt2I_0O$|#fMI3;7UDR;=Z^sd{xD{!HZ7oNO4$j^ezveGJlA=1;TVJ`|P(J7i$ zkBV-TG4Q+d(f{kg(xbQkg=}B^KM#wz)oweK-sVw(pV+U<_$tM~Lh;kD^~LG_Z;!^W zIo%dq+CtrYQ6nQ2W2gJ8BiX>vwGMPST4f`h zSOK-DR0?{?IRh2PN&%18x?rzoYm_O}Z{v1fyorlnOS!kYiDA#OXW{ul)iER#|ELd& zl7RQzbJ6QK`m8!;`{nAp)8XICN$4+*+W}Ufe@FLzEWhv!*vzZDA@C!1;i^ZZ^|kia zX9_%lhrgZ+u6}X7&$D!Vv6sL2;U!O4&lT4F^NLUi48OY^N8>8j~QYz`zSEIiZKd0o9?%j*aq>lb@ z+$+&hitn}>8!s$&=S(cZ%*0a=90{AYf&K7 z+zQ-g1=8u<`J_P9R(@YwyJ8=LnQxpb#c3}l^WDB)o?D-y>a4z6Uy8n>LNq^0jZko> z8>ZB`3K}=kz~vY&jiQz4;-P6I77fN|?kuZ)a(S5N2UcQS>Y#1YUM z-sX>kqM8zw643;?mSbo+1={Qocx9Sx>OWOh?U^_O@EL{D2683TXq-Q(4Nd1aAx|8& zhxrq!38-xU+`F1^hpH+Z6-Z?TeQ0f}C4`*9sCJWPagU&!ggTGB^)GKP4}C66crK0y ze7_gqa4)ucnQCgP!vKGXPo0_EdefYjmOJ-ZZsk{uwU&E_na~fCDaJBc%;o{ReMeLn zL`&jwNk8m#Xu9j<#8ojAK+QYa10}|*>}q@f;-6^IWl`mzRhG|BwI-)E zlo&{!s%j6^S2zj&H>r3z@jaP%+T;$^wTIZNEmum%61tQ$;ecb2Bsh?)HW7Hmn1)ul zi4=J8oiuh*)!r+faM5zFW7(_og>T)xc1QfdMqkJCf5)VmJ9nChbQTnvl}}#^=8W^H z);WCBV&RSWx=<<=e@))?S=6dWP>(Pg20ofk;lMRmTmUO0@NUb$tgVfw6bStez7rkP z5onh8#=l|nToCn+SF|B~{ZkI*0tVFTw@XwQV)v4rQI z%W@I;C0HbWCJl`!u@=cMRnE8wdyD?+dd-|V2mFcOk(a=i7|HhmPsdcRyFp_{#lWmK zS^}$AF9yr6=c4Ef>i{^=YmVpqN35@gn9Zk_koB;|WTv33coyG+24P%+`%*8+hOB5Q;x@9 zbOsXnBzlT%Ui{65zc2c7rx2H9GoJi?`v@4&^SQy3K^(|w9Vz~EFdXkgPYVXeKfs!KXpBi(W#W^nYFK*WZM{^}ZhR0%Y zpWbW5_8A~=xGiA;Qml^4mPqwavcboN=4=l}w*X}duazE`UfGjcoq&z+)FOeh^6R1i z-iQ2!EA&PCxa`B9td*-=hqT4u{7EJBLsa7kOos}P0HM(t-$`JPLzrCUrFNGB} z5(P^nw#j>ogvv<5j{EndU2BUR36UUr3tF89J!^|yjSl&Nr1rV zuH}tF?oEOtq_K9&f)F>XN8!fqX*~~19w~1Y*0s3JJY*pcDag`Cl^u9UX5am&P+H0+_)24^SKfJ6wy$g!%Ts%ioF@W%y>Jo>*y3iiZDPetSOFbtvBj1X8MNj2c9o?C;-R`@|%% zHi2qbz)9`}v^NlJZEV``)p$Y(T@ZIbD!AY%e6Qv*IqZ1(El}&2&i#J+d1p8-}V$Al-x1{K5VCx6Bx}2B3z6^d6Np1^(3@0qq6+^=>^m zcUXjUG%~8mYJF}M;sWvAtwzK$mL-D|gI04D%UbXcm?TGbkT4*TcN)r*D}}{2;pw$S zDI^QZHq)NC`!#no_nF8{7- ztsC3=I#<*){~+exLa)Y|7%tTp9|kxjIGNW}^NPoewl9EU&2-!~&J94}L8_V>a+wf) znc)8i-__aj$zu+f$vRtFcmWzKJRpG0PYYP~nz>@$;qjKu-iU`59Ya7mi%CWQ)ReKY z{!o5iUq(ZtOo_O2#h_3ScHiRf5KKN5X)HLoj{-D>?E#JBLb*XPdU(+vGD13313*;ZWj1ZHz;7TVmtn%r@7 zvf~ffL)9sl4&|rpH8vg30~%+gs%?bMJ9r$y_M0(Neth_!gijV#gPisdJ}(j(JuLpL zSKJK<1_q*tGK#-#LSG>h1#b~}SDGsrEKp$k6{c}Z6Ao%JwJOk~)j!|T!vjS+va&p^Y?|AY>63AS9h1*b5IEoEW6(qBuYpL00 zIG$#4^MPLTNsxG<@6AL&bJV#(Sx%BYw4YNL-_cU%9{%DR5CIjZvRdTRJK2kxtKG5J zzG03{^VciqWphknPSnFcq*kWBCFt~k0?f&Fn^J;N2mT8BmCsOxACiJ3wuUyV_YF~y zKe~&wAX!qmH;}Ue57oAxC~I-}G*Pj$!ku*Lip_BT$YPv%SqkSeL~GTnnddZ2APO^o zZ(B2H;EKxwZuJ&AE&x~antQEbk43tWJ_Y=2S*Gw^V>lUZj-TpWbmw-tnn64JO(9C0 z@M-3W3i$Gv=W;o48&@^w@_vWn|M7)913RQDO9VfL>hf^$-`vq+xNR#BqNK?S*gyk9 zyr2M0(4k-sa0BbZTmkE4jygatE?=Gr=eUr0yO@wLnEDiWfoa|HE_9QF4^{eaFe;PHh0rm zDtOS(p2BIn18HXV?d;|%dhfX@Cb!h9!{5d=;V=}!m+JE!EzH5(=oU2o8et*A#D~R- z)*s!Zzb(q2LCK;b$V*H?!Umw|%GqcJUyarb^t80uBb!}?LSJMet?!H|0h-^$4_^-* zu3;AEQ@f4C6x>pMT*^-IKIA_inUD zwi1P2!PB`U2yTVtynpe2S__5=TyT8U)(b(;s$WV)w;pTWM`LgfeVQiOSPjPR-_15P z`e_CR%E)V~t^rOB`uCHl7W|Sf7jD)ziMp8WavNk&EjDv!u&&dCBFxo(i!8dAqBJDc zHxv1w1M5zMAAqhoPL&A37dd4gO5f1K>C*V(BZ%@;4~wx);pS+a#tMFISqKqm<30;( zX`QjLvTCf_tJce<_zrWeG9$BNuJwK>U+SFeYIrV?-9w`St5kkO;PUA%R0j$6pw>st z*~_?S9}4zF)x7*Yur#=t$<0i#qafjK#jLSTbx9C&$5ZcAq!3dV?@O)-K~@q zKG7cl%N3Z9nKPm0PDu6LbmSaQ(P<}T39ImG5T1+QG=M2Z2+SBN)3&tC%BvD{@iIhM z5841NOp+6&p@7_@yd?}$A#k5wq_5Fp5fRt)uTo4%d^eISp^Tn#ab`14qM?69-KViJ zjB!}se+%ZyswNvmy##r=VCz<_SPS|fls*YZXh>@#TOt)G_j_0$q&+lQjhC)LoM=J^ z)h!oao*Mh(B5T$~_ay3N9CM<-F?X#G(JN)nZ;u6lg;7yG0`AWJziF}Y8e!dJKaH82GOo}fq0Zm!>N%Q3}z$Nl1 zhWZ5rxZuXDBL4253zt3h_wTjJ!^Ebug&u=@PcV1R??eDm>2EW)D(K33#fyz4!)-1n`zr1gww=rF@4euOw|b} zW|%L{P#GmIb&?b7a{Iv&k#%~Fq2G*GxH|&*or&fX^XRmu7Ze7AFfx6DhDn5(Swsxd z+a%Ffl-wcf0s)}UVw~#mGR#rPsu27q_9gZsn0j{(W*HmjsXj@lDl2vDmWQPaeGdcpmWQSB9bZ8>nn_o!L<163 zXYr(Ya^(kb9-OpkM(3Bdb}mQ^9*i}**wX2O?au)d0lw+vd_+tNHSxCD>kNQc=9>WP zd38r4Nd{9gbUHSsJ4eE7QG7?Gzn*GhgeN&Fc*hX$JkxvZ(vy_yLZkbO2Nm3r$>A|r z!Yd5-3cUrt8PN$zzu9fs$LF%gBP0*@f+09W9d@DERC0#GV-f z*Prcr#9o-FDfgP=!1y&CwO(Y}e#;5SakBSj&HVYB6FCbYhmYz)W$W~B)AZ~_-kn*h zYU3oO2`8O|BEDn-EA3bFekD7(sABB~3vWLn6OW>D@j);HI>T{8D4CGtsQAn=K+g}@ zUgn>ee0lT0bMEl>9(zO#)*<1?1NbYx`#NsysX0aU$~p-#!nIGAjOx`xUOe6nR_W$C zQeEb1q~WX_J9<(o`0I-_aq5EEks2{C&dX_fVkW_=rdpC=-`7-??KQtu<8dWVRc>7? z3E{7SYJhU6vBa3wQ#i4zM!DTy?{+tpSYHvL8C2jBZG|+3Wh0;97EGYb1 zwJyZ`p^OqCt=dVeJj1G8Yd|@poH*7h_;i%@Wyb4LqHY4dlNEiH=B+~ecdy==f;p5j z#WSb(*dv7)<(vUlRYe(~bV-?F9p{^s+ipLIlDBR4k|z|J@RszouHa~3;P^q6_Lf>k0#4L4OVfwSsk+1dQ_^^lFsb25|buRBUv`fpkB-=F5L3$K^#VxrBY=`U{-<5Ycw0X=_sZ_C=A z*c)K`l#SJ_tYUH;ZT0Q+o~Czt)JYCrdDYDy6Pg8FbWM%bdg@ZsAG3sWD5K4*#cUCu zOT8?MSTy3abs!&~u(wx}k8YT|rv8MXwc^|2R%FSRH8%H;7A{uyY5?b2)!yE_E?PT5 z8guYxw^X#>q!cy>5f6ShRW@{Hp+@jkd+E#F0sNscamJDz=pUz4QsdYE!K9{B{tYu` z4lQ@LWYe@l+kK2tHh8>4d<-PQ8SWcrqV=2|9*Bcp%>>F4f8#!`yzs)pit%~AzYGDj z*1NQx2_Xiw8ZMBOx2a8ZA71*#^=<~REbv2$kfp!RV7JmXo2J*x$gB1IE3ik=1@!Ds zW9XgOgn9dFu8D}Mv3&Io?{LiTFq=foesmUISRjm9}_#y z+Vi;{wg9P!Ih4!~!vCEEL8Af!0)XB({BZMRX2v|>aUt)}$VmJdpwT$~T?B}%Ej~`q zc@5)Qltzae&>=9#H|-4rjK;d}@7d}8VG4+dwFium##1ZY(t4B;c>Krl52JRgmJxQ$ z4-K~TUsByPxSlj_?m}$EYAGV)^$hRah5L`>B)|CkWR5}!oT~2i9hW-%?)|yR>yXU| z%?f`|@$&PlRniM7w?3YV^{kwHGtv8=jviKqtIjVy6B-oG<)>%8(NWg)upkTd>r61> zX7acG;g_nTqolSWbqcK|Ryajgn&E4{1YkEpM)BhOK4xBDfpai`>+2s6!7U7x22c9Z zXg2y^RZ4h!L8$!dxJ?^F*Izgyxiq}U>ch(e$pf%c2G3`)eXks`oCLruTU;XlbT)fPI;YLGJZk)^qRkX`I!UBgK8_rps7KAaozpqAB~_ae7F2k zxz#C^Zr?J_gi8L%8L4KPQ%K0A5SeKM*D28o^X6&`Av9gmsN>36N0G_7ROWTG%5?9{ zv{m?i&$1=s9$IGhDiinB&g1gI7$O$NNxVb0CTYHOQLDlX*X)iK)dis(Oxx| zV`h!ixuVw!GBc%b!`b~ZY~DJZSJ+q8%%zXY7|8Kcww2axm+FMWKi;7-9`lgx3z#@O z>$qhlx>4P=bA9X3ROv~_)L%~+p_9oP`|hbCfxOkpb45yxR)u?Gc$L+Uj(?!E>e(+@ zU{h_y-yx?|HU@MpuET&!YXCNYERjo6ojxMWPM!7{+lXij*0${d-DfQK@ z_P|A7k|-%ZRCaSfiL}|awR)hg7(mL0F8@BsZG0AV;?ZH#MBg=MdqU?kzfk-iS9W#< zr+AF`x5h815ZiU(_AT&_DzCSSi`F(t&40py47SERS4c|>#9_bUe z2du2U;xT1TGH!sm_cS|Op3LdF0j+1vNU-O=yR4?re6vy1Gg-_JS7ULj!^m zjh*rLe9J5MqUhH-9sQaugKQmm-;0-~@z)t`Q6{K6`isW*e+}uf5@q+I>fwEX2}N|5 z-wypFw741gfF1f!RWyp#ki$WR+=g^g;sY!TdHA(7QJW4kQO+ftp^_aU8suG2U~O#$ z1<}RU1rp38`0H2&<667LsU1U^&8lS~ZlTxF3rCR$*Eua;KmEy#W<;&0p_7oQb@`L& z2SaNMAxyKY5`N}ypWvHXXKjLf{241S#(rjEo|K~~%jNipLI(9T==awNcYDy^QMZ($ z&7lc_^Wv;8aRBbZ8gc5quN%@hk5O%z+2(Kq$Gsf?4#yIjMx;5iUxU!2zYpi-mFgAD zJ1&^w*pDGsZT zsoc#p9JNnCxKP8T`2F>0sORuQe9>X|kGa(z>OJXBN%4TDxEa32?I5#rv=Edk#OJP` z_=~=SmVPEyu`0*8S!V)A_f8T{gf}i8l!BB?F92N~=jF&zQt`iolD80XC3+tV_OiJ& z#hy~dkJk-|4h-`ix2c6~S}zP+?O=1Hc-CPZ_XimB9*pE41csKdW=`8D)Hxj=NL{@r zDxIMzKivoG<66f;_#6@ZD}AF@g+Ln%-XS4WHVY1*2=SbLPl*;}H8VfF$eU_*5(*^5 zi&}?L{Zv?efrS_yTuts%D#bo&Ba~NHlc_hP4YfAd*H?62V*k1raxZc>SB9}OPJMIC zD4-RUpjhQ)f0;J%9!W@%WYdNa-z&xW$|+hi+iksorAseZ{P?ZYh`#drg2Gsrodsl~ zjQdi{CX z+>#aqWq?cR_KsS$th}1MAzBU&DH6V;^birt5|?FM`al&+kc*&H;&epSXI>k}397m@ z5jA5R3{swX8ZV?Bbb5s!PW3if(`9DVJ)N~?9GMdGrzp%&0aLLiD%_wFW2-vV4KsamNselL zuwA(%%=|~Mv!642?Sb1d(6;%T1_?;hIT7Yei5GttE(Vf$F8{_VCLqJkcLvJJ%A!3% z|3{mRL;UK4{+<6$FS7*nki{#2QfU!mY9E0hwDR@D)>~b7Lk^Dh4xU&$Vd*gsZXhmi zW5wx)qtl&rFXxxkRgDX`iV!_0Nq4A_700eudkf}XaqZ$kuQ=o^$*_91AGmFsd2%?$ zQaP0oD5Rv3vExezMklvA)J*a+&qcoCzr!-y=IU3tra6&R44eLR1?!`DHZw4C{Ry=y z7_X-q!)1OCq1i?D!@hBfguTu|3`_DBgxYO5ZP*SuY}jnR?k`0t5K+ZRB$jkw2~ ziOnJdKC8}ZLLHBz!H~l0Pz!wJ+b#vW4EZj1zQINFZPKNrBchengzb3g z0lpY1JxE==t1DzaEY%f}YYYimI~ygzK+hCpzpxg`D%<$TS)ia#lV30Ri-dNjEO+H; zjsJbfoCA9L#!mW750!ylzf=9mT=ri{jpN(!O}`}V^Sz)?p{qVFqsQZ|tyq{5ZM_l@`(!Q2uD#OXPmG0T49_%G)nyI-iCwU$m}D(zB%&O_;qPKV z*V@i7F5xeBXJ-mbeZp=mQlZ@9c;Az30L>5m<}n-bm^He}gXUq8kT||5O`A2=t%@J& zIXVN(z_eA{XQ6!eK+I9`@X`o+7#u`B@h)E+Gfg202C*?mrD+jyYI&4|LQp)j+Bi0f z8X5&<5Wn`T+k=_y;X%^m8+Kk0ZAK6Os?&epV&$LwR^6jj4zc!w{q9#%s`RL~dt6Ej zPeVFIFs1$I!N#P3Xi6diiDrIIpvQFV6rw#=awQpj^ssutQL_k1^Ca|Pd#4kaFX3fH zKU!7r`t7d=j~;qfQTRjnjW2^gloV|UO6wM7UUCRffO^HhVLAXmowv8t9Y8oNd8|kA zca@hmu%TmjL4F-r2jm%pd#{0nvq#v)Y=C0h^+*8J+tCj*GM)k!1D~43_9JgF!W9se z_yBLT=wtio`2^G1+o&g!}kV|54Gj)ETuuRfbgd&zp2i^ zZEH2|v%1AV)H)Db0&yI0NWp<(tadx^npa|S%WGFYco~wtri`i@0|GII(EAZ`Tq?iX zt-+tCuD>6`8Buj&-5Nx+EM!LPx#=X$sVQlpOTNDVf!jqHPuSCc@|T}|l9&GiK*sI! z^C|UtsoQ$vA2E~3kst$bZzxRDvZXjC%If@O1MKWOIe{l%|Uf8no51c>3J`wj)Y@b|!T+pNZ+o#IoVLTPf53 zxlea)Pdt3_pze9VV^l+<&Co%K+uCjapK=p^>Yr+iLL>;k$kPtSlm>E{X;5U;H?Kg1 zGui=%B;~*y=cI!FK69JTy`@}#!jhziNbku^e(IsctI&C6&pNf}-zV-&>Lez4szn?6 z99q=01LZ&3G_E5#tdoNE!oOYh6pTaCR_phFK?s6R}O5P%6dy2@O2qT#4S6vZm zb3`Ty4x&uG9gro*9!+uUlZ_1cX07BZERjSwi@oF1-f zPn!xKhTQ(4dT3|J%tm3dn@!Y8OG`&&@JYB*vpFr9k6ow9_n}K8i1#jLg{P)CX zz72!%-vgwiFMj}w{=(byp|>yoe!c=Mqlg4F+S0T_-BYAr+{)TI^kg-={lbpn7eAL{ zq9iG=)Vm_s-I{2J3&Dh9teZVmMoN@Y3CB&=!M0o(O?X2hDQKm&GsP5AlGKU={0@hu z-SZUiN3D8*QiPB3v>>CTK&`AC zOokVNCIY;e67s5Oc4PDV8VxJ+`|0O4IO;PyqU)WbcPiYu@wGLJhvm;;_mYnDo=eON zL!V#c3)!Sz+>L1X{pR9_rNYmu$X{JPMXBm)t}23eJ!cE-pk>H@M67KvZG&fSx=OGJ zZeOPAmk5vKB`$Z!%w90MaZ}>bYHkEx)6bxVK|<~PPaZO+QcG8{h!Or>nZ$te}nk`#zQom zQk#_b5h(}<-)*dtC>-K~Eo+HrDygI`H*bgc9wvJ4VF+oA)ypI|WEC?B@1ArUM z7kg&iHMga%)KIY%00`NuESsP0O=B3km&b!VU$RXVBX@86;>Z5J)w2)X0k&eZ-rQEk9iLiSfDY&Jbe3sPFy z)tSA)%`0?V5dG>p7Z~6U37PQGa*v8qcQAr6bgO_FfyJznhvybH2apM&UhI_WDcw-Y z2nyvCbafy*@wlxzO)uI{#`R|Jzdu#-0DExx@2IQm-_|0LzgkZw-Wf3eON|A&JR{0v zN=D5THqzFwSmA(E4bZ;rOxpdxI$_odX*IDQbhYm1xYIcOVlQD;eHIqxozU9ZX%eWA zRMcox5*8^&9ULNw1lnS@9_QF?G++dkX>}m^ZwUAQ0+${Tl(AoJoD)2$V=nGWmFyDsspQH9B>yO6Q z{C2Bpd!V#NgPKIO6>H}NWaZd{w~}SWa(wcZ(=#83?ce;dJc;|ncfAA(e;m=VAZ#Dn zAhWAxW|J+Nw9y`IzWvH#^D29((U4~NPw6?88{>6nAtRs1GA&R-PXErV>I)+G9glb) zp+tF<3wJ&ULwrhm2112VU-xsqoE^tI^4t%pS#D91G-^bg6pjFOApb##=FWH5n1Tyl z$?6tKD9yTDV^$@ExC%cV7`TF4hIooBvoc;QbOCOgl1QSzf=1mJdI_IYcjc<7vOwBO zcZ>6QZP`U5ltIH9^)&1jWkb5?l@rEBO7Dlyk(3~g+9<&oe-9x`<0zM-F&n@$rn> zY}X5DB+`TVJ6pX^*ZS^-#?xuo7lhEOfHWXIa6ykxrTNz#S)+Pj{%?%54rU`FQW7e- zr+_$stG8-d`wRg2R4webV?qO4V*5fMd-gQ(tfC?x@Mo2|)+n5zplMSm{u=kfQ&Yl> zFZZ)prrtuScWu?pb5*40#{NdP1dQ9p{lGjQIoaZ)*#unYK3@q?LO6#sm=3;a3~065 z2r*jnfyD$ahBjME>A!uz5d&=d_P;*W7`y-azff}h^89T!V3B$tFaJnt+H`L9eK}re zT>|pi;_bD!1@pm&n?pyPM-R5i`r)gVfU;Up(#$ikorrP;i-}g>ynkGX&+B(U9`=5> z{fN0}WKv8l4?q6};PR_O!`W%LN6hv^YB}iPIQIDKD2XI=+}Zr|j(MN<=dCx;DAxNa z5nx;eemGSV*$;DEjH~qpIzT%_Gd5#7i>ad%)zt=jx_eSeidWxf!Fh))!|QF{fmE70 zgHF!-tM;z18%+J#m>EIt2mZlXTZb3FbIPfMYMngr}) z%W}ji@YsTj;=R~uzNj`oz<++Sx89aM*K}jRx7F$tKatnxhmw~57USk3GiQ9?NV>6H zYP#(kNMcVM5CS~&Az_}erc&r%YMZb1( zkRq^BuzZV>bLB6U1V4EG_Xn{+ek+Wp=kLbU-Nf@h{{;=S;@yQ@nz9yx)(iUI2mSr= zlJvK_x_V#e<3iXv@y3l;)F9NJTwkd>>*JRiZEshVierLej+F1GF&S7X=tRfb%8YD( zU%45kGBA5)xS`@+#1*k7MX_Ya7)nHRJAwYfq1VMXveFx$Jyey^|nV_8mz{teev#c;#*~|HCF)YQA*` z=4XRAtebG(sIU&T3iet7BLxF5oz6za#^_h-?<#E62Hq0bZt(>3oLm8Gx3gJ)F2|@k z)=?C_4uYokxpub$l!c8JVNS53bI?Nck499s#g3xI`w}z&ec-M~mxb5I&C(+H$C3iA zi9f6_hfrcB+Bdwgu>`<+=VhsomsI_wOh>Y^8)iAJJAD8$pv&$N5GaG!Bvvl$qE z1_{yJ4{fV7BzVQLN67IN_Apt2yDIFC|5pya@ICqZ9KJqnTtNS9p7E}k>0IuVa>7&Gi`05!NAL}qXUV3)h(O?EdHLa`L3!apwwlEmWx`V{K=U` zZFp`Lu+69EGuusDZ5^1VfA}UO1Z2!o>(;~va?mVa+5BWm`9Wa>-UA}FrwKTIaK7rr z;dWW?WhwH{Gu>vp;Et;GG@@GwNP!yjYyEF4Wb1<5tF5*%Vm#m&wlSvs-CWT~wRy&% zIAa{z#+ul5>F|YVE@n3>tp%DXbb3Z^he`#I}thkm10$q-~c7RC}5q7 zS#D9b9k{tvwf`jShQvc$NViXdOdpk-28H?aVwxL2L0y~DA>%WsrMdo7#Oie6^7q8C z^|JMfw{QHDV!Q2=xCKl_?oXd$v_8Kueab@L-?^P>`{6%nXQBL3&Vk2&1qsUO=#4hL zQl0Odju1I!J}ClB&`>%udC+JBu_QS9QdOtw+ISL|#-W==?@mEZGds^;=RGFo$Bp`p z42W2CroZUQ;HXyMHPG$4VkrT6#!ps_p@F@lU}DH31J!%g|50=nZcROI6dzp!MvoBK z5D+9rNhmNXml z9{daL|1XgXXB%rce|(N0OoZ9;!!^G9j1=|`%mlk*rt9wV88V_FqUC(%M;t$+BA5|zvPxa&sC<; z9O2w7))BQ#kpH&v_tg&>skh#(CIK#h6uEe%*4z(J-P47Wgi1~q0}iEuzU?(DV8yCm zF#Z^FRn;SZer>$)exBK=Szg zX)ojWi7(0a?6#vm9{1hEm=+M3-4~{_v~akTFs@QDNN$Gmu*GAy z_4@GU##Ywf&ktoVAxbX!G(CXovKf5hdAFc zOmP33f8!r~4t$LJQuKC#yY;Mh^KFLdWkyBC;|DWkK}+de0)X{&u;*&Li<{{U!ec`8 zrS`7w3>(wxP~r2g=ie>z3f}l>+$4RN-MdT>>iuqnpDM-%LT;2thwxPwz|)8c!8|Yz zZ+ednk&EjMb=8;BY+muUqhigYv)8PfDI%w~ix2zA`&I`P6)V1((s00kIhxt@kmD41 zP=qE6HT7{9_7z3gOxMcxej2$nN_{`^wAE(8wmyUtj-So_v=bk%)Ga~r`DwZE@=r1b zRC=3Iq8O0vwMK)Ouv2l}d+}uscp= zzXtg;%Z`-U3p_?WzxVQ$4Do~Uz-Cmf`>b3e4L#YmbG4}d&V%+xPn}#bjA-`*w%D?g zHmSNWxz__Y&?**pWRzR!)lAeD6TH$h641jG4K3|FQI3NOi zUNv_+HPeZ$O^pJaQ!iTqUYRko`*o0Qf;~_Wn!E2^ktPg86;ycs|Ft;VS=-p)DzQiy z=nv&&P{xdmOgAr_JGH9th`yZi`XGj_*#lq^gX_Xs<9gmhw>!ngE^YL^EXs|S+nXSr z86d(FRZ{H;XH~F$Q2m5#q{Kh~5D`PCC5kW@{r|k@nj;*=fx|ZFN|L)=cD?nfDY=7ciYuEetyFLC``*~YIZ*JSimK5*47EXH6G)q4y zNUf=LH_*A$V=#~|t4^LSLcN#X40Lw~zJ=vBK2Sn2YZ12cYR6VLCgAA-I!Ti&c4Wsf zUnb}Zc>lpx0GqY(j8rcvW#>~v#c>X@I&38RH8i&3r``y&B3z%3$x_ulHZ`8s%0OVo zR64lM4p4PHRF^k)>YOpw6QnUh3BA*cxxIQpOK&V8%VzgI0rxvo>qLhk`-6gP)N<;V z=lehlU;b#sqh2>!9xv_3$RWS_rNjSoG6|@FlU*Bt?^qus107Yro!8~S0MeU7s4!Sz z{;Su*;vXaDC+`R57`{$ZDvYJJc!c>UX?m)P<@-He(mttf<%Y{^-ES(!wUoYN4<&xp zZ{x-6ff0G?>x^UPUGWV{IW)RjeUm4;v$PWwSYbT;d|~BV%YQeGu8-n{Pnd_TBr(<% z3_9F}S)y#a3^OYdKR%pH&T>C!_q+3u=Fcqb1kqRXNBH4$Ex-aTOUw9G$ZdzNZ%zVU?;u~RsOcqjF zS6cJo;EO7+_j;-@R``mr0fbo0(9)>_cw^!0 zR`flwq#CMU1a!<*c%oA9)kkKFa73+G!ZmbjgO>25q$Wuk?qeSLJNCLwE_*u9M{<8IfG!tjPmI!?Msm z$<>^tPfGR#Kew4rfofu1U_l_o(j6eW-hA4bonW@?bp!NE#awF*>A$Pl!D?PeQrlJH zNbQk_f590n7jB}V(@cjfoE=>cFZsZXl3CA_Sr?i;G!zF>^Ii+X!--W4oYr#kil&Wu zt))_+&PoCkhxh9aSa+dFM0z@Xe9!*;x81pb85v-AUE2$6Uuyoz_!RYsj%@C0{8wj> z5#U*)rho>vtt7Ecr9&Z1cn>#Z)&?##_r3fAEU) zk)0Py{a-PW(GU|gxvQRw_S<)tq)Yy->yaCFE53C2#G2d^X(}>J&$1tB(wJ|!zFRa4 ziY?Q<5N&^zAY_*)W>tw*Q9Qlb0RYdNZ|H}V>Oxa$QHgd5)m$*9v@S)XXQF9mIq3=> z9~bj(5}t!Zv#A!IOS|08uQCS)o@_`By-{gjZI-Fx)krIbcXr?wiFkSO*CFF1H?{m8 zn~~uajjvQ)nSx%I$qS^S(PM_vG%azjy6qAO)K&M zQJAX^SaRfbO8*ppVwOh>@tmrP7%MXMA+6-mL9&xx(t6gDGb24h!xfW*(U3`x7zmEd z?YP>#Vg!gkrZ|14$)t)NZTA2ee%Am<=yfl*>HXgbz=P1mMm;_|t4a(jb-h?@UY+z6 zfS0)60Y{{utAB12qPM|%T7js?#`F4lH0BqWCbNd_8r-n zSMqQEJ)Pe_Kebvon+IBurV@M7=DbPxcuL&R3o>Oe!UBx9F|^{VfaM6E`BrGliFbep zXRinwK7mMI+O^vl!9`nL#y1tt&)rc<4;h4V;vip?*d2+-l#@8I*qREi99J!Dc23`M zv2(4L?Co7XFt)#%>xCQ@Ig5NY2ZR2$aK_v9`|6>lAaH82_$Sfe=SGC7sv8X~KQb;0 zn(z=_0-G*VPcpT>KkS(;XN{*nIqmObHH^QJ+Cb;@xwQmbR6h|E{agCUdv(tSNoIq7 z^!hs_4Xc~|B4OWsyY<8J_oRY!T%k{}rOE5t;T6Y2Z4v+wa=i=bJUed+byF$-@PUSwSSM8TaiT2v99JXRrYbH~^%n_!1AQwCF@1bwkUzX-CGph-&Gq$*y(l|smf7Tn6a`gUWc|7#w{`YX} zI>piO*S&O{do5d%nE%RU?)^V&tKsAS>lHN(8a8_(5yy8_rbOzZEMAW{62tSzA2r>S z=aDcjxmh_*Bau6O5shc_vFs0#AVn|KD~Bmi$Rk0M2bJM9(m4O^FuYJ{!P`<$Xr0BK z*N!-n0Ee>HmM6_)1#jNP-ffpN7MWH$&yPMdG#I}|JsJ) zy;)pw+U}axbmA;G;QIIm2!?3fa01W}j&F5FpfJd;bU#*G1WE!49plr(a&+ZY{ovzc ze~GHNf2S%1idW$gD&ES&)AsBD02Bn8N|m%EPD6^56HH%vrcsfKy%}l94tAzI)+cZa z2>j=e)GiL!=522=BuJ$t-+!J_coV8d0xKx|=|>2WfA+|p7#>dg;9;t|73zu{)SD75 z12M2Icg})km_u_*g?lGodbl0O5kGlP&cGBM*1uB}#woC8>S0XBtJ2<&LW{nn zkssp<-{@;z&1Ea8GQ?Mk9??}zn!{2k_9o!T+b#)`e6;bYaQq3E_pv2xzo$>PE`>bJDarPZRW6TRS#YkxM=wcJr3UW2o6hj{9j<0>Jg zDS+dZI1aDQia2|$7-j?e`Ru?)-+le%h*0!x)d{wqos$~9Hc4(Q2Y^V|4f=>JKAGklk=|)XRW3h>}NetJL~P+LvxG*A>u0iEsjto^!p^&e_J=j{j0r zo_v|!OXHbGW!;`Qb@Yu$p7oubp8=Lq?2++{t%PgyhqaXLU?}XXrFT za?%r1;6&7qb~V@KIq8z8wM_7J2_&%6e`qrj6^}F@;Op#R`t(KoA%2)qF`}%bwChE_ z>Lr2apW~>(!rP-c^A;X6Z=-kl$$n0Oo7F>sWJ^b5?D7)B`OmIiW62s_1v)K+wQX9h zqtKHuM#WJ3ml%wNz=Vn+(h;b=s3lH2BQYO&JFbJG!!6d*Qp+TV^5P;@B6E01NJZih zfX}Cj1|gIM5iWWOPRWMBir6`9KyAFuQ*~7z*0qRd#WO^nUzPGHK|;ww5!7%pL&q?1 zfrgYVJ9`mJF)@wCC?>R*F+Cm{27m2oM9l%E2dzi<=%+a00AORIm13>sgvj1f*wC>B z^!YPa*NZNRq(bK`=Xq~q+o4IgCZ4pc+#FrBf`}3o<>HLgzgg&R&<7FbRfv7PI#96S|DS*>yGMk3}ssdf6tx znP2+f-v}UcaKfDoWZT->OlqVkL5VBW6PeP&|PEX z*f&to#C-gY$EG4?yq%VOJZ~Pl+;|FYGlp|%8P6(3IaS2yrjQ*Qs) zSW_b-$JhGJhOnBUG&LX{+BE0YIvUGj;HZ$W@TI+N&fgU8wYFg9AYCdC2nHNpBpF@^ zEq)T~hrUpbw`4NjZ>lAe)NwuoTpO`pJW8u~OB>_SFtDH2?oj=O)*#_gE;IZ&#(bk7 z(rYgxQ3%QP91zJHcFmp;Ds~LXv*YuMVQ{_~+WYSIu!my6zWUx;(ZtZan2`2;gJjd1 z(aj^Nw%B@wZjQZEplQUvqn=IrJl#Fr_oc>q76C#1Opa{!W!A%=eS%^_!dSx3zbbl@ z9Xz$vC2b#$yV_T8{d^yf$luY#L+PaH9^ZaVH#_--g%6*;`T zis4BzOiNQZ_G@x(FNm$ml70TY9XdN?-shGePs4VcIL{Xcl_;pZ1Svxz;kjk6%48Ms zsRULkH*pPT%riUL%|2jxI2BP)Ix0d4y>bc^PveA|_l5R@Aku*BA0PzSaDzlvNb(<}iL>FK%Z-G<*~AKLf5 zpHA>@#3)_IOQhCk=h(^!!8h`P)&YNisUlD-Grr1A$q*m4Mq(zJ^ohRg@K58(F*h5z z!o~j^69KpjL)Iu~7SUn*Hxt3u3v=}JT-zbrGhbIXH>y6@`ybc9PwEa4 zb>U#+{_g%?NPd1z751mgV&#nFkNV9lXYH`!HnMLTukuPDgdhN~j&IaC`Kl)zojEj? z$WvPbq}*o2XP(aRh2nJB(Ax-4!x-^_T}|?hB_H*t1j!dOsw0W@iT1>WYw%1+3h0A} zzTKAhzW+dDw`l1fTZ(Uoe`XJ=ufF(v%p0GLr%#+e(}Qs$tOooc_%B8_@Kb&jA5+u} z&(Zd5I8oGGs^&W~pARo@O&cm`G{bxg<{U4dTaV~;Xzc?1GyFE}7REF8_q}(OdYRU$ zl-*9Q7wCSYu8$+G+|wP@Z+eu;QVb&>BFR-IX#A zo!WS-g?fWql?KURGVewJ$tOcn8_-f)#N(|y) z_*NrIadyNKi7|$|bt9W69x6i#HKy&X(Xb=^xS_1Gi=|*1d4#;+u6}0E{LVj)f9H1_ z?_Gi~2Ti5yZerNl6}qpluQkN99gQ<*c{ClX9xbxH#%W>P#_}z47vg<-i!bA3fT!r$ z6L2E1C$_%Pb*-#XKqyZy0iApI2&hDf#i zxSN^E$?dkV@$HB1tIvb;KBlqq=m<0tg?de3X0f4MHmsp(Z`M2Z-Nkh`ckyK9+W41V z$!B*jh1ykJt@W)0hGtuzUxBb&^{b&WH3pntezL?rKzY3bU^?kf6=oeeFMCo}Losj3 zxh>@PO=@!COmOpEc2z|dTq1dqax9wmq2(vHJW_#^XWSI5Vrne$dONtW=2h4@I5p#L3Lyp1WtKto+(ZGJe-Q-Y8= zwG#Y3Z%=9t=eu6x5YTF2w zICdzx+oRnH^Jjb(4l0qv{VWhw-pUgC*CHGn5Q7)Sjsv^AyO=(}C$DB#ni?BRP8-{> zmChv!7qzHNV>5Y>?4y7^gG56*PuvZrz36&G$tNU6n<^iBcy|6Ov7(Dp=mZQ^G)juz zPPFXs>8h5aUK+3d2In7E-!eg=BGSKCP%} z_tkSV9!!?KZ5R=Wel(@HAa$+au9@3738_pQ}{Gce6EbrttR5y>g*TUoLa^W)>11_K}5uC)%<{pwYjL^ z2khs;^Ej2N@U17MC$FqU;)OI6ifKVeux#MJT8u_lVO)BiM{1r23lTr+Wiugv$z<+h zH_Dg&y7=H`U-$elnM9NFS%sB@=H{>#@rG;nk&|cOE}36QMIKJuUrX;bfG$BB`RWpo zS4{W;jRctLrMpQ5n&ug9!P_dbKkhcs_DoamRbp8=II#uHcO|sAf-lI-RfcW?C12U| zDkNKcE6CS&i` z4afq$m+;2UrtGU#4M!yiGCO!ZG^F-1UIkaK+a4T>SA>eN9)4KHE)U{z=*^%rE3d0A zp^e@C`)(Q}r%SSb=PTZ=MT&E+56&l&%LF-rZ`<^H*?X^?Zpy+bQ)ZD4+&u*6xa>)zd~cW zpgAiO6Wlg;E4zY>NAoW2K_};^gi1p;Hns|@nM2u|Bj607KpJy=G{1BQFjK#U+`8QD z&ya@PG29n|l8B$d3ne1Hi4 zx4pW)9r#S$nkU9ye}sUyau-Ywa%MTuE)vVnRID!lUm{y z=mTFCgV(4Tr(8YxJPXeYp_!s+U9yz|Ufma*%TB;TcrjIQi2H!~Q8Kc1fVvbCYB#`0MJtd0 zC6mNk&6Z3Ty5kn-vwC!MtgVg@nh+h%66?+G)y&@n6d%*LK}MnG_K^O@V1Jp>ED`7Q zkP=!3xd~5op4+3_>k~iahp4kQ4gv7t!GYpK#k>C&vXX1q+V9V=p{sR#|7VwP&T`(Y zyEFExD2Dk#pVJNOHoY2f{zV637v6-k+q{8v5`4-E5E2oF6~I6eXIWkuvA!?H+p9j! zD%Y}vYwPR?qFdp-9bkj{D z65-1o<>VJ&(|7LFy-pj~kd6dLJW|=8QY%HJgsNjVR^6ET%#Al3yq{^{9ebje?vL*L zJ}9tE`w^_Hdg{B!3Qgr&qQUy^s_K?ZS=J;l$xTA%p6}nM#IH%Vf7LhtCvD~YX46tO zrj@C1a6OoYa(heJJPxf`s4I@AFhUke9fd>|y!}d4>aag!nNYHp-WeDi+;SftYx(6_ z2tuStj^*?*X;}f1R30(vp`NW3mxmnwJGTpuP6ZEHCB%lEX!V!b$cfCn90iI2FN29QEp{ei!i>;2vB zwVN6+PhB1NvB?HzYsZyH_~-w@efmKI^tV&osgm!lMN`iPYeqW~5C6>x94?f&al8*z zGB4)Yl!UYdUq=@{Qgnzkz~hbI;NI?{iO%Sf@b!(oy) zrwi9JHLUJDyGn~$uRc=voo#6H6bpT<4__92$tDo*W^{sUzMcXt&#fz@(&3Nhf-Bh3 zdBX6yw2;~)LUgsX7Ahp<&>ts1AY1m5Q81`xt@{PS{Gk|z-Lbq25t#xELZu|JVxvQq zq&!O6IWz9!i#TK36B8-Fnv5Faa;QMiLQO0au8{Z`AuR2rtMYO3s(G%v@n?JLsS*Ku zrcg#DB{_1+2{!}!fy;&Fn9rLcF3!uhe@9RD5(={hqjG{M|7kZ{oU`*gU25!!9ot|y zBN4@NiUNbw@)E4cjg_A_Ka0C8;*(+q-K0RF>5Utkra_lG!EsriDScM;9ZhD$z@He) zT?MdcqmdU?x$=3vVd#8W=~llJ8}v5d4T1J4sI|ysAnjQwF&GrX8&=bW_eE%>IP8vp zPEJmd@yGlHmdh^SJh$AH9J36;W9VdSyH6rP@$&w8psZL~?DSWh z|HgDu(@^J>$Ch;9>N+GTXiZ^Cc_+2)l1^mfNhJ)W%_U3@rA8vf!H|1`69cAAM zn2~BIgt9!>>vx7-h_?gHtbN`+eTjk$?2*6EZn+wyD^xx*g2t?Kq(d3xk*B>u#31sn zB4(YMHzsxCet*9AV0A%qr=alXgx?B_NwW9 zcc%EhN#GeZ12?iz<}go3U#A#wWQ?+C{wK0}@(YjKyHEPclW}nf%{>8O52a z;mL;h-%q>>3{g1=WYq$Py=sIaaG|h$R(J{RZ(J?10jB{3Ek2$SohI(&+~0x<8$4k+ zJMCs6zD`unNjJ{#e*7QB(-_^LU)NXwy0>t_iXW}z~k&az%RttCj_T0(D; zFeoWl@5*Cfc+q7&@Wxr78yKW1Cq#wAPI!k-^0a(?eeLjV%Z6@FZ&LAuDW#h?64QVh z&1B>i_+raiyisc7>$Fo5&i9(9ph~P&c`DWWV>Ru#)s8313IQJlRcyA5rvR>CDtmj- zEmOz!8Bbv$!HjwAbJFeoUGa9)HiOK-CO$UVXZI5!SMB%9G5Kv*|1sRJnBGkZ10(cc z`^_y|@O6c0+xwaI`9s-kM(Xvv&-Pb4`|r(s{rsg^DR>f5phs~XX)}Xqvc~`UGE~qP z9c>IVQ~x_UeI|H$aG=qGLrBd`$^_!gP5q|tJnQ+D9%7TU1qBHy>7{_p&`cBA=EH_z zum{nii3T3!FWApL)1LDYV{trm90hp5@UPX51H$U72=!feInQt%*Vkka>Y~s0w|NM5 zYC;PSLLEHO|4J0)u!bHKDe!MjCHZ`_Vq_Oeo^HN}u{Q6#>9SHrA@4Ke6cpS(SHzqdxoj_T*gX6n0Oy4S1HEZLksC9>NN_}(t zcLhNo>s7YZ4tU(wI;Va`!+rP3%zthp04^~I?LD#Ia z3fl~*s5)*~I3&N;^7kUZA2wmV#OiiCDLn))c*G*hRy=hC#B2B_Zh781q3?7}|1*&{ z6YQc5SSNqe_T`Xz-c`IoQ+?jnX+CHp;8uYL+w}w8zvutW%Xq?7uVTaK*J{_qM4!F7 zq#nN=zVmg_>QAcfN95l*?{Gk2j0vFl@^_9~)95)LKXe2}SA?#rs6_YBi4s({6`%oz z%-OJSKo!7vDLI%m10|i^I=%$_Fo{Uyo;4n=zK5>1Py@!MvwwR9 z%b^LysLZ-&vL8Ng|9#B^|8LId!U-_3h$elC(C3M8IQXCG zo(m(L@w^7WW{TY%tmPaXf4lL@@W-INni;PehC4FKTl%nXERes_2V&cxd|M6UqE)=upu(Fs^jUs)#f>^3GbI}t7K-mej19l6@u&ta13y*E@18rO|0ed!)%+*Y z%WmNiJ-mwVNY_$qMv;6*Pp12W)?Rx#Kw9akO1)!|%zYQa6ZLB-ewgIXQ~BF`?9aQ8 z#tY|y2N@iD{rthY9}myaORh8OZbWHwj=9zBvHc0a-^pqk%I+{uQ9#7dJMY>|u{f%6 z;qN?Cf*mJR4k$djx^L#k19ejs@WYHein2XEQgme#`tYk!p&+P1toX|~A^Hiw zfuSnZqs(|`9HLjvi_gN7zvgGYKfYMIA8ngA^&!|JtZ`ylVr@I1RWU}o1g!V|Ybt7+ z!AANenM0^F+@M_Ls7dtitZt-}qhDmS7BBuotreMX&W(kP()j4mxLy^g<&YLO-3Y5o zhjN%?=};fSw;D5;CVrr_J`Czm?@&i6CiUUr5aQx%q44R1$^Zl)k0bq*(v*7MU6eX+ zQI5x&up~<+F$zUK)ZY&w{(`k_=PQW|!(r!u5#SX$up1^)L@6i#F2~q%HbGoP zb!E37=(yNe?^5m`Tx>9xWgTa4NpM;qQp^)VBfi=A-yW%awL9KhhM+6vCMLA23>}9_ z<03nV!KE)uJW_pXmeeOs@48+o?LeQ~wB<3x=?Sbe{KQN63J+ND0$hP!E^4`aSk{60 zSRY!%q7vgU_~;7HqTn~A-@zdq1fnfobtN&c>G%rvXER$O{2K9SmSS_>v}=R>oM?7% zr%lqF=;vzf4h&~IiNyaES>x<;jq zm&8mempkvn9Z+KF!ouW{VXUg1LYaP?nE371y!z zL1D$B;r}5-vpx48GcQGPYbR{gRl~JKFq*nBYQL=|ohc+5o$xEAZqmybM-&m$ zp}^D<+3O5K?Z+voJMg^V?^!PB*pe_S;H4W#c}bOxOU=#%!HdKGhQa(IU&~i2;ujez zsguk7a6naASR_Sw=`%gD(i}c>h2{uticKXjTYZq2)G}s^qpOlfvcQ#oqU4E%q_n?9 zLbO<3jWTqyPkCCZpXh`;?|5D)9a>#%Ax4&p(m)n6) zj?17w?3X#(@d;Kdu8agPBex`-uu+enkdA%R(5RHLz#dHw<(sl#Ar{K|DJ13fK4PvJ zH+r~83B2#-{J1}LI^Y%ZnUk|w&m=~-va}eYh)*8&WljBeWO*(;O$P^2gOy?FGul5D zm6eu`0%~0I!X2vMf7j{>3oHo0yhTEUHa6VCqB`u_3U!%+kn)mZD4xe`w|vdxlOm45 z*@ad%zgL7$TY9l)5=jTrKGLmOu%f60Dw6R(CJ#G|=;0VFOLEM_HZ5)p(%PWyZa52; zCQypeNa-7*E!((p_3duBJM{#9^2Jb&J~RgTg)z)!DDn>q!3pIprxZ$2{<@$jEGsQXzBVe#f} zB0C?O+P@QM^2(1I6-eE9FwHw8qy0N%^v!Rd4-erZ9If}L`>OX#3m^BD2GadC>|WIV z7W;Sv&m;qxFzt@8gu&zz#=!!cN(~A2?D!}pc?FlL4ja^3?uo7!)2r80Nu5Z=g%a