diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c28a982 --- /dev/null +++ b/.gitignore @@ -0,0 +1,21 @@ +# git rm -r --cached . +# git add . +# git commit -m 'update .gitignore' + +*.iml +.gradle +/local.properties +/.idea +/.idea/caches +/.idea/libraries +/.idea/modules.xml +/.idea/workspace.xml +/.idea/navEditor.xml +/.idea/assetWizardSettings.xml +.DS_Store +/build +/captures +.externalNativeBuild +.cxx +local.properties +/cmake-build-debug diff --git a/3rdparty/TNN/.clang-format b/3rdparty/TNN/.clang-format new file mode 100755 index 0000000..7850633 --- /dev/null +++ b/3rdparty/TNN/.clang-format @@ -0,0 +1,114 @@ +--- +# 语言: None Cpp Java ObjC Protp +Language: Cpp +#LLVM Google +BasedOnStyle: Google +# 语言: None Cpp Java ObjC Protp +# 访问说明符的偏移(public private) +AccessModifierOffset: -4 +# 括号之后,水平对齐参数: Align DontAlign AlwaysBreak +AlignAfterOpenBracket: Align +# 连续的宏 +# AlignConsecutiveMacros: true +# 连续的赋值时,对齐所有的等号 +AlignConsecutiveAssignments: true +# 左对齐换行(使用反斜杠换行)的反斜杠 +AlignEscapedNewlines: Right +# # 左对齐换行(使用反斜杠换行)的反斜杠 +# AlignEscapedNewlinesLeft: true +# 水平对齐二元和三元表达式的操作数 +AlignOperands: true +# 允许函数声明的所有参数在放在下一行 +AllowAllParametersOfDeclarationOnNextLine: false +# AllowAllArgumentsOnNextLine: false +# 允许短的块放在同一行 +AllowShortBlocksOnASingleLine : false +# 允许短的case标签放在同一行 +AllowShortCaseLabelsOnASingleLine: false +# 允许短的函数放在同一行: None, InlineOnly(定义在类中), Empty(空函数), Inline(定义在类中,空函数), All +AllowShortFunctionsOnASingleLine: Empty +# 是否允许短if单行 If true, if (a) return; 可以放到同一行 +AllowShortIfStatementsOnASingleLine: false +# 允许短的循环保持在同一行 +AllowShortLoopsOnASingleLine: false +# 总是在定义返回类型后换行(deprecated) +AlwaysBreakAfterDefinitionReturnType: None +# 每行字符的限制,0表示没有限制 +ColumnLimit: 120 +# 描述具有特殊意义的注释的正则表达式,它不应该被分割为多行或以其它方式改变 +CommentPragmas: '^ IWYU pragma:' +#指针的*的挨着哪边 +PointerAlignment: Right +#缩进宽度 +IndentWidth: 4 +# OC block后面的缩进 +ObjCBlockIndentWidth: 4 +#tab键盘的宽度 +TabWidth: 4 +Standard: Cpp11 +UseTab: Never +CompactNamespaces: false +# 命名空间的偏移 +NamespaceIndentation: Inner +# 命名空间的末尾注释 +FixNamespaceComments: true +# IndentPPDirectives: BeforeHash +--- +# 语言: None Cpp Java ObjC Protp +Language: ObjC +#LLVM Google +BasedOnStyle: LLVM +# 访问说明符的偏移(public private) +AccessModifierOffset: -4 +# 括号之后,水平对齐参数: Align DontAlign AlwaysBreak +AlignAfterOpenBracket: Align +# 连续的宏 +# AlignConsecutiveMacros: true +# 连续的赋值时,对齐所有的等号 +AlignConsecutiveAssignments: true +# 左对齐换行(使用反斜杠换行)的反斜杠 +AlignEscapedNewlines: Right +# # 左对齐换行(使用反斜杠换行)的反斜杠 +# AlignEscapedNewlinesLeft: true +# 水平对齐二元和三元表达式的操作数 +AlignOperands: true +# 允许函数声明的所有参数在放在下一行 +AllowAllParametersOfDeclarationOnNextLine: false +# AllowAllArgumentsOnNextLine: false +# 允许短的块放在同一行 +AllowShortBlocksOnASingleLine : false +# 允许短的case标签放在同一行 +AllowShortCaseLabelsOnASingleLine: false +# 允许短的函数放在同一行: None, InlineOnly(定义在类中), Empty(空函数), Inline(定义在类中,空函数), All +AllowShortFunctionsOnASingleLine: Empty +# 是否允许短if单行 If true, if (a) return; 可以放到同一行 +AllowShortIfStatementsOnASingleLine: false +# 允许短的循环保持在同一行 +AllowShortLoopsOnASingleLine: false +# 总是在定义返回类型后换行(deprecated) +AlwaysBreakAfterDefinitionReturnType: None +# 每行字符的限制,0表示没有限制 +ColumnLimit: 120 +# 描述具有特殊意义的注释的正则表达式,它不应该被分割为多行或以其它方式改变 +CommentPragmas: '^ IWYU pragma:' +#指针的*的挨着哪边 +PointerAlignment: Right +#缩进宽度 +IndentWidth: 4 +# OC block后面的缩进 +ObjCBlockIndentWidth: 4 +#tab键盘的宽度 +TabWidth: 4 +Standard: Cpp11 +UseTab: Never +CompactNamespaces: false +# 命名空间的偏移 +NamespaceIndentation: Inner +# 命名空间的末尾注释 +FixNamespaceComments: true +# IndentPPDirectives: BeforeHash +--- +Language: Proto +#.proto文件不格式化 +DisableFormat: true +... diff --git a/3rdparty/TNN/.github/ISSUE_TEMPLATE/feature_request.md b/3rdparty/TNN/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000..bbcbbe7 --- /dev/null +++ b/3rdparty/TNN/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,20 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: '' +labels: '' +assignees: '' + +--- + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + +**Additional context** +Add any other context or screenshots about the feature request here. diff --git a/3rdparty/TNN/.github/ISSUE_TEMPLATE/model-converter-issue.md b/3rdparty/TNN/.github/ISSUE_TEMPLATE/model-converter-issue.md new file mode 100644 index 0000000..6ad2d0c --- /dev/null +++ b/3rdparty/TNN/.github/ISSUE_TEMPLATE/model-converter-issue.md @@ -0,0 +1,31 @@ +--- +name: model converter issue +about: Describe this issue template's purpose here. +title: '' +labels: '' +assignees: '' + +--- + +**1. 使用环境(environment)** + - OS: Mac/Ubuntu/Centos/Docker + - OS Version: + + **2. Github版本** + - branch: + - commit(optional): + + **3. 详细描述bug 情况 (Describe the bug)** + A clear and concise description of what the bug is. + - issue type: 编译问题(Build failed)/ 模型转换失败(converter failed)/ 模型不对齐(model misalignment) + - original model: Caffe/ONNX/TensorFlow/TensorFlowLite + (如果可以的话,请上传原始的模型文件) + + **4. 日志(Log)** + ```txt + 将日志粘贴在这里 + Paste log here or pastebin + ``` + + **5. 截图(Screenshots)** + If applicable, add screenshots to help explain your problem. diff --git a/3rdparty/TNN/.github/ISSUE_TEMPLATE/tnn-inference-issue.md b/3rdparty/TNN/.github/ISSUE_TEMPLATE/tnn-inference-issue.md new file mode 100644 index 0000000..8df4d15 --- /dev/null +++ b/3rdparty/TNN/.github/ISSUE_TEMPLATE/tnn-inference-issue.md @@ -0,0 +1,41 @@ +--- +name: tnn inference issue +about: Describe this issue template's purpose here. +title: '' +labels: '' +assignees: '' + +--- + + **1. 环境(environment)** + - Build OS and Version: Mac/Ubuntu/Centos/Windows + - RunTime OS Version: Linux/Android/IOS + - RunTime DEVICE: ARM/OPENCL/METAL + + **2. Github版本** + - branch: + - commit(optional): + + **3. 编译方式(compile method)** + CMake完整编译参数(full cmake arguments) + + **4. 编译日志(build log)** + ```txt + 将日志粘贴在这里 + Paste log here or pastebin + ``` + + **5. 详细描述bug 情况 (Describe the bug)** + + + **6. 运行日志(runtime log)** + ```txt + 将日志粘贴在这里 + Paste log here or pastebin + ``` + + **7. 截图(Screenshots)** + ```txt + 将截图粘贴在这里 + Paste screenshorts here or pastebin + ``` diff --git a/3rdparty/TNN/.github/release-drafter.yml b/3rdparty/TNN/.github/release-drafter.yml new file mode 100644 index 0000000..b05f832 --- /dev/null +++ b/3rdparty/TNN/.github/release-drafter.yml @@ -0,0 +1,29 @@ +name-template: 'TNN v$RESOLVED_VERSION' +tag-template: 'v$RESOLVED_VERSION' +categories: + - title: '🚀 Features' + labels: + - 'enhancement' + - title: '🐛 Bug Fixes' + labels: + - 'bug' + - title: '🧰 Maintenance' + labels: + - 'documentation' +change-template: '- $TITLE @$AUTHOR (#$NUMBER)' +change-title-escapes: '\<*_&' # You can add # and @ to disable mentions, and add ` to disable code blocks. +version-resolver: + major: + labels: + - 'major' + minor: + labels: + - 'minor' + patch: + labels: + - 'patch' + default: patch +template: | + ## Changes + + $CHANGES \ No newline at end of file diff --git a/3rdparty/TNN/.github/workflows/android-arm-cpu.yml b/3rdparty/TNN/.github/workflows/android-arm-cpu.yml new file mode 100644 index 0000000..f49b5c0 --- /dev/null +++ b/3rdparty/TNN/.github/workflows/android-arm-cpu.yml @@ -0,0 +1,32 @@ +name: android-arm-cpu +on: + push: + branches: + - master + pull_request: + branches: + - master + +jobs: + setup: + runs-on: ubuntu-latest + outputs: + CONDITION: ${{ steps.preflight.outputs.CONDITION }} + steps: + - uses: actions/checkout@v2 + - name: Preflight + id: preflight + run: | + echo ::set-output name=CONDITION::0 + ./scripts/.ci/preflight.sh android || ret=$? && echo $ret && echo ::set-output name=CONDITION::$ret + + android: + needs: [setup] + if: ${{ needs.setup.outputs.CONDITION != '11' }} + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: configure + run: sudo apt-get install attr + - name: build + run: export ANDROID_NDK=$ANDROID_HOME/ndk-bundle && ./scripts/build_android.sh \ No newline at end of file diff --git a/3rdparty/TNN/.github/workflows/ios-cpu.yml b/3rdparty/TNN/.github/workflows/ios-cpu.yml new file mode 100644 index 0000000..ff47973 --- /dev/null +++ b/3rdparty/TNN/.github/workflows/ios-cpu.yml @@ -0,0 +1,30 @@ +name: ios-cpu +on: + push: + branches: + - master + pull_request: + branches: + - master + +jobs: + setup: + runs-on: ubuntu-latest + outputs: + CONDITION: ${{ steps.preflight.outputs.CONDITION }} + steps: + - uses: actions/checkout@v2 + - name: Preflight + id: preflight + run: | + echo ::set-output name=CONDITION::0 + ./scripts/.ci/preflight.sh ios || ret=$? && echo $ret && echo ::set-output name=CONDITION::$ret + + ios-iphone-os: + needs: [setup] + if: ${{ needs.setup.outputs.CONDITION != '11' }} + runs-on: macos-latest + steps: + - uses: actions/checkout@v2 + - name: build + run: ./scripts/build_ios.sh diff --git a/3rdparty/TNN/.github/workflows/linux-x86-cpu-gcc.yml b/3rdparty/TNN/.github/workflows/linux-x86-cpu-gcc.yml new file mode 100644 index 0000000..3195d75 --- /dev/null +++ b/3rdparty/TNN/.github/workflows/linux-x86-cpu-gcc.yml @@ -0,0 +1,34 @@ +name: linux-x86-cpu-gcc +on: + push: + branches: + - master + pull_request: + branches: + - master + +jobs: + setup: + runs-on: ubuntu-latest + outputs: + CONDITION: ${{ steps.preflight.outputs.CONDITION }} + steps: + - uses: actions/checkout@v2 + - name: Preflight + id: preflight + run: | + echo ::set-output name=CONDITION::0 + ./scripts/.ci/preflight.sh x86 || ret=$? && echo $ret && echo ::set-output name=CONDITION::$ret + + linux-gcc: + needs: [setup] + if: ${{ needs.setup.outputs.CONDITION != '11' }} + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: update + run: sudo apt-get update + - name: gcc-multilib + run: sudo apt-get install gcc-multilib g++-multilib libprotobuf-dev protobuf-compiler + - name: build + run: ./scripts/build_x86_linux.sh diff --git a/3rdparty/TNN/.github/workflows/macos-x64-cpu.yml b/3rdparty/TNN/.github/workflows/macos-x64-cpu.yml new file mode 100644 index 0000000..e93862e --- /dev/null +++ b/3rdparty/TNN/.github/workflows/macos-x64-cpu.yml @@ -0,0 +1,32 @@ +name: macos-x64-cpu +on: + push: + branches: + - master + pull_request: + branches: + - master + +jobs: + setup: + runs-on: ubuntu-latest + outputs: + CONDITION: ${{ steps.preflight.outputs.CONDITION }} + steps: + - uses: actions/checkout@v2 + - name: Preflight + id: preflight + run: | + echo ::set-output name=CONDITION::0 + ./scripts/.ci/preflight.sh x86 || ret=$? && echo $ret && echo ::set-output name=CONDITION::$ret + + macos-clang: + needs: [setup] + if: ${{ needs.setup.outputs.CONDITION != '11' }} + runs-on: macos-latest + steps: + - uses: actions/checkout@v2 + - name: protobuf + run: brew install protobuf opencv3 + - name: build + run: ./scripts/build_macos.sh diff --git a/3rdparty/TNN/.github/workflows/release-drafter.yml b/3rdparty/TNN/.github/workflows/release-drafter.yml new file mode 100644 index 0000000..c77b1be --- /dev/null +++ b/3rdparty/TNN/.github/workflows/release-drafter.yml @@ -0,0 +1,16 @@ +name: Release Drafter + +on: + push: + branches: + - master + +jobs: + update_release_draft: + runs-on: ubuntu-latest + steps: + - uses: release-drafter/release-drafter@v5 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + config-name: release-drafter.yml diff --git a/3rdparty/TNN/.github/workflows/release.yml b/3rdparty/TNN/.github/workflows/release.yml new file mode 100644 index 0000000..1889c2f --- /dev/null +++ b/3rdparty/TNN/.github/workflows/release.yml @@ -0,0 +1,617 @@ +name: Release + +on: + push: + tags: + - '*' + +jobs: + setup: + runs-on: ubuntu-latest + outputs: + VERSION: ${{ steps.get_version.outputs.VERSION }} + steps: + - name: Get-version + id: get_version + run: | + echo "github ref:" ${GITHUB_REF} + echo "tag version:" ${GITHUB_REF/refs\/tags\//} + echo ::set-output name=VERSION::${GITHUB_REF/refs\/tags\//} + + full-source: + needs: [setup] + runs-on: ubuntu-latest + env: + ASSET_NAME: tnn-${{ needs.setup.outputs.VERSION }}-full-source + outputs: + ASSET_NAME: ${{ env.ASSET_NAME }} + steps: + - uses: actions/checkout@v2 + - name: Compress + env: + ASSET_PATH: /tmp/${{ env.ASSET_NAME }}.zip + run: | + echo "compress to" ${ASSET_PATH} + rm -rf .git + rm -f ${ASSET_PATH} + zip -9r ${ASSET_PATH} . + - name: Upload_asset + uses: actions/upload-artifact@v2 + with: + name: ${{ env.ASSET_NAME }} + path: /tmp/${{ env.ASSET_NAME }}.zip + if-no-files-found: error + + android: + needs: [setup] + runs-on: ubuntu-latest + env: + ASSET_NAME: tnn-${{ needs.setup.outputs.VERSION }}-android + outputs: + ASSET_NAME: ${{ env.ASSET_NAME }} + steps: + - uses: actions/checkout@v2 + - name: Configure + run: sudo apt-get install attr + - name: Build + env: + ASSET_PATH: ${{ env.ASSET_NAME }}.zip + run: | + cd ./scripts + export ANDROID_NDK=$ANDROID_HOME/ndk-bundle && ./build_android.sh + cd ./release + zip -9r ${ASSET_PATH} . + - name: Upload_asset + uses: actions/upload-artifact@v2 + with: + name: ${{ env.ASSET_NAME }} + path: ./scripts/release/${{ env.ASSET_NAME }}.zip + if-no-files-found: error + + ios: + needs: [setup] + runs-on: macos-latest + env: + ASSET_NAME: tnn-${{ needs.setup.outputs.VERSION }}-ios + outputs: + ASSET_NAME: ${{ env.ASSET_NAME }} + steps: + - uses: actions/checkout@v2 + - name: Build + run: | + ./scripts/build_ios.sh + cd ./platforms/ios + zip -9r ${{ env.ASSET_NAME }}.zip ./tnn.bundle ./tnn.framework + - name: Upload_asset + uses: actions/upload-artifact@v2 + with: + name: ${{ env.ASSET_NAME }} + path: ./platforms/ios/${{ env.ASSET_NAME }}.zip + if-no-files-found: error + + centos7-x86: + needs: [setup] + runs-on: ubuntu-16.04 + container: + image: neiltian/tnn-cuda-build-env:centos7-cuda10.2-cudnn8-trt7.1 + env: + ASSET_NAME: tnn-${{ needs.setup.outputs.VERSION }}-centos7-x86 + outputs: + ASSET_NAME: ${{ env.ASSET_NAME }} + steps: + - uses: actions/checkout@v2 + - name: Build + env: + ASSET_PATH: ${{ env.ASSET_NAME }}.tar.gz + run: | + export PATH=$PATH:/usr/local/cmake-3.18.4-Linux-x86_64/bin/ + cd ./scripts + ./build_x86_linux.sh + cd ./x86_linux_release + tar -zcvf ${ASSET_PATH} lib include bin + - name: Upload_asset + uses: actions/upload-artifact@v2 + with: + name: ${{ env.ASSET_NAME }} + path: ./scripts/x86_linux_release/${{ env.ASSET_NAME }}.tar.gz + if-no-files-found: error + + centos7-cuda: + needs: [setup] + runs-on: ubuntu-16.04 + container: + image: neiltian/tnn-cuda-build-env:centos7-cuda10.2-cudnn8-trt7.1 + env: + ASSET_NAME: tnn-${{ needs.setup.outputs.VERSION }}-centos7-cuda + outputs: + ASSET_NAME: ${{ env.ASSET_NAME }} + steps: + - uses: actions/checkout@v2 + - name: Build + env: + ASSET_PATH: ${{ env.ASSET_NAME }}.tar.gz + run: | + export PATH=$PATH:/usr/local/cmake-3.18.4-Linux-x86_64/bin/ + export TENSORRT_ROOT_DIR=/usr/local/tensorrt/ + export CUDNN_ROOT_DIR=/usr/local/cudnn/ + cd ./scripts + ./build_cuda_linux.sh + cd cuda_linux_release + tar -zcvf ${ASSET_PATH} lib include bin + - name: Upload_asset + uses: actions/upload-artifact@v2 + with: + name: ${{ env.ASSET_NAME }} + path: ./scripts/cuda_linux_release/${{ env.ASSET_NAME }}.tar.gz + if-no-files-found: error + + centos7-x86-cuda: + needs: [setup] + runs-on: ubuntu-16.04 + container: + image: neiltian/tnn-cuda-build-env:centos7-cuda10.2-cudnn8-trt7.1 + env: + ASSET_NAME: tnn-${{ needs.setup.outputs.VERSION }}-centos7-x86-cuda + outputs: + ASSET_NAME: ${{ env.ASSET_NAME }} + steps: + - uses: actions/checkout@v2 + - name: Build + env: + ASSET_PATH: ${{ env.ASSET_NAME }}.tar.gz + run: | + export PATH=$PATH:/usr/local/cmake-3.18.4-Linux-x86_64/bin/ + export TENSORRT_ROOT_DIR=/usr/local/tensorrt/ + export CUDNN_ROOT_DIR=/usr/local/cudnn/ + cd ./scripts + ./build_linux.sh + cd linux_release + tar -zcvf ${ASSET_PATH} lib include bin + - name: Upload_asset + uses: actions/upload-artifact@v2 + with: + name: ${{ env.ASSET_NAME }} + path: ./scripts/linux_release/${{ env.ASSET_NAME }}.tar.gz + if-no-files-found: error + + centos8-x86: + needs: [setup] + runs-on: ubuntu-16.04 + container: + image: neiltian/tnn-cuda-build-env:centos8-cuda10.2-cudnn8-trt7.1 + env: + ASSET_NAME: tnn-${{ needs.setup.outputs.VERSION }}-centos8-x86 + outputs: + ASSET_NAME: ${{ env.ASSET_NAME }} + steps: + - uses: actions/checkout@v2 + - name: Build + env: + ASSET_PATH: ${{ env.ASSET_NAME }}.tar.gz + run: | + export PATH=$PATH:/usr/local/cmake-3.18.4-Linux-x86_64/bin/ + cd ./scripts + ./build_x86_linux.sh + cd ./x86_linux_release + tar -zcvf ${ASSET_PATH} lib include bin + - name: Upload_asset + uses: actions/upload-artifact@v2 + with: + name: ${{ env.ASSET_NAME }} + path: ./scripts/x86_linux_release/${{ env.ASSET_NAME }}.tar.gz + if-no-files-found: error + + ubuntu-1604-x86: + needs: [setup] + runs-on: ubuntu-16.04 + env: + ASSET_NAME: tnn-${{ needs.setup.outputs.VERSION }}-ubuntu-16.04-x86 + outputs: + ASSET_NAME: ${{ env.ASSET_NAME }} + steps: + - uses: actions/checkout@v2 + - name: Configure + run: sudo apt-get install attr + - name: Build + env: + ASSET_PATH: ${{ env.ASSET_NAME }}.tar.gz + run: | + cd ./scripts + ./build_x86_linux.sh + cd ./x86_linux_release + tar -zcvf ${ASSET_PATH} lib include bin + - name: Upload_asset + uses: actions/upload-artifact@v2 + with: + name: ${{ env.ASSET_NAME }} + path: ./scripts/x86_linux_release/${{ env.ASSET_NAME }}.tar.gz + if-no-files-found: error + + ubuntu-1604-cuda: + needs: [setup] + runs-on: ubuntu-16.04 + container: + image: neiltian/tnn-cuda-build-env:ubuntu-16.04-cuda10.2-cudnn8-trt7.1 + env: + ASSET_NAME: tnn-${{ needs.setup.outputs.VERSION }}-ubuntu-16.04-cuda + outputs: + ASSET_NAME: ${{ env.ASSET_NAME }} + steps: + - uses: actions/checkout@v2 + - name: Build + env: + ASSET_PATH: ${{ env.ASSET_NAME }}.tar.gz + run: | + export PATH=$PATH:/usr/local/cmake-3.15.3/bin/ + export TENSORRT_ROOT_DIR=/usr/local/tensorrt/ + export CUDNN_ROOT_DIR=/usr/local/cudnn/ + cd ./scripts + ./build_cuda_linux.sh + cd cuda_linux_release + tar -zcvf ${ASSET_PATH} lib include bin + - name: Upload_asset + uses: actions/upload-artifact@v2 + with: + name: ${{ env.ASSET_NAME }} + path: ./scripts/cuda_linux_release/${{ env.ASSET_NAME }}.tar.gz + if-no-files-found: error + + ubuntu-1604-x86-cuda: + needs: [setup] + runs-on: ubuntu-16.04 + container: + image: neiltian/tnn-cuda-build-env:ubuntu-16.04-cuda10.2-cudnn8-trt7.1 + env: + ASSET_NAME: tnn-${{ needs.setup.outputs.VERSION }}-ubuntu-16.04-x86-cuda + outputs: + ASSET_NAME: ${{ env.ASSET_NAME }} + steps: + - uses: actions/checkout@v2 + - name: Build + env: + ASSET_PATH: ${{ env.ASSET_NAME }}.tar.gz + run: | + apt-get install wget + export PATH=$PATH:/usr/local/cmake-3.15.3/bin/ + export TENSORRT_ROOT_DIR=/usr/local/tensorrt/ + export CUDNN_ROOT_DIR=/usr/local/cudnn/ + cd ./scripts + ./build_linux.sh + cd linux_release + tar -zcvf ${ASSET_PATH} lib include bin + - name: Upload_asset + uses: actions/upload-artifact@v2 + with: + name: ${{ env.ASSET_NAME }} + path: ./scripts/linux_release/${{ env.ASSET_NAME }}.tar.gz + if-no-files-found: error + + ubuntu-1804-x86: + needs: [setup] + runs-on: ubuntu-18.04 + env: + ASSET_NAME: tnn-${{ needs.setup.outputs.VERSION }}-ubuntu-18.04-x86 + outputs: + ASSET_NAME: ${{ env.ASSET_NAME }} + steps: + - uses: actions/checkout@v2 + - name: Configure + run: sudo apt-get install attr + - name: Build + env: + ASSET_PATH: ${{ env.ASSET_NAME }}.tar.gz + run: | + cd ./scripts + ./build_x86_linux.sh + cd ./x86_linux_release + tar -zcvf ${ASSET_PATH} lib include bin + - name: Upload_asset + uses: actions/upload-artifact@v2 + with: + name: ${{ env.ASSET_NAME }} + path: ./scripts/x86_linux_release/${{ env.ASSET_NAME }}.tar.gz + if-no-files-found: error + + ubuntu-1804-cuda: + needs: [setup] + runs-on: ubuntu-18.04 + container: + image: nvcr.io/nvidia/tensorrt:20.03-py3 + env: + ASSET_NAME: tnn-${{ needs.setup.outputs.VERSION }}-ubuntu-18.04-cuda + outputs: + ASSET_NAME: ${{ env.ASSET_NAME }} + steps: + - uses: actions/checkout@v2 + - name: Build + env: + ASSET_PATH: ${{ env.ASSET_NAME }}.tar.gz + run: | + mkdir /usr/local/cudnn + mkdir /usr/local/tensorrt + ln -s /usr/include/ /usr/local/cudnn/include + ln -s /usr/include/ /usr/local/tensorrt/include + ln -s /usr/lib/x86_64-linux-gnu/ /usr/local/cudnn/lib64 + ln -s /usr/lib/x86_64-linux-gnu/ /usr/local/tensorrt/lib + export TENSORRT_ROOT_DIR=/usr/local/tensorrt/ + export CUDNN_ROOT_DIR=/usr/local/cudnn/ + cd ./scripts + ./build_cuda_linux.sh + cd cuda_linux_release + tar -zcvf ${ASSET_PATH} lib include bin + - name: Upload_asset + uses: actions/upload-artifact@v2 + with: + name: ${{ env.ASSET_NAME }} + path: ./scripts/cuda_linux_release/${{ env.ASSET_NAME }}.tar.gz + if-no-files-found: error + + ubuntu-1804-x86-cuda: + needs: [setup] + runs-on: ubuntu-18.04 + container: + image: nvcr.io/nvidia/tensorrt:20.03-py3 + env: + ASSET_NAME: tnn-${{ needs.setup.outputs.VERSION }}-ubuntu-18.04-x86-cuda + outputs: + ASSET_NAME: ${{ env.ASSET_NAME }} + steps: + - uses: actions/checkout@v2 + - name: Build + env: + ASSET_PATH: ${{ env.ASSET_NAME }}.tar.gz + run: | + mkdir /usr/local/cudnn + mkdir /usr/local/tensorrt + ln -s /usr/include/ /usr/local/cudnn/include + ln -s /usr/include/ /usr/local/tensorrt/include + ln -s /usr/lib/x86_64-linux-gnu/ /usr/local/cudnn/lib64 + ln -s /usr/lib/x86_64-linux-gnu/ /usr/local/tensorrt/lib + export TENSORRT_ROOT_DIR=/usr/local/tensorrt/ + export CUDNN_ROOT_DIR=/usr/local/cudnn/ + cd ./scripts + ./build_linux.sh + cd linux_release + tar -zcvf ${ASSET_PATH} lib include bin + - name: Upload_asset + uses: actions/upload-artifact@v2 + with: + name: ${{ env.ASSET_NAME }} + path: ./scripts/linux_release/${{ env.ASSET_NAME }}.tar.gz + if-no-files-found: error + + ubuntu-2004-x86: + needs: [setup] + runs-on: ubuntu-20.04 + env: + ASSET_NAME: tnn-${{ needs.setup.outputs.VERSION }}-ubuntu-20.04-x86 + outputs: + ASSET_NAME: ${{ env.ASSET_NAME }} + steps: + - uses: actions/checkout@v2 + - name: Configure + run: sudo apt-get install attr + - name: Build + env: + ASSET_PATH: ${{ env.ASSET_NAME }}.tar.gz + run: | + cd ./scripts + ./build_x86_linux.sh + cd ./x86_linux_release + tar -zcvf ${ASSET_PATH} lib include bin + - name: Upload_asset + uses: actions/upload-artifact@v2 + with: + name: ${{ env.ASSET_NAME }} + path: ./scripts/x86_linux_release/${{ env.ASSET_NAME }}.tar.gz + if-no-files-found: error + + macos: + needs: [setup] + runs-on: macos-latest + env: + ASSET_NAME: tnn-${{ needs.setup.outputs.VERSION }}-macos + outputs: + ASSET_NAME: ${{ env.ASSET_NAME }} + steps: + - uses: actions/checkout@v2 + - name: Build + env: + ASSET_PATH: ${{ env.ASSET_NAME }}.zip + run: | + cd ./scripts + ./build_macos.sh + cd ./macos_release + zip -9r ${ASSET_PATH} . + - name: Upload_asset + uses: actions/upload-artifact@v2 + with: + name: ${{ env.ASSET_NAME }} + path: ./scripts/macos_release/${{ env.ASSET_NAME }}.zip + if-no-files-found: error + + windows: + needs: [setup] + runs-on: windows-latest + env: + ASSET_NAME: tnn-${{ needs.setup.outputs.VERSION }}-windows + outputs: + ASSET_NAME: ${{ env.ASSET_NAME }} + steps: + - uses: actions/checkout@v2 + - uses: ilammy/msvc-dev-cmd@v1 + - uses: seanmiddleditch/gha-setup-ninja@master + - name: Build + run: | + cd .\scripts + .\build_msvc.bat + cd .\msvc_release + 7z a -r ${{ env.ASSET_NAME }}.zip . + - name: Upload_asset + uses: actions/upload-artifact@v2 + with: + name: ${{ env.ASSET_NAME }} + path: .\scripts\msvc_release\${{ env.ASSET_NAME }}.zip + if-no-files-found: error + + release: + needs: [setup, full-source, android, ios, centos7-x86, centos7-cuda, centos7-x86-cuda, centos8-x86, ubuntu-1604-x86, ubuntu-1604-cuda, ubuntu-1604-x86-cuda, ubuntu-1804-x86, ubuntu-1804-cuda, ubuntu-1804-x86-cuda, ubuntu-2004-x86, macos, windows] + runs-on: ubuntu-latest + env: + ARTIFACTS_PATH: artifacts + steps: + - name: Download + id: download_artifacts + uses: actions/download-artifact@v2 + with: + path: ${{ env.ARTIFACTS_PATH }} + - name: Create-release + id: create_release + uses: actions/create-release@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + tag_name: ${{ needs.setup.outputs.VERSION }} + release_name: TNN ${{ needs.setup.outputs.VERSION }} + draft: true + prerelease: false + - name: Upload-full-source + uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + ASSET_NAME: ${{ needs.full-source.outputs.ASSET_NAME }} + with: + upload_url: ${{ steps.create_release.outputs.upload_url }} + asset_path: ${{ env.ARTIFACTS_PATH }}/${{ env.ASSET_NAME }}/${{ env.ASSET_NAME }}.zip + asset_name: ${{ env.ASSET_NAME }}.zip + asset_content_type: application/zip + - name: Upload-android + uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + ASSET_NAME: ${{ needs.android.outputs.ASSET_NAME }} + with: + upload_url: ${{ steps.create_release.outputs.upload_url }} + asset_path: ${{ env.ARTIFACTS_PATH }}/${{ env.ASSET_NAME }}/${{ env.ASSET_NAME }}.zip + asset_name: ${{ env.ASSET_NAME }}.zip + asset_content_type: application/zip + - name: Upload-ios + uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + ASSET_NAME: ${{ needs.ios.outputs.ASSET_NAME }} + with: + upload_url: ${{ steps.create_release.outputs.upload_url }} + asset_path: ${{ env.ARTIFACTS_PATH }}/${{ env.ASSET_NAME }}/${{ env.ASSET_NAME }}.zip + asset_name: ${{ env.ASSET_NAME }}.zip + asset_content_type: application/zip + - name: Upload-centos7-x86 + uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + ASSET_NAME: ${{ needs.centos7-x86.outputs.ASSET_NAME }} + with: + upload_url: ${{ steps.create_release.outputs.upload_url }} + asset_path: ${{ env.ARTIFACTS_PATH }}/${{ env.ASSET_NAME }}/${{ env.ASSET_NAME }}.tar.gz + asset_name: ${{ env.ASSET_NAME }}.tar.gz + asset_content_type: application/tar+gzip + - name: Upload-centos8-x86 + uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + ASSET_NAME: ${{ needs.centos8-x86.outputs.ASSET_NAME }} + with: + upload_url: ${{ steps.create_release.outputs.upload_url }} + asset_path: ${{ env.ARTIFACTS_PATH }}/${{ env.ASSET_NAME }}/${{ env.ASSET_NAME }}.tar.gz + asset_name: ${{ env.ASSET_NAME }}.tar.gz + asset_content_type: application/tar+gzip + - name: Upload-ubuntu-16.04-x86 + uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + ASSET_NAME: ${{ needs.ubuntu-1604-x86.outputs.ASSET_NAME }} + with: + upload_url: ${{ steps.create_release.outputs.upload_url }} + asset_path: ${{ env.ARTIFACTS_PATH }}/${{ env.ASSET_NAME }}/${{ env.ASSET_NAME }}.tar.gz + asset_name: ${{ env.ASSET_NAME }}.tar.gz + asset_content_type: application/tar+gzip + - name: Upload-ubuntu-16.04-cuda + uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + ASSET_NAME: ${{ needs.ubuntu-1604-cuda.outputs.ASSET_NAME }} + with: + upload_url: ${{ steps.create_release.outputs.upload_url }} + asset_path: ${{ env.ARTIFACTS_PATH }}/${{ env.ASSET_NAME }}/${{ env.ASSET_NAME }}.tar.gz + asset_name: ${{ env.ASSET_NAME }}.tar.gz + asset_content_type: application/tar+gzip + - name: Upload-ubuntu-16.04-x86-cuda + uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + ASSET_NAME: ${{ needs.ubuntu-1604-x86-cuda.outputs.ASSET_NAME }} + with: + upload_url: ${{ steps.create_release.outputs.upload_url }} + asset_path: ${{ env.ARTIFACTS_PATH }}/${{ env.ASSET_NAME }}/${{ env.ASSET_NAME }}.tar.gz + asset_name: ${{ env.ASSET_NAME }}.tar.gz + asset_content_type: application/tar+gzip + - name: Upload-ubuntu-18.04-x86 + uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + ASSET_NAME: ${{ needs.ubuntu-1804-x86.outputs.ASSET_NAME }} + with: + upload_url: ${{ steps.create_release.outputs.upload_url }} + asset_path: ${{ env.ARTIFACTS_PATH }}/${{ env.ASSET_NAME }}/${{ env.ASSET_NAME }}.tar.gz + asset_name: ${{ env.ASSET_NAME }}.tar.gz + asset_content_type: application/tar+gzip + - name: Upload-ubuntu-18.04-cuda + uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + ASSET_NAME: ${{ needs.ubuntu-1804-cuda.outputs.ASSET_NAME }} + with: + upload_url: ${{ steps.create_release.outputs.upload_url }} + asset_path: ${{ env.ARTIFACTS_PATH }}/${{ env.ASSET_NAME }}/${{ env.ASSET_NAME }}.tar.gz + asset_name: ${{ env.ASSET_NAME }}.tar.gz + asset_content_type: application/tar+gzip + - name: Upload-ubuntu-18.04-x86-cuda + uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + ASSET_NAME: ${{ needs.ubuntu-1804-x86-cuda.outputs.ASSET_NAME }} + with: + upload_url: ${{ steps.create_release.outputs.upload_url }} + asset_path: ${{ env.ARTIFACTS_PATH }}/${{ env.ASSET_NAME }}/${{ env.ASSET_NAME }}.tar.gz + asset_name: ${{ env.ASSET_NAME }}.tar.gz + asset_content_type: application/tar+gzip + - name: Upload-ubuntu-20.04-x86 + uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + ASSET_NAME: ${{ needs.ubuntu-2004-x86.outputs.ASSET_NAME }} + with: + upload_url: ${{ steps.create_release.outputs.upload_url }} + asset_path: ${{ env.ARTIFACTS_PATH }}/${{ env.ASSET_NAME }}/${{ env.ASSET_NAME }}.tar.gz + asset_name: ${{ env.ASSET_NAME }}.tar.gz + asset_content_type: application/tar+gzip + - name: Upload-macos + uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + ASSET_NAME: ${{ needs.macos.outputs.ASSET_NAME }} + with: + upload_url: ${{ steps.create_release.outputs.upload_url }} + asset_path: ${{ env.ARTIFACTS_PATH }}/${{ env.ASSET_NAME }}/${{ env.ASSET_NAME }}.zip + asset_name: ${{ env.ASSET_NAME }}.zip + asset_content_type: application/zip + - name: Upload-windows + uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + ASSET_NAME: ${{ needs.windows.outputs.ASSET_NAME }} + with: + upload_url: ${{ steps.create_release.outputs.upload_url }} + asset_path: ${{ env.ARTIFACTS_PATH }}/${{ env.ASSET_NAME }}/${{ env.ASSET_NAME }}.zip + asset_name: ${{ env.ASSET_NAME }}.zip + asset_content_type: application/zip diff --git a/3rdparty/TNN/.gitignore b/3rdparty/TNN/.gitignore new file mode 100644 index 0000000..21221bf --- /dev/null +++ b/3rdparty/TNN/.gitignore @@ -0,0 +1,498 @@ +.DS_Store +.vscode +build +build32 +build64 +release +tags +.idea/ +tools/onnx2tnn/onnx-converter/3rdparty/ +GPATH +GRTAGS +GTAGS +### https://raw.github.com/github/gitignore/eee21bf0c397cddc39ff1c94615d135e0ad36f8c/Global/JetBrains.gitignore + +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/**/usage.statistics.xml +.idea/**/dictionaries +.idea/**/shelf + +# Generated files +.idea/**/contentModel.xml + +# Sensitive or high-churn files +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml +.idea/**/dbnavigator.xml + +# Gradle +.idea/**/gradle.xml +.idea/**/libraries + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +# .idea/artifacts +# .idea/compiler.xml +# .idea/jarRepositories.xml +# .idea/modules.xml +# .idea/*.iml +# .idea/modules +# *.iml +# *.ipr + +# CMake +cmake-build-*/ + +# Mongo Explorer plugin +.idea/**/mongoSettings.xml + +# File-based project format +*.iws + +# IntelliJ +out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Cursive Clojure plugin +.idea/replstate.xml + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties + +# Editor-based Rest Client +.idea/httpRequests + +# Android studio 3.1+ serialized cache file +.idea/caches/build_file_checksums.ser + + +### https://raw.github.com/github/gitignore/eee21bf0c397cddc39ff1c94615d135e0ad36f8c/Global/Emacs.gitignore + +# -*- mode: gitignore; -*- +*~ +\#*\# +/.emacs.desktop +/.emacs.desktop.lock +*.elc +auto-save-list +tramp +.\#* + +# Org-mode +.org-id-locations +*_archive + +# flymake-mode +*_flymake.* + +# eshell files +/eshell/history +/eshell/lastdir + +# elpa packages +/elpa/ + +# reftex files +*.rel + +# AUCTeX auto folder +/auto/ + +# cask packages +.cask/ +dist/ + +# Flycheck +flycheck_*.el + +# server auth directory +/server/ + +# projectiles files +.projectile + +# directory configuration +.dir-locals.el + +# network security +/network-security.data + + + +### https://raw.github.com/github/gitignore/eee21bf0c397cddc39ff1c94615d135e0ad36f8c/Global/Vim.gitignore + +# Swap +[._]*.s[a-v][a-z] +!*.svg # comment out if you don't need vector files +[._]*.sw[a-p] +[._]s[a-rt-v][a-z] +[._]ss[a-gi-z] +[._]sw[a-p] + +# Session +Session.vim +Sessionx.vim + +# Temporary +.netrwhist +*~ +# Auto-generated tag files +tags +# Persistent undo +[._]*.un~ + + +### https://raw.github.com/github/gitignore/eee21bf0c397cddc39ff1c94615d135e0ad36f8c/Global/Linux.gitignore + +*~ + +# temporary files which can be created if a process still has a handle open of a deleted file +.fuse_hidden* + +# KDE directory preferences +.directory + +# Linux trash folder which might appear on any partition or disk +.Trash-* + +# .nfs files are created when an open file is removed but is still being accessed +.nfs* + + +### https://raw.github.com/github/gitignore/eee21bf0c397cddc39ff1c94615d135e0ad36f8c/Global/macOS.gitignore + +# General +.DS_Store +.AppleDouble +.LSOverride + +# Icon must end with two \r +Icon + + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + + +### https://raw.github.com/github/gitignore/eee21bf0c397cddc39ff1c94615d135e0ad36f8c/CMake.gitignore + +CMakeLists.txt.user +CMakeCache.txt +CMakeFiles +CMakeScripts +Testing +Makefile +cmake_install.cmake +install_manifest.txt +compile_commands.json +CTestTestfile.cmake +_deps + + +### https://raw.github.com/github/gitignore/eee21bf0c397cddc39ff1c94615d135e0ad36f8c/C++.gitignore + +# Prerequisites +*.d + +# Compiled Object files +*.slo +*.lo +*.o +*.obj + +# Precompiled Headers +*.gch +*.pch + +# Compiled Dynamic libraries +*.so +*.dylib +*.dll + +# Fortran module files +*.mod +*.smod + +# Compiled Static libraries +*.lai +*.la +*.a +*.lib + +# Executables +*.exe +*.out +*.app + + +### https://raw.github.com/github/gitignore/eee21bf0c397cddc39ff1c94615d135e0ad36f8c/Python.gitignore + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + + +### https://raw.github.com/github/gitignore/eee21bf0c397cddc39ff1c94615d135e0ad36f8c/Android.gitignore + +# Built application files +*.aar +*.ap_ +*.aab + +# Files for the ART/Dalvik VM +*.dex + +# Java class files +*.class + +# Generated files +bin/ +gen/ +out/ +# Uncomment the following line in case you need and you don't have the release build type files in your app +# release/ + +# Gradle files +.gradle/ +build/ + +# Local configuration file (sdk path, etc) +local.properties + +# Proguard folder generated by Eclipse +proguard/ + +# Log Files +*.log + +# Android Studio Navigation editor temp files +.navigation/ + +# Android Studio captures folder +captures/ + +# IntelliJ +*.iml +.idea/workspace.xml +.idea/tasks.xml +.idea/gradle.xml +.idea/assetWizardSettings.xml +.idea/dictionaries +.idea/libraries +# Android Studio 3 in .gitignore file. +.idea/caches +.idea/modules.xml +# Comment next line if keeping position of elements in Navigation Editor is relevant for you +.idea/navEditor.xml + +# Keystore files +# Uncomment the following lines if you do not want to check your keystore files in. +#*.jks +#*.keystore + +# External native build folder generated in Android Studio 2.2 and later +.externalNativeBuild +.cxx/ + +# Google Services (e.g. APIs or Firebase) +# google-services.json + +# Freeline +freeline.py +freeline/ +freeline_project_description.json + +# fastlane +fastlane/report.xml +fastlane/Preview.html +fastlane/screenshots +fastlane/test_output +fastlane/readme.md + +# Version control +vcs.xml + +# lint +lint/intermediates/ +lint/generated/ +lint/outputs/ +lint/tmp/ +# lint/reports/ + +source/tnn/network/tensorrt/thirdparty/TensorRT* +model/ +# opencl generated code +opencl_program.cc + +# opencl generated code +opencl_program.cc diff --git a/3rdparty/TNN/.travis.yml b/3rdparty/TNN/.travis.yml new file mode 100644 index 0000000..db6bc36 --- /dev/null +++ b/3rdparty/TNN/.travis.yml @@ -0,0 +1,73 @@ +sudo: false + +git: + depth: 3 + quiet: true + +addons: + apt: + package: + - lcov + +matrix: + include: + - name: "Linux | Arm64 | build" + os: linux + arch: arm64 + before_install: + - ./scripts/.ci/preflight.sh arm || travis_terminate 0 + script: + - ./scripts/build_aarch64_linux.sh + + - name: "Linux | Arm32 | build" + os: linux + arch: arm64 + before_install: + - ./scripts/.ci/preflight.sh arm || travis_terminate 0 + before_script: + - sudo dpkg --add-architecture armhf + - sudo apt-get update + - sudo apt-get -y install crossbuild-essential-armhf libc6:armhf libstdc++-5-dev:armhf linux-libc-dev:armhf + script: + - ./scripts/build_armhf_linux.sh + + - name: "Linux | Arm64 | test" + os: linux + compiler: clang + arch: arm64 + before_install: + - ./scripts/.ci/preflight.sh arm || travis_terminate 0 + script: + - travis_wait 40 ./scripts/build_test.sh + + - name: "Windows | x64 | build" + os: windows + language: cpp + before_install: + - ./scripts/.ci/preflight.sh x86 || travis_terminate 0 + install: + - PowerShell -Command 'Set-ExecutionPolicy -ExecutionPolicy RemoteSigned' + - choco install ninja + script: + - scripts/build_msvc_native.bat x64 ci + env: + - CXX=cl.exe + - CXX_FOR_BUILD=cl.exe + - CC=cl.exe + - CC_FOR_BUILD=cl.exe + + - name: "Windows | x86 | build" + os: windows + language: cpp + before_install: + - ./scripts/.ci/preflight.sh x86 || travis_terminate 0 + install: + - PowerShell -Command 'Set-ExecutionPolicy -ExecutionPolicy RemoteSigned' + - choco install ninja + script: + - scripts/build_msvc_native.bat x86 ci + env: + - CXX=cl.exe + - CXX_FOR_BUILD=cl.exe + - CC=cl.exe + - CC_FOR_BUILD=cl.exe diff --git a/3rdparty/TNN/CMakeLists.txt b/3rdparty/TNN/CMakeLists.txt new file mode 100644 index 0000000..684fa8f --- /dev/null +++ b/3rdparty/TNN/CMakeLists.txt @@ -0,0 +1,390 @@ +cmake_minimum_required(VERSION 3.1) + +project(TNN) + +ENABLE_LANGUAGE(ASM) + +set(TNN_MAJOR_VERSION 0) +set(TNN_MINOR_VERSION 1) +set(TNN_PATCH_VERSION 0) +set(TNN_BUILD_VERSION 0) +set(TNN_VERSION "${TNN_MAJOR_VERSION}.${TNN_MINOR_VERSION}.${TNN_PATCH_VERSION}.${TNN_BUILD_VERSION}") + +option(TNN_CPU_ENABLE "Enable Cpu" ON) +option(TNN_X86_ENABLE "Enable X86" OFF) +option(TNN_ARM_ENABLE "Enable Arm" OFF) +option(TNN_ARM82_ENABLE "Enable Arm82" OFF) +option(TNN_METAL_ENABLE "Enable Metal" OFF) +option(TNN_OPENCL_ENABLE "Enable OpenCL" OFF) +option(TNN_CUDA_ENABLE "Enable CUDA" OFF) +option(TNN_DSP_ENABLE "Enable DSP" OFF) +option(TNN_ATLAS_ENABLE "Enable Atlas" OFF) +option(TNN_TENSORRT_ENABLE "Enable TensorRT" OFF) +option(TNN_OPENVINO_ENABLE "Enable OPENVINO" OFF) +option(TNN_NPU_ENABLE "Enable NPU" OFF) +option(TNN_HUAWEI_NPU_ENABLE "Enable NPU" OFF) +option(TNN_RK_NPU_ENABLE "Enable RKNPU" OFF) +option(TNN_SYMBOL_HIDE "Enable Hide Symbol Visibility" ON) +option(TNN_OPENMP_ENABLE "Enable OpenMP" OFF) +option(TNN_BUILD_SHARED "Build Shared Library" ON) +option(TNN_OPENVINO_BUILD_SHARED "Build Shared Openvino Library" OFF) +option(TNN_TEST_ENABLE "Enable Test" OFF) +option(TNN_UNIT_TEST_ENABLE "Enable Test" OFF) +option(TNN_PROFILER_ENABLE "Enable Test" OFF) +option(TNN_QUANTIZATION_ENABLE "Enable Test" OFF) +option(TNN_MODEL_CHECK_ENABLE "Enable Test" OFF) +option(TNN_BENCHMARK_MODE "Enable Benchmark" OFF) +option(TNN_UNIT_TEST_BENCHMARK "Enable Benchmark Layer" OFF) +option(TNN_CONVERTER_ENABLE "Enable Model Converter" OFF) +option(TNN_ONNX2TNN_ENABLE "Enable ONNX2TNN Converter" OFF) +option(TNN_TNN2MEM_ENABLE "Enable tnn2mem" OFF) +option(TNN_BUILD_BENCHMARK_TEST_LIB_ENABLE "Enable Build Benchmark Test Lib" OFF) +option(TNN_GLIBCXX_USE_CXX11_ABI_ENABLE "Enable Use CXX11 ABI" ON) + +set(TNN_USE_GFLAGS OFF) + +message(${CMAKE_SOURCE_DIR}) +message(${CMAKE_CURRENT_SOURCE_DIR}) + +include(cmake/macros.cmake) + +if (SYSTEM.Windows) + add_definitions(-DBUILDING_DLL) +endif() + +if(TNN_PROFILER_ENABLE) + add_definitions(-DTNN_PROFILE) + set(TNN_SYMBOL_HIDE OFF) +endif() + +if(TNN_BENCHMARK_MODE) + add_definitions(-DGENERATE_RESOURCE) +endif() + +if(MSVC) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /wd4003 /wd4819 /wd4244 /wd4018 /utf-8") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4003 /wd4819 /wd4244 /wd4018 /utf-8") +endif() + +# ignore loop-vectorize warning +if(SYSTEM.Windows) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") +else() + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-pass-failed") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-pass-failed") +endif() + +# ignore deprecated warning +if(SYSTEM.Windows) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") +else() + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-deprecated-declarations -Wno-ignored-attributes") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated-declarations -Wno-ignored-attributes") +endif() + +if(DEBUG) + set(TNN_SYMBOL_HIDE OFF) + add_definitions(-DDEBUG) + if (NOT CMAKE_BUILD_TYPE OR CMAKE_BUILD_TYPE STREQUAL "") + set(CMAKE_BUILD_TYPE "Debug" CACHE STRING "set build type to debug" FORCE) + endif() +else() + if (NOT CMAKE_BUILD_TYPE OR CMAKE_BUILD_TYPE STREQUAL "") + set(CMAKE_BUILD_TYPE "Release" CACHE STRING "set build type to release" FORCE) + endif() + if(BUILD_FOR_ANDROID_COMMAND) + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -s -Wl,--gc-sections") + endif() +endif() + +if(TNN_TEST_ENABLE) + option(TNN_METAL_FLOAT32 "Enable Metal Float32" ON) +else() + set(TNN_UNIT_TEST_ENABLE OFF) +endif() + +if(TNN_UNIT_TEST_ENABLE) + enable_testing() + set(TNN_CPU_ENABLE ON) + set(TNN_SYMBOL_HIDE OFF) + add_definitions(-DGENERATE_RESOURCE) +endif() + +if(TNN_CONVERTER_ENABLE OR TNN_ONNX2TNN_ENABLE) + set(TNN_SYMBOL_HIDE OFF) + add_definitions(-DTNN_CONVERTER_RUNTIME) +endif() + +if(TNN_QUANTIZATION_ENABLE OR TNN_MODEL_CHECK_ENABLE) + set(TNN_SYMBOL_HIDE OFF) + add_definitions(-DFORWARD_CALLBACK_ENABLE) +endif() + +if(TNN_QUANTIZATION_ENABLE OR TNN_UNIT_TEST_ENABLE) + add_definitions(-DGET_INTERP_ENABLE) +endif() + +if(TNN_MODEL_CHECK_ENABLE) + option(TNN_METAL_FLOAT32 "Enable Metal Float32" ON) +endif() + +if(TNN_ARM82_ENABLE) + add_definitions(-DTNN_ARM82=1) +endif() + +# only used to simulate arm82 computation in the unit test +option(TNN_ARM82_SIMU "Enable arm82 simulation" OFF) +if(TNN_ARM82_SIMU) + add_definitions(-DTNN_ARM82_SIMU) +endif() + +if(TNN_METAL_FLOAT32) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DTNN_METAL_FULL_PRECISION=1") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTNN_METAL_FULL_PRECISION=1") + + if(TNN_PROFILER_ENABLE OR TNN_MODEL_CHECK_ENABLE) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DTNN_METAL_BENCHMARK=1 -DTNN_METAL_DEBUG=1") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTNN_METAL_BENCHMARK=1 -DTNN_METAL_DEBUG=1") + endif() +endif() + +if(TNN_OPENMP_ENABLE) + FIND_PACKAGE(OpenMP REQUIRED) + if(OPENMP_FOUND) + if(MSVC) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /openmp") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /openmp") + else() + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") + include_directories(${OpenMP_C_INCLUDE_DIRS} ${OpenMP_CXX_INCLUDE_DIRS}) + link_libraries(${OpenMP_C_LIBRARIES} ${OpenMP_CXX_LIBRARIES}) + endif() + else() + error("OpenMP Not Found.") + endif() +endif() + + +if(UNIX) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread") + if(TNN_GLIBCXX_USE_CXX11_ABI_ENABLE) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=1") + else() + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=0") + endif() +endif() + +set(CMAKE_CXX_STANDARD 11) +set(CMAKE_POSITION_INDEPENDENT_CODE ON) + +if(TNN_METAL_ENABLE) + add_compile_options(-x objective-c++) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fobjc-arc") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fobjc-arc") +endif() + +if(TNN_TNN2MEM_ENABLE) + add_subdirectory(tools/tnn2mem) +endif() + +message(STATUS ">>>>>>>>>>>>>") +message(STATUS "TNN BUILD INFO:") +message(STATUS "\tSystem: ${CMAKE_SYSTEM_NAME}") +message(STATUS "\tProcessor: ${CMAKE_SYSTEM_PROCESSOR}") +message(STATUS "\tCpu:\t${TNN_CPU_ENABLE}") +message(STATUS "\tX86:\t${TNN_X86_ENABLE}") +message(STATUS "\tArm:\t${TNN_ARM_ENABLE}") +message(STATUS "\tArm82:\t${TNN_ARM82_ENABLE}") +message(STATUS "\tMetal:\t${TNN_METAL_ENABLE}") +message(STATUS "\tOpenCL:\t${TNN_OPENCL_ENABLE}") +message(STATUS "\tCUDA:\t${TNN_CUDA_ENABLE}") +message(STATUS "\tDSP:\t${TNN_DSP_ENABLE}") +message(STATUS "\tAtlas:\t${TNN_ATLAS_ENABLE}") +message(STATUS "\tTensorRT:\t${TNN_TENSORRT_ENABLE}") +message(STATUS "\tHuaweiNPU:\t${TNN_HUAWEI_NPU_ENABLE}") +message(STATUS "\tRKNPU:\t${TNN_RK_NPU_ENABLE}") +message(STATUS "\tOpenVINO:\t${TNN_OPENVINO_ENABLE}") +message(STATUS "\tOpenMP:\t${TNN_OPENMP_ENABLE}") +message(STATUS "\tTEST:\t${TNN_TEST_ENABLE}") +message(STATUS "\t--Unit Test:\t${TNN_UNIT_TEST_ENABLE}") +message(STATUS "\tQuantization:\t${TNN_QUANTIZATION_ENABLE}") +message(STATUS "\tModelCheck:\t${TNN_MODEL_CHECK_ENABLE}") +message(STATUS "\tDEBUG:\t${DEBUG}") +message(STATUS "\tPROFILE:\t${TNN_PROFILER_ENABLE}") +message(STATUS "\tBENCHMARK:\t${TNN_BENCHMARK_MODE}") +message(STATUS "\tBENCHMARK Layer:\t${TNN_UNIT_TEST_BENCHMARK}") +message(STATUS "\tModel Converter:\t${TNN_CONVERTER_ENABLE}") +message(STATUS "\tONNX2TNN Converter:\t${TNN_ONNX2TNN_ENABLE}") +message(STATUS "\tTNN2MEM:\t${TNN_TNN2MEM_ENABLE}") +message(STATUS "\tBENCHMARK Test Lib:\t${TNN_BUILD_BENCHMARK_TEST_LIB_ENABLE}") + +include_directories(include) +include_directories(source) + +file(GLOB_RECURSE SRC "source/tnn/core/*.h" + "source/tnn/core/*.cc" + "source/tnn/layer/*.h" + "source/tnn/layer/*.cc" + "source/tnn/utils/*.h" + "source/tnn/utils/*.cc" + "source/tnn/interpreter/*.h" + "source/tnn/interpreter/*.cc" + "source/tnn/optimizer/*.h" + "source/tnn/optimizer/*.cc" + "source/tnn/extern_wrapper/*.h" + "source/tnn/extern_wrapper/*.cc" + "source/tnn/memory_manager/*.h" + "source/tnn/memory_manager/*.cc") + +if(TNN_SYMBOL_HIDE AND UNIX) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden -fvisibility-inlines-hidden") +endif() + +if(TNN_X86_ENABLE) + add_subdirectory(source/tnn/device/x86) + set(TARGET_OBJECTS ${TARGET_OBJECTS} "$") +endif() + +if(TNN_CPU_ENABLE) + add_subdirectory(source/tnn/device/cpu) + set(TARGET_OBJECTS ${TARGET_OBJECTS} "$") +endif() + +if(TNN_ARM_ENABLE) + add_subdirectory(source/tnn/device/arm) + set(TARGET_OBJECTS ${TARGET_OBJECTS} "$") + if(TNN_ARM82_ENABLE) + set(TARGET_OBJECTS ${TARGET_OBJECTS} "$") + endif() +endif() + +if(TNN_OPENVINO_ENABLE) + add_subdirectory(source/tnn/network/openvino) + set(TARGET_OBJECTS ${TARGET_OBJECTS} "$") +endif() + +if(TNN_OPENCL_ENABLE) + include(FindPythonInterp REQUIRED) + if (NOT PYTHON_EXECUTABLE) + message (FATAL_ERROR "No Python installation found! It is required by OpenCL codegen.") + endif () + + if(SHARING_MEM_WITH_OPENGL) + add_definitions(-DSHARING_MEM_WITH_OPENGL) + add_definitions(-DCL_HPP_TARGET_OPENCL_VERSION=120) + endif() + add_subdirectory(source/tnn/device/opencl) + set(TARGET_OBJECTS ${TARGET_OBJECTS} "$") +endif() + +if(TNN_METAL_ENABLE) + add_subdirectory(source/tnn/device/metal) + set(TARGET_OBJECTS ${TARGET_OBJECTS} "$") +endif() + +if(TNN_CUDA_ENABLE) + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr") + add_subdirectory(source/tnn/device/cuda) + if(TNN_TENSORRT_ENABLE) + add_subdirectory(source/tnn/network/tensorrt) + set(TARGET_OBJECTS ${TARGET_OBJECTS} "$") + endif() + set(TARGET_OBJECTS ${TARGET_OBJECTS} "$") +endif() + +if(TNN_HUAWEI_NPU_ENABLE) + if(ANDROID_ABI STREQUAL "armeabi-v7a") + link_directories( + third_party/huawei_npu/hiai_ddk_latest/armeabi-v7a/ + ) + else() + link_directories( + third_party/huawei_npu/hiai_ddk_latest/arm64-v8a/ + ) + endif() + add_subdirectory(source/tnn/device/huawei_npu) + set(TARGET_OBJECTS ${TARGET_OBJECTS} "$") +endif() + +if(TNN_RK_NPU_ENABLE) + if(CMAKE_SIZEOF_VOID_P EQUAL 8) + link_directories( + ./third_party/rknpu/rknpu_ddk/lib64/ + ) + else() + link_directories( + ./third_party/rknpu/rknpu_ddk/lib/ + ) + endif() + add_subdirectory(source/tnn/device/rknpu) + set(TARGET_OBJECTS ${TARGET_OBJECTS} "$") +endif() + +if(TNN_BUILD_SHARED) + add_library(TNN SHARED ${SRC} ${TARGET_OBJECTS}) + set_target_properties(TNN PROPERTIES VERSION ${TNN_VERSION} SOVERSION ${TNN_MAJOR_VERSION}) + if(SHARING_MEM_WITH_OPENGL) + target_link_libraries(TNN -lEGL -lGLESv2) + endif() +else() + add_library(TNN STATIC ${SRC} ${TARGET_OBJECTS}) + set_target_properties(TNN PROPERTIES VERSION ${TNN_VERSION}) + if(SHARING_MEM_WITH_OPENGL) + target_link_libraries(TNN -lEGL -lGLESv2) + endif() +endif() + +if(TNN_QUANTIZATION_ENABLE) + add_subdirectory(tools/quantization) +endif() + +if(SYSTEM.Linux) + include(platforms/linux/CMakeLists.txt) +elseif(SYSTEM.Android) + include(platforms/android/CMakeLists.txt) +elseif(SYSTEM.iOS) + include(platforms/ios/CMakeLists.txt) +elseif(SYSTEM.Darwin) + include(platforms/mac/CMakeLists.txt) +elseif(SYSTEM.Windows) + include(platforms/windows/CMakeLists.txt) +endif() + +if (TNN_TEST_ENABLE OR TNN_CONVERTER_ENABLE OR TNN_MODEL_CHECK_ENABLE) + set(TNN_USE_GFLAGS ON) +endif () + +if (TNN_USE_GFLAGS) + add_subdirectory(third_party/gflags) + get_target_property(GFLAGS_INCLUDE_DIRS gflags INTERFACE_INCLUDE_DIRECTORIES) + include_directories(BEFORE "${GFLAGS_INCLUDE_DIRS}") +endif () + +if(TNN_MODEL_CHECK_ENABLE) + add_subdirectory(tools/model_check) +endif() + +if(TNN_TEST_ENABLE) + add_subdirectory(test) +endif() + +if(TNN_CONVERTER_ENABLE) + add_subdirectory(third_party/flatbuffers) + add_subdirectory(tools/converter) +endif() + +if(TNN_ONNX2TNN_ENABLE) + add_subdirectory(tools/onnx2tnn/onnx-converter) +endif() + +if(TNN_COVERAGE) + if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fprofile-instr-generate -fcoverage-mapping") + elseif (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -coverage -fprofile-arcs -ftest-coverage") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -coverage -lgcov") + endif() +endif() diff --git a/3rdparty/TNN/Dockerfile b/3rdparty/TNN/Dockerfile new file mode 100644 index 0000000..30d5296 --- /dev/null +++ b/3rdparty/TNN/Dockerfile @@ -0,0 +1,39 @@ +FROM ubuntu:18.04 + +ENV LANG C.UTF-8 +ENV LANGUAGE C.UTF-8 +ENV LC_ALL C.UTF-8 + +RUN sed -i s@/archive.ubuntu.com/@/mirrors.tencent.com/@g /etc/apt/sources.list +RUN sed -i s@/security.ubuntu.com/@/mirrors.tencent.com/@g /etc/apt/sources.list + +RUN apt-get clean && apt-get update -y && apt-get -y install --no-install-recommends apt-utils + +RUN apt-get -y install git cmake make + +RUN apt-get -y install gcc g++ + +RUN apt-get -y install protobuf-compiler libprotobuf-dev + +RUN apt-get -y install python3 python3-dev python3-pip + +#RUN mkdir -p /root/.pip && echo "[global]\n index-url = https://mirrors.tencent.com/pypi/simple/" >> /root/.pip/pip.conf + +RUN python3 -m pip install --upgrade pip && pip3 install -U onnx==1.6.0 onnxruntime numpy onnx-simplifier setuptools protobuf + + +RUN pip3 install tensorflow==1.15.0 tf2onnx + +ENV TNN_ROOT=/opt/TNN +ENV TOOLS_ROOT=$TNN_ROOT/tools +# COPY ./onnx2tnn $TOOLS_ROOT/onnx2tnn +# COPY ./caffe2onnx $TOOLS_ROOT/caffe2onnx +# COPY ./convert2tnn $TOOLS_ROOT/convert2tnn +COPY . $TNN_ROOT/ +#RUN cd $TOOLS_ROOT/onnx2tnn/onnx-converter && ./build.sh +RUN cd $TOOLS_ROOT/convert2tnn && bash ./build.sh + + +RUN python3 $TOOLS_ROOT/convert2tnn/converter.py -h + +WORKDIR $TOOLS_ROOT/convert2tnn/ diff --git a/3rdparty/TNN/LICENSE b/3rdparty/TNN/LICENSE new file mode 100644 index 0000000..e8a499e Binary files /dev/null and b/3rdparty/TNN/LICENSE differ diff --git a/3rdparty/TNN/README.md b/3rdparty/TNN/README.md new file mode 100644 index 0000000..de1be0f --- /dev/null +++ b/3rdparty/TNN/README.md @@ -0,0 +1,132 @@ +[中文版本](README_CH.md) +
+ +## Introduction + +TNN: A high-performance, lightweight neural network inference framework open sourced by Tencent Youtu Lab. It also has many outstanding advantages such as cross-platform, high performance, model compression, and code tailoring. The TNN framework further strengthens the support and performance optimization of mobile devices on the basis of the original Rapidnet and ncnn frameworks. At the same time, it refers to the high performance and good scalability characteristics of the industry's mainstream open source frameworks, and expands the support for X86 and NV GPUs. On the mobile phone, TNN has been used by many applications such as mobile QQ, weishi, and Pitu. As a basic acceleration framework for Tencent Cloud AI, TNN has provided acceleration support for the implementation of many businesses. Everyone is welcome to participate in the collaborative construction to promote the further improvement of the TNN reasoning framework. + +## Effect Example + +Face Detection(blazeface) | Object Detection(yolov5s) | Face Alignment
(from Tencent Youtu Lab) | Hair Segmentation
(from Tencent Guangying Lab) +:-------------------------: | :------: | :------: | :------: +[![face_detection](https://raw.githubusercontent.com/darrenyao87/tnn-models/master/doc/demo/face_detection.gif)](https://github.com/darrenyao87/tnn-models/tree/master/model/blazeface)
model link: [tflite](https://github.com/google/mediapipe/blob/master/mediapipe/models/face_detection_front.tflite) [tnn](https://github.com/darrenyao87/tnn-models/tree/master/model/blazeface) | [![yolov5](https://raw.githubusercontent.com/darrenyao87/tnn-models/master/doc/demo/object-detection.gif)](https://github.com/darrenyao87/tnn-models/tree/master/model/yolov5)
model link: [onnx](https://github.com/ultralytics/yolov5/blob/master/models/export.py) [tnn](https://github.com/darrenyao87/tnn-models/tree/master/model/yolov5) | [![youtu_face_alignment](https://raw.githubusercontent.com/darrenyao87/tnn-models/master/doc/demo/face_alignment.gif)](https://github.com/darrenyao87/tnn-models/tree/master/model/youtu_face_alignment)
model link: [tnn](https://github.com/darrenyao87/tnn-models/tree/master/model/youtu_face_alignment) | [![hair_segmentation](https://raw.githubusercontent.com/darrenyao87/tnn-models/master/doc/demo/hair_seg_red.gif)](https://github.com/darrenyao87/tnn-models/tree/master/model/hair_segmentation)
model link: [tnn](https://github.com/darrenyao87/tnn-models/tree/master/model/hair_segmentation) + +Pose Estimation
(from Tencent Guangliu) | Pose Estimation
(blazepose) | Chinese OCR | Reading Comprehension +:--------------------------: | :------: | :------: | :------: +[![skeleton](https://raw.githubusercontent.com/darrenyao87/tnn-models/master/doc/demo/skeleton_guangliu.gif)](https://github.com/darrenyao87/tnn-models/tree/master/model/skeleton)
model link: [tnn](https://github.com/darrenyao87/tnn-models/tree/master/model/skeleton) | [![blazepose](https://raw.githubusercontent.com/darrenyao87/tnn-models/master/doc/demo/skeleton_blazepose.gif)](https://github.com/darrenyao87/tnn-models/tree/master/model/blazepose)
model link: [tflite](https://github.com/google/mediapipe/blob/master/mediapipe/modules/pose_landmark/pose_landmark_full_body.tflite) [tnn](https://github.com/darrenyao87/tnn-models/tree/master/model/blazepose) | [![chinese-ocr](https://raw.githubusercontent.com/darrenyao87/tnn-models/master/doc/demo/chinese-ocr.gif)](https://github.com/darrenyao87/tnn-models/tree/master/model/chinese-ocr)
model link: [onnx](https://github.com/DayBreak-u/chineseocr_lite/tree/onnx/models) [tnn](https://github.com/darrenyao87/tnn-models/tree/master/model/chinese-ocr) | [![bertsquad10](https://raw.githubusercontent.com/darrenyao87/tnn-models/master/doc/demo/bert_squad.gif)](https://github.com/darrenyao87/tnn-models/tree/master/model/bertsquad10)
model link: [onnx](https://github.com/onnx/models/blob/master/text/machine_comprehension/bert-squad/model/bertsquad-10.onnx) [tnn](https://github.com/darrenyao87/tnn-models/tree/master/model/bertsquad10) + +Chinese OCR demo is the TNN implementation of [chineseocr_lite](https://github.com/DayBreak-u/chineseocr_lite) project. It is lightweight and supports tilted, rotated and vertical text recognition. + +The support for each demo is shown in the following table. You can click the ✅ and find the entrance code for each demo. +demo | ARM | OpenCL | Metal | NPU | X86 | CUDA +:---------------------------------------------------------------------------------------: | :------: | :------: | :------: | :------: | :------: | :------: +[Face Detection](https://github.com/Tencent/TNN/blob/master/examples/base/blazeface_detector.cc) | ✅ | [✅](https://github.com/Tencent/TNN/tree/master/examples/android/demo/src/main/java/com/tencent/tnn/demo/StreamBlazeFaceDetector) | [✅ ](https://github.com/Tencent/TNN/blob/master/examples/ios/TNNExamples/TNNCameraPreviewController/TNNViewModel/TNNBlazeFaceDetectorViewModel.mm) | [✅](https://github.com/Tencent/TNN/tree/master/examples/android/demo/src/main/java/com/tencent/tnn/demo/StreamBlazeFaceDetector) | | +[Object Detection](https://github.com/Tencent/TNN/blob/master/examples/base/object_detector_yolo.cc) | ✅ | [✅](https://github.com/Tencent/TNN/tree/master/examples/android/demo/src/main/java/com/tencent/tnn/demo/StreamObjectDetector) | [✅](https://github.com/Tencent/TNN/blob/master/examples/ios/TNNExamples/TNNCameraPreviewController/TNNViewModel/TNNYoloObjectDetectorViewModel.mm) | [✅](https://github.com/Tencent/TNN/tree/master/examples/android/demo/src/main/java/com/tencent/tnn/demo/StreamObjectDetector) | | +[Face Alignment](https://github.com/Tencent/TNN/blob/master/examples/base/face_detect_aligner.cc) | ✅ | [✅](https://github.com/Tencent/TNN/tree/master/examples/android/demo/src/main/java/com/tencent/tnn/demo/StreamBlazeFaceAlign) | [✅](https://github.com/Tencent/TNN/blob/master/examples/ios/TNNExamples/TNNCameraPreviewController/TNNViewModel/TNNFaceDetectAlignerViewModel.mm) | [✅](https://github.com/Tencent/TNN/tree/master/examples/android/demo/src/main/java/com/tencent/tnn/demo/StreamBlazeFaceAlign) | | +[Hair Segmentation](https://github.com/Tencent/TNN/blob/master/examples/base/hair_segmentation.cc) | ✅ | [✅](https://github.com/Tencent/TNN/tree/master/examples/android/demo/src/main/java/com/tencent/tnn/demo/StreamHairSegmentation) | [✅](https://github.com/Tencent/TNN/blob/master/examples/ios/TNNExamples/TNNCameraPreviewController/TNNViewModel/TNNHairSegmentationViewModel.mm) | [✅](https://github.com/Tencent/TNN/tree/master/examples/android/demo/src/main/java/com/tencent/tnn/demo/StreamHairSegmentation) | | +[Pose Estimation
(from Tencent Guangliu)](https://github.com/Tencent/TNN/blob/master/examples/base/skeleton_detector.cc) | ✅ | [✅](https://github.com/Tencent/TNN/tree/master/examples/android/demo/src/main/java/com/tencent/tnn/demo/StreamSkeletonDetector) | [✅](https://github.com/Tencent/TNN/blob/master/examples/ios/TNNExamples/TNNCameraPreviewController/TNNViewModel/TNNSkeletonDetectorViewModel.mm) | [✅](https://github.com/Tencent/TNN/tree/master/examples/android/demo/src/main/java/com/tencent/tnn/demo/StreamSkeletonDetector) | | +[Pose Estimation(blazepose)](https://github.com/Tencent/TNN/blob/master/examples/base/pose_detect_landmark.cc) | ✅ | [✅](https://github.com/Tencent/TNN/tree/master/examples/android/demo/src/main/java/com/tencent/tnn/demo/StreamPoseDetectLandmark) | [✅](https://github.com/Tencent/TNN/blob/master/examples/ios/TNNExamples/TNNCameraPreviewController/TNNViewModel/TNNPoseDetectLandmarkViewModel.mm) | [✅](https://github.com/Tencent/TNN/tree/master/examples/android/demo/src/main/java/com/tencent/tnn/demo/StreamPoseDetectLandmark) | | | +[Chinese OCR](https://github.com/Tencent/TNN/blob/master/examples/base/ocr_driver.cc) | ✅ | [✅](https://github.com/Tencent/TNN/tree/master/examples/android/demo/src/main/java/com/tencent/tnn/demo/StreamOCRDetector) | [✅](https://github.com/Tencent/TNN/blob/master/examples/ios/TNNExamples/TNNCameraPreviewController/TNNViewModel/TNNOCRViewModel.mm) | | | +[Reading Comprehension](https://github.com/Tencent/TNN/blob/master/examples/base/bert_tokenizer.cc) | | | | | [✅](https://github.com/Tencent/TNN/blob/master/examples/linux/src/BertReadingComprehension/BertReadingComprehension.cc) | [✅](https://github.com/Tencent/TNN/blob/master/examples/linux/src/BertReadingComprehension/BertReadingComprehension.cc) + +## Quick Start + +It is very simple to use TNN. If you have a trained model, the model can be deployed on the target platform through three steps. +1. Convert the trained model into a TNN model. We provide a wealth of tools to help you complete this step, whether you are using Tensorflow, Pytorch, or Caffe, you can easily complete the conversion. +Detailed hands-on tutorials can be found here [How to Create a TNN Model](doc/en/user/convert_en.md). + +2. When you have finished converting the model, the second step is to compile the TNN engine of the target platform. You can choose among different acceleration solutions such as ARM/OpenCL/Metal/NPU/X86/CUDA according to the hardware support. + For these platforms, TNN provides convenient one-click scripts to compile. For detailed steps, please refer to [How to Compile TNN](doc/en/user/compile_en.md). + +3. The final step is to use the compiled TNN engine for inference. You can make program calls to TNN inside your application. We provide a rich and detailed demo as a reference to help you complete. + * [Run an iOS Demo](doc/en/user/demo_en.md) + * [Run an Android Demo](doc/en/user/demo_en.md) + * [Run an Linux/Windows Demo](doc/en/user/demo_en.md) + +## Technical Solutions + +At present, TNN has been launched in various major businesses, and its following characteristics have been widely praised. + +* Computation optimization + * The backend operators are primely optimized to make the best use of computing power in different architectures, regarding instruction issue, throughput, delay, cache bandwidth, cache delay, registers, etc.. + * The TNN performance on mainstream hardware platforms (CPU: ARMv7, ARMv8, X86, GPU: Mali, Adreno, Apple, NV GPU, NPU) has been greatly tuned and improved. + * The convolution function is implemented by various algorithms such as Winograd, Tile-GEMM, Direct Conv, etc., to ensure efficiency under different parameters and sizes. + * Op fusion: TNN can do offline analysis of network graph, fuse multiple simple operations and reduce overhead such as redundant memory access and kernel startup cost. + +* Low precision computation acceleration + * TNN supports INT8/FP16 mode, reduces model size & memory consumption, and utilizes specific hardware low-precision instructions to accelerate calculations. + * TNN supports INT8 WINOGRAD algorithm, (input 6bit), further reduces the model calculation complexity without sacrificing the accuracy. + * TNN supports mixed-precision data in one model, speeding up the model's calculation speed while preserving its accuracy. + +* Memory optimization + * Efficient "memory pool" implementation: Based on a full network DAG analysis, the implementation reuses memory between non-dependent nodes which reduces memory cost by 90%. + * Cross-model memory reduces: This supports external real-time design for network memory so that multiple models can share mutual memory. + +* The performance of mainstream models on TNN: [benchmark data](doc/benchmark_data.md) + +* TNN architecture diagram: + +
+ +* TNN supports TensorFlow, Pytorch, MxNet, Caffe, and other training frameworks through ONNX, leveraging the continuous improvement of the ONNX open-source society. + Currently, TNN supports 100+ ONNX operators, consisting of most of the mainstream CNN, NLP operators needed. +* TNN runs on mainstream operating systems (Android, iOS, embedded Linux, Windows, Linux), and is compatible with ARM CPU,X86 GPU, NPU hardware platform. +* TNN is constructed through Modular Design, which abstracts and isolates components such as model analysis, graph construction, graph optimization, low-level hardware adaptation, and high-performance kernel. + It uses "Factory Mode" to register and build devices, that tries to minimize the cost of supporting more hardware and acceleration solutions. +* The size of the mobile dynamic library is only around 400KB, and it provides basic image conversion operations, which are light-weight and convenient. TNN uses unified models and interfaces across platforms and can switch easily by configuring just one single parameter. + +## Learn About TNN Abilities +* [Operator Support](doc/en/user/support_en.md) +* [Model Support](doc/en/user/support_en.md) +* [Device Support](doc/en/user/support_en.md) +* [Profiling](doc/en/development/profiling_en.md) + +## Manual +* [Compile TNN](doc/en/user/compile_en.md) +* [Tools]() + * [Create a TNN Model](doc/en/user/convert_en.md) + * [Model Quantization](doc/en/user/quantization_en.md) + * [Model Visualization Netron](https://lutzroeder.github.io/netron/) + * [Performance Analysis](doc/en/development/profiling_en.md) + * [Model Alignment](doc/en/development/model_check_en.md) + +## API Document +* [API call](doc/en/user/api_en.md) + +## Contribute to TNN +* [Development Basics](doc/en/development/contributing_en.md) +* [Detailed Architecture](doc/en/development/architecture_en.md) +* [Add a New Operator](doc/en/development/add_op_en.md) +* [Unit Test](doc/en/development/unit_test_en.md) + +## Roadmap +* [Road map](doc/cn/user/roadmap.md) + +## Acknowledgement +TNN referenced the following projects: + +* [ncnn](https://github.com/Tencent/ncnn) +* [mace](https://github.com/XiaoMi/mace.git) +* [MNN](https://github.com/alibaba/MNN) +* [caffe-onnx](https://github.com/htshinichi/caffe-onnx) +* [tensorflow-onnx](https://github.com/onnx/tensorflow-onnx) +* [onnx](https://github.com/onnx/onnx) +* [onnxruntime](https://github.com/microsoft/onnxruntime) +* [openvino](https://github.com/openvinotoolkit/openvino) +* [xbyak](https://github.com/herumi/xbyak) +* [TensorRT](https://developer.nvidia.com/tensorrt) + +## License +* [BSD 3 Clause](LICENSE) + +## FAQ +* [FAQ](doc/en/faq_en.md) + +## Join Us + +* Everyone is welcome to participate to build the best inference framework in the industry. + +* Technical Discussion QQ Group: 913940506 Answer: TNN + +* Scan the QR code to join the TNN discussion group: +
diff --git a/3rdparty/TNN/README_CH.md b/3rdparty/TNN/README_CH.md new file mode 100644 index 0000000..5e53c38 --- /dev/null +++ b/3rdparty/TNN/README_CH.md @@ -0,0 +1,132 @@ +[English Version](README.md) +
+ +## 简介 + +TNN:由腾讯优图实验室开源的高性能、轻量级神经网络推理框架,同时拥有跨平台、高性能、模型压缩、代码裁剪等众多突出优势。TNN框架在原有Rapidnet、ncnn框架的基础上进一步加强了移动端设备的支持以及性能优化,同时借鉴了业界主流开源框架高性能和良好拓展性的特性,拓展了对于后台X86, NV GPU的支持。手机端 TNN已经在手Q、微视、P图等众多应用中落地,服务端TNN作为腾讯云AI基础加速框架已为众多业务落地提供加速支持。欢迎大家参与协同共建,促进TNN推理框架进一步完善。 + + +## 效果示例 + +人脸检测(blazeface) | 物体检测(yolov5s) | 人脸配准(腾讯优图) | 头发分割(腾讯光影) +:-------------------------: | :------: | :------: | :------: +[![face_detection](https://raw.githubusercontent.com/darrenyao87/tnn-models/master/doc/demo/face_detection.gif)](https://github.com/darrenyao87/tnn-models/tree/master/model/blazeface)
模型链接: [tflite](https://github.com/google/mediapipe/blob/master/mediapipe/models/face_detection_front.tflite) [tnn](https://github.com/darrenyao87/tnn-models/tree/master/model/blazeface) | [![yolov5](https://raw.githubusercontent.com/darrenyao87/tnn-models/master/doc/demo/object-detection.gif)](https://github.com/darrenyao87/tnn-models/tree/master/model/yolov5)
模型链接: [onnx](https://github.com/ultralytics/yolov5/blob/master/models/export.py) [tnn](https://github.com/darrenyao87/tnn-models/tree/master/model/yolov5) | [![youtu_face_alignment](https://raw.githubusercontent.com/darrenyao87/tnn-models/master/doc/demo/face_alignment.gif)](https://github.com/darrenyao87/tnn-models/tree/master/model/youtu_face_alignment)
模型链接: [tnn](https://github.com/darrenyao87/tnn-models/tree/master/model/youtu_face_alignment) | [![hair_segmentation](https://raw.githubusercontent.com/darrenyao87/tnn-models/master/doc/demo/hair_seg_red.gif)](https://github.com/darrenyao87/tnn-models/tree/master/model/hair_segmentation)
模型链接: [tnn](https://github.com/darrenyao87/tnn-models/tree/master/model/hair_segmentation) + +姿势估计(腾讯光流) | 姿势估计(blazepose) | 中文字符识别 | 阅读理解 +:--------------------------: | :------: | :------: | :------: +[![skeleton](https://raw.githubusercontent.com/darrenyao87/tnn-models/master/doc/demo/skeleton_guangliu.gif)](https://github.com/darrenyao87/tnn-models/tree/master/model/skeleton)
模型链接: [tnn](https://github.com/darrenyao87/tnn-models/tree/master/model/skeleton) | [![blazepose](https://raw.githubusercontent.com/darrenyao87/tnn-models/master/doc/demo/skeleton_blazepose.gif)](https://github.com/darrenyao87/tnn-models/tree/master/model/blazepose)
模型链接: [tflite](https://github.com/google/mediapipe/blob/master/mediapipe/modules/pose_landmark/pose_landmark_full_body.tflite) [tnn](https://github.com/darrenyao87/tnn-models/tree/master/model/blazepose) | [![chinese-ocr](https://raw.githubusercontent.com/darrenyao87/tnn-models/master/doc/demo/chinese-ocr.gif)](https://github.com/darrenyao87/tnn-models/tree/master/model/chinese-ocr)
模型链接: [onnx](https://github.com/DayBreak-u/chineseocr_lite/tree/onnx/models) [tnn](https://github.com/darrenyao87/tnn-models/tree/master/model/chinese-ocr) | [![bertsquad10](https://raw.githubusercontent.com/darrenyao87/tnn-models/master/doc/demo/bert_squad.gif)](https://github.com/darrenyao87/tnn-models/tree/master/model/bertsquad10)
模型链接: [onnx](https://github.com/onnx/models/blob/master/text/machine_comprehension/bert-squad/model/bertsquad-10.onnx) [tnn](https://github.com/darrenyao87/tnn-models/tree/master/model/bertsquad10) + +中文字符识别demo是[chineseocr_lite](https://github.com/DayBreak-u/chineseocr_lite)的TNN实现,是一个超轻量级的中文ocr,支持倾斜、旋转和竖排文字识别。 + +各个平台对demo的支持情况如下表所示,单击✅标记,便可以跳转至对应demo的入口代码。 +demo | ARM | OpenCL | Metal | NPU | X86 | CUDA +:---------------------------------------------------------------------------------------: | :------: | :------: | :------: | :------: | :------: | :------: +[人脸检测](https://github.com/Tencent/TNN/blob/master/examples/base/blazeface_detector.cc) | ✅ | [✅](https://github.com/Tencent/TNN/tree/master/examples/android/demo/src/main/java/com/tencent/tnn/demo/StreamBlazeFaceDetector) | [✅ ](https://github.com/Tencent/TNN/blob/master/examples/ios/TNNExamples/TNNCameraPreviewController/TNNViewModel/TNNBlazeFaceDetectorViewModel.mm) | [✅](https://github.com/Tencent/TNN/tree/master/examples/android/demo/src/main/java/com/tencent/tnn/demo/StreamBlazeFaceDetector) | | +[物体检测](https://github.com/Tencent/TNN/blob/master/examples/base/object_detector_yolo.cc) | ✅ | [✅](https://github.com/Tencent/TNN/tree/master/examples/android/demo/src/main/java/com/tencent/tnn/demo/StreamObjectDetector) | [✅](https://github.com/Tencent/TNN/blob/master/examples/ios/TNNExamples/TNNCameraPreviewController/TNNViewModel/TNNYoloObjectDetectorViewModel.mm) | [✅](https://github.com/Tencent/TNN/tree/master/examples/android/demo/src/main/java/com/tencent/tnn/demo/StreamObjectDetector) | | +[人脸配准](https://github.com/Tencent/TNN/blob/master/examples/base/face_detect_aligner.cc) | ✅ | [✅](https://github.com/Tencent/TNN/tree/master/examples/android/demo/src/main/java/com/tencent/tnn/demo/StreamBlazeFaceAlign) | [✅](https://github.com/Tencent/TNN/blob/master/examples/ios/TNNExamples/TNNCameraPreviewController/TNNViewModel/TNNFaceDetectAlignerViewModel.mm) | [✅](https://github.com/Tencent/TNN/tree/master/examples/android/demo/src/main/java/com/tencent/tnn/demo/StreamBlazeFaceAlign) | | +[头发分割](https://github.com/Tencent/TNN/blob/master/examples/base/hair_segmentation.cc) | ✅ | [✅](https://github.com/Tencent/TNN/tree/master/examples/android/demo/src/main/java/com/tencent/tnn/demo/StreamHairSegmentation) | [✅](https://github.com/Tencent/TNN/blob/master/examples/ios/TNNExamples/TNNCameraPreviewController/TNNViewModel/TNNHairSegmentationViewModel.mm) | [✅](https://github.com/Tencent/TNN/tree/master/examples/android/demo/src/main/java/com/tencent/tnn/demo/StreamHairSegmentation) | | +[姿势估计(腾讯光流)](https://github.com/Tencent/TNN/blob/master/examples/base/skeleton_detector.cc) | ✅ | [✅](https://github.com/Tencent/TNN/tree/master/examples/android/demo/src/main/java/com/tencent/tnn/demo/StreamSkeletonDetector) | [✅](https://github.com/Tencent/TNN/blob/master/examples/ios/TNNExamples/TNNCameraPreviewController/TNNViewModel/TNNSkeletonDetectorViewModel.mm) | [✅](https://github.com/Tencent/TNN/tree/master/examples/android/demo/src/main/java/com/tencent/tnn/demo/StreamSkeletonDetector) | | +[姿势估计(blazepose)](https://github.com/Tencent/TNN/blob/master/examples/base/pose_detect_landmark.cc) | ✅ | [✅](https://github.com/Tencent/TNN/tree/master/examples/android/demo/src/main/java/com/tencent/tnn/demo/StreamPoseDetectLandmark) | [✅](https://github.com/Tencent/TNN/blob/master/examples/ios/TNNExamples/TNNCameraPreviewController/TNNViewModel/TNNPoseDetectLandmarkViewModel.mm) | [✅](https://github.com/Tencent/TNN/tree/master/examples/android/demo/src/main/java/com/tencent/tnn/demo/StreamPoseDetectLandmark) | | +[中文字符识别](https://github.com/Tencent/TNN/blob/master/examples/base/ocr_driver.cc) | ✅ | [✅](https://github.com/Tencent/TNN/tree/master/examples/android/demo/src/main/java/com/tencent/tnn/demo/StreamOCRDetector) | [✅](https://github.com/Tencent/TNN/blob/master/examples/ios/TNNExamples/TNNCameraPreviewController/TNNViewModel/TNNOCRViewModel.mm) | | | +[阅读理解](https://github.com/Tencent/TNN/blob/master/examples/base/bert_tokenizer.cc) | | | | | [✅](https://github.com/Tencent/TNN/blob/master/examples/linux/src/BertReadingComprehension/BertReadingComprehension.cc) | [✅](https://github.com/Tencent/TNN/blob/master/examples/linux/src/BertReadingComprehension/BertReadingComprehension.cc) + +## 快速开始 + +使用 TNN 非常简单,如果你有一个已经训练好的模型, 那么一般而言通过以下三个步骤就能完成模型在目标平台上的部署。 +1. 第一步是把训练好的模型转换成TNN的模型,为此我们提供了丰富的工具来帮助你完成这一步,无论你使用的是 TensorFlow、PyTorch、或者 Caffe,都可以轻松完成转换。 +详细的手把手教程可以参见这里[如何转换模型](doc/cn/user/convert.md)。 + +2. 当你完成了模型的转换,第二步就是编译目标平台的 TNN 引擎了,你可以根据自己的目标平台的硬件支持情况,选择 CPU/ARM/OpenCL/Metal/NPU/X86/CUDA 等加速方案。 + 对于这些平台,TNN 都提供了一键编译的脚本,使用非常方便。详细步骤可以参考这里[如何编译TNN](doc/cn/user/compile.md)。 + +3. 最后一步就是使用编译好的 TNN 引擎进行推理,你可以在自己的应用程序中嵌入对 TNN 的调用,这方面我们提供了丰富而详实的 demo 来帮助你完成。 + * [从0开始跑通一个iOS Demo](doc/cn/user/demo.md) + * [从0开始跑通一个Android Demo](doc/cn/user/demo.md) + * [从0开始跑通一个Windows/Linux Demo](doc/cn/user/demo.md#四) + +## 技术方案 + +目前TNN具有的以下特性获得了广泛的好评。 + +* 计算优化 + * 针对不同架构在硬件指令发射、吞吐、延迟、缓存带宽、缓存延迟、寄存器数量等特点,深度优化底层算子,极致利用硬件算力 + * 主流硬件平台(CPU: ARMv7, ARMv8,X86 GPU: Mali, Adreno, Apple, NV GPU) 深度调优 + * CNN 核心卷积运算通过 Winograd,Tile-GEMM, Direct Conv 等多种算法实现,保证不同参数、计算尺度下高效计算 + * Op 融合:离线分析网络计算图,多个小 Op(计算量小、功能较简单)融合运算,减少反复内存读取、kernel 启动等开销 + +* 低精度优化 + * 支持 INT8, FP16 低精度计算,减少模型大小、内存消耗,同时利用硬件低精度计算指令加速计算 + * 支持 INT8 Winograd 算法,(输入6bit), 在精度满足要求的情况下,进一步降低模型计算复杂度 + * 支持单模型多种精度混合计算,加速计算同时保证模型精度 + +* 内存优化 + * 高效”内存池”实现:通过 DAG 网络计算图分析,实现无计算依赖的节点间复用内存,降低 90% 内存资源消耗 + * 跨模型内存复用:支持外部实时指定用于网络内存,实现“多个模型,单份内存”。 + +* 主流模型实测性能:[评测数据](doc/benchmark_data.md) + +* TNN架构图: + +
+ +* 通过 ONNX 支持 TensorFlow, PyTorch, MXNet, Caffe 等多种训练框架,充分利用和融入不断完善的 ONNX 开源生态。当前支持 ONNX 算子100+,覆盖主流CNN, NLP网络。 +* 支持主流安卓、iOS、Embedded Linux 操作系统, Windows, Linux,支持 ARM CPU, x86, Mali GPU, Adreno GPU, NV GPU, 达芬奇NPU,RK NPU。 +* 模块化设计,将模型解析、计算图构建、优化、底层硬件适配、高性能 kernel 实现各部分抽象隔离,通过 Factory Mode 注册、构建设备,方便接入更多的底层硬件、加速方案。 +* 移动端动态库尺寸仅约 400KB,并提供基础图像变换操作,调用简单便捷。跨平台模型统一、调用接口统一,通过单个配置参数快速切换。 + +## 能力展示 +* [支持的算子](doc/cn/user/support.md) +* [支持的网络](doc/cn/user/support.md) +* [支持的架构](doc/cn/user/support.md) +* [Benchmark性能测试方法](doc/cn/development/profiling.md) + +## 使用手册 +* [从源码编译](doc/cn/user/compile.md) +* [工具集]() + * [模型转换](doc/cn/user/convert.md) + * [模型量化](doc/cn/user/quantization.md) + * [模型可视化Netron](https://lutzroeder.github.io/netron/) + * [性能分析工具](doc/cn/development/profiling.md) + * [模型对齐工具](doc/cn/development/model_check.md) + +## API文档 +* [API调用](doc/cn/user/api.md) + +## 贡献者须知 +* [开发基础须知](doc/cn/development/contributing.md) +* [架构详解](doc/cn/development/architecture.md) +* [新增OP](doc/cn/development/add_op.md) +* [单元测试](doc/cn/development/unit_test.md) + +## Roadmap +* [Road map](doc/cn/user/roadmap.md) + +## 致谢 +TNN参考和借鉴了下列项目: + +* [ncnn](https://github.com/Tencent/ncnn) +* [mace](https://github.com/XiaoMi/mace.git) +* [MNN](https://github.com/alibaba/MNN) +* [caffe-onnx](https://github.com/htshinichi/caffe-onnx) +* [tensorflow-onnx](https://github.com/onnx/tensorflow-onnx) +* [onnx](https://github.com/onnx/onnx) +* [onnxruntime](https://github.com/microsoft/onnxruntime) +* [openvino](https://github.com/openvinotoolkit/openvino) +* [xbyak](https://github.com/herumi/xbyak) +* [TensorRT](https://developer.nvidia.com/zh-cn/tensorrt) + +## License + +* [BSD 3 Clause](LICENSE) + +## FAQ +* [FAQ 常见问题](doc/cn/faq.md) + +## 加入我们 + +* 欢迎大家参与,协同共建,打造业界最好的高性能推理框架。 + +* 技术交流 QQ 群: 913940506 答案:TNN + +* QQ 群二维码: +
diff --git a/3rdparty/TNN/RELEASE.md b/3rdparty/TNN/RELEASE.md new file mode 100644 index 0000000..e69de29 diff --git a/3rdparty/TNN/TNN-QQ.png b/3rdparty/TNN/TNN-QQ.png new file mode 100644 index 0000000..cc236ff Binary files /dev/null and b/3rdparty/TNN/TNN-QQ.png differ diff --git a/3rdparty/TNN/TNN.png b/3rdparty/TNN/TNN.png new file mode 100644 index 0000000..b07ecc7 Binary files /dev/null and b/3rdparty/TNN/TNN.png differ diff --git a/3rdparty/TNN/_config.yml b/3rdparty/TNN/_config.yml new file mode 100644 index 0000000..c419263 --- /dev/null +++ b/3rdparty/TNN/_config.yml @@ -0,0 +1 @@ +theme: jekyll-theme-cayman \ No newline at end of file diff --git a/3rdparty/TNN/benchmark/.gitignore b/3rdparty/TNN/benchmark/.gitignore new file mode 100644 index 0000000..2211df6 --- /dev/null +++ b/3rdparty/TNN/benchmark/.gitignore @@ -0,0 +1 @@ +*.txt diff --git a/3rdparty/TNN/benchmark/benchmark-model/densenet.tnnproto b/3rdparty/TNN/benchmark/benchmark-model/densenet.tnnproto new file mode 100644 index 0000000..8464dc5 --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark-model/densenet.tnnproto @@ -0,0 +1,315 @@ +"1 0 1 4206624770 ," +"Placeholder 1 3 224 224 ," +" ," +"softmax_tensor ," +" 310 ," +"Convolution Relu 1 1 Placeholder Relu 1 3 64 7 7 2 2 0 0 1 0 1 1 1 ," +"Pooling max_pooling2d/MaxPool 1 1 Relu max_pooling2d/MaxPool 0 3 3 2 2 0 0 -1 -1 0 1 ," +"Mul block-0/denseblock-0-0/batch_normalization/FusedBatchNorm_mul_0 1 1 max_pooling2d/MaxPool block-0/denseblock-0-0/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-0/denseblock-0-0/Relu 1 1 block-0/denseblock-0-0/batch_normalization/FusedBatchNorm_mul_0 block-0/denseblock-0-0/Relu_output 1 ," +"ReLU block-0/denseblock-0-0/Relu_activation 1 1 block-0/denseblock-0-0/Relu_output block-0/denseblock-0-0/Relu ," +"Convolution block-0/denseblock-0-0/conv2d/Conv2D 1 1 block-0/denseblock-0-0/Relu block-0/denseblock-0-0/conv2d/Conv2D 1 64 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-0/denseblock-0-0/concat 2 1 max_pooling2d/MaxPool block-0/denseblock-0-0/conv2d/Conv2D block-0/denseblock-0-0/concat 1 ," +"Mul block-0/denseblock-0-1/batch_normalization/FusedBatchNorm_mul_0 1 1 block-0/denseblock-0-0/concat block-0/denseblock-0-1/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-0/denseblock-0-1/Relu 1 1 block-0/denseblock-0-1/batch_normalization/FusedBatchNorm_mul_0 block-0/denseblock-0-1/Relu_output 1 ," +"ReLU block-0/denseblock-0-1/Relu_activation 1 1 block-0/denseblock-0-1/Relu_output block-0/denseblock-0-1/Relu ," +"Convolution block-0/denseblock-0-1/conv2d/Conv2D 1 1 block-0/denseblock-0-1/Relu block-0/denseblock-0-1/conv2d/Conv2D 1 96 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-0/denseblock-0-1/concat 2 1 block-0/denseblock-0-0/concat block-0/denseblock-0-1/conv2d/Conv2D block-0/denseblock-0-1/concat 1 ," +"Mul block-0/denseblock-0-2/batch_normalization/FusedBatchNorm_mul_0 1 1 block-0/denseblock-0-1/concat block-0/denseblock-0-2/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-0/denseblock-0-2/Relu 1 1 block-0/denseblock-0-2/batch_normalization/FusedBatchNorm_mul_0 block-0/denseblock-0-2/Relu_output 1 ," +"ReLU block-0/denseblock-0-2/Relu_activation 1 1 block-0/denseblock-0-2/Relu_output block-0/denseblock-0-2/Relu ," +"Convolution block-0/denseblock-0-2/conv2d/Conv2D 1 1 block-0/denseblock-0-2/Relu block-0/denseblock-0-2/conv2d/Conv2D 1 128 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-0/denseblock-0-2/concat 2 1 block-0/denseblock-0-1/concat block-0/denseblock-0-2/conv2d/Conv2D block-0/denseblock-0-2/concat 1 ," +"Mul block-0/denseblock-0-3/batch_normalization/FusedBatchNorm_mul_0 1 1 block-0/denseblock-0-2/concat block-0/denseblock-0-3/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-0/denseblock-0-3/Relu 1 1 block-0/denseblock-0-3/batch_normalization/FusedBatchNorm_mul_0 block-0/denseblock-0-3/Relu_output 1 ," +"ReLU block-0/denseblock-0-3/Relu_activation 1 1 block-0/denseblock-0-3/Relu_output block-0/denseblock-0-3/Relu ," +"Convolution block-0/denseblock-0-3/conv2d/Conv2D 1 1 block-0/denseblock-0-3/Relu block-0/denseblock-0-3/conv2d/Conv2D 1 160 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-0/denseblock-0-3/concat 2 1 block-0/denseblock-0-2/concat block-0/denseblock-0-3/conv2d/Conv2D block-0/denseblock-0-3/concat 1 ," +"Mul block-0/denseblock-0-4/batch_normalization/FusedBatchNorm_mul_0 1 1 block-0/denseblock-0-3/concat block-0/denseblock-0-4/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-0/denseblock-0-4/Relu 1 1 block-0/denseblock-0-4/batch_normalization/FusedBatchNorm_mul_0 block-0/denseblock-0-4/Relu_output 1 ," +"ReLU block-0/denseblock-0-4/Relu_activation 1 1 block-0/denseblock-0-4/Relu_output block-0/denseblock-0-4/Relu ," +"Convolution block-0/denseblock-0-4/conv2d/Conv2D 1 1 block-0/denseblock-0-4/Relu block-0/denseblock-0-4/conv2d/Conv2D 1 192 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-0/denseblock-0-4/concat 2 1 block-0/denseblock-0-3/concat block-0/denseblock-0-4/conv2d/Conv2D block-0/denseblock-0-4/concat 1 ," +"Mul block-0/denseblock-0-5/batch_normalization/FusedBatchNorm_mul_0 1 1 block-0/denseblock-0-4/concat block-0/denseblock-0-5/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-0/denseblock-0-5/Relu 1 1 block-0/denseblock-0-5/batch_normalization/FusedBatchNorm_mul_0 block-0/denseblock-0-5/Relu_output 1 ," +"ReLU block-0/denseblock-0-5/Relu_activation 1 1 block-0/denseblock-0-5/Relu_output block-0/denseblock-0-5/Relu ," +"Convolution block-0/denseblock-0-5/conv2d/Conv2D 1 1 block-0/denseblock-0-5/Relu block-0/denseblock-0-5/conv2d/Conv2D 1 224 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-0/denseblock-0-5/concat 2 1 block-0/denseblock-0-4/concat block-0/denseblock-0-5/conv2d/Conv2D block-0/denseblock-0-5/concat 1 ," +"Mul block-0/batch_normalization/FusedBatchNorm_mul_0 1 1 block-0/denseblock-0-5/concat block-0/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-0/Relu 1 1 block-0/batch_normalization/FusedBatchNorm_mul_0 block-0/Relu_output 1 ," +"ReLU block-0/Relu_activation 1 1 block-0/Relu_output block-0/Relu ," +"Convolution block-0/conv2d/Conv2D 1 1 block-0/Relu block-0/conv2d/Conv2D 1 256 128 1 1 1 1 0 0 1 0 1 1 0 ," +"Pooling block-0/average_pooling2d/AvgPool 1 1 block-0/conv2d/Conv2D block-0/average_pooling2d/AvgPool 1 2 2 2 2 0 0 -1 -1 0 1 ," +"Mul block-1/denseblock-1-0/batch_normalization/FusedBatchNorm_mul_0 1 1 block-0/average_pooling2d/AvgPool block-1/denseblock-1-0/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-1/denseblock-1-0/Relu 1 1 block-1/denseblock-1-0/batch_normalization/FusedBatchNorm_mul_0 block-1/denseblock-1-0/Relu_output 1 ," +"ReLU block-1/denseblock-1-0/Relu_activation 1 1 block-1/denseblock-1-0/Relu_output block-1/denseblock-1-0/Relu ," +"Convolution block-1/denseblock-1-0/conv2d/Conv2D 1 1 block-1/denseblock-1-0/Relu block-1/denseblock-1-0/conv2d/Conv2D 1 128 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-1/denseblock-1-0/concat 2 1 block-0/average_pooling2d/AvgPool block-1/denseblock-1-0/conv2d/Conv2D block-1/denseblock-1-0/concat 1 ," +"Mul block-1/denseblock-1-1/batch_normalization/FusedBatchNorm_mul_0 1 1 block-1/denseblock-1-0/concat block-1/denseblock-1-1/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-1/denseblock-1-1/Relu 1 1 block-1/denseblock-1-1/batch_normalization/FusedBatchNorm_mul_0 block-1/denseblock-1-1/Relu_output 1 ," +"ReLU block-1/denseblock-1-1/Relu_activation 1 1 block-1/denseblock-1-1/Relu_output block-1/denseblock-1-1/Relu ," +"Convolution block-1/denseblock-1-1/conv2d/Conv2D 1 1 block-1/denseblock-1-1/Relu block-1/denseblock-1-1/conv2d/Conv2D 1 160 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-1/denseblock-1-1/concat 2 1 block-1/denseblock-1-0/concat block-1/denseblock-1-1/conv2d/Conv2D block-1/denseblock-1-1/concat 1 ," +"Mul block-1/denseblock-1-2/batch_normalization/FusedBatchNorm_mul_0 1 1 block-1/denseblock-1-1/concat block-1/denseblock-1-2/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-1/denseblock-1-2/Relu 1 1 block-1/denseblock-1-2/batch_normalization/FusedBatchNorm_mul_0 block-1/denseblock-1-2/Relu_output 1 ," +"ReLU block-1/denseblock-1-2/Relu_activation 1 1 block-1/denseblock-1-2/Relu_output block-1/denseblock-1-2/Relu ," +"Convolution block-1/denseblock-1-2/conv2d/Conv2D 1 1 block-1/denseblock-1-2/Relu block-1/denseblock-1-2/conv2d/Conv2D 1 192 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-1/denseblock-1-2/concat 2 1 block-1/denseblock-1-1/concat block-1/denseblock-1-2/conv2d/Conv2D block-1/denseblock-1-2/concat 1 ," +"Mul block-1/denseblock-1-3/batch_normalization/FusedBatchNorm_mul_0 1 1 block-1/denseblock-1-2/concat block-1/denseblock-1-3/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-1/denseblock-1-3/Relu 1 1 block-1/denseblock-1-3/batch_normalization/FusedBatchNorm_mul_0 block-1/denseblock-1-3/Relu_output 1 ," +"ReLU block-1/denseblock-1-3/Relu_activation 1 1 block-1/denseblock-1-3/Relu_output block-1/denseblock-1-3/Relu ," +"Convolution block-1/denseblock-1-3/conv2d/Conv2D 1 1 block-1/denseblock-1-3/Relu block-1/denseblock-1-3/conv2d/Conv2D 1 224 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-1/denseblock-1-3/concat 2 1 block-1/denseblock-1-2/concat block-1/denseblock-1-3/conv2d/Conv2D block-1/denseblock-1-3/concat 1 ," +"Mul block-1/denseblock-1-4/batch_normalization/FusedBatchNorm_mul_0 1 1 block-1/denseblock-1-3/concat block-1/denseblock-1-4/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-1/denseblock-1-4/Relu 1 1 block-1/denseblock-1-4/batch_normalization/FusedBatchNorm_mul_0 block-1/denseblock-1-4/Relu_output 1 ," +"ReLU block-1/denseblock-1-4/Relu_activation 1 1 block-1/denseblock-1-4/Relu_output block-1/denseblock-1-4/Relu ," +"Convolution block-1/denseblock-1-4/conv2d/Conv2D 1 1 block-1/denseblock-1-4/Relu block-1/denseblock-1-4/conv2d/Conv2D 1 256 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-1/denseblock-1-4/concat 2 1 block-1/denseblock-1-3/concat block-1/denseblock-1-4/conv2d/Conv2D block-1/denseblock-1-4/concat 1 ," +"Mul block-1/denseblock-1-5/batch_normalization/FusedBatchNorm_mul_0 1 1 block-1/denseblock-1-4/concat block-1/denseblock-1-5/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-1/denseblock-1-5/Relu 1 1 block-1/denseblock-1-5/batch_normalization/FusedBatchNorm_mul_0 block-1/denseblock-1-5/Relu_output 1 ," +"ReLU block-1/denseblock-1-5/Relu_activation 1 1 block-1/denseblock-1-5/Relu_output block-1/denseblock-1-5/Relu ," +"Convolution block-1/denseblock-1-5/conv2d/Conv2D 1 1 block-1/denseblock-1-5/Relu block-1/denseblock-1-5/conv2d/Conv2D 1 288 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-1/denseblock-1-5/concat 2 1 block-1/denseblock-1-4/concat block-1/denseblock-1-5/conv2d/Conv2D block-1/denseblock-1-5/concat 1 ," +"Mul block-1/denseblock-1-6/batch_normalization/FusedBatchNorm_mul_0 1 1 block-1/denseblock-1-5/concat block-1/denseblock-1-6/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-1/denseblock-1-6/Relu 1 1 block-1/denseblock-1-6/batch_normalization/FusedBatchNorm_mul_0 block-1/denseblock-1-6/Relu_output 1 ," +"ReLU block-1/denseblock-1-6/Relu_activation 1 1 block-1/denseblock-1-6/Relu_output block-1/denseblock-1-6/Relu ," +"Convolution block-1/denseblock-1-6/conv2d/Conv2D 1 1 block-1/denseblock-1-6/Relu block-1/denseblock-1-6/conv2d/Conv2D 1 320 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-1/denseblock-1-6/concat 2 1 block-1/denseblock-1-5/concat block-1/denseblock-1-6/conv2d/Conv2D block-1/denseblock-1-6/concat 1 ," +"Mul block-1/denseblock-1-7/batch_normalization/FusedBatchNorm_mul_0 1 1 block-1/denseblock-1-6/concat block-1/denseblock-1-7/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-1/denseblock-1-7/Relu 1 1 block-1/denseblock-1-7/batch_normalization/FusedBatchNorm_mul_0 block-1/denseblock-1-7/Relu_output 1 ," +"ReLU block-1/denseblock-1-7/Relu_activation 1 1 block-1/denseblock-1-7/Relu_output block-1/denseblock-1-7/Relu ," +"Convolution block-1/denseblock-1-7/conv2d/Conv2D 1 1 block-1/denseblock-1-7/Relu block-1/denseblock-1-7/conv2d/Conv2D 1 352 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-1/denseblock-1-7/concat 2 1 block-1/denseblock-1-6/concat block-1/denseblock-1-7/conv2d/Conv2D block-1/denseblock-1-7/concat 1 ," +"Mul block-1/denseblock-1-8/batch_normalization/FusedBatchNorm_mul_0 1 1 block-1/denseblock-1-7/concat block-1/denseblock-1-8/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-1/denseblock-1-8/Relu 1 1 block-1/denseblock-1-8/batch_normalization/FusedBatchNorm_mul_0 block-1/denseblock-1-8/Relu_output 1 ," +"ReLU block-1/denseblock-1-8/Relu_activation 1 1 block-1/denseblock-1-8/Relu_output block-1/denseblock-1-8/Relu ," +"Convolution block-1/denseblock-1-8/conv2d/Conv2D 1 1 block-1/denseblock-1-8/Relu block-1/denseblock-1-8/conv2d/Conv2D 1 384 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-1/denseblock-1-8/concat 2 1 block-1/denseblock-1-7/concat block-1/denseblock-1-8/conv2d/Conv2D block-1/denseblock-1-8/concat 1 ," +"Mul block-1/denseblock-1-9/batch_normalization/FusedBatchNorm_mul_0 1 1 block-1/denseblock-1-8/concat block-1/denseblock-1-9/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-1/denseblock-1-9/Relu 1 1 block-1/denseblock-1-9/batch_normalization/FusedBatchNorm_mul_0 block-1/denseblock-1-9/Relu_output 1 ," +"ReLU block-1/denseblock-1-9/Relu_activation 1 1 block-1/denseblock-1-9/Relu_output block-1/denseblock-1-9/Relu ," +"Convolution block-1/denseblock-1-9/conv2d/Conv2D 1 1 block-1/denseblock-1-9/Relu block-1/denseblock-1-9/conv2d/Conv2D 1 416 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-1/denseblock-1-9/concat 2 1 block-1/denseblock-1-8/concat block-1/denseblock-1-9/conv2d/Conv2D block-1/denseblock-1-9/concat 1 ," +"Mul block-1/denseblock-1-10/batch_normalization/FusedBatchNorm_mul_0 1 1 block-1/denseblock-1-9/concat block-1/denseblock-1-10/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-1/denseblock-1-10/Relu 1 1 block-1/denseblock-1-10/batch_normalization/FusedBatchNorm_mul_0 block-1/denseblock-1-10/Relu_output 1 ," +"ReLU block-1/denseblock-1-10/Relu_activation 1 1 block-1/denseblock-1-10/Relu_output block-1/denseblock-1-10/Relu ," +"Convolution block-1/denseblock-1-10/conv2d/Conv2D 1 1 block-1/denseblock-1-10/Relu block-1/denseblock-1-10/conv2d/Conv2D 1 448 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-1/denseblock-1-10/concat 2 1 block-1/denseblock-1-9/concat block-1/denseblock-1-10/conv2d/Conv2D block-1/denseblock-1-10/concat 1 ," +"Mul block-1/denseblock-1-11/batch_normalization/FusedBatchNorm_mul_0 1 1 block-1/denseblock-1-10/concat block-1/denseblock-1-11/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-1/denseblock-1-11/Relu 1 1 block-1/denseblock-1-11/batch_normalization/FusedBatchNorm_mul_0 block-1/denseblock-1-11/Relu_output 1 ," +"ReLU block-1/denseblock-1-11/Relu_activation 1 1 block-1/denseblock-1-11/Relu_output block-1/denseblock-1-11/Relu ," +"Convolution block-1/denseblock-1-11/conv2d/Conv2D 1 1 block-1/denseblock-1-11/Relu block-1/denseblock-1-11/conv2d/Conv2D 1 480 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-1/denseblock-1-11/concat 2 1 block-1/denseblock-1-10/concat block-1/denseblock-1-11/conv2d/Conv2D block-1/denseblock-1-11/concat 1 ," +"Mul block-1/batch_normalization/FusedBatchNorm_mul_0 1 1 block-1/denseblock-1-11/concat block-1/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-1/Relu 1 1 block-1/batch_normalization/FusedBatchNorm_mul_0 block-1/Relu_output 1 ," +"ReLU block-1/Relu_activation 1 1 block-1/Relu_output block-1/Relu ," +"Convolution block-1/conv2d/Conv2D 1 1 block-1/Relu block-1/conv2d/Conv2D 1 512 256 1 1 1 1 0 0 1 0 1 1 0 ," +"Pooling block-1/average_pooling2d/AvgPool 1 1 block-1/conv2d/Conv2D block-1/average_pooling2d/AvgPool 1 2 2 2 2 0 0 -1 -1 0 1 ," +"Mul block-2/denseblock-2-0/batch_normalization/FusedBatchNorm_mul_0 1 1 block-1/average_pooling2d/AvgPool block-2/denseblock-2-0/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-2/denseblock-2-0/Relu 1 1 block-2/denseblock-2-0/batch_normalization/FusedBatchNorm_mul_0 block-2/denseblock-2-0/Relu_output 1 ," +"ReLU block-2/denseblock-2-0/Relu_activation 1 1 block-2/denseblock-2-0/Relu_output block-2/denseblock-2-0/Relu ," +"Convolution block-2/denseblock-2-0/conv2d/Conv2D 1 1 block-2/denseblock-2-0/Relu block-2/denseblock-2-0/conv2d/Conv2D 1 256 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-2/denseblock-2-0/concat 2 1 block-1/average_pooling2d/AvgPool block-2/denseblock-2-0/conv2d/Conv2D block-2/denseblock-2-0/concat 1 ," +"Mul block-2/denseblock-2-1/batch_normalization/FusedBatchNorm_mul_0 1 1 block-2/denseblock-2-0/concat block-2/denseblock-2-1/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-2/denseblock-2-1/Relu 1 1 block-2/denseblock-2-1/batch_normalization/FusedBatchNorm_mul_0 block-2/denseblock-2-1/Relu_output 1 ," +"ReLU block-2/denseblock-2-1/Relu_activation 1 1 block-2/denseblock-2-1/Relu_output block-2/denseblock-2-1/Relu ," +"Convolution block-2/denseblock-2-1/conv2d/Conv2D 1 1 block-2/denseblock-2-1/Relu block-2/denseblock-2-1/conv2d/Conv2D 1 288 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-2/denseblock-2-1/concat 2 1 block-2/denseblock-2-0/concat block-2/denseblock-2-1/conv2d/Conv2D block-2/denseblock-2-1/concat 1 ," +"Mul block-2/denseblock-2-2/batch_normalization/FusedBatchNorm_mul_0 1 1 block-2/denseblock-2-1/concat block-2/denseblock-2-2/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-2/denseblock-2-2/Relu 1 1 block-2/denseblock-2-2/batch_normalization/FusedBatchNorm_mul_0 block-2/denseblock-2-2/Relu_output 1 ," +"ReLU block-2/denseblock-2-2/Relu_activation 1 1 block-2/denseblock-2-2/Relu_output block-2/denseblock-2-2/Relu ," +"Convolution block-2/denseblock-2-2/conv2d/Conv2D 1 1 block-2/denseblock-2-2/Relu block-2/denseblock-2-2/conv2d/Conv2D 1 320 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-2/denseblock-2-2/concat 2 1 block-2/denseblock-2-1/concat block-2/denseblock-2-2/conv2d/Conv2D block-2/denseblock-2-2/concat 1 ," +"Mul block-2/denseblock-2-3/batch_normalization/FusedBatchNorm_mul_0 1 1 block-2/denseblock-2-2/concat block-2/denseblock-2-3/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-2/denseblock-2-3/Relu 1 1 block-2/denseblock-2-3/batch_normalization/FusedBatchNorm_mul_0 block-2/denseblock-2-3/Relu_output 1 ," +"ReLU block-2/denseblock-2-3/Relu_activation 1 1 block-2/denseblock-2-3/Relu_output block-2/denseblock-2-3/Relu ," +"Convolution block-2/denseblock-2-3/conv2d/Conv2D 1 1 block-2/denseblock-2-3/Relu block-2/denseblock-2-3/conv2d/Conv2D 1 352 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-2/denseblock-2-3/concat 2 1 block-2/denseblock-2-2/concat block-2/denseblock-2-3/conv2d/Conv2D block-2/denseblock-2-3/concat 1 ," +"Mul block-2/denseblock-2-4/batch_normalization/FusedBatchNorm_mul_0 1 1 block-2/denseblock-2-3/concat block-2/denseblock-2-4/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-2/denseblock-2-4/Relu 1 1 block-2/denseblock-2-4/batch_normalization/FusedBatchNorm_mul_0 block-2/denseblock-2-4/Relu_output 1 ," +"ReLU block-2/denseblock-2-4/Relu_activation 1 1 block-2/denseblock-2-4/Relu_output block-2/denseblock-2-4/Relu ," +"Convolution block-2/denseblock-2-4/conv2d/Conv2D 1 1 block-2/denseblock-2-4/Relu block-2/denseblock-2-4/conv2d/Conv2D 1 384 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-2/denseblock-2-4/concat 2 1 block-2/denseblock-2-3/concat block-2/denseblock-2-4/conv2d/Conv2D block-2/denseblock-2-4/concat 1 ," +"Mul block-2/denseblock-2-5/batch_normalization/FusedBatchNorm_mul_0 1 1 block-2/denseblock-2-4/concat block-2/denseblock-2-5/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-2/denseblock-2-5/Relu 1 1 block-2/denseblock-2-5/batch_normalization/FusedBatchNorm_mul_0 block-2/denseblock-2-5/Relu_output 1 ," +"ReLU block-2/denseblock-2-5/Relu_activation 1 1 block-2/denseblock-2-5/Relu_output block-2/denseblock-2-5/Relu ," +"Convolution block-2/denseblock-2-5/conv2d/Conv2D 1 1 block-2/denseblock-2-5/Relu block-2/denseblock-2-5/conv2d/Conv2D 1 416 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-2/denseblock-2-5/concat 2 1 block-2/denseblock-2-4/concat block-2/denseblock-2-5/conv2d/Conv2D block-2/denseblock-2-5/concat 1 ," +"Mul block-2/denseblock-2-6/batch_normalization/FusedBatchNorm_mul_0 1 1 block-2/denseblock-2-5/concat block-2/denseblock-2-6/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-2/denseblock-2-6/Relu 1 1 block-2/denseblock-2-6/batch_normalization/FusedBatchNorm_mul_0 block-2/denseblock-2-6/Relu_output 1 ," +"ReLU block-2/denseblock-2-6/Relu_activation 1 1 block-2/denseblock-2-6/Relu_output block-2/denseblock-2-6/Relu ," +"Convolution block-2/denseblock-2-6/conv2d/Conv2D 1 1 block-2/denseblock-2-6/Relu block-2/denseblock-2-6/conv2d/Conv2D 1 448 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-2/denseblock-2-6/concat 2 1 block-2/denseblock-2-5/concat block-2/denseblock-2-6/conv2d/Conv2D block-2/denseblock-2-6/concat 1 ," +"Mul block-2/denseblock-2-7/batch_normalization/FusedBatchNorm_mul_0 1 1 block-2/denseblock-2-6/concat block-2/denseblock-2-7/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-2/denseblock-2-7/Relu 1 1 block-2/denseblock-2-7/batch_normalization/FusedBatchNorm_mul_0 block-2/denseblock-2-7/Relu_output 1 ," +"ReLU block-2/denseblock-2-7/Relu_activation 1 1 block-2/denseblock-2-7/Relu_output block-2/denseblock-2-7/Relu ," +"Convolution block-2/denseblock-2-7/conv2d/Conv2D 1 1 block-2/denseblock-2-7/Relu block-2/denseblock-2-7/conv2d/Conv2D 1 480 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-2/denseblock-2-7/concat 2 1 block-2/denseblock-2-6/concat block-2/denseblock-2-7/conv2d/Conv2D block-2/denseblock-2-7/concat 1 ," +"Mul block-2/denseblock-2-8/batch_normalization/FusedBatchNorm_mul_0 1 1 block-2/denseblock-2-7/concat block-2/denseblock-2-8/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-2/denseblock-2-8/Relu 1 1 block-2/denseblock-2-8/batch_normalization/FusedBatchNorm_mul_0 block-2/denseblock-2-8/Relu_output 1 ," +"ReLU block-2/denseblock-2-8/Relu_activation 1 1 block-2/denseblock-2-8/Relu_output block-2/denseblock-2-8/Relu ," +"Convolution block-2/denseblock-2-8/conv2d/Conv2D 1 1 block-2/denseblock-2-8/Relu block-2/denseblock-2-8/conv2d/Conv2D 1 512 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-2/denseblock-2-8/concat 2 1 block-2/denseblock-2-7/concat block-2/denseblock-2-8/conv2d/Conv2D block-2/denseblock-2-8/concat 1 ," +"Mul block-2/denseblock-2-9/batch_normalization/FusedBatchNorm_mul_0 1 1 block-2/denseblock-2-8/concat block-2/denseblock-2-9/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-2/denseblock-2-9/Relu 1 1 block-2/denseblock-2-9/batch_normalization/FusedBatchNorm_mul_0 block-2/denseblock-2-9/Relu_output 1 ," +"ReLU block-2/denseblock-2-9/Relu_activation 1 1 block-2/denseblock-2-9/Relu_output block-2/denseblock-2-9/Relu ," +"Convolution block-2/denseblock-2-9/conv2d/Conv2D 1 1 block-2/denseblock-2-9/Relu block-2/denseblock-2-9/conv2d/Conv2D 1 544 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-2/denseblock-2-9/concat 2 1 block-2/denseblock-2-8/concat block-2/denseblock-2-9/conv2d/Conv2D block-2/denseblock-2-9/concat 1 ," +"Mul block-2/denseblock-2-10/batch_normalization/FusedBatchNorm_mul_0 1 1 block-2/denseblock-2-9/concat block-2/denseblock-2-10/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-2/denseblock-2-10/Relu 1 1 block-2/denseblock-2-10/batch_normalization/FusedBatchNorm_mul_0 block-2/denseblock-2-10/Relu_output 1 ," +"ReLU block-2/denseblock-2-10/Relu_activation 1 1 block-2/denseblock-2-10/Relu_output block-2/denseblock-2-10/Relu ," +"Convolution block-2/denseblock-2-10/conv2d/Conv2D 1 1 block-2/denseblock-2-10/Relu block-2/denseblock-2-10/conv2d/Conv2D 1 576 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-2/denseblock-2-10/concat 2 1 block-2/denseblock-2-9/concat block-2/denseblock-2-10/conv2d/Conv2D block-2/denseblock-2-10/concat 1 ," +"Mul block-2/denseblock-2-11/batch_normalization/FusedBatchNorm_mul_0 1 1 block-2/denseblock-2-10/concat block-2/denseblock-2-11/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-2/denseblock-2-11/Relu 1 1 block-2/denseblock-2-11/batch_normalization/FusedBatchNorm_mul_0 block-2/denseblock-2-11/Relu_output 1 ," +"ReLU block-2/denseblock-2-11/Relu_activation 1 1 block-2/denseblock-2-11/Relu_output block-2/denseblock-2-11/Relu ," +"Convolution block-2/denseblock-2-11/conv2d/Conv2D 1 1 block-2/denseblock-2-11/Relu block-2/denseblock-2-11/conv2d/Conv2D 1 608 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-2/denseblock-2-11/concat 2 1 block-2/denseblock-2-10/concat block-2/denseblock-2-11/conv2d/Conv2D block-2/denseblock-2-11/concat 1 ," +"Mul block-2/denseblock-2-12/batch_normalization/FusedBatchNorm_mul_0 1 1 block-2/denseblock-2-11/concat block-2/denseblock-2-12/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-2/denseblock-2-12/Relu 1 1 block-2/denseblock-2-12/batch_normalization/FusedBatchNorm_mul_0 block-2/denseblock-2-12/Relu_output 1 ," +"ReLU block-2/denseblock-2-12/Relu_activation 1 1 block-2/denseblock-2-12/Relu_output block-2/denseblock-2-12/Relu ," +"Convolution block-2/denseblock-2-12/conv2d/Conv2D 1 1 block-2/denseblock-2-12/Relu block-2/denseblock-2-12/conv2d/Conv2D 1 640 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-2/denseblock-2-12/concat 2 1 block-2/denseblock-2-11/concat block-2/denseblock-2-12/conv2d/Conv2D block-2/denseblock-2-12/concat 1 ," +"Mul block-2/denseblock-2-13/batch_normalization/FusedBatchNorm_mul_0 1 1 block-2/denseblock-2-12/concat block-2/denseblock-2-13/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-2/denseblock-2-13/Relu 1 1 block-2/denseblock-2-13/batch_normalization/FusedBatchNorm_mul_0 block-2/denseblock-2-13/Relu_output 1 ," +"ReLU block-2/denseblock-2-13/Relu_activation 1 1 block-2/denseblock-2-13/Relu_output block-2/denseblock-2-13/Relu ," +"Convolution block-2/denseblock-2-13/conv2d/Conv2D 1 1 block-2/denseblock-2-13/Relu block-2/denseblock-2-13/conv2d/Conv2D 1 672 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-2/denseblock-2-13/concat 2 1 block-2/denseblock-2-12/concat block-2/denseblock-2-13/conv2d/Conv2D block-2/denseblock-2-13/concat 1 ," +"Mul block-2/denseblock-2-14/batch_normalization/FusedBatchNorm_mul_0 1 1 block-2/denseblock-2-13/concat block-2/denseblock-2-14/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-2/denseblock-2-14/Relu 1 1 block-2/denseblock-2-14/batch_normalization/FusedBatchNorm_mul_0 block-2/denseblock-2-14/Relu_output 1 ," +"ReLU block-2/denseblock-2-14/Relu_activation 1 1 block-2/denseblock-2-14/Relu_output block-2/denseblock-2-14/Relu ," +"Convolution block-2/denseblock-2-14/conv2d/Conv2D 1 1 block-2/denseblock-2-14/Relu block-2/denseblock-2-14/conv2d/Conv2D 1 704 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-2/denseblock-2-14/concat 2 1 block-2/denseblock-2-13/concat block-2/denseblock-2-14/conv2d/Conv2D block-2/denseblock-2-14/concat 1 ," +"Mul block-2/denseblock-2-15/batch_normalization/FusedBatchNorm_mul_0 1 1 block-2/denseblock-2-14/concat block-2/denseblock-2-15/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-2/denseblock-2-15/Relu 1 1 block-2/denseblock-2-15/batch_normalization/FusedBatchNorm_mul_0 block-2/denseblock-2-15/Relu_output 1 ," +"ReLU block-2/denseblock-2-15/Relu_activation 1 1 block-2/denseblock-2-15/Relu_output block-2/denseblock-2-15/Relu ," +"Convolution block-2/denseblock-2-15/conv2d/Conv2D 1 1 block-2/denseblock-2-15/Relu block-2/denseblock-2-15/conv2d/Conv2D 1 736 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-2/denseblock-2-15/concat 2 1 block-2/denseblock-2-14/concat block-2/denseblock-2-15/conv2d/Conv2D block-2/denseblock-2-15/concat 1 ," +"Mul block-2/denseblock-2-16/batch_normalization/FusedBatchNorm_mul_0 1 1 block-2/denseblock-2-15/concat block-2/denseblock-2-16/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-2/denseblock-2-16/Relu 1 1 block-2/denseblock-2-16/batch_normalization/FusedBatchNorm_mul_0 block-2/denseblock-2-16/Relu_output 1 ," +"ReLU block-2/denseblock-2-16/Relu_activation 1 1 block-2/denseblock-2-16/Relu_output block-2/denseblock-2-16/Relu ," +"Convolution block-2/denseblock-2-16/conv2d/Conv2D 1 1 block-2/denseblock-2-16/Relu block-2/denseblock-2-16/conv2d/Conv2D 1 768 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-2/denseblock-2-16/concat 2 1 block-2/denseblock-2-15/concat block-2/denseblock-2-16/conv2d/Conv2D block-2/denseblock-2-16/concat 1 ," +"Mul block-2/denseblock-2-17/batch_normalization/FusedBatchNorm_mul_0 1 1 block-2/denseblock-2-16/concat block-2/denseblock-2-17/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-2/denseblock-2-17/Relu 1 1 block-2/denseblock-2-17/batch_normalization/FusedBatchNorm_mul_0 block-2/denseblock-2-17/Relu_output 1 ," +"ReLU block-2/denseblock-2-17/Relu_activation 1 1 block-2/denseblock-2-17/Relu_output block-2/denseblock-2-17/Relu ," +"Convolution block-2/denseblock-2-17/conv2d/Conv2D 1 1 block-2/denseblock-2-17/Relu block-2/denseblock-2-17/conv2d/Conv2D 1 800 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-2/denseblock-2-17/concat 2 1 block-2/denseblock-2-16/concat block-2/denseblock-2-17/conv2d/Conv2D block-2/denseblock-2-17/concat 1 ," +"Mul block-2/denseblock-2-18/batch_normalization/FusedBatchNorm_mul_0 1 1 block-2/denseblock-2-17/concat block-2/denseblock-2-18/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-2/denseblock-2-18/Relu 1 1 block-2/denseblock-2-18/batch_normalization/FusedBatchNorm_mul_0 block-2/denseblock-2-18/Relu_output 1 ," +"ReLU block-2/denseblock-2-18/Relu_activation 1 1 block-2/denseblock-2-18/Relu_output block-2/denseblock-2-18/Relu ," +"Convolution block-2/denseblock-2-18/conv2d/Conv2D 1 1 block-2/denseblock-2-18/Relu block-2/denseblock-2-18/conv2d/Conv2D 1 832 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-2/denseblock-2-18/concat 2 1 block-2/denseblock-2-17/concat block-2/denseblock-2-18/conv2d/Conv2D block-2/denseblock-2-18/concat 1 ," +"Mul block-2/denseblock-2-19/batch_normalization/FusedBatchNorm_mul_0 1 1 block-2/denseblock-2-18/concat block-2/denseblock-2-19/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-2/denseblock-2-19/Relu 1 1 block-2/denseblock-2-19/batch_normalization/FusedBatchNorm_mul_0 block-2/denseblock-2-19/Relu_output 1 ," +"ReLU block-2/denseblock-2-19/Relu_activation 1 1 block-2/denseblock-2-19/Relu_output block-2/denseblock-2-19/Relu ," +"Convolution block-2/denseblock-2-19/conv2d/Conv2D 1 1 block-2/denseblock-2-19/Relu block-2/denseblock-2-19/conv2d/Conv2D 1 864 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-2/denseblock-2-19/concat 2 1 block-2/denseblock-2-18/concat block-2/denseblock-2-19/conv2d/Conv2D block-2/denseblock-2-19/concat 1 ," +"Mul block-2/denseblock-2-20/batch_normalization/FusedBatchNorm_mul_0 1 1 block-2/denseblock-2-19/concat block-2/denseblock-2-20/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-2/denseblock-2-20/Relu 1 1 block-2/denseblock-2-20/batch_normalization/FusedBatchNorm_mul_0 block-2/denseblock-2-20/Relu_output 1 ," +"ReLU block-2/denseblock-2-20/Relu_activation 1 1 block-2/denseblock-2-20/Relu_output block-2/denseblock-2-20/Relu ," +"Convolution block-2/denseblock-2-20/conv2d/Conv2D 1 1 block-2/denseblock-2-20/Relu block-2/denseblock-2-20/conv2d/Conv2D 1 896 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-2/denseblock-2-20/concat 2 1 block-2/denseblock-2-19/concat block-2/denseblock-2-20/conv2d/Conv2D block-2/denseblock-2-20/concat 1 ," +"Mul block-2/denseblock-2-21/batch_normalization/FusedBatchNorm_mul_0 1 1 block-2/denseblock-2-20/concat block-2/denseblock-2-21/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-2/denseblock-2-21/Relu 1 1 block-2/denseblock-2-21/batch_normalization/FusedBatchNorm_mul_0 block-2/denseblock-2-21/Relu_output 1 ," +"ReLU block-2/denseblock-2-21/Relu_activation 1 1 block-2/denseblock-2-21/Relu_output block-2/denseblock-2-21/Relu ," +"Convolution block-2/denseblock-2-21/conv2d/Conv2D 1 1 block-2/denseblock-2-21/Relu block-2/denseblock-2-21/conv2d/Conv2D 1 928 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-2/denseblock-2-21/concat 2 1 block-2/denseblock-2-20/concat block-2/denseblock-2-21/conv2d/Conv2D block-2/denseblock-2-21/concat 1 ," +"Mul block-2/denseblock-2-22/batch_normalization/FusedBatchNorm_mul_0 1 1 block-2/denseblock-2-21/concat block-2/denseblock-2-22/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-2/denseblock-2-22/Relu 1 1 block-2/denseblock-2-22/batch_normalization/FusedBatchNorm_mul_0 block-2/denseblock-2-22/Relu_output 1 ," +"ReLU block-2/denseblock-2-22/Relu_activation 1 1 block-2/denseblock-2-22/Relu_output block-2/denseblock-2-22/Relu ," +"Convolution block-2/denseblock-2-22/conv2d/Conv2D 1 1 block-2/denseblock-2-22/Relu block-2/denseblock-2-22/conv2d/Conv2D 1 960 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-2/denseblock-2-22/concat 2 1 block-2/denseblock-2-21/concat block-2/denseblock-2-22/conv2d/Conv2D block-2/denseblock-2-22/concat 1 ," +"Mul block-2/denseblock-2-23/batch_normalization/FusedBatchNorm_mul_0 1 1 block-2/denseblock-2-22/concat block-2/denseblock-2-23/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-2/denseblock-2-23/Relu 1 1 block-2/denseblock-2-23/batch_normalization/FusedBatchNorm_mul_0 block-2/denseblock-2-23/Relu_output 1 ," +"ReLU block-2/denseblock-2-23/Relu_activation 1 1 block-2/denseblock-2-23/Relu_output block-2/denseblock-2-23/Relu ," +"Convolution block-2/denseblock-2-23/conv2d/Conv2D 1 1 block-2/denseblock-2-23/Relu block-2/denseblock-2-23/conv2d/Conv2D 1 992 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-2/denseblock-2-23/concat 2 1 block-2/denseblock-2-22/concat block-2/denseblock-2-23/conv2d/Conv2D block-2/denseblock-2-23/concat 1 ," +"Mul block-2/batch_normalization/FusedBatchNorm_mul_0 1 1 block-2/denseblock-2-23/concat block-2/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-2/Relu 1 1 block-2/batch_normalization/FusedBatchNorm_mul_0 block-2/Relu_output 1 ," +"ReLU block-2/Relu_activation 1 1 block-2/Relu_output block-2/Relu ," +"Convolution block-2/conv2d/Conv2D 1 1 block-2/Relu block-2/conv2d/Conv2D 1 1024 512 1 1 1 1 0 0 1 0 1 1 0 ," +"Pooling block-2/average_pooling2d/AvgPool 1 1 block-2/conv2d/Conv2D block-2/average_pooling2d/AvgPool 1 2 2 2 2 0 0 -1 -1 0 1 ," +"Mul block-3/denseblock-3-0/batch_normalization/FusedBatchNorm_mul_0 1 1 block-2/average_pooling2d/AvgPool block-3/denseblock-3-0/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-3/denseblock-3-0/Relu 1 1 block-3/denseblock-3-0/batch_normalization/FusedBatchNorm_mul_0 block-3/denseblock-3-0/Relu_output 1 ," +"ReLU block-3/denseblock-3-0/Relu_activation 1 1 block-3/denseblock-3-0/Relu_output block-3/denseblock-3-0/Relu ," +"Convolution block-3/denseblock-3-0/conv2d/Conv2D 1 1 block-3/denseblock-3-0/Relu block-3/denseblock-3-0/conv2d/Conv2D 1 512 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-3/denseblock-3-0/concat 2 1 block-2/average_pooling2d/AvgPool block-3/denseblock-3-0/conv2d/Conv2D block-3/denseblock-3-0/concat 1 ," +"Mul block-3/denseblock-3-1/batch_normalization/FusedBatchNorm_mul_0 1 1 block-3/denseblock-3-0/concat block-3/denseblock-3-1/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-3/denseblock-3-1/Relu 1 1 block-3/denseblock-3-1/batch_normalization/FusedBatchNorm_mul_0 block-3/denseblock-3-1/Relu_output 1 ," +"ReLU block-3/denseblock-3-1/Relu_activation 1 1 block-3/denseblock-3-1/Relu_output block-3/denseblock-3-1/Relu ," +"Convolution block-3/denseblock-3-1/conv2d/Conv2D 1 1 block-3/denseblock-3-1/Relu block-3/denseblock-3-1/conv2d/Conv2D 1 544 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-3/denseblock-3-1/concat 2 1 block-3/denseblock-3-0/concat block-3/denseblock-3-1/conv2d/Conv2D block-3/denseblock-3-1/concat 1 ," +"Mul block-3/denseblock-3-2/batch_normalization/FusedBatchNorm_mul_0 1 1 block-3/denseblock-3-1/concat block-3/denseblock-3-2/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-3/denseblock-3-2/Relu 1 1 block-3/denseblock-3-2/batch_normalization/FusedBatchNorm_mul_0 block-3/denseblock-3-2/Relu_output 1 ," +"ReLU block-3/denseblock-3-2/Relu_activation 1 1 block-3/denseblock-3-2/Relu_output block-3/denseblock-3-2/Relu ," +"Convolution block-3/denseblock-3-2/conv2d/Conv2D 1 1 block-3/denseblock-3-2/Relu block-3/denseblock-3-2/conv2d/Conv2D 1 576 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-3/denseblock-3-2/concat 2 1 block-3/denseblock-3-1/concat block-3/denseblock-3-2/conv2d/Conv2D block-3/denseblock-3-2/concat 1 ," +"Mul block-3/denseblock-3-3/batch_normalization/FusedBatchNorm_mul_0 1 1 block-3/denseblock-3-2/concat block-3/denseblock-3-3/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-3/denseblock-3-3/Relu 1 1 block-3/denseblock-3-3/batch_normalization/FusedBatchNorm_mul_0 block-3/denseblock-3-3/Relu_output 1 ," +"ReLU block-3/denseblock-3-3/Relu_activation 1 1 block-3/denseblock-3-3/Relu_output block-3/denseblock-3-3/Relu ," +"Convolution block-3/denseblock-3-3/conv2d/Conv2D 1 1 block-3/denseblock-3-3/Relu block-3/denseblock-3-3/conv2d/Conv2D 1 608 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-3/denseblock-3-3/concat 2 1 block-3/denseblock-3-2/concat block-3/denseblock-3-3/conv2d/Conv2D block-3/denseblock-3-3/concat 1 ," +"Mul block-3/denseblock-3-4/batch_normalization/FusedBatchNorm_mul_0 1 1 block-3/denseblock-3-3/concat block-3/denseblock-3-4/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-3/denseblock-3-4/Relu 1 1 block-3/denseblock-3-4/batch_normalization/FusedBatchNorm_mul_0 block-3/denseblock-3-4/Relu_output 1 ," +"ReLU block-3/denseblock-3-4/Relu_activation 1 1 block-3/denseblock-3-4/Relu_output block-3/denseblock-3-4/Relu ," +"Convolution block-3/denseblock-3-4/conv2d/Conv2D 1 1 block-3/denseblock-3-4/Relu block-3/denseblock-3-4/conv2d/Conv2D 1 640 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-3/denseblock-3-4/concat 2 1 block-3/denseblock-3-3/concat block-3/denseblock-3-4/conv2d/Conv2D block-3/denseblock-3-4/concat 1 ," +"Mul block-3/denseblock-3-5/batch_normalization/FusedBatchNorm_mul_0 1 1 block-3/denseblock-3-4/concat block-3/denseblock-3-5/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-3/denseblock-3-5/Relu 1 1 block-3/denseblock-3-5/batch_normalization/FusedBatchNorm_mul_0 block-3/denseblock-3-5/Relu_output 1 ," +"ReLU block-3/denseblock-3-5/Relu_activation 1 1 block-3/denseblock-3-5/Relu_output block-3/denseblock-3-5/Relu ," +"Convolution block-3/denseblock-3-5/conv2d/Conv2D 1 1 block-3/denseblock-3-5/Relu block-3/denseblock-3-5/conv2d/Conv2D 1 672 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-3/denseblock-3-5/concat 2 1 block-3/denseblock-3-4/concat block-3/denseblock-3-5/conv2d/Conv2D block-3/denseblock-3-5/concat 1 ," +"Mul block-3/denseblock-3-6/batch_normalization/FusedBatchNorm_mul_0 1 1 block-3/denseblock-3-5/concat block-3/denseblock-3-6/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-3/denseblock-3-6/Relu 1 1 block-3/denseblock-3-6/batch_normalization/FusedBatchNorm_mul_0 block-3/denseblock-3-6/Relu_output 1 ," +"ReLU block-3/denseblock-3-6/Relu_activation 1 1 block-3/denseblock-3-6/Relu_output block-3/denseblock-3-6/Relu ," +"Convolution block-3/denseblock-3-6/conv2d/Conv2D 1 1 block-3/denseblock-3-6/Relu block-3/denseblock-3-6/conv2d/Conv2D 1 704 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-3/denseblock-3-6/concat 2 1 block-3/denseblock-3-5/concat block-3/denseblock-3-6/conv2d/Conv2D block-3/denseblock-3-6/concat 1 ," +"Mul block-3/denseblock-3-7/batch_normalization/FusedBatchNorm_mul_0 1 1 block-3/denseblock-3-6/concat block-3/denseblock-3-7/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-3/denseblock-3-7/Relu 1 1 block-3/denseblock-3-7/batch_normalization/FusedBatchNorm_mul_0 block-3/denseblock-3-7/Relu_output 1 ," +"ReLU block-3/denseblock-3-7/Relu_activation 1 1 block-3/denseblock-3-7/Relu_output block-3/denseblock-3-7/Relu ," +"Convolution block-3/denseblock-3-7/conv2d/Conv2D 1 1 block-3/denseblock-3-7/Relu block-3/denseblock-3-7/conv2d/Conv2D 1 736 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-3/denseblock-3-7/concat 2 1 block-3/denseblock-3-6/concat block-3/denseblock-3-7/conv2d/Conv2D block-3/denseblock-3-7/concat 1 ," +"Mul block-3/denseblock-3-8/batch_normalization/FusedBatchNorm_mul_0 1 1 block-3/denseblock-3-7/concat block-3/denseblock-3-8/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-3/denseblock-3-8/Relu 1 1 block-3/denseblock-3-8/batch_normalization/FusedBatchNorm_mul_0 block-3/denseblock-3-8/Relu_output 1 ," +"ReLU block-3/denseblock-3-8/Relu_activation 1 1 block-3/denseblock-3-8/Relu_output block-3/denseblock-3-8/Relu ," +"Convolution block-3/denseblock-3-8/conv2d/Conv2D 1 1 block-3/denseblock-3-8/Relu block-3/denseblock-3-8/conv2d/Conv2D 1 768 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-3/denseblock-3-8/concat 2 1 block-3/denseblock-3-7/concat block-3/denseblock-3-8/conv2d/Conv2D block-3/denseblock-3-8/concat 1 ," +"Mul block-3/denseblock-3-9/batch_normalization/FusedBatchNorm_mul_0 1 1 block-3/denseblock-3-8/concat block-3/denseblock-3-9/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-3/denseblock-3-9/Relu 1 1 block-3/denseblock-3-9/batch_normalization/FusedBatchNorm_mul_0 block-3/denseblock-3-9/Relu_output 1 ," +"ReLU block-3/denseblock-3-9/Relu_activation 1 1 block-3/denseblock-3-9/Relu_output block-3/denseblock-3-9/Relu ," +"Convolution block-3/denseblock-3-9/conv2d/Conv2D 1 1 block-3/denseblock-3-9/Relu block-3/denseblock-3-9/conv2d/Conv2D 1 800 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-3/denseblock-3-9/concat 2 1 block-3/denseblock-3-8/concat block-3/denseblock-3-9/conv2d/Conv2D block-3/denseblock-3-9/concat 1 ," +"Mul block-3/denseblock-3-10/batch_normalization/FusedBatchNorm_mul_0 1 1 block-3/denseblock-3-9/concat block-3/denseblock-3-10/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-3/denseblock-3-10/Relu 1 1 block-3/denseblock-3-10/batch_normalization/FusedBatchNorm_mul_0 block-3/denseblock-3-10/Relu_output 1 ," +"ReLU block-3/denseblock-3-10/Relu_activation 1 1 block-3/denseblock-3-10/Relu_output block-3/denseblock-3-10/Relu ," +"Convolution block-3/denseblock-3-10/conv2d/Conv2D 1 1 block-3/denseblock-3-10/Relu block-3/denseblock-3-10/conv2d/Conv2D 1 832 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-3/denseblock-3-10/concat 2 1 block-3/denseblock-3-9/concat block-3/denseblock-3-10/conv2d/Conv2D block-3/denseblock-3-10/concat 1 ," +"Mul block-3/denseblock-3-11/batch_normalization/FusedBatchNorm_mul_0 1 1 block-3/denseblock-3-10/concat block-3/denseblock-3-11/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-3/denseblock-3-11/Relu 1 1 block-3/denseblock-3-11/batch_normalization/FusedBatchNorm_mul_0 block-3/denseblock-3-11/Relu_output 1 ," +"ReLU block-3/denseblock-3-11/Relu_activation 1 1 block-3/denseblock-3-11/Relu_output block-3/denseblock-3-11/Relu ," +"Convolution block-3/denseblock-3-11/conv2d/Conv2D 1 1 block-3/denseblock-3-11/Relu block-3/denseblock-3-11/conv2d/Conv2D 1 864 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-3/denseblock-3-11/concat 2 1 block-3/denseblock-3-10/concat block-3/denseblock-3-11/conv2d/Conv2D block-3/denseblock-3-11/concat 1 ," +"Mul block-3/denseblock-3-12/batch_normalization/FusedBatchNorm_mul_0 1 1 block-3/denseblock-3-11/concat block-3/denseblock-3-12/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-3/denseblock-3-12/Relu 1 1 block-3/denseblock-3-12/batch_normalization/FusedBatchNorm_mul_0 block-3/denseblock-3-12/Relu_output 1 ," +"ReLU block-3/denseblock-3-12/Relu_activation 1 1 block-3/denseblock-3-12/Relu_output block-3/denseblock-3-12/Relu ," +"Convolution block-3/denseblock-3-12/conv2d/Conv2D 1 1 block-3/denseblock-3-12/Relu block-3/denseblock-3-12/conv2d/Conv2D 1 896 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-3/denseblock-3-12/concat 2 1 block-3/denseblock-3-11/concat block-3/denseblock-3-12/conv2d/Conv2D block-3/denseblock-3-12/concat 1 ," +"Mul block-3/denseblock-3-13/batch_normalization/FusedBatchNorm_mul_0 1 1 block-3/denseblock-3-12/concat block-3/denseblock-3-13/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-3/denseblock-3-13/Relu 1 1 block-3/denseblock-3-13/batch_normalization/FusedBatchNorm_mul_0 block-3/denseblock-3-13/Relu_output 1 ," +"ReLU block-3/denseblock-3-13/Relu_activation 1 1 block-3/denseblock-3-13/Relu_output block-3/denseblock-3-13/Relu ," +"Convolution block-3/denseblock-3-13/conv2d/Conv2D 1 1 block-3/denseblock-3-13/Relu block-3/denseblock-3-13/conv2d/Conv2D 1 928 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-3/denseblock-3-13/concat 2 1 block-3/denseblock-3-12/concat block-3/denseblock-3-13/conv2d/Conv2D block-3/denseblock-3-13/concat 1 ," +"Mul block-3/denseblock-3-14/batch_normalization/FusedBatchNorm_mul_0 1 1 block-3/denseblock-3-13/concat block-3/denseblock-3-14/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-3/denseblock-3-14/Relu 1 1 block-3/denseblock-3-14/batch_normalization/FusedBatchNorm_mul_0 block-3/denseblock-3-14/Relu_output 1 ," +"ReLU block-3/denseblock-3-14/Relu_activation 1 1 block-3/denseblock-3-14/Relu_output block-3/denseblock-3-14/Relu ," +"Convolution block-3/denseblock-3-14/conv2d/Conv2D 1 1 block-3/denseblock-3-14/Relu block-3/denseblock-3-14/conv2d/Conv2D 1 960 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-3/denseblock-3-14/concat 2 1 block-3/denseblock-3-13/concat block-3/denseblock-3-14/conv2d/Conv2D block-3/denseblock-3-14/concat 1 ," +"Mul block-3/denseblock-3-15/batch_normalization/FusedBatchNorm_mul_0 1 1 block-3/denseblock-3-14/concat block-3/denseblock-3-15/batch_normalization/FusedBatchNorm_mul_0 1 ," +"Add block-3/denseblock-3-15/Relu 1 1 block-3/denseblock-3-15/batch_normalization/FusedBatchNorm_mul_0 block-3/denseblock-3-15/Relu_output 1 ," +"ReLU block-3/denseblock-3-15/Relu_activation 1 1 block-3/denseblock-3-15/Relu_output block-3/denseblock-3-15/Relu ," +"Convolution block-3/denseblock-3-15/conv2d/Conv2D 1 1 block-3/denseblock-3-15/Relu block-3/denseblock-3-15/conv2d/Conv2D 1 992 32 3 3 1 1 0 0 1 0 1 1 0 ," +"Concat block-3/denseblock-3-15/concat 2 1 block-3/denseblock-3-14/concat block-3/denseblock-3-15/conv2d/Conv2D block-3/denseblock-3-15/concat 1 ," +"Pooling global_pool 1 1 block-3/denseblock-3-15/concat global_pool 1 0 0 1 1 0 0 -1 -1 -1 0 ," +"InnerProduct dense/BiasAdd 1 1 global_pool dense/BiasAdd 1001 1 1 1 ," +"Softmax softmax_tensor 1 1 dense/BiasAdd softmax_tensor 1 ," diff --git a/3rdparty/TNN/benchmark/benchmark-model/inception_v3.tnnproto b/3rdparty/TNN/benchmark/benchmark-model/inception_v3.tnnproto new file mode 100644 index 0000000..d39a3fa --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark-model/inception_v3.tnnproto @@ -0,0 +1,225 @@ +"1 0 1 4206624770 ," +"input 1 3 299 299 ," +" ," +"InceptionV3/Predictions/Reshape_1 ," +" 220 ," +"Convolution InceptionV3/InceptionV3/Conv2d_1a_3x3/Relu 1 1 input InceptionV3/InceptionV3/Conv2d_1a_3x3/Relu_output 1 3 32 3 3 2 2 0 0 1 -1 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Conv2d_1a_3x3/Relu_activation 1 1 InceptionV3/InceptionV3/Conv2d_1a_3x3/Relu_output InceptionV3/InceptionV3/Conv2d_1a_3x3/Relu ," +"Convolution InceptionV3/InceptionV3/Conv2d_2a_3x3/Relu 1 1 InceptionV3/InceptionV3/Conv2d_1a_3x3/Relu InceptionV3/InceptionV3/Conv2d_2a_3x3/Relu_output 1 32 32 3 3 1 1 0 0 1 -1 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Conv2d_2a_3x3/Relu_activation 1 1 InceptionV3/InceptionV3/Conv2d_2a_3x3/Relu_output InceptionV3/InceptionV3/Conv2d_2a_3x3/Relu ," +"Convolution InceptionV3/InceptionV3/Conv2d_2b_3x3/Relu 1 1 InceptionV3/InceptionV3/Conv2d_2a_3x3/Relu InceptionV3/InceptionV3/Conv2d_2b_3x3/Relu_output 1 32 64 3 3 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Conv2d_2b_3x3/Relu_activation 1 1 InceptionV3/InceptionV3/Conv2d_2b_3x3/Relu_output InceptionV3/InceptionV3/Conv2d_2b_3x3/Relu ," +"Pooling InceptionV3/InceptionV3/MaxPool_3a_3x3/MaxPool 1 1 InceptionV3/InceptionV3/Conv2d_2b_3x3/Relu InceptionV3/InceptionV3/MaxPool_3a_3x3/MaxPool 0 3 3 2 2 0 0 -1 -1 1 1 ," +"Convolution InceptionV3/InceptionV3/Conv2d_3b_1x1/Relu 1 1 InceptionV3/InceptionV3/MaxPool_3a_3x3/MaxPool InceptionV3/InceptionV3/Conv2d_3b_1x1/Relu_output 1 64 80 1 1 1 1 0 0 1 -1 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Conv2d_3b_1x1/Relu_activation 1 1 InceptionV3/InceptionV3/Conv2d_3b_1x1/Relu_output InceptionV3/InceptionV3/Conv2d_3b_1x1/Relu ," +"Convolution InceptionV3/InceptionV3/Conv2d_4a_3x3/Relu 1 1 InceptionV3/InceptionV3/Conv2d_3b_1x1/Relu InceptionV3/InceptionV3/Conv2d_4a_3x3/Relu_output 1 80 192 3 3 1 1 0 0 1 -1 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Conv2d_4a_3x3/Relu_activation 1 1 InceptionV3/InceptionV3/Conv2d_4a_3x3/Relu_output InceptionV3/InceptionV3/Conv2d_4a_3x3/Relu ," +"Pooling InceptionV3/InceptionV3/MaxPool_5a_3x3/MaxPool 1 1 InceptionV3/InceptionV3/Conv2d_4a_3x3/Relu InceptionV3/InceptionV3/MaxPool_5a_3x3/MaxPool 0 3 3 2 2 0 0 -1 -1 1 1 ," +"Convolution InceptionV3/InceptionV3/Mixed_5b/Branch_0/Conv2d_0a_1x1/Relu 1 1 InceptionV3/InceptionV3/MaxPool_5a_3x3/MaxPool InceptionV3/InceptionV3/Mixed_5b/Branch_0/Conv2d_0a_1x1/Relu_output 1 192 64 1 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_5b/Branch_0/Conv2d_0a_1x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_5b/Branch_0/Conv2d_0a_1x1/Relu_output InceptionV3/InceptionV3/Mixed_5b/Branch_0/Conv2d_0a_1x1/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_5b/Branch_1/Conv2d_0a_1x1/Relu 1 1 InceptionV3/InceptionV3/MaxPool_5a_3x3/MaxPool InceptionV3/InceptionV3/Mixed_5b/Branch_1/Conv2d_0a_1x1/Relu_output 1 192 48 1 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_5b/Branch_1/Conv2d_0a_1x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_5b/Branch_1/Conv2d_0a_1x1/Relu_output InceptionV3/InceptionV3/Mixed_5b/Branch_1/Conv2d_0a_1x1/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_5b/Branch_1/Conv2d_0b_5x5/Relu 1 1 InceptionV3/InceptionV3/Mixed_5b/Branch_1/Conv2d_0a_1x1/Relu InceptionV3/InceptionV3/Mixed_5b/Branch_1/Conv2d_0b_5x5/Relu_output 1 48 64 5 5 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_5b/Branch_1/Conv2d_0b_5x5/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_5b/Branch_1/Conv2d_0b_5x5/Relu_output InceptionV3/InceptionV3/Mixed_5b/Branch_1/Conv2d_0b_5x5/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_5b/Branch_2/Conv2d_0a_1x1/Relu 1 1 InceptionV3/InceptionV3/MaxPool_5a_3x3/MaxPool InceptionV3/InceptionV3/Mixed_5b/Branch_2/Conv2d_0a_1x1/Relu_output 1 192 64 1 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_5b/Branch_2/Conv2d_0a_1x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_5b/Branch_2/Conv2d_0a_1x1/Relu_output InceptionV3/InceptionV3/Mixed_5b/Branch_2/Conv2d_0a_1x1/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_5b/Branch_2/Conv2d_0b_3x3/Relu 1 1 InceptionV3/InceptionV3/Mixed_5b/Branch_2/Conv2d_0a_1x1/Relu InceptionV3/InceptionV3/Mixed_5b/Branch_2/Conv2d_0b_3x3/Relu_output 1 64 96 3 3 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_5b/Branch_2/Conv2d_0b_3x3/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_5b/Branch_2/Conv2d_0b_3x3/Relu_output InceptionV3/InceptionV3/Mixed_5b/Branch_2/Conv2d_0b_3x3/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_5b/Branch_2/Conv2d_0c_3x3/Relu 1 1 InceptionV3/InceptionV3/Mixed_5b/Branch_2/Conv2d_0b_3x3/Relu InceptionV3/InceptionV3/Mixed_5b/Branch_2/Conv2d_0c_3x3/Relu_output 1 96 96 3 3 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_5b/Branch_2/Conv2d_0c_3x3/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_5b/Branch_2/Conv2d_0c_3x3/Relu_output InceptionV3/InceptionV3/Mixed_5b/Branch_2/Conv2d_0c_3x3/Relu ," +"Pooling InceptionV3/InceptionV3/Mixed_5b/Branch_3/AvgPool_0a_3x3/AvgPool 1 1 InceptionV3/InceptionV3/MaxPool_5a_3x3/MaxPool InceptionV3/InceptionV3/Mixed_5b/Branch_3/AvgPool_0a_3x3/AvgPool 1 3 3 1 1 0 0 -1 -1 0 1 ," +"Convolution InceptionV3/InceptionV3/Mixed_5b/Branch_3/Conv2d_0b_1x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_5b/Branch_3/AvgPool_0a_3x3/AvgPool InceptionV3/InceptionV3/Mixed_5b/Branch_3/Conv2d_0b_1x1/Relu_output 1 192 32 1 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_5b/Branch_3/Conv2d_0b_1x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_5b/Branch_3/Conv2d_0b_1x1/Relu_output InceptionV3/InceptionV3/Mixed_5b/Branch_3/Conv2d_0b_1x1/Relu ," +"Concat InceptionV3/InceptionV3/Mixed_5b/concat 4 1 InceptionV3/InceptionV3/Mixed_5b/Branch_0/Conv2d_0a_1x1/Relu InceptionV3/InceptionV3/Mixed_5b/Branch_1/Conv2d_0b_5x5/Relu InceptionV3/InceptionV3/Mixed_5b/Branch_2/Conv2d_0c_3x3/Relu InceptionV3/InceptionV3/Mixed_5b/Branch_3/Conv2d_0b_1x1/Relu InceptionV3/InceptionV3/Mixed_5b/concat 1 ," +"Convolution InceptionV3/InceptionV3/Mixed_5c/Branch_0/Conv2d_0a_1x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_5b/concat InceptionV3/InceptionV3/Mixed_5c/Branch_0/Conv2d_0a_1x1/Relu_output 1 256 64 1 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_5c/Branch_0/Conv2d_0a_1x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_5c/Branch_0/Conv2d_0a_1x1/Relu_output InceptionV3/InceptionV3/Mixed_5c/Branch_0/Conv2d_0a_1x1/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_5c/Branch_1/Conv2d_0b_1x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_5b/concat InceptionV3/InceptionV3/Mixed_5c/Branch_1/Conv2d_0b_1x1/Relu_output 1 256 48 1 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_5c/Branch_1/Conv2d_0b_1x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_5c/Branch_1/Conv2d_0b_1x1/Relu_output InceptionV3/InceptionV3/Mixed_5c/Branch_1/Conv2d_0b_1x1/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_5c/Branch_1/Conv_1_0c_5x5/Relu 1 1 InceptionV3/InceptionV3/Mixed_5c/Branch_1/Conv2d_0b_1x1/Relu InceptionV3/InceptionV3/Mixed_5c/Branch_1/Conv_1_0c_5x5/Relu_output 1 48 64 5 5 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_5c/Branch_1/Conv_1_0c_5x5/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_5c/Branch_1/Conv_1_0c_5x5/Relu_output InceptionV3/InceptionV3/Mixed_5c/Branch_1/Conv_1_0c_5x5/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_5c/Branch_2/Conv2d_0a_1x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_5b/concat InceptionV3/InceptionV3/Mixed_5c/Branch_2/Conv2d_0a_1x1/Relu_output 1 256 64 1 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_5c/Branch_2/Conv2d_0a_1x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_5c/Branch_2/Conv2d_0a_1x1/Relu_output InceptionV3/InceptionV3/Mixed_5c/Branch_2/Conv2d_0a_1x1/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_5c/Branch_2/Conv2d_0b_3x3/Relu 1 1 InceptionV3/InceptionV3/Mixed_5c/Branch_2/Conv2d_0a_1x1/Relu InceptionV3/InceptionV3/Mixed_5c/Branch_2/Conv2d_0b_3x3/Relu_output 1 64 96 3 3 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_5c/Branch_2/Conv2d_0b_3x3/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_5c/Branch_2/Conv2d_0b_3x3/Relu_output InceptionV3/InceptionV3/Mixed_5c/Branch_2/Conv2d_0b_3x3/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_5c/Branch_2/Conv2d_0c_3x3/Relu 1 1 InceptionV3/InceptionV3/Mixed_5c/Branch_2/Conv2d_0b_3x3/Relu InceptionV3/InceptionV3/Mixed_5c/Branch_2/Conv2d_0c_3x3/Relu_output 1 96 96 3 3 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_5c/Branch_2/Conv2d_0c_3x3/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_5c/Branch_2/Conv2d_0c_3x3/Relu_output InceptionV3/InceptionV3/Mixed_5c/Branch_2/Conv2d_0c_3x3/Relu ," +"Pooling InceptionV3/InceptionV3/Mixed_5c/Branch_3/AvgPool_0a_3x3/AvgPool 1 1 InceptionV3/InceptionV3/Mixed_5b/concat InceptionV3/InceptionV3/Mixed_5c/Branch_3/AvgPool_0a_3x3/AvgPool 1 3 3 1 1 0 0 -1 -1 0 1 ," +"Convolution InceptionV3/InceptionV3/Mixed_5c/Branch_3/Conv2d_0b_1x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_5c/Branch_3/AvgPool_0a_3x3/AvgPool InceptionV3/InceptionV3/Mixed_5c/Branch_3/Conv2d_0b_1x1/Relu_output 1 256 64 1 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_5c/Branch_3/Conv2d_0b_1x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_5c/Branch_3/Conv2d_0b_1x1/Relu_output InceptionV3/InceptionV3/Mixed_5c/Branch_3/Conv2d_0b_1x1/Relu ," +"Concat InceptionV3/InceptionV3/Mixed_5c/concat 4 1 InceptionV3/InceptionV3/Mixed_5c/Branch_0/Conv2d_0a_1x1/Relu InceptionV3/InceptionV3/Mixed_5c/Branch_1/Conv_1_0c_5x5/Relu InceptionV3/InceptionV3/Mixed_5c/Branch_2/Conv2d_0c_3x3/Relu InceptionV3/InceptionV3/Mixed_5c/Branch_3/Conv2d_0b_1x1/Relu InceptionV3/InceptionV3/Mixed_5c/concat 1 ," +"Convolution InceptionV3/InceptionV3/Mixed_5d/Branch_0/Conv2d_0a_1x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_5c/concat InceptionV3/InceptionV3/Mixed_5d/Branch_0/Conv2d_0a_1x1/Relu_output 1 288 64 1 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_5d/Branch_0/Conv2d_0a_1x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_5d/Branch_0/Conv2d_0a_1x1/Relu_output InceptionV3/InceptionV3/Mixed_5d/Branch_0/Conv2d_0a_1x1/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_5d/Branch_1/Conv2d_0a_1x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_5c/concat InceptionV3/InceptionV3/Mixed_5d/Branch_1/Conv2d_0a_1x1/Relu_output 1 288 48 1 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_5d/Branch_1/Conv2d_0a_1x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_5d/Branch_1/Conv2d_0a_1x1/Relu_output InceptionV3/InceptionV3/Mixed_5d/Branch_1/Conv2d_0a_1x1/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_5d/Branch_1/Conv2d_0b_5x5/Relu 1 1 InceptionV3/InceptionV3/Mixed_5d/Branch_1/Conv2d_0a_1x1/Relu InceptionV3/InceptionV3/Mixed_5d/Branch_1/Conv2d_0b_5x5/Relu_output 1 48 64 5 5 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_5d/Branch_1/Conv2d_0b_5x5/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_5d/Branch_1/Conv2d_0b_5x5/Relu_output InceptionV3/InceptionV3/Mixed_5d/Branch_1/Conv2d_0b_5x5/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_5d/Branch_2/Conv2d_0a_1x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_5c/concat InceptionV3/InceptionV3/Mixed_5d/Branch_2/Conv2d_0a_1x1/Relu_output 1 288 64 1 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_5d/Branch_2/Conv2d_0a_1x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_5d/Branch_2/Conv2d_0a_1x1/Relu_output InceptionV3/InceptionV3/Mixed_5d/Branch_2/Conv2d_0a_1x1/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_5d/Branch_2/Conv2d_0b_3x3/Relu 1 1 InceptionV3/InceptionV3/Mixed_5d/Branch_2/Conv2d_0a_1x1/Relu InceptionV3/InceptionV3/Mixed_5d/Branch_2/Conv2d_0b_3x3/Relu_output 1 64 96 3 3 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_5d/Branch_2/Conv2d_0b_3x3/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_5d/Branch_2/Conv2d_0b_3x3/Relu_output InceptionV3/InceptionV3/Mixed_5d/Branch_2/Conv2d_0b_3x3/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_5d/Branch_2/Conv2d_0c_3x3/Relu 1 1 InceptionV3/InceptionV3/Mixed_5d/Branch_2/Conv2d_0b_3x3/Relu InceptionV3/InceptionV3/Mixed_5d/Branch_2/Conv2d_0c_3x3/Relu_output 1 96 96 3 3 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_5d/Branch_2/Conv2d_0c_3x3/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_5d/Branch_2/Conv2d_0c_3x3/Relu_output InceptionV3/InceptionV3/Mixed_5d/Branch_2/Conv2d_0c_3x3/Relu ," +"Pooling InceptionV3/InceptionV3/Mixed_5d/Branch_3/AvgPool_0a_3x3/AvgPool 1 1 InceptionV3/InceptionV3/Mixed_5c/concat InceptionV3/InceptionV3/Mixed_5d/Branch_3/AvgPool_0a_3x3/AvgPool 1 3 3 1 1 0 0 -1 -1 0 1 ," +"Convolution InceptionV3/InceptionV3/Mixed_5d/Branch_3/Conv2d_0b_1x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_5d/Branch_3/AvgPool_0a_3x3/AvgPool InceptionV3/InceptionV3/Mixed_5d/Branch_3/Conv2d_0b_1x1/Relu_output 1 288 64 1 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_5d/Branch_3/Conv2d_0b_1x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_5d/Branch_3/Conv2d_0b_1x1/Relu_output InceptionV3/InceptionV3/Mixed_5d/Branch_3/Conv2d_0b_1x1/Relu ," +"Concat InceptionV3/InceptionV3/Mixed_5d/concat 4 1 InceptionV3/InceptionV3/Mixed_5d/Branch_0/Conv2d_0a_1x1/Relu InceptionV3/InceptionV3/Mixed_5d/Branch_1/Conv2d_0b_5x5/Relu InceptionV3/InceptionV3/Mixed_5d/Branch_2/Conv2d_0c_3x3/Relu InceptionV3/InceptionV3/Mixed_5d/Branch_3/Conv2d_0b_1x1/Relu InceptionV3/InceptionV3/Mixed_5d/concat 1 ," +"Convolution InceptionV3/InceptionV3/Mixed_6a/Branch_0/Conv2d_1a_1x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_5d/concat InceptionV3/InceptionV3/Mixed_6a/Branch_0/Conv2d_1a_1x1/Relu_output 1 288 384 3 3 2 2 0 0 1 -1 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_6a/Branch_0/Conv2d_1a_1x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_6a/Branch_0/Conv2d_1a_1x1/Relu_output InceptionV3/InceptionV3/Mixed_6a/Branch_0/Conv2d_1a_1x1/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_6a/Branch_1/Conv2d_0a_1x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_5d/concat InceptionV3/InceptionV3/Mixed_6a/Branch_1/Conv2d_0a_1x1/Relu_output 1 288 64 1 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_6a/Branch_1/Conv2d_0a_1x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_6a/Branch_1/Conv2d_0a_1x1/Relu_output InceptionV3/InceptionV3/Mixed_6a/Branch_1/Conv2d_0a_1x1/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_6a/Branch_1/Conv2d_0b_3x3/Relu 1 1 InceptionV3/InceptionV3/Mixed_6a/Branch_1/Conv2d_0a_1x1/Relu InceptionV3/InceptionV3/Mixed_6a/Branch_1/Conv2d_0b_3x3/Relu_output 1 64 96 3 3 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_6a/Branch_1/Conv2d_0b_3x3/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_6a/Branch_1/Conv2d_0b_3x3/Relu_output InceptionV3/InceptionV3/Mixed_6a/Branch_1/Conv2d_0b_3x3/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_6a/Branch_1/Conv2d_1a_1x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_6a/Branch_1/Conv2d_0b_3x3/Relu InceptionV3/InceptionV3/Mixed_6a/Branch_1/Conv2d_1a_1x1/Relu_output 1 96 96 3 3 2 2 0 0 1 -1 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_6a/Branch_1/Conv2d_1a_1x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_6a/Branch_1/Conv2d_1a_1x1/Relu_output InceptionV3/InceptionV3/Mixed_6a/Branch_1/Conv2d_1a_1x1/Relu ," +"Pooling InceptionV3/InceptionV3/Mixed_6a/Branch_2/MaxPool_1a_3x3/MaxPool 1 1 InceptionV3/InceptionV3/Mixed_5d/concat InceptionV3/InceptionV3/Mixed_6a/Branch_2/MaxPool_1a_3x3/MaxPool 0 3 3 2 2 0 0 -1 -1 1 1 ," +"Concat InceptionV3/InceptionV3/Mixed_6a/concat 3 1 InceptionV3/InceptionV3/Mixed_6a/Branch_0/Conv2d_1a_1x1/Relu InceptionV3/InceptionV3/Mixed_6a/Branch_1/Conv2d_1a_1x1/Relu InceptionV3/InceptionV3/Mixed_6a/Branch_2/MaxPool_1a_3x3/MaxPool InceptionV3/InceptionV3/Mixed_6a/concat 1 ," +"Convolution InceptionV3/InceptionV3/Mixed_6b/Branch_0/Conv2d_0a_1x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_6a/concat InceptionV3/InceptionV3/Mixed_6b/Branch_0/Conv2d_0a_1x1/Relu_output 1 768 192 1 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_6b/Branch_0/Conv2d_0a_1x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_6b/Branch_0/Conv2d_0a_1x1/Relu_output InceptionV3/InceptionV3/Mixed_6b/Branch_0/Conv2d_0a_1x1/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_6b/Branch_1/Conv2d_0a_1x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_6a/concat InceptionV3/InceptionV3/Mixed_6b/Branch_1/Conv2d_0a_1x1/Relu_output 1 768 128 1 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_6b/Branch_1/Conv2d_0a_1x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_6b/Branch_1/Conv2d_0a_1x1/Relu_output InceptionV3/InceptionV3/Mixed_6b/Branch_1/Conv2d_0a_1x1/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_6b/Branch_1/Conv2d_0b_1x7/Relu 1 1 InceptionV3/InceptionV3/Mixed_6b/Branch_1/Conv2d_0a_1x1/Relu InceptionV3/InceptionV3/Mixed_6b/Branch_1/Conv2d_0b_1x7/Relu_output 1 128 128 1 7 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_6b/Branch_1/Conv2d_0b_1x7/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_6b/Branch_1/Conv2d_0b_1x7/Relu_output InceptionV3/InceptionV3/Mixed_6b/Branch_1/Conv2d_0b_1x7/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_6b/Branch_1/Conv2d_0c_7x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_6b/Branch_1/Conv2d_0b_1x7/Relu InceptionV3/InceptionV3/Mixed_6b/Branch_1/Conv2d_0c_7x1/Relu_output 1 128 192 7 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_6b/Branch_1/Conv2d_0c_7x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_6b/Branch_1/Conv2d_0c_7x1/Relu_output InceptionV3/InceptionV3/Mixed_6b/Branch_1/Conv2d_0c_7x1/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_6b/Branch_2/Conv2d_0a_1x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_6a/concat InceptionV3/InceptionV3/Mixed_6b/Branch_2/Conv2d_0a_1x1/Relu_output 1 768 128 1 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_6b/Branch_2/Conv2d_0a_1x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_6b/Branch_2/Conv2d_0a_1x1/Relu_output InceptionV3/InceptionV3/Mixed_6b/Branch_2/Conv2d_0a_1x1/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_6b/Branch_2/Conv2d_0b_7x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_6b/Branch_2/Conv2d_0a_1x1/Relu InceptionV3/InceptionV3/Mixed_6b/Branch_2/Conv2d_0b_7x1/Relu_output 1 128 128 7 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_6b/Branch_2/Conv2d_0b_7x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_6b/Branch_2/Conv2d_0b_7x1/Relu_output InceptionV3/InceptionV3/Mixed_6b/Branch_2/Conv2d_0b_7x1/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_6b/Branch_2/Conv2d_0c_1x7/Relu 1 1 InceptionV3/InceptionV3/Mixed_6b/Branch_2/Conv2d_0b_7x1/Relu InceptionV3/InceptionV3/Mixed_6b/Branch_2/Conv2d_0c_1x7/Relu_output 1 128 128 1 7 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_6b/Branch_2/Conv2d_0c_1x7/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_6b/Branch_2/Conv2d_0c_1x7/Relu_output InceptionV3/InceptionV3/Mixed_6b/Branch_2/Conv2d_0c_1x7/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_6b/Branch_2/Conv2d_0d_7x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_6b/Branch_2/Conv2d_0c_1x7/Relu InceptionV3/InceptionV3/Mixed_6b/Branch_2/Conv2d_0d_7x1/Relu_output 1 128 128 7 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_6b/Branch_2/Conv2d_0d_7x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_6b/Branch_2/Conv2d_0d_7x1/Relu_output InceptionV3/InceptionV3/Mixed_6b/Branch_2/Conv2d_0d_7x1/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_6b/Branch_2/Conv2d_0e_1x7/Relu 1 1 InceptionV3/InceptionV3/Mixed_6b/Branch_2/Conv2d_0d_7x1/Relu InceptionV3/InceptionV3/Mixed_6b/Branch_2/Conv2d_0e_1x7/Relu_output 1 128 192 1 7 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_6b/Branch_2/Conv2d_0e_1x7/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_6b/Branch_2/Conv2d_0e_1x7/Relu_output InceptionV3/InceptionV3/Mixed_6b/Branch_2/Conv2d_0e_1x7/Relu ," +"Pooling InceptionV3/InceptionV3/Mixed_6b/Branch_3/AvgPool_0a_3x3/AvgPool 1 1 InceptionV3/InceptionV3/Mixed_6a/concat InceptionV3/InceptionV3/Mixed_6b/Branch_3/AvgPool_0a_3x3/AvgPool 1 3 3 1 1 0 0 -1 -1 0 1 ," +"Convolution InceptionV3/InceptionV3/Mixed_6b/Branch_3/Conv2d_0b_1x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_6b/Branch_3/AvgPool_0a_3x3/AvgPool InceptionV3/InceptionV3/Mixed_6b/Branch_3/Conv2d_0b_1x1/Relu_output 1 768 192 1 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_6b/Branch_3/Conv2d_0b_1x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_6b/Branch_3/Conv2d_0b_1x1/Relu_output InceptionV3/InceptionV3/Mixed_6b/Branch_3/Conv2d_0b_1x1/Relu ," +"Concat InceptionV3/InceptionV3/Mixed_6b/concat 4 1 InceptionV3/InceptionV3/Mixed_6b/Branch_0/Conv2d_0a_1x1/Relu InceptionV3/InceptionV3/Mixed_6b/Branch_1/Conv2d_0c_7x1/Relu InceptionV3/InceptionV3/Mixed_6b/Branch_2/Conv2d_0e_1x7/Relu InceptionV3/InceptionV3/Mixed_6b/Branch_3/Conv2d_0b_1x1/Relu InceptionV3/InceptionV3/Mixed_6b/concat 1 ," +"Convolution InceptionV3/InceptionV3/Mixed_6c/Branch_0/Conv2d_0a_1x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_6b/concat InceptionV3/InceptionV3/Mixed_6c/Branch_0/Conv2d_0a_1x1/Relu_output 1 768 192 1 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_6c/Branch_0/Conv2d_0a_1x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_6c/Branch_0/Conv2d_0a_1x1/Relu_output InceptionV3/InceptionV3/Mixed_6c/Branch_0/Conv2d_0a_1x1/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_6c/Branch_1/Conv2d_0a_1x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_6b/concat InceptionV3/InceptionV3/Mixed_6c/Branch_1/Conv2d_0a_1x1/Relu_output 1 768 160 1 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_6c/Branch_1/Conv2d_0a_1x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_6c/Branch_1/Conv2d_0a_1x1/Relu_output InceptionV3/InceptionV3/Mixed_6c/Branch_1/Conv2d_0a_1x1/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_6c/Branch_1/Conv2d_0b_1x7/Relu 1 1 InceptionV3/InceptionV3/Mixed_6c/Branch_1/Conv2d_0a_1x1/Relu InceptionV3/InceptionV3/Mixed_6c/Branch_1/Conv2d_0b_1x7/Relu_output 1 160 160 1 7 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_6c/Branch_1/Conv2d_0b_1x7/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_6c/Branch_1/Conv2d_0b_1x7/Relu_output InceptionV3/InceptionV3/Mixed_6c/Branch_1/Conv2d_0b_1x7/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_6c/Branch_1/Conv2d_0c_7x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_6c/Branch_1/Conv2d_0b_1x7/Relu InceptionV3/InceptionV3/Mixed_6c/Branch_1/Conv2d_0c_7x1/Relu_output 1 160 192 7 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_6c/Branch_1/Conv2d_0c_7x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_6c/Branch_1/Conv2d_0c_7x1/Relu_output InceptionV3/InceptionV3/Mixed_6c/Branch_1/Conv2d_0c_7x1/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_6c/Branch_2/Conv2d_0a_1x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_6b/concat InceptionV3/InceptionV3/Mixed_6c/Branch_2/Conv2d_0a_1x1/Relu_output 1 768 160 1 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_6c/Branch_2/Conv2d_0a_1x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_6c/Branch_2/Conv2d_0a_1x1/Relu_output InceptionV3/InceptionV3/Mixed_6c/Branch_2/Conv2d_0a_1x1/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_6c/Branch_2/Conv2d_0b_7x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_6c/Branch_2/Conv2d_0a_1x1/Relu InceptionV3/InceptionV3/Mixed_6c/Branch_2/Conv2d_0b_7x1/Relu_output 1 160 160 7 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_6c/Branch_2/Conv2d_0b_7x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_6c/Branch_2/Conv2d_0b_7x1/Relu_output InceptionV3/InceptionV3/Mixed_6c/Branch_2/Conv2d_0b_7x1/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_6c/Branch_2/Conv2d_0c_1x7/Relu 1 1 InceptionV3/InceptionV3/Mixed_6c/Branch_2/Conv2d_0b_7x1/Relu InceptionV3/InceptionV3/Mixed_6c/Branch_2/Conv2d_0c_1x7/Relu_output 1 160 160 1 7 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_6c/Branch_2/Conv2d_0c_1x7/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_6c/Branch_2/Conv2d_0c_1x7/Relu_output InceptionV3/InceptionV3/Mixed_6c/Branch_2/Conv2d_0c_1x7/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_6c/Branch_2/Conv2d_0d_7x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_6c/Branch_2/Conv2d_0c_1x7/Relu InceptionV3/InceptionV3/Mixed_6c/Branch_2/Conv2d_0d_7x1/Relu_output 1 160 160 7 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_6c/Branch_2/Conv2d_0d_7x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_6c/Branch_2/Conv2d_0d_7x1/Relu_output InceptionV3/InceptionV3/Mixed_6c/Branch_2/Conv2d_0d_7x1/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_6c/Branch_2/Conv2d_0e_1x7/Relu 1 1 InceptionV3/InceptionV3/Mixed_6c/Branch_2/Conv2d_0d_7x1/Relu InceptionV3/InceptionV3/Mixed_6c/Branch_2/Conv2d_0e_1x7/Relu_output 1 160 192 1 7 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_6c/Branch_2/Conv2d_0e_1x7/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_6c/Branch_2/Conv2d_0e_1x7/Relu_output InceptionV3/InceptionV3/Mixed_6c/Branch_2/Conv2d_0e_1x7/Relu ," +"Pooling InceptionV3/InceptionV3/Mixed_6c/Branch_3/AvgPool_0a_3x3/AvgPool 1 1 InceptionV3/InceptionV3/Mixed_6b/concat InceptionV3/InceptionV3/Mixed_6c/Branch_3/AvgPool_0a_3x3/AvgPool 1 3 3 1 1 0 0 -1 -1 0 1 ," +"Convolution InceptionV3/InceptionV3/Mixed_6c/Branch_3/Conv2d_0b_1x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_6c/Branch_3/AvgPool_0a_3x3/AvgPool InceptionV3/InceptionV3/Mixed_6c/Branch_3/Conv2d_0b_1x1/Relu_output 1 768 192 1 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_6c/Branch_3/Conv2d_0b_1x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_6c/Branch_3/Conv2d_0b_1x1/Relu_output InceptionV3/InceptionV3/Mixed_6c/Branch_3/Conv2d_0b_1x1/Relu ," +"Concat InceptionV3/InceptionV3/Mixed_6c/concat 4 1 InceptionV3/InceptionV3/Mixed_6c/Branch_0/Conv2d_0a_1x1/Relu InceptionV3/InceptionV3/Mixed_6c/Branch_1/Conv2d_0c_7x1/Relu InceptionV3/InceptionV3/Mixed_6c/Branch_2/Conv2d_0e_1x7/Relu InceptionV3/InceptionV3/Mixed_6c/Branch_3/Conv2d_0b_1x1/Relu InceptionV3/InceptionV3/Mixed_6c/concat 1 ," +"Convolution InceptionV3/InceptionV3/Mixed_6d/Branch_0/Conv2d_0a_1x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_6c/concat InceptionV3/InceptionV3/Mixed_6d/Branch_0/Conv2d_0a_1x1/Relu_output 1 768 192 1 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_6d/Branch_0/Conv2d_0a_1x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_6d/Branch_0/Conv2d_0a_1x1/Relu_output InceptionV3/InceptionV3/Mixed_6d/Branch_0/Conv2d_0a_1x1/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_6d/Branch_1/Conv2d_0a_1x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_6c/concat InceptionV3/InceptionV3/Mixed_6d/Branch_1/Conv2d_0a_1x1/Relu_output 1 768 160 1 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_6d/Branch_1/Conv2d_0a_1x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_6d/Branch_1/Conv2d_0a_1x1/Relu_output InceptionV3/InceptionV3/Mixed_6d/Branch_1/Conv2d_0a_1x1/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_6d/Branch_1/Conv2d_0b_1x7/Relu 1 1 InceptionV3/InceptionV3/Mixed_6d/Branch_1/Conv2d_0a_1x1/Relu InceptionV3/InceptionV3/Mixed_6d/Branch_1/Conv2d_0b_1x7/Relu_output 1 160 160 1 7 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_6d/Branch_1/Conv2d_0b_1x7/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_6d/Branch_1/Conv2d_0b_1x7/Relu_output InceptionV3/InceptionV3/Mixed_6d/Branch_1/Conv2d_0b_1x7/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_6d/Branch_1/Conv2d_0c_7x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_6d/Branch_1/Conv2d_0b_1x7/Relu InceptionV3/InceptionV3/Mixed_6d/Branch_1/Conv2d_0c_7x1/Relu_output 1 160 192 7 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_6d/Branch_1/Conv2d_0c_7x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_6d/Branch_1/Conv2d_0c_7x1/Relu_output InceptionV3/InceptionV3/Mixed_6d/Branch_1/Conv2d_0c_7x1/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_6d/Branch_2/Conv2d_0a_1x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_6c/concat InceptionV3/InceptionV3/Mixed_6d/Branch_2/Conv2d_0a_1x1/Relu_output 1 768 160 1 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_6d/Branch_2/Conv2d_0a_1x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_6d/Branch_2/Conv2d_0a_1x1/Relu_output InceptionV3/InceptionV3/Mixed_6d/Branch_2/Conv2d_0a_1x1/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_6d/Branch_2/Conv2d_0b_7x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_6d/Branch_2/Conv2d_0a_1x1/Relu InceptionV3/InceptionV3/Mixed_6d/Branch_2/Conv2d_0b_7x1/Relu_output 1 160 160 7 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_6d/Branch_2/Conv2d_0b_7x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_6d/Branch_2/Conv2d_0b_7x1/Relu_output InceptionV3/InceptionV3/Mixed_6d/Branch_2/Conv2d_0b_7x1/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_6d/Branch_2/Conv2d_0c_1x7/Relu 1 1 InceptionV3/InceptionV3/Mixed_6d/Branch_2/Conv2d_0b_7x1/Relu InceptionV3/InceptionV3/Mixed_6d/Branch_2/Conv2d_0c_1x7/Relu_output 1 160 160 1 7 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_6d/Branch_2/Conv2d_0c_1x7/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_6d/Branch_2/Conv2d_0c_1x7/Relu_output InceptionV3/InceptionV3/Mixed_6d/Branch_2/Conv2d_0c_1x7/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_6d/Branch_2/Conv2d_0d_7x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_6d/Branch_2/Conv2d_0c_1x7/Relu InceptionV3/InceptionV3/Mixed_6d/Branch_2/Conv2d_0d_7x1/Relu_output 1 160 160 7 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_6d/Branch_2/Conv2d_0d_7x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_6d/Branch_2/Conv2d_0d_7x1/Relu_output InceptionV3/InceptionV3/Mixed_6d/Branch_2/Conv2d_0d_7x1/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_6d/Branch_2/Conv2d_0e_1x7/Relu 1 1 InceptionV3/InceptionV3/Mixed_6d/Branch_2/Conv2d_0d_7x1/Relu InceptionV3/InceptionV3/Mixed_6d/Branch_2/Conv2d_0e_1x7/Relu_output 1 160 192 1 7 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_6d/Branch_2/Conv2d_0e_1x7/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_6d/Branch_2/Conv2d_0e_1x7/Relu_output InceptionV3/InceptionV3/Mixed_6d/Branch_2/Conv2d_0e_1x7/Relu ," +"Pooling InceptionV3/InceptionV3/Mixed_6d/Branch_3/AvgPool_0a_3x3/AvgPool 1 1 InceptionV3/InceptionV3/Mixed_6c/concat InceptionV3/InceptionV3/Mixed_6d/Branch_3/AvgPool_0a_3x3/AvgPool 1 3 3 1 1 0 0 -1 -1 0 1 ," +"Convolution InceptionV3/InceptionV3/Mixed_6d/Branch_3/Conv2d_0b_1x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_6d/Branch_3/AvgPool_0a_3x3/AvgPool InceptionV3/InceptionV3/Mixed_6d/Branch_3/Conv2d_0b_1x1/Relu_output 1 768 192 1 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_6d/Branch_3/Conv2d_0b_1x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_6d/Branch_3/Conv2d_0b_1x1/Relu_output InceptionV3/InceptionV3/Mixed_6d/Branch_3/Conv2d_0b_1x1/Relu ," +"Concat InceptionV3/InceptionV3/Mixed_6d/concat 4 1 InceptionV3/InceptionV3/Mixed_6d/Branch_0/Conv2d_0a_1x1/Relu InceptionV3/InceptionV3/Mixed_6d/Branch_1/Conv2d_0c_7x1/Relu InceptionV3/InceptionV3/Mixed_6d/Branch_2/Conv2d_0e_1x7/Relu InceptionV3/InceptionV3/Mixed_6d/Branch_3/Conv2d_0b_1x1/Relu InceptionV3/InceptionV3/Mixed_6d/concat 1 ," +"Convolution InceptionV3/InceptionV3/Mixed_6e/Branch_0/Conv2d_0a_1x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_6d/concat InceptionV3/InceptionV3/Mixed_6e/Branch_0/Conv2d_0a_1x1/Relu_output 1 768 192 1 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_6e/Branch_0/Conv2d_0a_1x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_6e/Branch_0/Conv2d_0a_1x1/Relu_output InceptionV3/InceptionV3/Mixed_6e/Branch_0/Conv2d_0a_1x1/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_6e/Branch_1/Conv2d_0a_1x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_6d/concat InceptionV3/InceptionV3/Mixed_6e/Branch_1/Conv2d_0a_1x1/Relu_output 1 768 192 1 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_6e/Branch_1/Conv2d_0a_1x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_6e/Branch_1/Conv2d_0a_1x1/Relu_output InceptionV3/InceptionV3/Mixed_6e/Branch_1/Conv2d_0a_1x1/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_6e/Branch_1/Conv2d_0b_1x7/Relu 1 1 InceptionV3/InceptionV3/Mixed_6e/Branch_1/Conv2d_0a_1x1/Relu InceptionV3/InceptionV3/Mixed_6e/Branch_1/Conv2d_0b_1x7/Relu_output 1 192 192 1 7 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_6e/Branch_1/Conv2d_0b_1x7/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_6e/Branch_1/Conv2d_0b_1x7/Relu_output InceptionV3/InceptionV3/Mixed_6e/Branch_1/Conv2d_0b_1x7/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_6e/Branch_1/Conv2d_0c_7x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_6e/Branch_1/Conv2d_0b_1x7/Relu InceptionV3/InceptionV3/Mixed_6e/Branch_1/Conv2d_0c_7x1/Relu_output 1 192 192 7 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_6e/Branch_1/Conv2d_0c_7x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_6e/Branch_1/Conv2d_0c_7x1/Relu_output InceptionV3/InceptionV3/Mixed_6e/Branch_1/Conv2d_0c_7x1/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_6e/Branch_2/Conv2d_0a_1x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_6d/concat InceptionV3/InceptionV3/Mixed_6e/Branch_2/Conv2d_0a_1x1/Relu_output 1 768 192 1 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_6e/Branch_2/Conv2d_0a_1x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_6e/Branch_2/Conv2d_0a_1x1/Relu_output InceptionV3/InceptionV3/Mixed_6e/Branch_2/Conv2d_0a_1x1/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_6e/Branch_2/Conv2d_0b_7x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_6e/Branch_2/Conv2d_0a_1x1/Relu InceptionV3/InceptionV3/Mixed_6e/Branch_2/Conv2d_0b_7x1/Relu_output 1 192 192 7 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_6e/Branch_2/Conv2d_0b_7x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_6e/Branch_2/Conv2d_0b_7x1/Relu_output InceptionV3/InceptionV3/Mixed_6e/Branch_2/Conv2d_0b_7x1/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_6e/Branch_2/Conv2d_0c_1x7/Relu 1 1 InceptionV3/InceptionV3/Mixed_6e/Branch_2/Conv2d_0b_7x1/Relu InceptionV3/InceptionV3/Mixed_6e/Branch_2/Conv2d_0c_1x7/Relu_output 1 192 192 1 7 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_6e/Branch_2/Conv2d_0c_1x7/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_6e/Branch_2/Conv2d_0c_1x7/Relu_output InceptionV3/InceptionV3/Mixed_6e/Branch_2/Conv2d_0c_1x7/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_6e/Branch_2/Conv2d_0d_7x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_6e/Branch_2/Conv2d_0c_1x7/Relu InceptionV3/InceptionV3/Mixed_6e/Branch_2/Conv2d_0d_7x1/Relu_output 1 192 192 7 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_6e/Branch_2/Conv2d_0d_7x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_6e/Branch_2/Conv2d_0d_7x1/Relu_output InceptionV3/InceptionV3/Mixed_6e/Branch_2/Conv2d_0d_7x1/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_6e/Branch_2/Conv2d_0e_1x7/Relu 1 1 InceptionV3/InceptionV3/Mixed_6e/Branch_2/Conv2d_0d_7x1/Relu InceptionV3/InceptionV3/Mixed_6e/Branch_2/Conv2d_0e_1x7/Relu_output 1 192 192 1 7 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_6e/Branch_2/Conv2d_0e_1x7/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_6e/Branch_2/Conv2d_0e_1x7/Relu_output InceptionV3/InceptionV3/Mixed_6e/Branch_2/Conv2d_0e_1x7/Relu ," +"Pooling InceptionV3/InceptionV3/Mixed_6e/Branch_3/AvgPool_0a_3x3/AvgPool 1 1 InceptionV3/InceptionV3/Mixed_6d/concat InceptionV3/InceptionV3/Mixed_6e/Branch_3/AvgPool_0a_3x3/AvgPool 1 3 3 1 1 0 0 -1 -1 0 1 ," +"Convolution InceptionV3/InceptionV3/Mixed_6e/Branch_3/Conv2d_0b_1x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_6e/Branch_3/AvgPool_0a_3x3/AvgPool InceptionV3/InceptionV3/Mixed_6e/Branch_3/Conv2d_0b_1x1/Relu_output 1 768 192 1 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_6e/Branch_3/Conv2d_0b_1x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_6e/Branch_3/Conv2d_0b_1x1/Relu_output InceptionV3/InceptionV3/Mixed_6e/Branch_3/Conv2d_0b_1x1/Relu ," +"Concat InceptionV3/InceptionV3/Mixed_6e/concat 4 1 InceptionV3/InceptionV3/Mixed_6e/Branch_0/Conv2d_0a_1x1/Relu InceptionV3/InceptionV3/Mixed_6e/Branch_1/Conv2d_0c_7x1/Relu InceptionV3/InceptionV3/Mixed_6e/Branch_2/Conv2d_0e_1x7/Relu InceptionV3/InceptionV3/Mixed_6e/Branch_3/Conv2d_0b_1x1/Relu InceptionV3/InceptionV3/Mixed_6e/concat 1 ," +"Convolution InceptionV3/InceptionV3/Mixed_7a/Branch_0/Conv2d_0a_1x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_6e/concat InceptionV3/InceptionV3/Mixed_7a/Branch_0/Conv2d_0a_1x1/Relu_output 1 768 192 1 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_7a/Branch_0/Conv2d_0a_1x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_7a/Branch_0/Conv2d_0a_1x1/Relu_output InceptionV3/InceptionV3/Mixed_7a/Branch_0/Conv2d_0a_1x1/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_7a/Branch_0/Conv2d_1a_3x3/Relu 1 1 InceptionV3/InceptionV3/Mixed_7a/Branch_0/Conv2d_0a_1x1/Relu InceptionV3/InceptionV3/Mixed_7a/Branch_0/Conv2d_1a_3x3/Relu_output 1 192 320 3 3 2 2 0 0 1 -1 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_7a/Branch_0/Conv2d_1a_3x3/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_7a/Branch_0/Conv2d_1a_3x3/Relu_output InceptionV3/InceptionV3/Mixed_7a/Branch_0/Conv2d_1a_3x3/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_7a/Branch_1/Conv2d_0a_1x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_6e/concat InceptionV3/InceptionV3/Mixed_7a/Branch_1/Conv2d_0a_1x1/Relu_output 1 768 192 1 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_7a/Branch_1/Conv2d_0a_1x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_7a/Branch_1/Conv2d_0a_1x1/Relu_output InceptionV3/InceptionV3/Mixed_7a/Branch_1/Conv2d_0a_1x1/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_7a/Branch_1/Conv2d_0b_1x7/Relu 1 1 InceptionV3/InceptionV3/Mixed_7a/Branch_1/Conv2d_0a_1x1/Relu InceptionV3/InceptionV3/Mixed_7a/Branch_1/Conv2d_0b_1x7/Relu_output 1 192 192 1 7 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_7a/Branch_1/Conv2d_0b_1x7/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_7a/Branch_1/Conv2d_0b_1x7/Relu_output InceptionV3/InceptionV3/Mixed_7a/Branch_1/Conv2d_0b_1x7/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_7a/Branch_1/Conv2d_0c_7x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_7a/Branch_1/Conv2d_0b_1x7/Relu InceptionV3/InceptionV3/Mixed_7a/Branch_1/Conv2d_0c_7x1/Relu_output 1 192 192 7 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_7a/Branch_1/Conv2d_0c_7x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_7a/Branch_1/Conv2d_0c_7x1/Relu_output InceptionV3/InceptionV3/Mixed_7a/Branch_1/Conv2d_0c_7x1/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_7a/Branch_1/Conv2d_1a_3x3/Relu 1 1 InceptionV3/InceptionV3/Mixed_7a/Branch_1/Conv2d_0c_7x1/Relu InceptionV3/InceptionV3/Mixed_7a/Branch_1/Conv2d_1a_3x3/Relu_output 1 192 192 3 3 2 2 0 0 1 -1 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_7a/Branch_1/Conv2d_1a_3x3/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_7a/Branch_1/Conv2d_1a_3x3/Relu_output InceptionV3/InceptionV3/Mixed_7a/Branch_1/Conv2d_1a_3x3/Relu ," +"Pooling InceptionV3/InceptionV3/Mixed_7a/Branch_2/MaxPool_1a_3x3/MaxPool 1 1 InceptionV3/InceptionV3/Mixed_6e/concat InceptionV3/InceptionV3/Mixed_7a/Branch_2/MaxPool_1a_3x3/MaxPool 0 3 3 2 2 0 0 -1 -1 1 1 ," +"Concat InceptionV3/InceptionV3/Mixed_7a/concat 3 1 InceptionV3/InceptionV3/Mixed_7a/Branch_0/Conv2d_1a_3x3/Relu InceptionV3/InceptionV3/Mixed_7a/Branch_1/Conv2d_1a_3x3/Relu InceptionV3/InceptionV3/Mixed_7a/Branch_2/MaxPool_1a_3x3/MaxPool InceptionV3/InceptionV3/Mixed_7a/concat 1 ," +"Convolution InceptionV3/InceptionV3/Mixed_7b/Branch_0/Conv2d_0a_1x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_7a/concat InceptionV3/InceptionV3/Mixed_7b/Branch_0/Conv2d_0a_1x1/Relu_output 1 1280 320 1 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_7b/Branch_0/Conv2d_0a_1x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_7b/Branch_0/Conv2d_0a_1x1/Relu_output InceptionV3/InceptionV3/Mixed_7b/Branch_0/Conv2d_0a_1x1/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_7b/Branch_1/Conv2d_0a_1x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_7a/concat InceptionV3/InceptionV3/Mixed_7b/Branch_1/Conv2d_0a_1x1/Relu_output 1 1280 384 1 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_7b/Branch_1/Conv2d_0a_1x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_7b/Branch_1/Conv2d_0a_1x1/Relu_output InceptionV3/InceptionV3/Mixed_7b/Branch_1/Conv2d_0a_1x1/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_7b/Branch_1/Conv2d_0b_1x3/Relu 1 1 InceptionV3/InceptionV3/Mixed_7b/Branch_1/Conv2d_0a_1x1/Relu InceptionV3/InceptionV3/Mixed_7b/Branch_1/Conv2d_0b_1x3/Relu_output 1 384 384 1 3 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_7b/Branch_1/Conv2d_0b_1x3/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_7b/Branch_1/Conv2d_0b_1x3/Relu_output InceptionV3/InceptionV3/Mixed_7b/Branch_1/Conv2d_0b_1x3/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_7b/Branch_1/Conv2d_0b_3x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_7b/Branch_1/Conv2d_0a_1x1/Relu InceptionV3/InceptionV3/Mixed_7b/Branch_1/Conv2d_0b_3x1/Relu_output 1 384 384 3 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_7b/Branch_1/Conv2d_0b_3x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_7b/Branch_1/Conv2d_0b_3x1/Relu_output InceptionV3/InceptionV3/Mixed_7b/Branch_1/Conv2d_0b_3x1/Relu ," +"Concat InceptionV3/InceptionV3/Mixed_7b/Branch_1/concat 2 1 InceptionV3/InceptionV3/Mixed_7b/Branch_1/Conv2d_0b_1x3/Relu InceptionV3/InceptionV3/Mixed_7b/Branch_1/Conv2d_0b_3x1/Relu InceptionV3/InceptionV3/Mixed_7b/Branch_1/concat 1 ," +"Convolution InceptionV3/InceptionV3/Mixed_7b/Branch_2/Conv2d_0a_1x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_7a/concat InceptionV3/InceptionV3/Mixed_7b/Branch_2/Conv2d_0a_1x1/Relu_output 1 1280 448 1 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_7b/Branch_2/Conv2d_0a_1x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_7b/Branch_2/Conv2d_0a_1x1/Relu_output InceptionV3/InceptionV3/Mixed_7b/Branch_2/Conv2d_0a_1x1/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_7b/Branch_2/Conv2d_0b_3x3/Relu 1 1 InceptionV3/InceptionV3/Mixed_7b/Branch_2/Conv2d_0a_1x1/Relu InceptionV3/InceptionV3/Mixed_7b/Branch_2/Conv2d_0b_3x3/Relu_output 1 448 384 3 3 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_7b/Branch_2/Conv2d_0b_3x3/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_7b/Branch_2/Conv2d_0b_3x3/Relu_output InceptionV3/InceptionV3/Mixed_7b/Branch_2/Conv2d_0b_3x3/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_7b/Branch_2/Conv2d_0c_1x3/Relu 1 1 InceptionV3/InceptionV3/Mixed_7b/Branch_2/Conv2d_0b_3x3/Relu InceptionV3/InceptionV3/Mixed_7b/Branch_2/Conv2d_0c_1x3/Relu_output 1 384 384 1 3 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_7b/Branch_2/Conv2d_0c_1x3/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_7b/Branch_2/Conv2d_0c_1x3/Relu_output InceptionV3/InceptionV3/Mixed_7b/Branch_2/Conv2d_0c_1x3/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_7b/Branch_2/Conv2d_0d_3x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_7b/Branch_2/Conv2d_0b_3x3/Relu InceptionV3/InceptionV3/Mixed_7b/Branch_2/Conv2d_0d_3x1/Relu_output 1 384 384 3 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_7b/Branch_2/Conv2d_0d_3x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_7b/Branch_2/Conv2d_0d_3x1/Relu_output InceptionV3/InceptionV3/Mixed_7b/Branch_2/Conv2d_0d_3x1/Relu ," +"Concat InceptionV3/InceptionV3/Mixed_7b/Branch_2/concat 2 1 InceptionV3/InceptionV3/Mixed_7b/Branch_2/Conv2d_0c_1x3/Relu InceptionV3/InceptionV3/Mixed_7b/Branch_2/Conv2d_0d_3x1/Relu InceptionV3/InceptionV3/Mixed_7b/Branch_2/concat 1 ," +"Pooling InceptionV3/InceptionV3/Mixed_7b/Branch_3/AvgPool_0a_3x3/AvgPool 1 1 InceptionV3/InceptionV3/Mixed_7a/concat InceptionV3/InceptionV3/Mixed_7b/Branch_3/AvgPool_0a_3x3/AvgPool 1 3 3 1 1 0 0 -1 -1 0 1 ," +"Convolution InceptionV3/InceptionV3/Mixed_7b/Branch_3/Conv2d_0b_1x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_7b/Branch_3/AvgPool_0a_3x3/AvgPool InceptionV3/InceptionV3/Mixed_7b/Branch_3/Conv2d_0b_1x1/Relu_output 1 1280 192 1 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_7b/Branch_3/Conv2d_0b_1x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_7b/Branch_3/Conv2d_0b_1x1/Relu_output InceptionV3/InceptionV3/Mixed_7b/Branch_3/Conv2d_0b_1x1/Relu ," +"Concat InceptionV3/InceptionV3/Mixed_7b/concat 4 1 InceptionV3/InceptionV3/Mixed_7b/Branch_0/Conv2d_0a_1x1/Relu InceptionV3/InceptionV3/Mixed_7b/Branch_1/concat InceptionV3/InceptionV3/Mixed_7b/Branch_2/concat InceptionV3/InceptionV3/Mixed_7b/Branch_3/Conv2d_0b_1x1/Relu InceptionV3/InceptionV3/Mixed_7b/concat 1 ," +"Convolution InceptionV3/InceptionV3/Mixed_7c/Branch_0/Conv2d_0a_1x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_7b/concat InceptionV3/InceptionV3/Mixed_7c/Branch_0/Conv2d_0a_1x1/Relu_output 1 2048 320 1 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_7c/Branch_0/Conv2d_0a_1x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_7c/Branch_0/Conv2d_0a_1x1/Relu_output InceptionV3/InceptionV3/Mixed_7c/Branch_0/Conv2d_0a_1x1/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_7c/Branch_1/Conv2d_0a_1x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_7b/concat InceptionV3/InceptionV3/Mixed_7c/Branch_1/Conv2d_0a_1x1/Relu_output 1 2048 384 1 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_7c/Branch_1/Conv2d_0a_1x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_7c/Branch_1/Conv2d_0a_1x1/Relu_output InceptionV3/InceptionV3/Mixed_7c/Branch_1/Conv2d_0a_1x1/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_7c/Branch_1/Conv2d_0b_1x3/Relu 1 1 InceptionV3/InceptionV3/Mixed_7c/Branch_1/Conv2d_0a_1x1/Relu InceptionV3/InceptionV3/Mixed_7c/Branch_1/Conv2d_0b_1x3/Relu_output 1 384 384 1 3 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_7c/Branch_1/Conv2d_0b_1x3/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_7c/Branch_1/Conv2d_0b_1x3/Relu_output InceptionV3/InceptionV3/Mixed_7c/Branch_1/Conv2d_0b_1x3/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_7c/Branch_1/Conv2d_0c_3x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_7c/Branch_1/Conv2d_0a_1x1/Relu InceptionV3/InceptionV3/Mixed_7c/Branch_1/Conv2d_0c_3x1/Relu_output 1 384 384 3 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_7c/Branch_1/Conv2d_0c_3x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_7c/Branch_1/Conv2d_0c_3x1/Relu_output InceptionV3/InceptionV3/Mixed_7c/Branch_1/Conv2d_0c_3x1/Relu ," +"Concat InceptionV3/InceptionV3/Mixed_7c/Branch_1/concat 2 1 InceptionV3/InceptionV3/Mixed_7c/Branch_1/Conv2d_0b_1x3/Relu InceptionV3/InceptionV3/Mixed_7c/Branch_1/Conv2d_0c_3x1/Relu InceptionV3/InceptionV3/Mixed_7c/Branch_1/concat 1 ," +"Convolution InceptionV3/InceptionV3/Mixed_7c/Branch_2/Conv2d_0a_1x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_7b/concat InceptionV3/InceptionV3/Mixed_7c/Branch_2/Conv2d_0a_1x1/Relu_output 1 2048 448 1 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_7c/Branch_2/Conv2d_0a_1x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_7c/Branch_2/Conv2d_0a_1x1/Relu_output InceptionV3/InceptionV3/Mixed_7c/Branch_2/Conv2d_0a_1x1/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_7c/Branch_2/Conv2d_0b_3x3/Relu 1 1 InceptionV3/InceptionV3/Mixed_7c/Branch_2/Conv2d_0a_1x1/Relu InceptionV3/InceptionV3/Mixed_7c/Branch_2/Conv2d_0b_3x3/Relu_output 1 448 384 3 3 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_7c/Branch_2/Conv2d_0b_3x3/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_7c/Branch_2/Conv2d_0b_3x3/Relu_output InceptionV3/InceptionV3/Mixed_7c/Branch_2/Conv2d_0b_3x3/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_7c/Branch_2/Conv2d_0c_1x3/Relu 1 1 InceptionV3/InceptionV3/Mixed_7c/Branch_2/Conv2d_0b_3x3/Relu InceptionV3/InceptionV3/Mixed_7c/Branch_2/Conv2d_0c_1x3/Relu_output 1 384 384 1 3 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_7c/Branch_2/Conv2d_0c_1x3/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_7c/Branch_2/Conv2d_0c_1x3/Relu_output InceptionV3/InceptionV3/Mixed_7c/Branch_2/Conv2d_0c_1x3/Relu ," +"Convolution InceptionV3/InceptionV3/Mixed_7c/Branch_2/Conv2d_0d_3x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_7c/Branch_2/Conv2d_0b_3x3/Relu InceptionV3/InceptionV3/Mixed_7c/Branch_2/Conv2d_0d_3x1/Relu_output 1 384 384 3 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_7c/Branch_2/Conv2d_0d_3x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_7c/Branch_2/Conv2d_0d_3x1/Relu_output InceptionV3/InceptionV3/Mixed_7c/Branch_2/Conv2d_0d_3x1/Relu ," +"Concat InceptionV3/InceptionV3/Mixed_7c/Branch_2/concat 2 1 InceptionV3/InceptionV3/Mixed_7c/Branch_2/Conv2d_0c_1x3/Relu InceptionV3/InceptionV3/Mixed_7c/Branch_2/Conv2d_0d_3x1/Relu InceptionV3/InceptionV3/Mixed_7c/Branch_2/concat 1 ," +"Pooling InceptionV3/InceptionV3/Mixed_7c/Branch_3/AvgPool_0a_3x3/AvgPool 1 1 InceptionV3/InceptionV3/Mixed_7b/concat InceptionV3/InceptionV3/Mixed_7c/Branch_3/AvgPool_0a_3x3/AvgPool 1 3 3 1 1 0 0 -1 -1 0 1 ," +"Convolution InceptionV3/InceptionV3/Mixed_7c/Branch_3/Conv2d_0b_1x1/Relu 1 1 InceptionV3/InceptionV3/Mixed_7c/Branch_3/AvgPool_0a_3x3/AvgPool InceptionV3/InceptionV3/Mixed_7c/Branch_3/Conv2d_0b_1x1/Relu_output 1 2048 192 1 1 1 1 0 0 1 0 1 1 1 ," +"ReLU InceptionV3/InceptionV3/Mixed_7c/Branch_3/Conv2d_0b_1x1/Relu_activation 1 1 InceptionV3/InceptionV3/Mixed_7c/Branch_3/Conv2d_0b_1x1/Relu_output InceptionV3/InceptionV3/Mixed_7c/Branch_3/Conv2d_0b_1x1/Relu ," +"Concat InceptionV3/InceptionV3/Mixed_7c/concat 4 1 InceptionV3/InceptionV3/Mixed_7c/Branch_0/Conv2d_0a_1x1/Relu InceptionV3/InceptionV3/Mixed_7c/Branch_1/concat InceptionV3/InceptionV3/Mixed_7c/Branch_2/concat InceptionV3/InceptionV3/Mixed_7c/Branch_3/Conv2d_0b_1x1/Relu InceptionV3/InceptionV3/Mixed_7c/concat 1 ," +"Pooling InceptionV3/Logits/AvgPool_1a_8x8/AvgPool 1 1 InceptionV3/InceptionV3/Mixed_7c/concat InceptionV3/Logits/AvgPool_1a_8x8/AvgPool 1 8 8 2 2 0 0 -1 -1 1 1 ," +"Convolution InceptionV3/Logits/Conv2d_1c_1x1/BiasAdd 1 1 InceptionV3/Logits/AvgPool_1a_8x8/AvgPool InceptionV3/Logits/Conv2d_1c_1x1/BiasAdd 1 2048 1001 1 1 1 1 0 0 1 0 1 1 0 ," +"Reshape InceptionV3/Logits/SpatialSqueeze 1 1 InceptionV3/Logits/Conv2d_1c_1x1/BiasAdd InceptionV3/Logits/SpatialSqueeze 0 4 4 0 1001 1 1 0 ," +"Softmax InceptionV3/Predictions/Reshape_1 1 1 InceptionV3/Logits/SpatialSqueeze InceptionV3/Predictions/Reshape_1 1 ," diff --git a/3rdparty/TNN/benchmark/benchmark-model/inception_v4.tnnproto b/3rdparty/TNN/benchmark/benchmark-model/inception_v4.tnnproto new file mode 100644 index 0000000..4b25699 --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark-model/inception_v4.tnnproto @@ -0,0 +1,200 @@ +"1 0 1 4206624770 ," +"input 1 3 299 299 ," +" ," +"InceptionV4/Logits/Predictions ," +" 195 ," +"Convolution InceptionV4/InceptionV4/Conv2d_1a_3x3/Relu 1 1 input InceptionV4/InceptionV4/Conv2d_1a_3x3/Relu 1 3 32 3 3 2 2 0 0 1 -1 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Conv2d_2a_3x3/Relu 1 1 InceptionV4/InceptionV4/Conv2d_1a_3x3/Relu InceptionV4/InceptionV4/Conv2d_2a_3x3/Relu 1 32 32 3 3 1 1 0 0 1 -1 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Conv2d_2b_3x3/Relu 1 1 InceptionV4/InceptionV4/Conv2d_2a_3x3/Relu InceptionV4/InceptionV4/Conv2d_2b_3x3/Relu 1 32 64 3 3 1 1 0 0 1 0 1 1 1 ," +"Pooling InceptionV4/InceptionV4/Mixed_3a/Branch_0/MaxPool_0a_3x3/MaxPool 1 1 InceptionV4/InceptionV4/Conv2d_2b_3x3/Relu InceptionV4/InceptionV4/Mixed_3a/Branch_0/MaxPool_0a_3x3/MaxPool 0 3 3 2 2 0 0 -1 -1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_3a/Branch_1/Conv2d_0a_3x3/Relu 1 1 InceptionV4/InceptionV4/Conv2d_2b_3x3/Relu InceptionV4/InceptionV4/Mixed_3a/Branch_1/Conv2d_0a_3x3/Relu 1 64 96 3 3 2 2 0 0 1 -1 1 1 1 ," +"Concat InceptionV4/InceptionV4/Mixed_3a/concat 2 1 InceptionV4/InceptionV4/Mixed_3a/Branch_0/MaxPool_0a_3x3/MaxPool InceptionV4/InceptionV4/Mixed_3a/Branch_1/Conv2d_0a_3x3/Relu InceptionV4/InceptionV4/Mixed_3a/concat 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_4a/Branch_0/Conv2d_0a_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_3a/concat InceptionV4/InceptionV4/Mixed_4a/Branch_0/Conv2d_0a_1x1/Relu 1 160 64 1 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_4a/Branch_0/Conv2d_1a_3x3/Relu 1 1 InceptionV4/InceptionV4/Mixed_4a/Branch_0/Conv2d_0a_1x1/Relu InceptionV4/InceptionV4/Mixed_4a/Branch_0/Conv2d_1a_3x3/Relu 1 64 96 3 3 1 1 0 0 1 -1 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_4a/Branch_1/Conv2d_0a_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_3a/concat InceptionV4/InceptionV4/Mixed_4a/Branch_1/Conv2d_0a_1x1/Relu 1 160 64 1 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_4a/Branch_1/Conv2d_0b_1x7/Relu 1 1 InceptionV4/InceptionV4/Mixed_4a/Branch_1/Conv2d_0a_1x1/Relu InceptionV4/InceptionV4/Mixed_4a/Branch_1/Conv2d_0b_1x7/Relu 1 64 64 1 7 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_4a/Branch_1/Conv2d_0c_7x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_4a/Branch_1/Conv2d_0b_1x7/Relu InceptionV4/InceptionV4/Mixed_4a/Branch_1/Conv2d_0c_7x1/Relu 1 64 64 7 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_4a/Branch_1/Conv2d_1a_3x3/Relu 1 1 InceptionV4/InceptionV4/Mixed_4a/Branch_1/Conv2d_0c_7x1/Relu InceptionV4/InceptionV4/Mixed_4a/Branch_1/Conv2d_1a_3x3/Relu 1 64 96 3 3 1 1 0 0 1 -1 1 1 1 ," +"Concat InceptionV4/InceptionV4/Mixed_4a/concat 2 1 InceptionV4/InceptionV4/Mixed_4a/Branch_0/Conv2d_1a_3x3/Relu InceptionV4/InceptionV4/Mixed_4a/Branch_1/Conv2d_1a_3x3/Relu InceptionV4/InceptionV4/Mixed_4a/concat 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_5a/Branch_0/Conv2d_1a_3x3/Relu 1 1 InceptionV4/InceptionV4/Mixed_4a/concat InceptionV4/InceptionV4/Mixed_5a/Branch_0/Conv2d_1a_3x3/Relu 1 192 192 3 3 2 2 0 0 1 -1 1 1 1 ," +"Pooling InceptionV4/InceptionV4/Mixed_5a/Branch_1/MaxPool_1a_3x3/MaxPool 1 1 InceptionV4/InceptionV4/Mixed_4a/concat InceptionV4/InceptionV4/Mixed_5a/Branch_1/MaxPool_1a_3x3/MaxPool 0 3 3 2 2 0 0 -1 -1 1 1 ," +"Concat InceptionV4/InceptionV4/Mixed_5a/concat 2 1 InceptionV4/InceptionV4/Mixed_5a/Branch_0/Conv2d_1a_3x3/Relu InceptionV4/InceptionV4/Mixed_5a/Branch_1/MaxPool_1a_3x3/MaxPool InceptionV4/InceptionV4/Mixed_5a/concat 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_5b/Branch_0/Conv2d_0a_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_5a/concat InceptionV4/InceptionV4/Mixed_5b/Branch_0/Conv2d_0a_1x1/Relu 1 384 96 1 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_5b/Branch_1/Conv2d_0a_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_5a/concat InceptionV4/InceptionV4/Mixed_5b/Branch_1/Conv2d_0a_1x1/Relu 1 384 64 1 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_5b/Branch_1/Conv2d_0b_3x3/Relu 1 1 InceptionV4/InceptionV4/Mixed_5b/Branch_1/Conv2d_0a_1x1/Relu InceptionV4/InceptionV4/Mixed_5b/Branch_1/Conv2d_0b_3x3/Relu 1 64 96 3 3 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_5b/Branch_2/Conv2d_0a_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_5a/concat InceptionV4/InceptionV4/Mixed_5b/Branch_2/Conv2d_0a_1x1/Relu 1 384 64 1 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_5b/Branch_2/Conv2d_0b_3x3/Relu 1 1 InceptionV4/InceptionV4/Mixed_5b/Branch_2/Conv2d_0a_1x1/Relu InceptionV4/InceptionV4/Mixed_5b/Branch_2/Conv2d_0b_3x3/Relu 1 64 96 3 3 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_5b/Branch_2/Conv2d_0c_3x3/Relu 1 1 InceptionV4/InceptionV4/Mixed_5b/Branch_2/Conv2d_0b_3x3/Relu InceptionV4/InceptionV4/Mixed_5b/Branch_2/Conv2d_0c_3x3/Relu 1 96 96 3 3 1 1 0 0 1 0 1 1 1 ," +"Pooling InceptionV4/InceptionV4/Mixed_5b/Branch_3/AvgPool_0a_3x3/AvgPool 1 1 InceptionV4/InceptionV4/Mixed_5a/concat InceptionV4/InceptionV4/Mixed_5b/Branch_3/AvgPool_0a_3x3/AvgPool 1 3 3 1 1 0 0 -1 -1 0 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_5b/Branch_3/Conv2d_0b_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_5b/Branch_3/AvgPool_0a_3x3/AvgPool InceptionV4/InceptionV4/Mixed_5b/Branch_3/Conv2d_0b_1x1/Relu 1 384 96 1 1 1 1 0 0 1 0 1 1 1 ," +"Concat InceptionV4/InceptionV4/Mixed_5b/concat 4 1 InceptionV4/InceptionV4/Mixed_5b/Branch_0/Conv2d_0a_1x1/Relu InceptionV4/InceptionV4/Mixed_5b/Branch_1/Conv2d_0b_3x3/Relu InceptionV4/InceptionV4/Mixed_5b/Branch_2/Conv2d_0c_3x3/Relu InceptionV4/InceptionV4/Mixed_5b/Branch_3/Conv2d_0b_1x1/Relu InceptionV4/InceptionV4/Mixed_5b/concat 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_5c/Branch_0/Conv2d_0a_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_5b/concat InceptionV4/InceptionV4/Mixed_5c/Branch_0/Conv2d_0a_1x1/Relu 1 384 96 1 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_5c/Branch_1/Conv2d_0a_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_5b/concat InceptionV4/InceptionV4/Mixed_5c/Branch_1/Conv2d_0a_1x1/Relu 1 384 64 1 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_5c/Branch_1/Conv2d_0b_3x3/Relu 1 1 InceptionV4/InceptionV4/Mixed_5c/Branch_1/Conv2d_0a_1x1/Relu InceptionV4/InceptionV4/Mixed_5c/Branch_1/Conv2d_0b_3x3/Relu 1 64 96 3 3 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_5c/Branch_2/Conv2d_0a_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_5b/concat InceptionV4/InceptionV4/Mixed_5c/Branch_2/Conv2d_0a_1x1/Relu 1 384 64 1 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_5c/Branch_2/Conv2d_0b_3x3/Relu 1 1 InceptionV4/InceptionV4/Mixed_5c/Branch_2/Conv2d_0a_1x1/Relu InceptionV4/InceptionV4/Mixed_5c/Branch_2/Conv2d_0b_3x3/Relu 1 64 96 3 3 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_5c/Branch_2/Conv2d_0c_3x3/Relu 1 1 InceptionV4/InceptionV4/Mixed_5c/Branch_2/Conv2d_0b_3x3/Relu InceptionV4/InceptionV4/Mixed_5c/Branch_2/Conv2d_0c_3x3/Relu 1 96 96 3 3 1 1 0 0 1 0 1 1 1 ," +"Pooling InceptionV4/InceptionV4/Mixed_5c/Branch_3/AvgPool_0a_3x3/AvgPool 1 1 InceptionV4/InceptionV4/Mixed_5b/concat InceptionV4/InceptionV4/Mixed_5c/Branch_3/AvgPool_0a_3x3/AvgPool 1 3 3 1 1 0 0 -1 -1 0 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_5c/Branch_3/Conv2d_0b_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_5c/Branch_3/AvgPool_0a_3x3/AvgPool InceptionV4/InceptionV4/Mixed_5c/Branch_3/Conv2d_0b_1x1/Relu 1 384 96 1 1 1 1 0 0 1 0 1 1 1 ," +"Concat InceptionV4/InceptionV4/Mixed_5c/concat 4 1 InceptionV4/InceptionV4/Mixed_5c/Branch_0/Conv2d_0a_1x1/Relu InceptionV4/InceptionV4/Mixed_5c/Branch_1/Conv2d_0b_3x3/Relu InceptionV4/InceptionV4/Mixed_5c/Branch_2/Conv2d_0c_3x3/Relu InceptionV4/InceptionV4/Mixed_5c/Branch_3/Conv2d_0b_1x1/Relu InceptionV4/InceptionV4/Mixed_5c/concat 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_5d/Branch_0/Conv2d_0a_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_5c/concat InceptionV4/InceptionV4/Mixed_5d/Branch_0/Conv2d_0a_1x1/Relu 1 384 96 1 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_5d/Branch_1/Conv2d_0a_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_5c/concat InceptionV4/InceptionV4/Mixed_5d/Branch_1/Conv2d_0a_1x1/Relu 1 384 64 1 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_5d/Branch_1/Conv2d_0b_3x3/Relu 1 1 InceptionV4/InceptionV4/Mixed_5d/Branch_1/Conv2d_0a_1x1/Relu InceptionV4/InceptionV4/Mixed_5d/Branch_1/Conv2d_0b_3x3/Relu 1 64 96 3 3 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_5d/Branch_2/Conv2d_0a_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_5c/concat InceptionV4/InceptionV4/Mixed_5d/Branch_2/Conv2d_0a_1x1/Relu 1 384 64 1 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_5d/Branch_2/Conv2d_0b_3x3/Relu 1 1 InceptionV4/InceptionV4/Mixed_5d/Branch_2/Conv2d_0a_1x1/Relu InceptionV4/InceptionV4/Mixed_5d/Branch_2/Conv2d_0b_3x3/Relu 1 64 96 3 3 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_5d/Branch_2/Conv2d_0c_3x3/Relu 1 1 InceptionV4/InceptionV4/Mixed_5d/Branch_2/Conv2d_0b_3x3/Relu InceptionV4/InceptionV4/Mixed_5d/Branch_2/Conv2d_0c_3x3/Relu 1 96 96 3 3 1 1 0 0 1 0 1 1 1 ," +"Pooling InceptionV4/InceptionV4/Mixed_5d/Branch_3/AvgPool_0a_3x3/AvgPool 1 1 InceptionV4/InceptionV4/Mixed_5c/concat InceptionV4/InceptionV4/Mixed_5d/Branch_3/AvgPool_0a_3x3/AvgPool 1 3 3 1 1 0 0 -1 -1 0 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_5d/Branch_3/Conv2d_0b_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_5d/Branch_3/AvgPool_0a_3x3/AvgPool InceptionV4/InceptionV4/Mixed_5d/Branch_3/Conv2d_0b_1x1/Relu 1 384 96 1 1 1 1 0 0 1 0 1 1 1 ," +"Concat InceptionV4/InceptionV4/Mixed_5d/concat 4 1 InceptionV4/InceptionV4/Mixed_5d/Branch_0/Conv2d_0a_1x1/Relu InceptionV4/InceptionV4/Mixed_5d/Branch_1/Conv2d_0b_3x3/Relu InceptionV4/InceptionV4/Mixed_5d/Branch_2/Conv2d_0c_3x3/Relu InceptionV4/InceptionV4/Mixed_5d/Branch_3/Conv2d_0b_1x1/Relu InceptionV4/InceptionV4/Mixed_5d/concat 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_5e/Branch_0/Conv2d_0a_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_5d/concat InceptionV4/InceptionV4/Mixed_5e/Branch_0/Conv2d_0a_1x1/Relu 1 384 96 1 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_5e/Branch_1/Conv2d_0a_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_5d/concat InceptionV4/InceptionV4/Mixed_5e/Branch_1/Conv2d_0a_1x1/Relu 1 384 64 1 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_5e/Branch_1/Conv2d_0b_3x3/Relu 1 1 InceptionV4/InceptionV4/Mixed_5e/Branch_1/Conv2d_0a_1x1/Relu InceptionV4/InceptionV4/Mixed_5e/Branch_1/Conv2d_0b_3x3/Relu 1 64 96 3 3 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_5e/Branch_2/Conv2d_0a_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_5d/concat InceptionV4/InceptionV4/Mixed_5e/Branch_2/Conv2d_0a_1x1/Relu 1 384 64 1 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_5e/Branch_2/Conv2d_0b_3x3/Relu 1 1 InceptionV4/InceptionV4/Mixed_5e/Branch_2/Conv2d_0a_1x1/Relu InceptionV4/InceptionV4/Mixed_5e/Branch_2/Conv2d_0b_3x3/Relu 1 64 96 3 3 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_5e/Branch_2/Conv2d_0c_3x3/Relu 1 1 InceptionV4/InceptionV4/Mixed_5e/Branch_2/Conv2d_0b_3x3/Relu InceptionV4/InceptionV4/Mixed_5e/Branch_2/Conv2d_0c_3x3/Relu 1 96 96 3 3 1 1 0 0 1 0 1 1 1 ," +"Pooling InceptionV4/InceptionV4/Mixed_5e/Branch_3/AvgPool_0a_3x3/AvgPool 1 1 InceptionV4/InceptionV4/Mixed_5d/concat InceptionV4/InceptionV4/Mixed_5e/Branch_3/AvgPool_0a_3x3/AvgPool 1 3 3 1 1 0 0 -1 -1 0 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_5e/Branch_3/Conv2d_0b_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_5e/Branch_3/AvgPool_0a_3x3/AvgPool InceptionV4/InceptionV4/Mixed_5e/Branch_3/Conv2d_0b_1x1/Relu 1 384 96 1 1 1 1 0 0 1 0 1 1 1 ," +"Concat InceptionV4/InceptionV4/Mixed_5e/concat 4 1 InceptionV4/InceptionV4/Mixed_5e/Branch_0/Conv2d_0a_1x1/Relu InceptionV4/InceptionV4/Mixed_5e/Branch_1/Conv2d_0b_3x3/Relu InceptionV4/InceptionV4/Mixed_5e/Branch_2/Conv2d_0c_3x3/Relu InceptionV4/InceptionV4/Mixed_5e/Branch_3/Conv2d_0b_1x1/Relu InceptionV4/InceptionV4/Mixed_5e/concat 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6a/Branch_0/Conv2d_1a_3x3/Relu 1 1 InceptionV4/InceptionV4/Mixed_5e/concat InceptionV4/InceptionV4/Mixed_6a/Branch_0/Conv2d_1a_3x3/Relu 1 384 384 3 3 2 2 0 0 1 -1 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6a/Branch_1/Conv2d_0a_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_5e/concat InceptionV4/InceptionV4/Mixed_6a/Branch_1/Conv2d_0a_1x1/Relu 1 384 192 1 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6a/Branch_1/Conv2d_0b_3x3/Relu 1 1 InceptionV4/InceptionV4/Mixed_6a/Branch_1/Conv2d_0a_1x1/Relu InceptionV4/InceptionV4/Mixed_6a/Branch_1/Conv2d_0b_3x3/Relu 1 192 224 3 3 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6a/Branch_1/Conv2d_1a_3x3/Relu 1 1 InceptionV4/InceptionV4/Mixed_6a/Branch_1/Conv2d_0b_3x3/Relu InceptionV4/InceptionV4/Mixed_6a/Branch_1/Conv2d_1a_3x3/Relu 1 224 256 3 3 2 2 0 0 1 -1 1 1 1 ," +"Pooling InceptionV4/InceptionV4/Mixed_6a/Branch_2/MaxPool_1a_3x3/MaxPool 1 1 InceptionV4/InceptionV4/Mixed_5e/concat InceptionV4/InceptionV4/Mixed_6a/Branch_2/MaxPool_1a_3x3/MaxPool 0 3 3 2 2 0 0 -1 -1 1 1 ," +"Concat InceptionV4/InceptionV4/Mixed_6a/concat 3 1 InceptionV4/InceptionV4/Mixed_6a/Branch_0/Conv2d_1a_3x3/Relu InceptionV4/InceptionV4/Mixed_6a/Branch_1/Conv2d_1a_3x3/Relu InceptionV4/InceptionV4/Mixed_6a/Branch_2/MaxPool_1a_3x3/MaxPool InceptionV4/InceptionV4/Mixed_6a/concat 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6b/Branch_0/Conv2d_0a_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_6a/concat InceptionV4/InceptionV4/Mixed_6b/Branch_0/Conv2d_0a_1x1/Relu 1 1024 384 1 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6b/Branch_1/Conv2d_0a_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_6a/concat InceptionV4/InceptionV4/Mixed_6b/Branch_1/Conv2d_0a_1x1/Relu 1 1024 192 1 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6b/Branch_1/Conv2d_0b_1x7/Relu 1 1 InceptionV4/InceptionV4/Mixed_6b/Branch_1/Conv2d_0a_1x1/Relu InceptionV4/InceptionV4/Mixed_6b/Branch_1/Conv2d_0b_1x7/Relu 1 192 224 1 7 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6b/Branch_1/Conv2d_0c_7x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_6b/Branch_1/Conv2d_0b_1x7/Relu InceptionV4/InceptionV4/Mixed_6b/Branch_1/Conv2d_0c_7x1/Relu 1 224 256 7 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6b/Branch_2/Conv2d_0a_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_6a/concat InceptionV4/InceptionV4/Mixed_6b/Branch_2/Conv2d_0a_1x1/Relu 1 1024 192 1 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6b/Branch_2/Conv2d_0b_7x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_6b/Branch_2/Conv2d_0a_1x1/Relu InceptionV4/InceptionV4/Mixed_6b/Branch_2/Conv2d_0b_7x1/Relu 1 192 192 7 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6b/Branch_2/Conv2d_0c_1x7/Relu 1 1 InceptionV4/InceptionV4/Mixed_6b/Branch_2/Conv2d_0b_7x1/Relu InceptionV4/InceptionV4/Mixed_6b/Branch_2/Conv2d_0c_1x7/Relu 1 192 224 1 7 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6b/Branch_2/Conv2d_0d_7x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_6b/Branch_2/Conv2d_0c_1x7/Relu InceptionV4/InceptionV4/Mixed_6b/Branch_2/Conv2d_0d_7x1/Relu 1 224 224 7 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6b/Branch_2/Conv2d_0e_1x7/Relu 1 1 InceptionV4/InceptionV4/Mixed_6b/Branch_2/Conv2d_0d_7x1/Relu InceptionV4/InceptionV4/Mixed_6b/Branch_2/Conv2d_0e_1x7/Relu 1 224 256 1 7 1 1 0 0 1 0 1 1 1 ," +"Pooling InceptionV4/InceptionV4/Mixed_6b/Branch_3/AvgPool_0a_3x3/AvgPool 1 1 InceptionV4/InceptionV4/Mixed_6a/concat InceptionV4/InceptionV4/Mixed_6b/Branch_3/AvgPool_0a_3x3/AvgPool 1 3 3 1 1 0 0 -1 -1 0 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6b/Branch_3/Conv2d_0b_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_6b/Branch_3/AvgPool_0a_3x3/AvgPool InceptionV4/InceptionV4/Mixed_6b/Branch_3/Conv2d_0b_1x1/Relu 1 1024 128 1 1 1 1 0 0 1 0 1 1 1 ," +"Concat InceptionV4/InceptionV4/Mixed_6b/concat 4 1 InceptionV4/InceptionV4/Mixed_6b/Branch_0/Conv2d_0a_1x1/Relu InceptionV4/InceptionV4/Mixed_6b/Branch_1/Conv2d_0c_7x1/Relu InceptionV4/InceptionV4/Mixed_6b/Branch_2/Conv2d_0e_1x7/Relu InceptionV4/InceptionV4/Mixed_6b/Branch_3/Conv2d_0b_1x1/Relu InceptionV4/InceptionV4/Mixed_6b/concat 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6c/Branch_0/Conv2d_0a_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_6b/concat InceptionV4/InceptionV4/Mixed_6c/Branch_0/Conv2d_0a_1x1/Relu 1 1024 384 1 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6c/Branch_1/Conv2d_0a_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_6b/concat InceptionV4/InceptionV4/Mixed_6c/Branch_1/Conv2d_0a_1x1/Relu 1 1024 192 1 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6c/Branch_1/Conv2d_0b_1x7/Relu 1 1 InceptionV4/InceptionV4/Mixed_6c/Branch_1/Conv2d_0a_1x1/Relu InceptionV4/InceptionV4/Mixed_6c/Branch_1/Conv2d_0b_1x7/Relu 1 192 224 1 7 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6c/Branch_1/Conv2d_0c_7x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_6c/Branch_1/Conv2d_0b_1x7/Relu InceptionV4/InceptionV4/Mixed_6c/Branch_1/Conv2d_0c_7x1/Relu 1 224 256 7 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6c/Branch_2/Conv2d_0a_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_6b/concat InceptionV4/InceptionV4/Mixed_6c/Branch_2/Conv2d_0a_1x1/Relu 1 1024 192 1 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6c/Branch_2/Conv2d_0b_7x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_6c/Branch_2/Conv2d_0a_1x1/Relu InceptionV4/InceptionV4/Mixed_6c/Branch_2/Conv2d_0b_7x1/Relu 1 192 192 7 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6c/Branch_2/Conv2d_0c_1x7/Relu 1 1 InceptionV4/InceptionV4/Mixed_6c/Branch_2/Conv2d_0b_7x1/Relu InceptionV4/InceptionV4/Mixed_6c/Branch_2/Conv2d_0c_1x7/Relu 1 192 224 1 7 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6c/Branch_2/Conv2d_0d_7x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_6c/Branch_2/Conv2d_0c_1x7/Relu InceptionV4/InceptionV4/Mixed_6c/Branch_2/Conv2d_0d_7x1/Relu 1 224 224 7 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6c/Branch_2/Conv2d_0e_1x7/Relu 1 1 InceptionV4/InceptionV4/Mixed_6c/Branch_2/Conv2d_0d_7x1/Relu InceptionV4/InceptionV4/Mixed_6c/Branch_2/Conv2d_0e_1x7/Relu 1 224 256 1 7 1 1 0 0 1 0 1 1 1 ," +"Pooling InceptionV4/InceptionV4/Mixed_6c/Branch_3/AvgPool_0a_3x3/AvgPool 1 1 InceptionV4/InceptionV4/Mixed_6b/concat InceptionV4/InceptionV4/Mixed_6c/Branch_3/AvgPool_0a_3x3/AvgPool 1 3 3 1 1 0 0 -1 -1 0 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6c/Branch_3/Conv2d_0b_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_6c/Branch_3/AvgPool_0a_3x3/AvgPool InceptionV4/InceptionV4/Mixed_6c/Branch_3/Conv2d_0b_1x1/Relu 1 1024 128 1 1 1 1 0 0 1 0 1 1 1 ," +"Concat InceptionV4/InceptionV4/Mixed_6c/concat 4 1 InceptionV4/InceptionV4/Mixed_6c/Branch_0/Conv2d_0a_1x1/Relu InceptionV4/InceptionV4/Mixed_6c/Branch_1/Conv2d_0c_7x1/Relu InceptionV4/InceptionV4/Mixed_6c/Branch_2/Conv2d_0e_1x7/Relu InceptionV4/InceptionV4/Mixed_6c/Branch_3/Conv2d_0b_1x1/Relu InceptionV4/InceptionV4/Mixed_6c/concat 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6d/Branch_0/Conv2d_0a_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_6c/concat InceptionV4/InceptionV4/Mixed_6d/Branch_0/Conv2d_0a_1x1/Relu 1 1024 384 1 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6d/Branch_1/Conv2d_0a_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_6c/concat InceptionV4/InceptionV4/Mixed_6d/Branch_1/Conv2d_0a_1x1/Relu 1 1024 192 1 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6d/Branch_1/Conv2d_0b_1x7/Relu 1 1 InceptionV4/InceptionV4/Mixed_6d/Branch_1/Conv2d_0a_1x1/Relu InceptionV4/InceptionV4/Mixed_6d/Branch_1/Conv2d_0b_1x7/Relu 1 192 224 1 7 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6d/Branch_1/Conv2d_0c_7x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_6d/Branch_1/Conv2d_0b_1x7/Relu InceptionV4/InceptionV4/Mixed_6d/Branch_1/Conv2d_0c_7x1/Relu 1 224 256 7 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6d/Branch_2/Conv2d_0a_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_6c/concat InceptionV4/InceptionV4/Mixed_6d/Branch_2/Conv2d_0a_1x1/Relu 1 1024 192 1 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6d/Branch_2/Conv2d_0b_7x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_6d/Branch_2/Conv2d_0a_1x1/Relu InceptionV4/InceptionV4/Mixed_6d/Branch_2/Conv2d_0b_7x1/Relu 1 192 192 7 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6d/Branch_2/Conv2d_0c_1x7/Relu 1 1 InceptionV4/InceptionV4/Mixed_6d/Branch_2/Conv2d_0b_7x1/Relu InceptionV4/InceptionV4/Mixed_6d/Branch_2/Conv2d_0c_1x7/Relu 1 192 224 1 7 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6d/Branch_2/Conv2d_0d_7x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_6d/Branch_2/Conv2d_0c_1x7/Relu InceptionV4/InceptionV4/Mixed_6d/Branch_2/Conv2d_0d_7x1/Relu 1 224 224 7 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6d/Branch_2/Conv2d_0e_1x7/Relu 1 1 InceptionV4/InceptionV4/Mixed_6d/Branch_2/Conv2d_0d_7x1/Relu InceptionV4/InceptionV4/Mixed_6d/Branch_2/Conv2d_0e_1x7/Relu 1 224 256 1 7 1 1 0 0 1 0 1 1 1 ," +"Pooling InceptionV4/InceptionV4/Mixed_6d/Branch_3/AvgPool_0a_3x3/AvgPool 1 1 InceptionV4/InceptionV4/Mixed_6c/concat InceptionV4/InceptionV4/Mixed_6d/Branch_3/AvgPool_0a_3x3/AvgPool 1 3 3 1 1 0 0 -1 -1 0 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6d/Branch_3/Conv2d_0b_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_6d/Branch_3/AvgPool_0a_3x3/AvgPool InceptionV4/InceptionV4/Mixed_6d/Branch_3/Conv2d_0b_1x1/Relu 1 1024 128 1 1 1 1 0 0 1 0 1 1 1 ," +"Concat InceptionV4/InceptionV4/Mixed_6d/concat 4 1 InceptionV4/InceptionV4/Mixed_6d/Branch_0/Conv2d_0a_1x1/Relu InceptionV4/InceptionV4/Mixed_6d/Branch_1/Conv2d_0c_7x1/Relu InceptionV4/InceptionV4/Mixed_6d/Branch_2/Conv2d_0e_1x7/Relu InceptionV4/InceptionV4/Mixed_6d/Branch_3/Conv2d_0b_1x1/Relu InceptionV4/InceptionV4/Mixed_6d/concat 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6e/Branch_0/Conv2d_0a_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_6d/concat InceptionV4/InceptionV4/Mixed_6e/Branch_0/Conv2d_0a_1x1/Relu 1 1024 384 1 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6e/Branch_1/Conv2d_0a_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_6d/concat InceptionV4/InceptionV4/Mixed_6e/Branch_1/Conv2d_0a_1x1/Relu 1 1024 192 1 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6e/Branch_1/Conv2d_0b_1x7/Relu 1 1 InceptionV4/InceptionV4/Mixed_6e/Branch_1/Conv2d_0a_1x1/Relu InceptionV4/InceptionV4/Mixed_6e/Branch_1/Conv2d_0b_1x7/Relu 1 192 224 1 7 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6e/Branch_1/Conv2d_0c_7x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_6e/Branch_1/Conv2d_0b_1x7/Relu InceptionV4/InceptionV4/Mixed_6e/Branch_1/Conv2d_0c_7x1/Relu 1 224 256 7 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6e/Branch_2/Conv2d_0a_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_6d/concat InceptionV4/InceptionV4/Mixed_6e/Branch_2/Conv2d_0a_1x1/Relu 1 1024 192 1 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6e/Branch_2/Conv2d_0b_7x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_6e/Branch_2/Conv2d_0a_1x1/Relu InceptionV4/InceptionV4/Mixed_6e/Branch_2/Conv2d_0b_7x1/Relu 1 192 192 7 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6e/Branch_2/Conv2d_0c_1x7/Relu 1 1 InceptionV4/InceptionV4/Mixed_6e/Branch_2/Conv2d_0b_7x1/Relu InceptionV4/InceptionV4/Mixed_6e/Branch_2/Conv2d_0c_1x7/Relu 1 192 224 1 7 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6e/Branch_2/Conv2d_0d_7x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_6e/Branch_2/Conv2d_0c_1x7/Relu InceptionV4/InceptionV4/Mixed_6e/Branch_2/Conv2d_0d_7x1/Relu 1 224 224 7 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6e/Branch_2/Conv2d_0e_1x7/Relu 1 1 InceptionV4/InceptionV4/Mixed_6e/Branch_2/Conv2d_0d_7x1/Relu InceptionV4/InceptionV4/Mixed_6e/Branch_2/Conv2d_0e_1x7/Relu 1 224 256 1 7 1 1 0 0 1 0 1 1 1 ," +"Pooling InceptionV4/InceptionV4/Mixed_6e/Branch_3/AvgPool_0a_3x3/AvgPool 1 1 InceptionV4/InceptionV4/Mixed_6d/concat InceptionV4/InceptionV4/Mixed_6e/Branch_3/AvgPool_0a_3x3/AvgPool 1 3 3 1 1 0 0 -1 -1 0 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6e/Branch_3/Conv2d_0b_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_6e/Branch_3/AvgPool_0a_3x3/AvgPool InceptionV4/InceptionV4/Mixed_6e/Branch_3/Conv2d_0b_1x1/Relu 1 1024 128 1 1 1 1 0 0 1 0 1 1 1 ," +"Concat InceptionV4/InceptionV4/Mixed_6e/concat 4 1 InceptionV4/InceptionV4/Mixed_6e/Branch_0/Conv2d_0a_1x1/Relu InceptionV4/InceptionV4/Mixed_6e/Branch_1/Conv2d_0c_7x1/Relu InceptionV4/InceptionV4/Mixed_6e/Branch_2/Conv2d_0e_1x7/Relu InceptionV4/InceptionV4/Mixed_6e/Branch_3/Conv2d_0b_1x1/Relu InceptionV4/InceptionV4/Mixed_6e/concat 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6f/Branch_0/Conv2d_0a_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_6e/concat InceptionV4/InceptionV4/Mixed_6f/Branch_0/Conv2d_0a_1x1/Relu 1 1024 384 1 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6f/Branch_1/Conv2d_0a_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_6e/concat InceptionV4/InceptionV4/Mixed_6f/Branch_1/Conv2d_0a_1x1/Relu 1 1024 192 1 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6f/Branch_1/Conv2d_0b_1x7/Relu 1 1 InceptionV4/InceptionV4/Mixed_6f/Branch_1/Conv2d_0a_1x1/Relu InceptionV4/InceptionV4/Mixed_6f/Branch_1/Conv2d_0b_1x7/Relu 1 192 224 1 7 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6f/Branch_1/Conv2d_0c_7x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_6f/Branch_1/Conv2d_0b_1x7/Relu InceptionV4/InceptionV4/Mixed_6f/Branch_1/Conv2d_0c_7x1/Relu 1 224 256 7 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6f/Branch_2/Conv2d_0a_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_6e/concat InceptionV4/InceptionV4/Mixed_6f/Branch_2/Conv2d_0a_1x1/Relu 1 1024 192 1 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6f/Branch_2/Conv2d_0b_7x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_6f/Branch_2/Conv2d_0a_1x1/Relu InceptionV4/InceptionV4/Mixed_6f/Branch_2/Conv2d_0b_7x1/Relu 1 192 192 7 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6f/Branch_2/Conv2d_0c_1x7/Relu 1 1 InceptionV4/InceptionV4/Mixed_6f/Branch_2/Conv2d_0b_7x1/Relu InceptionV4/InceptionV4/Mixed_6f/Branch_2/Conv2d_0c_1x7/Relu 1 192 224 1 7 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6f/Branch_2/Conv2d_0d_7x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_6f/Branch_2/Conv2d_0c_1x7/Relu InceptionV4/InceptionV4/Mixed_6f/Branch_2/Conv2d_0d_7x1/Relu 1 224 224 7 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6f/Branch_2/Conv2d_0e_1x7/Relu 1 1 InceptionV4/InceptionV4/Mixed_6f/Branch_2/Conv2d_0d_7x1/Relu InceptionV4/InceptionV4/Mixed_6f/Branch_2/Conv2d_0e_1x7/Relu 1 224 256 1 7 1 1 0 0 1 0 1 1 1 ," +"Pooling InceptionV4/InceptionV4/Mixed_6f/Branch_3/AvgPool_0a_3x3/AvgPool 1 1 InceptionV4/InceptionV4/Mixed_6e/concat InceptionV4/InceptionV4/Mixed_6f/Branch_3/AvgPool_0a_3x3/AvgPool 1 3 3 1 1 0 0 -1 -1 0 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6f/Branch_3/Conv2d_0b_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_6f/Branch_3/AvgPool_0a_3x3/AvgPool InceptionV4/InceptionV4/Mixed_6f/Branch_3/Conv2d_0b_1x1/Relu 1 1024 128 1 1 1 1 0 0 1 0 1 1 1 ," +"Concat InceptionV4/InceptionV4/Mixed_6f/concat 4 1 InceptionV4/InceptionV4/Mixed_6f/Branch_0/Conv2d_0a_1x1/Relu InceptionV4/InceptionV4/Mixed_6f/Branch_1/Conv2d_0c_7x1/Relu InceptionV4/InceptionV4/Mixed_6f/Branch_2/Conv2d_0e_1x7/Relu InceptionV4/InceptionV4/Mixed_6f/Branch_3/Conv2d_0b_1x1/Relu InceptionV4/InceptionV4/Mixed_6f/concat 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6g/Branch_0/Conv2d_0a_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_6f/concat InceptionV4/InceptionV4/Mixed_6g/Branch_0/Conv2d_0a_1x1/Relu 1 1024 384 1 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6g/Branch_1/Conv2d_0a_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_6f/concat InceptionV4/InceptionV4/Mixed_6g/Branch_1/Conv2d_0a_1x1/Relu 1 1024 192 1 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6g/Branch_1/Conv2d_0b_1x7/Relu 1 1 InceptionV4/InceptionV4/Mixed_6g/Branch_1/Conv2d_0a_1x1/Relu InceptionV4/InceptionV4/Mixed_6g/Branch_1/Conv2d_0b_1x7/Relu 1 192 224 1 7 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6g/Branch_1/Conv2d_0c_7x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_6g/Branch_1/Conv2d_0b_1x7/Relu InceptionV4/InceptionV4/Mixed_6g/Branch_1/Conv2d_0c_7x1/Relu 1 224 256 7 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6g/Branch_2/Conv2d_0a_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_6f/concat InceptionV4/InceptionV4/Mixed_6g/Branch_2/Conv2d_0a_1x1/Relu 1 1024 192 1 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6g/Branch_2/Conv2d_0b_7x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_6g/Branch_2/Conv2d_0a_1x1/Relu InceptionV4/InceptionV4/Mixed_6g/Branch_2/Conv2d_0b_7x1/Relu 1 192 192 7 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6g/Branch_2/Conv2d_0c_1x7/Relu 1 1 InceptionV4/InceptionV4/Mixed_6g/Branch_2/Conv2d_0b_7x1/Relu InceptionV4/InceptionV4/Mixed_6g/Branch_2/Conv2d_0c_1x7/Relu 1 192 224 1 7 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6g/Branch_2/Conv2d_0d_7x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_6g/Branch_2/Conv2d_0c_1x7/Relu InceptionV4/InceptionV4/Mixed_6g/Branch_2/Conv2d_0d_7x1/Relu 1 224 224 7 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6g/Branch_2/Conv2d_0e_1x7/Relu 1 1 InceptionV4/InceptionV4/Mixed_6g/Branch_2/Conv2d_0d_7x1/Relu InceptionV4/InceptionV4/Mixed_6g/Branch_2/Conv2d_0e_1x7/Relu 1 224 256 1 7 1 1 0 0 1 0 1 1 1 ," +"Pooling InceptionV4/InceptionV4/Mixed_6g/Branch_3/AvgPool_0a_3x3/AvgPool 1 1 InceptionV4/InceptionV4/Mixed_6f/concat InceptionV4/InceptionV4/Mixed_6g/Branch_3/AvgPool_0a_3x3/AvgPool 1 3 3 1 1 0 0 -1 -1 0 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6g/Branch_3/Conv2d_0b_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_6g/Branch_3/AvgPool_0a_3x3/AvgPool InceptionV4/InceptionV4/Mixed_6g/Branch_3/Conv2d_0b_1x1/Relu 1 1024 128 1 1 1 1 0 0 1 0 1 1 1 ," +"Concat InceptionV4/InceptionV4/Mixed_6g/concat 4 1 InceptionV4/InceptionV4/Mixed_6g/Branch_0/Conv2d_0a_1x1/Relu InceptionV4/InceptionV4/Mixed_6g/Branch_1/Conv2d_0c_7x1/Relu InceptionV4/InceptionV4/Mixed_6g/Branch_2/Conv2d_0e_1x7/Relu InceptionV4/InceptionV4/Mixed_6g/Branch_3/Conv2d_0b_1x1/Relu InceptionV4/InceptionV4/Mixed_6g/concat 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6h/Branch_0/Conv2d_0a_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_6g/concat InceptionV4/InceptionV4/Mixed_6h/Branch_0/Conv2d_0a_1x1/Relu 1 1024 384 1 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6h/Branch_1/Conv2d_0a_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_6g/concat InceptionV4/InceptionV4/Mixed_6h/Branch_1/Conv2d_0a_1x1/Relu 1 1024 192 1 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6h/Branch_1/Conv2d_0b_1x7/Relu 1 1 InceptionV4/InceptionV4/Mixed_6h/Branch_1/Conv2d_0a_1x1/Relu InceptionV4/InceptionV4/Mixed_6h/Branch_1/Conv2d_0b_1x7/Relu 1 192 224 1 7 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6h/Branch_1/Conv2d_0c_7x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_6h/Branch_1/Conv2d_0b_1x7/Relu InceptionV4/InceptionV4/Mixed_6h/Branch_1/Conv2d_0c_7x1/Relu 1 224 256 7 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6h/Branch_2/Conv2d_0a_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_6g/concat InceptionV4/InceptionV4/Mixed_6h/Branch_2/Conv2d_0a_1x1/Relu 1 1024 192 1 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6h/Branch_2/Conv2d_0b_7x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_6h/Branch_2/Conv2d_0a_1x1/Relu InceptionV4/InceptionV4/Mixed_6h/Branch_2/Conv2d_0b_7x1/Relu 1 192 192 7 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6h/Branch_2/Conv2d_0c_1x7/Relu 1 1 InceptionV4/InceptionV4/Mixed_6h/Branch_2/Conv2d_0b_7x1/Relu InceptionV4/InceptionV4/Mixed_6h/Branch_2/Conv2d_0c_1x7/Relu 1 192 224 1 7 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6h/Branch_2/Conv2d_0d_7x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_6h/Branch_2/Conv2d_0c_1x7/Relu InceptionV4/InceptionV4/Mixed_6h/Branch_2/Conv2d_0d_7x1/Relu 1 224 224 7 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6h/Branch_2/Conv2d_0e_1x7/Relu 1 1 InceptionV4/InceptionV4/Mixed_6h/Branch_2/Conv2d_0d_7x1/Relu InceptionV4/InceptionV4/Mixed_6h/Branch_2/Conv2d_0e_1x7/Relu 1 224 256 1 7 1 1 0 0 1 0 1 1 1 ," +"Pooling InceptionV4/InceptionV4/Mixed_6h/Branch_3/AvgPool_0a_3x3/AvgPool 1 1 InceptionV4/InceptionV4/Mixed_6g/concat InceptionV4/InceptionV4/Mixed_6h/Branch_3/AvgPool_0a_3x3/AvgPool 1 3 3 1 1 0 0 -1 -1 0 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_6h/Branch_3/Conv2d_0b_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_6h/Branch_3/AvgPool_0a_3x3/AvgPool InceptionV4/InceptionV4/Mixed_6h/Branch_3/Conv2d_0b_1x1/Relu 1 1024 128 1 1 1 1 0 0 1 0 1 1 1 ," +"Concat InceptionV4/InceptionV4/Mixed_6h/concat 4 1 InceptionV4/InceptionV4/Mixed_6h/Branch_0/Conv2d_0a_1x1/Relu InceptionV4/InceptionV4/Mixed_6h/Branch_1/Conv2d_0c_7x1/Relu InceptionV4/InceptionV4/Mixed_6h/Branch_2/Conv2d_0e_1x7/Relu InceptionV4/InceptionV4/Mixed_6h/Branch_3/Conv2d_0b_1x1/Relu InceptionV4/InceptionV4/Mixed_6h/concat 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_7a/Branch_0/Conv2d_0a_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_6h/concat InceptionV4/InceptionV4/Mixed_7a/Branch_0/Conv2d_0a_1x1/Relu 1 1024 192 1 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_7a/Branch_0/Conv2d_1a_3x3/Relu 1 1 InceptionV4/InceptionV4/Mixed_7a/Branch_0/Conv2d_0a_1x1/Relu InceptionV4/InceptionV4/Mixed_7a/Branch_0/Conv2d_1a_3x3/Relu 1 192 192 3 3 2 2 0 0 1 -1 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_7a/Branch_1/Conv2d_0a_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_6h/concat InceptionV4/InceptionV4/Mixed_7a/Branch_1/Conv2d_0a_1x1/Relu 1 1024 256 1 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_7a/Branch_1/Conv2d_0b_1x7/Relu 1 1 InceptionV4/InceptionV4/Mixed_7a/Branch_1/Conv2d_0a_1x1/Relu InceptionV4/InceptionV4/Mixed_7a/Branch_1/Conv2d_0b_1x7/Relu 1 256 256 1 7 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_7a/Branch_1/Conv2d_0c_7x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_7a/Branch_1/Conv2d_0b_1x7/Relu InceptionV4/InceptionV4/Mixed_7a/Branch_1/Conv2d_0c_7x1/Relu 1 256 320 7 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_7a/Branch_1/Conv2d_1a_3x3/Relu 1 1 InceptionV4/InceptionV4/Mixed_7a/Branch_1/Conv2d_0c_7x1/Relu InceptionV4/InceptionV4/Mixed_7a/Branch_1/Conv2d_1a_3x3/Relu 1 320 320 3 3 2 2 0 0 1 -1 1 1 1 ," +"Pooling InceptionV4/InceptionV4/Mixed_7a/Branch_2/MaxPool_1a_3x3/MaxPool 1 1 InceptionV4/InceptionV4/Mixed_6h/concat InceptionV4/InceptionV4/Mixed_7a/Branch_2/MaxPool_1a_3x3/MaxPool 0 3 3 2 2 0 0 -1 -1 1 1 ," +"Concat InceptionV4/InceptionV4/Mixed_7a/concat 3 1 InceptionV4/InceptionV4/Mixed_7a/Branch_0/Conv2d_1a_3x3/Relu InceptionV4/InceptionV4/Mixed_7a/Branch_1/Conv2d_1a_3x3/Relu InceptionV4/InceptionV4/Mixed_7a/Branch_2/MaxPool_1a_3x3/MaxPool InceptionV4/InceptionV4/Mixed_7a/concat 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_7b/Branch_0/Conv2d_0a_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_7a/concat InceptionV4/InceptionV4/Mixed_7b/Branch_0/Conv2d_0a_1x1/Relu 1 1536 256 1 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_7b/Branch_1/Conv2d_0a_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_7a/concat InceptionV4/InceptionV4/Mixed_7b/Branch_1/Conv2d_0a_1x1/Relu 1 1536 384 1 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_7b/Branch_1/Conv2d_0b_1x3/Relu 1 1 InceptionV4/InceptionV4/Mixed_7b/Branch_1/Conv2d_0a_1x1/Relu InceptionV4/InceptionV4/Mixed_7b/Branch_1/Conv2d_0b_1x3/Relu 1 384 256 1 3 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_7b/Branch_1/Conv2d_0c_3x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_7b/Branch_1/Conv2d_0a_1x1/Relu InceptionV4/InceptionV4/Mixed_7b/Branch_1/Conv2d_0c_3x1/Relu 1 384 256 3 1 1 1 0 0 1 0 1 1 1 ," +"Concat InceptionV4/InceptionV4/Mixed_7b/Branch_1/concat 2 1 InceptionV4/InceptionV4/Mixed_7b/Branch_1/Conv2d_0b_1x3/Relu InceptionV4/InceptionV4/Mixed_7b/Branch_1/Conv2d_0c_3x1/Relu InceptionV4/InceptionV4/Mixed_7b/Branch_1/concat 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_7b/Branch_2/Conv2d_0a_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_7a/concat InceptionV4/InceptionV4/Mixed_7b/Branch_2/Conv2d_0a_1x1/Relu 1 1536 384 1 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_7b/Branch_2/Conv2d_0b_3x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_7b/Branch_2/Conv2d_0a_1x1/Relu InceptionV4/InceptionV4/Mixed_7b/Branch_2/Conv2d_0b_3x1/Relu 1 384 448 3 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_7b/Branch_2/Conv2d_0c_1x3/Relu 1 1 InceptionV4/InceptionV4/Mixed_7b/Branch_2/Conv2d_0b_3x1/Relu InceptionV4/InceptionV4/Mixed_7b/Branch_2/Conv2d_0c_1x3/Relu 1 448 512 1 3 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_7b/Branch_2/Conv2d_0d_1x3/Relu 1 1 InceptionV4/InceptionV4/Mixed_7b/Branch_2/Conv2d_0c_1x3/Relu InceptionV4/InceptionV4/Mixed_7b/Branch_2/Conv2d_0d_1x3/Relu 1 512 256 1 3 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_7b/Branch_2/Conv2d_0e_3x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_7b/Branch_2/Conv2d_0c_1x3/Relu InceptionV4/InceptionV4/Mixed_7b/Branch_2/Conv2d_0e_3x1/Relu 1 512 256 3 1 1 1 0 0 1 0 1 1 1 ," +"Concat InceptionV4/InceptionV4/Mixed_7b/Branch_2/concat 2 1 InceptionV4/InceptionV4/Mixed_7b/Branch_2/Conv2d_0d_1x3/Relu InceptionV4/InceptionV4/Mixed_7b/Branch_2/Conv2d_0e_3x1/Relu InceptionV4/InceptionV4/Mixed_7b/Branch_2/concat 1 ," +"Pooling InceptionV4/InceptionV4/Mixed_7b/Branch_3/AvgPool_0a_3x3/AvgPool 1 1 InceptionV4/InceptionV4/Mixed_7a/concat InceptionV4/InceptionV4/Mixed_7b/Branch_3/AvgPool_0a_3x3/AvgPool 1 3 3 1 1 0 0 -1 -1 0 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_7b/Branch_3/Conv2d_0b_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_7b/Branch_3/AvgPool_0a_3x3/AvgPool InceptionV4/InceptionV4/Mixed_7b/Branch_3/Conv2d_0b_1x1/Relu 1 1536 256 1 1 1 1 0 0 1 0 1 1 1 ," +"Concat InceptionV4/InceptionV4/Mixed_7b/concat 4 1 InceptionV4/InceptionV4/Mixed_7b/Branch_0/Conv2d_0a_1x1/Relu InceptionV4/InceptionV4/Mixed_7b/Branch_1/concat InceptionV4/InceptionV4/Mixed_7b/Branch_2/concat InceptionV4/InceptionV4/Mixed_7b/Branch_3/Conv2d_0b_1x1/Relu InceptionV4/InceptionV4/Mixed_7b/concat 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_7c/Branch_0/Conv2d_0a_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_7b/concat InceptionV4/InceptionV4/Mixed_7c/Branch_0/Conv2d_0a_1x1/Relu 1 1536 256 1 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_7c/Branch_1/Conv2d_0a_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_7b/concat InceptionV4/InceptionV4/Mixed_7c/Branch_1/Conv2d_0a_1x1/Relu 1 1536 384 1 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_7c/Branch_1/Conv2d_0b_1x3/Relu 1 1 InceptionV4/InceptionV4/Mixed_7c/Branch_1/Conv2d_0a_1x1/Relu InceptionV4/InceptionV4/Mixed_7c/Branch_1/Conv2d_0b_1x3/Relu 1 384 256 1 3 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_7c/Branch_1/Conv2d_0c_3x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_7c/Branch_1/Conv2d_0a_1x1/Relu InceptionV4/InceptionV4/Mixed_7c/Branch_1/Conv2d_0c_3x1/Relu 1 384 256 3 1 1 1 0 0 1 0 1 1 1 ," +"Concat InceptionV4/InceptionV4/Mixed_7c/Branch_1/concat 2 1 InceptionV4/InceptionV4/Mixed_7c/Branch_1/Conv2d_0b_1x3/Relu InceptionV4/InceptionV4/Mixed_7c/Branch_1/Conv2d_0c_3x1/Relu InceptionV4/InceptionV4/Mixed_7c/Branch_1/concat 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_7c/Branch_2/Conv2d_0a_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_7b/concat InceptionV4/InceptionV4/Mixed_7c/Branch_2/Conv2d_0a_1x1/Relu 1 1536 384 1 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_7c/Branch_2/Conv2d_0b_3x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_7c/Branch_2/Conv2d_0a_1x1/Relu InceptionV4/InceptionV4/Mixed_7c/Branch_2/Conv2d_0b_3x1/Relu 1 384 448 3 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_7c/Branch_2/Conv2d_0c_1x3/Relu 1 1 InceptionV4/InceptionV4/Mixed_7c/Branch_2/Conv2d_0b_3x1/Relu InceptionV4/InceptionV4/Mixed_7c/Branch_2/Conv2d_0c_1x3/Relu 1 448 512 1 3 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_7c/Branch_2/Conv2d_0d_1x3/Relu 1 1 InceptionV4/InceptionV4/Mixed_7c/Branch_2/Conv2d_0c_1x3/Relu InceptionV4/InceptionV4/Mixed_7c/Branch_2/Conv2d_0d_1x3/Relu 1 512 256 1 3 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_7c/Branch_2/Conv2d_0e_3x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_7c/Branch_2/Conv2d_0c_1x3/Relu InceptionV4/InceptionV4/Mixed_7c/Branch_2/Conv2d_0e_3x1/Relu 1 512 256 3 1 1 1 0 0 1 0 1 1 1 ," +"Concat InceptionV4/InceptionV4/Mixed_7c/Branch_2/concat 2 1 InceptionV4/InceptionV4/Mixed_7c/Branch_2/Conv2d_0d_1x3/Relu InceptionV4/InceptionV4/Mixed_7c/Branch_2/Conv2d_0e_3x1/Relu InceptionV4/InceptionV4/Mixed_7c/Branch_2/concat 1 ," +"Pooling InceptionV4/InceptionV4/Mixed_7c/Branch_3/AvgPool_0a_3x3/AvgPool 1 1 InceptionV4/InceptionV4/Mixed_7b/concat InceptionV4/InceptionV4/Mixed_7c/Branch_3/AvgPool_0a_3x3/AvgPool 1 3 3 1 1 0 0 -1 -1 0 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_7c/Branch_3/Conv2d_0b_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_7c/Branch_3/AvgPool_0a_3x3/AvgPool InceptionV4/InceptionV4/Mixed_7c/Branch_3/Conv2d_0b_1x1/Relu 1 1536 256 1 1 1 1 0 0 1 0 1 1 1 ," +"Concat InceptionV4/InceptionV4/Mixed_7c/concat 4 1 InceptionV4/InceptionV4/Mixed_7c/Branch_0/Conv2d_0a_1x1/Relu InceptionV4/InceptionV4/Mixed_7c/Branch_1/concat InceptionV4/InceptionV4/Mixed_7c/Branch_2/concat InceptionV4/InceptionV4/Mixed_7c/Branch_3/Conv2d_0b_1x1/Relu InceptionV4/InceptionV4/Mixed_7c/concat 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_7d/Branch_0/Conv2d_0a_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_7c/concat InceptionV4/InceptionV4/Mixed_7d/Branch_0/Conv2d_0a_1x1/Relu 1 1536 256 1 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_7d/Branch_1/Conv2d_0a_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_7c/concat InceptionV4/InceptionV4/Mixed_7d/Branch_1/Conv2d_0a_1x1/Relu 1 1536 384 1 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_7d/Branch_1/Conv2d_0b_1x3/Relu 1 1 InceptionV4/InceptionV4/Mixed_7d/Branch_1/Conv2d_0a_1x1/Relu InceptionV4/InceptionV4/Mixed_7d/Branch_1/Conv2d_0b_1x3/Relu 1 384 256 1 3 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_7d/Branch_1/Conv2d_0c_3x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_7d/Branch_1/Conv2d_0a_1x1/Relu InceptionV4/InceptionV4/Mixed_7d/Branch_1/Conv2d_0c_3x1/Relu 1 384 256 3 1 1 1 0 0 1 0 1 1 1 ," +"Concat InceptionV4/InceptionV4/Mixed_7d/Branch_1/concat 2 1 InceptionV4/InceptionV4/Mixed_7d/Branch_1/Conv2d_0b_1x3/Relu InceptionV4/InceptionV4/Mixed_7d/Branch_1/Conv2d_0c_3x1/Relu InceptionV4/InceptionV4/Mixed_7d/Branch_1/concat 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_7d/Branch_2/Conv2d_0a_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_7c/concat InceptionV4/InceptionV4/Mixed_7d/Branch_2/Conv2d_0a_1x1/Relu 1 1536 384 1 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_7d/Branch_2/Conv2d_0b_3x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_7d/Branch_2/Conv2d_0a_1x1/Relu InceptionV4/InceptionV4/Mixed_7d/Branch_2/Conv2d_0b_3x1/Relu 1 384 448 3 1 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_7d/Branch_2/Conv2d_0c_1x3/Relu 1 1 InceptionV4/InceptionV4/Mixed_7d/Branch_2/Conv2d_0b_3x1/Relu InceptionV4/InceptionV4/Mixed_7d/Branch_2/Conv2d_0c_1x3/Relu 1 448 512 1 3 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_7d/Branch_2/Conv2d_0d_1x3/Relu 1 1 InceptionV4/InceptionV4/Mixed_7d/Branch_2/Conv2d_0c_1x3/Relu InceptionV4/InceptionV4/Mixed_7d/Branch_2/Conv2d_0d_1x3/Relu 1 512 256 1 3 1 1 0 0 1 0 1 1 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_7d/Branch_2/Conv2d_0e_3x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_7d/Branch_2/Conv2d_0c_1x3/Relu InceptionV4/InceptionV4/Mixed_7d/Branch_2/Conv2d_0e_3x1/Relu 1 512 256 3 1 1 1 0 0 1 0 1 1 1 ," +"Concat InceptionV4/InceptionV4/Mixed_7d/Branch_2/concat 2 1 InceptionV4/InceptionV4/Mixed_7d/Branch_2/Conv2d_0d_1x3/Relu InceptionV4/InceptionV4/Mixed_7d/Branch_2/Conv2d_0e_3x1/Relu InceptionV4/InceptionV4/Mixed_7d/Branch_2/concat 1 ," +"Pooling InceptionV4/InceptionV4/Mixed_7d/Branch_3/AvgPool_0a_3x3/AvgPool 1 1 InceptionV4/InceptionV4/Mixed_7c/concat InceptionV4/InceptionV4/Mixed_7d/Branch_3/AvgPool_0a_3x3/AvgPool 1 3 3 1 1 0 0 -1 -1 0 1 ," +"Convolution InceptionV4/InceptionV4/Mixed_7d/Branch_3/Conv2d_0b_1x1/Relu 1 1 InceptionV4/InceptionV4/Mixed_7d/Branch_3/AvgPool_0a_3x3/AvgPool InceptionV4/InceptionV4/Mixed_7d/Branch_3/Conv2d_0b_1x1/Relu 1 1536 256 1 1 1 1 0 0 1 0 1 1 1 ," +"Concat InceptionV4/InceptionV4/Mixed_7d/concat 4 1 InceptionV4/InceptionV4/Mixed_7d/Branch_0/Conv2d_0a_1x1/Relu InceptionV4/InceptionV4/Mixed_7d/Branch_1/concat InceptionV4/InceptionV4/Mixed_7d/Branch_2/concat InceptionV4/InceptionV4/Mixed_7d/Branch_3/Conv2d_0b_1x1/Relu InceptionV4/InceptionV4/Mixed_7d/concat 1 ," +"Pooling InceptionV4/Logits/AvgPool_1a/AvgPool 1 1 InceptionV4/InceptionV4/Mixed_7d/concat InceptionV4/Logits/AvgPool_1a/AvgPool 1 8 8 1 1 0 0 -1 -1 1 1 ," +"InnerProduct InceptionV4/Logits/Logits/BiasAdd 1 1 InceptionV4/Logits/AvgPool_1a/AvgPool InceptionV4/Logits/Logits/BiasAdd 1001 1 1 1 ," +"Softmax InceptionV4/Logits/Predictions 1 1 InceptionV4/Logits/Logits/BiasAdd InceptionV4/Logits/Predictions 1 ," diff --git a/3rdparty/TNN/benchmark/benchmark-model/mobilenet_v1.tnnproto b/3rdparty/TNN/benchmark/benchmark-model/mobilenet_v1.tnnproto new file mode 100644 index 0000000..98ed73c --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark-model/mobilenet_v1.tnnproto @@ -0,0 +1,62 @@ +"1 58 1 4206624770 ," +"input 1 3 224 224 ," +" conv1 conv2_1/dw conv2_1/sep conv2_2/dw conv2_2/sep conv3_1/dw conv3_1/sep conv3_2/dw conv3_2/sep conv4_1/dw conv4_1/sep conv4_2/dw conv4_2/sep conv5_1/dw conv5_1/sep conv5_2/dw conv5_2/sep conv5_3/dw conv5_3/sep conv5_4/dw conv5_4/sep conv5_5/dw conv5_5/sep conv5_6/dw conv5_6/sep conv6/dw conv6/sep fc7 input pool6 prob relu1 relu2_1/dw relu2_1/sep relu2_2/dw relu2_2/sep relu3_1/dw relu3_1/sep relu3_2/dw relu3_2/sep relu4_1/dw relu4_1/sep relu4_2/dw relu4_2/sep relu5_1/dw relu5_1/sep relu5_2/dw relu5_2/sep relu5_3/dw relu5_3/sep relu5_4/dw relu5_4/sep relu5_5/dw relu5_5/sep relu5_6/dw relu5_6/sep relu6/dw relu6/sep ," +"prob ," +" 57 ," +"Convolution conv1 1 1 input conv1 1 3 32 3 3 2 2 1 1 1 -1 1 1 ," +"ReLU relu1 1 1 conv1 relu1 ," +"Convolution conv2_1/dw 1 1 relu1 conv2_1/dw 32 1 32 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU relu2_1/dw 1 1 conv2_1/dw relu2_1/dw ," +"Convolution conv2_1/sep 1 1 relu2_1/dw conv2_1/sep 1 32 64 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU relu2_1/sep 1 1 conv2_1/sep relu2_1/sep ," +"Convolution conv2_2/dw 1 1 relu2_1/sep conv2_2/dw 64 1 64 3 3 2 2 1 1 1 -1 1 1 ," +"ReLU relu2_2/dw 1 1 conv2_2/dw relu2_2/dw ," +"Convolution conv2_2/sep 1 1 relu2_2/dw conv2_2/sep 1 64 128 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU relu2_2/sep 1 1 conv2_2/sep relu2_2/sep ," +"Convolution conv3_1/dw 1 1 relu2_2/sep conv3_1/dw 128 1 128 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU relu3_1/dw 1 1 conv3_1/dw relu3_1/dw ," +"Convolution conv3_1/sep 1 1 relu3_1/dw conv3_1/sep 1 128 128 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU relu3_1/sep 1 1 conv3_1/sep relu3_1/sep ," +"Convolution conv3_2/dw 1 1 relu3_1/sep conv3_2/dw 128 1 128 3 3 2 2 1 1 1 -1 1 1 ," +"ReLU relu3_2/dw 1 1 conv3_2/dw relu3_2/dw ," +"Convolution conv3_2/sep 1 1 relu3_2/dw conv3_2/sep 1 128 256 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU relu3_2/sep 1 1 conv3_2/sep relu3_2/sep ," +"Convolution conv4_1/dw 1 1 relu3_2/sep conv4_1/dw 256 1 256 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU relu4_1/dw 1 1 conv4_1/dw relu4_1/dw ," +"Convolution conv4_1/sep 1 1 relu4_1/dw conv4_1/sep 1 256 256 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU relu4_1/sep 1 1 conv4_1/sep relu4_1/sep ," +"Convolution conv4_2/dw 1 1 relu4_1/sep conv4_2/dw 256 1 256 3 3 2 2 1 1 1 -1 1 1 ," +"ReLU relu4_2/dw 1 1 conv4_2/dw relu4_2/dw ," +"Convolution conv4_2/sep 1 1 relu4_2/dw conv4_2/sep 1 256 512 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU relu4_2/sep 1 1 conv4_2/sep relu4_2/sep ," +"Convolution conv5_1/dw 1 1 relu4_2/sep conv5_1/dw 512 1 512 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU relu5_1/dw 1 1 conv5_1/dw relu5_1/dw ," +"Convolution conv5_1/sep 1 1 relu5_1/dw conv5_1/sep 1 512 512 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU relu5_1/sep 1 1 conv5_1/sep relu5_1/sep ," +"Convolution conv5_2/dw 1 1 relu5_1/sep conv5_2/dw 512 1 512 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU relu5_2/dw 1 1 conv5_2/dw relu5_2/dw ," +"Convolution conv5_2/sep 1 1 relu5_2/dw conv5_2/sep 1 512 512 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU relu5_2/sep 1 1 conv5_2/sep relu5_2/sep ," +"Convolution conv5_3/dw 1 1 relu5_2/sep conv5_3/dw 512 1 512 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU relu5_3/dw 1 1 conv5_3/dw relu5_3/dw ," +"Convolution conv5_3/sep 1 1 relu5_3/dw conv5_3/sep 1 512 512 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU relu5_3/sep 1 1 conv5_3/sep relu5_3/sep ," +"Convolution conv5_4/dw 1 1 relu5_3/sep conv5_4/dw 512 1 512 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU relu5_4/dw 1 1 conv5_4/dw relu5_4/dw ," +"Convolution conv5_4/sep 1 1 relu5_4/dw conv5_4/sep 1 512 512 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU relu5_4/sep 1 1 conv5_4/sep relu5_4/sep ," +"Convolution conv5_5/dw 1 1 relu5_4/sep conv5_5/dw 512 1 512 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU relu5_5/dw 1 1 conv5_5/dw relu5_5/dw ," +"Convolution conv5_5/sep 1 1 relu5_5/dw conv5_5/sep 1 512 512 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU relu5_5/sep 1 1 conv5_5/sep relu5_5/sep ," +"Convolution conv5_6/dw 1 1 relu5_5/sep conv5_6/dw 512 1 512 3 3 2 2 1 1 1 -1 1 1 ," +"ReLU relu5_6/dw 1 1 conv5_6/dw relu5_6/dw ," +"Convolution conv5_6/sep 1 1 relu5_6/dw conv5_6/sep 1 512 1024 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU relu5_6/sep 1 1 conv5_6/sep relu5_6/sep ," +"Convolution conv6/dw 1 1 relu5_6/sep conv6/dw 1024 1 1024 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU relu6/dw 1 1 conv6/dw relu6/dw ," +"Convolution conv6/sep 1 1 relu6/dw conv6/sep 1 1024 1024 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU relu6/sep 1 1 conv6/sep relu6/sep ," +"Pooling pool6 1 1 relu6/sep pool6 1 0 0 1 1 0 0 -1 -1 -1 0 ," +"Convolution fc7 1 1 pool6 fc7 1 1024 1000 1 1 1 1 0 0 1 -1 1 1 ," +"SoftmaxCaffe prob 1 1 fc7 prob 1 ," diff --git a/3rdparty/TNN/benchmark/benchmark-model/mobilenet_v2.tnnproto b/3rdparty/TNN/benchmark/benchmark-model/mobilenet_v2.tnnproto new file mode 100644 index 0000000..8dd6c7d --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark-model/mobilenet_v2.tnnproto @@ -0,0 +1,107 @@ +"1 103 1 4206624770 ," +"input 1 3 224 224 ," +" block_3_1 block_4_1 block_4_2 block_4_4 block_4_5 block_4_6 block_5_1 block_5_2 block_6_1 block_6_2 conv1/bn conv2_1/dwise/bn conv2_1/expand/bn conv2_1/linear/bn conv2_2/dwise/bn conv2_2/expand/bn conv2_2/linear/bn conv3_1/dwise/bn conv3_1/expand/bn conv3_1/linear/bn conv3_2/dwise/bn conv3_2/expand/bn conv3_2/linear/bn conv4_1/dwise/bn conv4_1/expand/bn conv4_1/linear/bn conv4_2/dwise/bn conv4_2/expand/bn conv4_2/linear/bn conv4_3/dwise/bn conv4_3/expand/bn conv4_3/linear/bn conv4_4/dwise/bn conv4_4/expand/bn conv4_4/linear/bn conv4_5/dwise/bn conv4_5/expand/bn conv4_5/linear/bn conv4_6/dwise/bn conv4_6/expand/bn conv4_6/linear/bn conv4_7/dwise/bn conv4_7/expand/bn conv4_7/linear/bn conv5_1/dwise/bn conv5_1/expand/bn conv5_1/linear/bn conv5_2/dwise/bn conv5_2/expand/bn conv5_2/linear/bn conv5_3/dwise/bn conv5_3/expand/bn conv5_3/linear/bn conv6_1/dwise/bn conv6_1/expand/bn conv6_1/linear/bn conv6_2/dwise/bn conv6_2/expand/bn conv6_2/linear/bn conv6_3/dwise/bn conv6_3/expand/bn conv6_3/linear/bn conv6_4/bn fc7 input pool6 prob relu1 relu2_1/dwise relu2_1/expand relu2_2/dwise relu2_2/expand relu3_1/dwise relu3_1/expand relu3_2/dwise relu3_2/expand relu4_1/dwise relu4_1/expand relu4_2/dwise relu4_2/expand relu4_3/dwise relu4_3/expand relu4_4/dwise relu4_4/expand relu4_5/dwise relu4_5/expand relu4_6/dwise relu4_6/expand relu4_7/dwise relu4_7/expand relu5_1/dwise relu5_1/expand relu5_2/dwise relu5_2/expand relu5_3/dwise relu5_3/expand relu6_1/dwise relu6_1/expand relu6_2/dwise relu6_2/expand relu6_3/dwise relu6_3/expand relu6_4 ," +"prob ," +" 102 ," +"Convolution conv1 1 1 input conv1/bn 1 3 32 3 3 2 2 1 1 1 -1 1 1 ," +"ReLU relu1 1 1 conv1/bn relu1 ," +"Convolution conv2_1/expand 1 1 relu1 conv2_1/expand/bn 1 32 32 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU relu2_1/expand 1 1 conv2_1/expand/bn relu2_1/expand ," +"Convolution conv2_1/dwise 1 1 relu2_1/expand conv2_1/dwise/bn 32 1 32 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU relu2_1/dwise 1 1 conv2_1/dwise/bn relu2_1/dwise ," +"Convolution conv2_1/linear 1 1 relu2_1/dwise conv2_1/linear/bn 1 32 16 1 1 1 1 0 0 1 -1 1 1 ," +"Convolution conv2_2/expand 1 1 conv2_1/linear/bn conv2_2/expand/bn 1 16 96 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU relu2_2/expand 1 1 conv2_2/expand/bn relu2_2/expand ," +"Convolution conv2_2/dwise 1 1 relu2_2/expand conv2_2/dwise/bn 96 1 96 3 3 2 2 1 1 1 -1 1 1 ," +"ReLU relu2_2/dwise 1 1 conv2_2/dwise/bn relu2_2/dwise ," +"Convolution conv2_2/linear 1 1 relu2_2/dwise conv2_2/linear/bn 1 96 24 1 1 1 1 0 0 1 -1 1 1 ," +"Convolution conv3_1/expand 1 1 conv2_2/linear/bn conv3_1/expand/bn 1 24 144 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU relu3_1/expand 1 1 conv3_1/expand/bn relu3_1/expand ," +"Convolution conv3_1/dwise 1 1 relu3_1/expand conv3_1/dwise/bn 144 1 144 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU relu3_1/dwise 1 1 conv3_1/dwise/bn relu3_1/dwise ," +"Convolution conv3_1/linear 1 1 relu3_1/dwise conv3_1/linear/bn 1 144 24 1 1 1 1 0 0 1 -1 1 1 ," +"Add block_3_1 2 1 conv2_2/linear/bn conv3_1/linear/bn block_3_1 ," +"Convolution conv3_2/expand 1 1 block_3_1 conv3_2/expand/bn 1 24 144 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU relu3_2/expand 1 1 conv3_2/expand/bn relu3_2/expand ," +"Convolution conv3_2/dwise 1 1 relu3_2/expand conv3_2/dwise/bn 144 1 144 3 3 2 2 1 1 1 -1 1 1 ," +"ReLU relu3_2/dwise 1 1 conv3_2/dwise/bn relu3_2/dwise ," +"Convolution conv3_2/linear 1 1 relu3_2/dwise conv3_2/linear/bn 1 144 32 1 1 1 1 0 0 1 -1 1 1 ," +"Convolution conv4_1/expand 1 1 conv3_2/linear/bn conv4_1/expand/bn 1 32 192 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU relu4_1/expand 1 1 conv4_1/expand/bn relu4_1/expand ," +"Convolution conv4_1/dwise 1 1 relu4_1/expand conv4_1/dwise/bn 192 1 192 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU relu4_1/dwise 1 1 conv4_1/dwise/bn relu4_1/dwise ," +"Convolution conv4_1/linear 1 1 relu4_1/dwise conv4_1/linear/bn 1 192 32 1 1 1 1 0 0 1 -1 1 1 ," +"Add block_4_1 2 1 conv3_2/linear/bn conv4_1/linear/bn block_4_1 ," +"Convolution conv4_2/expand 1 1 block_4_1 conv4_2/expand/bn 1 32 192 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU relu4_2/expand 1 1 conv4_2/expand/bn relu4_2/expand ," +"Convolution conv4_2/dwise 1 1 relu4_2/expand conv4_2/dwise/bn 192 1 192 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU relu4_2/dwise 1 1 conv4_2/dwise/bn relu4_2/dwise ," +"Convolution conv4_2/linear 1 1 relu4_2/dwise conv4_2/linear/bn 1 192 32 1 1 1 1 0 0 1 -1 1 1 ," +"Add block_4_2 2 1 block_4_1 conv4_2/linear/bn block_4_2 ," +"Convolution conv4_3/expand 1 1 block_4_2 conv4_3/expand/bn 1 32 192 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU relu4_3/expand 1 1 conv4_3/expand/bn relu4_3/expand ," +"Convolution conv4_3/dwise 1 1 relu4_3/expand conv4_3/dwise/bn 192 1 192 3 3 2 2 1 1 1 -1 1 1 ," +"ReLU relu4_3/dwise 1 1 conv4_3/dwise/bn relu4_3/dwise ," +"Convolution conv4_3/linear 1 1 relu4_3/dwise conv4_3/linear/bn 1 192 64 1 1 1 1 0 0 1 -1 1 1 ," +"Convolution conv4_4/expand 1 1 conv4_3/linear/bn conv4_4/expand/bn 1 64 384 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU relu4_4/expand 1 1 conv4_4/expand/bn relu4_4/expand ," +"Convolution conv4_4/dwise 1 1 relu4_4/expand conv4_4/dwise/bn 384 1 384 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU relu4_4/dwise 1 1 conv4_4/dwise/bn relu4_4/dwise ," +"Convolution conv4_4/linear 1 1 relu4_4/dwise conv4_4/linear/bn 1 384 64 1 1 1 1 0 0 1 -1 1 1 ," +"Add block_4_4 2 1 conv4_3/linear/bn conv4_4/linear/bn block_4_4 ," +"Convolution conv4_5/expand 1 1 block_4_4 conv4_5/expand/bn 1 64 384 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU relu4_5/expand 1 1 conv4_5/expand/bn relu4_5/expand ," +"Convolution conv4_5/dwise 1 1 relu4_5/expand conv4_5/dwise/bn 384 1 384 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU relu4_5/dwise 1 1 conv4_5/dwise/bn relu4_5/dwise ," +"Convolution conv4_5/linear 1 1 relu4_5/dwise conv4_5/linear/bn 1 384 64 1 1 1 1 0 0 1 -1 1 1 ," +"Add block_4_5 2 1 block_4_4 conv4_5/linear/bn block_4_5 ," +"Convolution conv4_6/expand 1 1 block_4_5 conv4_6/expand/bn 1 64 384 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU relu4_6/expand 1 1 conv4_6/expand/bn relu4_6/expand ," +"Convolution conv4_6/dwise 1 1 relu4_6/expand conv4_6/dwise/bn 384 1 384 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU relu4_6/dwise 1 1 conv4_6/dwise/bn relu4_6/dwise ," +"Convolution conv4_6/linear 1 1 relu4_6/dwise conv4_6/linear/bn 1 384 64 1 1 1 1 0 0 1 -1 1 1 ," +"Add block_4_6 2 1 block_4_5 conv4_6/linear/bn block_4_6 ," +"Convolution conv4_7/expand 1 1 block_4_6 conv4_7/expand/bn 1 64 384 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU relu4_7/expand 1 1 conv4_7/expand/bn relu4_7/expand ," +"Convolution conv4_7/dwise 1 1 relu4_7/expand conv4_7/dwise/bn 384 1 384 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU relu4_7/dwise 1 1 conv4_7/dwise/bn relu4_7/dwise ," +"Convolution conv4_7/linear 1 1 relu4_7/dwise conv4_7/linear/bn 1 384 96 1 1 1 1 0 0 1 -1 1 1 ," +"Convolution conv5_1/expand 1 1 conv4_7/linear/bn conv5_1/expand/bn 1 96 576 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU relu5_1/expand 1 1 conv5_1/expand/bn relu5_1/expand ," +"Convolution conv5_1/dwise 1 1 relu5_1/expand conv5_1/dwise/bn 576 1 576 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU relu5_1/dwise 1 1 conv5_1/dwise/bn relu5_1/dwise ," +"Convolution conv5_1/linear 1 1 relu5_1/dwise conv5_1/linear/bn 1 576 96 1 1 1 1 0 0 1 -1 1 1 ," +"Add block_5_1 2 1 conv4_7/linear/bn conv5_1/linear/bn block_5_1 ," +"Convolution conv5_2/expand 1 1 block_5_1 conv5_2/expand/bn 1 96 576 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU relu5_2/expand 1 1 conv5_2/expand/bn relu5_2/expand ," +"Convolution conv5_2/dwise 1 1 relu5_2/expand conv5_2/dwise/bn 576 1 576 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU relu5_2/dwise 1 1 conv5_2/dwise/bn relu5_2/dwise ," +"Convolution conv5_2/linear 1 1 relu5_2/dwise conv5_2/linear/bn 1 576 96 1 1 1 1 0 0 1 -1 1 1 ," +"Add block_5_2 2 1 block_5_1 conv5_2/linear/bn block_5_2 ," +"Convolution conv5_3/expand 1 1 block_5_2 conv5_3/expand/bn 1 96 576 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU relu5_3/expand 1 1 conv5_3/expand/bn relu5_3/expand ," +"Convolution conv5_3/dwise 1 1 relu5_3/expand conv5_3/dwise/bn 576 1 576 3 3 2 2 1 1 1 -1 1 1 ," +"ReLU relu5_3/dwise 1 1 conv5_3/dwise/bn relu5_3/dwise ," +"Convolution conv5_3/linear 1 1 relu5_3/dwise conv5_3/linear/bn 1 576 160 1 1 1 1 0 0 1 -1 1 1 ," +"Convolution conv6_1/expand 1 1 conv5_3/linear/bn conv6_1/expand/bn 1 160 960 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU relu6_1/expand 1 1 conv6_1/expand/bn relu6_1/expand ," +"Convolution conv6_1/dwise 1 1 relu6_1/expand conv6_1/dwise/bn 960 1 960 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU relu6_1/dwise 1 1 conv6_1/dwise/bn relu6_1/dwise ," +"Convolution conv6_1/linear 1 1 relu6_1/dwise conv6_1/linear/bn 1 960 160 1 1 1 1 0 0 1 -1 1 1 ," +"Add block_6_1 2 1 conv5_3/linear/bn conv6_1/linear/bn block_6_1 ," +"Convolution conv6_2/expand 1 1 block_6_1 conv6_2/expand/bn 1 160 960 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU relu6_2/expand 1 1 conv6_2/expand/bn relu6_2/expand ," +"Convolution conv6_2/dwise 1 1 relu6_2/expand conv6_2/dwise/bn 960 1 960 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU relu6_2/dwise 1 1 conv6_2/dwise/bn relu6_2/dwise ," +"Convolution conv6_2/linear 1 1 relu6_2/dwise conv6_2/linear/bn 1 960 160 1 1 1 1 0 0 1 -1 1 1 ," +"Add block_6_2 2 1 block_6_1 conv6_2/linear/bn block_6_2 ," +"Convolution conv6_3/expand 1 1 block_6_2 conv6_3/expand/bn 1 160 960 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU relu6_3/expand 1 1 conv6_3/expand/bn relu6_3/expand ," +"Convolution conv6_3/dwise 1 1 relu6_3/expand conv6_3/dwise/bn 960 1 960 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU relu6_3/dwise 1 1 conv6_3/dwise/bn relu6_3/dwise ," +"Convolution conv6_3/linear 1 1 relu6_3/dwise conv6_3/linear/bn 1 960 320 1 1 1 1 0 0 1 -1 1 1 ," +"Convolution conv6_4 1 1 conv6_3/linear/bn conv6_4/bn 1 320 1280 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU relu6_4 1 1 conv6_4/bn relu6_4 ," +"Pooling pool6 1 1 relu6_4 pool6 1 0 0 1 1 0 0 -1 -1 -1 0 ," +"Convolution fc7 1 1 pool6 fc7 1 1280 1000 1 1 1 1 0 0 1 -1 1 1 ," +"SoftmaxCaffe prob 1 1 fc7 prob 1 ," diff --git a/3rdparty/TNN/benchmark/benchmark-model/quant_inception_v3.tnnproto b/3rdparty/TNN/benchmark/benchmark-model/quant_inception_v3.tnnproto new file mode 100644 index 0000000..d89f983 --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark-model/quant_inception_v3.tnnproto @@ -0,0 +1,127 @@ +"1 217 1 4206624770 ," +"input 1 3 395 395 ," +" classifier classifier_Reshape conv1_3x3_relu conv1_3x3_s2 conv2_3x3_relu conv2_3x3_s1 conv3_3x3_relu conv3_3x3_s1 conv4_3x3 conv4_3x3_reduce conv4_relu_3x3 conv4_relu_3x3_reduce inception_a1_1x1 inception_a1_1x1_relu inception_a1_3x3_1 inception_a1_3x3_1_relu inception_a1_3x3_2 inception_a1_3x3_2_relu inception_a1_3x3_reduce inception_a1_3x3_reduce_relu inception_a1_5x5 inception_a1_5x5_reduce inception_a1_5x5_reduce_relu inception_a1_5x5_relu inception_a1_output inception_a1_pool inception_a1_pool_proj inception_a1_pool_proj_relu inception_a2_1x1 inception_a2_1x1_relu inception_a2_3x3_1 inception_a2_3x3_1_relu inception_a2_3x3_2 inception_a2_3x3_2_relu inception_a2_3x3_reduce inception_a2_3x3_reduce_relu inception_a2_5x5 inception_a2_5x5_reduce inception_a2_5x5_reduce_relu inception_a2_5x5_relu inception_a2_output inception_a2_pool inception_a2_pool_proj inception_a2_pool_proj_relu inception_a3_1x1 inception_a3_1x1_relu inception_a3_3x3_1 inception_a3_3x3_1_relu inception_a3_3x3_2 inception_a3_3x3_2_relu inception_a3_3x3_reduce inception_a3_3x3_reduce_relu inception_a3_5x5 inception_a3_5x5_reduce inception_a3_5x5_reduce_relu inception_a3_5x5_relu inception_a3_output inception_a3_pool inception_a3_pool_proj inception_a3_pool_proj_relu inception_b1_1x1 inception_b1_1x1_2 inception_b1_1x1_2_relu inception_b1_1x1_relu inception_b1_1x7 inception_b1_1x7_2 inception_b1_1x7_2_relu inception_b1_1x7_3 inception_b1_1x7_3_relu inception_b1_1x7_reduce inception_b1_1x7_reduce_relu inception_b1_1x7_relu inception_b1_7x1 inception_b1_7x1_2 inception_b1_7x1_2_relu inception_b1_7x1_3 inception_b1_7x1_3_relu inception_b1_7x1_reduce inception_b1_7x1_reduce_relu inception_b1_7x1_relu inception_b1_concat inception_b1_pool_ave inception_b2_1x1 inception_b2_1x1_2 inception_b2_1x1_2_relu inception_b2_1x1_relu inception_b2_1x7 inception_b2_1x7_2 inception_b2_1x7_2_relu inception_b2_1x7_3 inception_b2_1x7_3_relu inception_b2_1x7_reduce inception_b2_1x7_reduce_relu inception_b2_1x7_relu inception_b2_7x1 inception_b2_7x1_2 inception_b2_7x1_2_relu inception_b2_7x1_3 inception_b2_7x1_3_relu inception_b2_7x1_reduce inception_b2_7x1_reduce_relu inception_b2_7x1_relu inception_b2_concat inception_b2_pool_ave inception_b3_1x1 inception_b3_1x1_2 inception_b3_1x1_2_relu inception_b3_1x1_relu inception_b3_1x7 inception_b3_1x7_2 inception_b3_1x7_2_relu inception_b3_1x7_3 inception_b3_1x7_3_relu inception_b3_1x7_reduce inception_b3_1x7_reduce_relu inception_b3_1x7_relu inception_b3_7x1 inception_b3_7x1_2 inception_b3_7x1_2_relu inception_b3_7x1_3 inception_b3_7x1_3_relu inception_b3_7x1_reduce inception_b3_7x1_reduce_relu inception_b3_7x1_relu inception_b3_concat inception_b3_pool_ave inception_b4_1x1 inception_b4_1x1_2 inception_b4_1x1_2_relu inception_b4_1x1_relu inception_b4_1x7 inception_b4_1x7_2 inception_b4_1x7_2_relu inception_b4_1x7_3 inception_b4_1x7_3_relu inception_b4_1x7_reduce inception_b4_1x7_reduce_relu inception_b4_1x7_relu inception_b4_7x1 inception_b4_7x1_2 inception_b4_7x1_2_relu inception_b4_7x1_3 inception_b4_7x1_3_relu inception_b4_7x1_reduce inception_b4_7x1_reduce_relu inception_b4_7x1_relu inception_b4_concat inception_b4_pool_ave inception_c1_1x1 inception_c1_1x1_2 inception_c1_1x1_2_relu inception_c1_1x1_relu inception_c1_1x3 inception_c1_1x3_2 inception_c1_1x3_2_relu inception_c1_1x3_reduce inception_c1_1x3_reduce_relu inception_c1_1x3_relu inception_c1_3x1 inception_c1_3x1_2 inception_c1_3x1_2_relu inception_c1_3x1_relu inception_c1_3x3 inception_c1_3x3_reduce inception_c1_3x3_reduce_relu inception_c1_3x3_relu inception_c1_concat inception_c1_pool inception_c2_1x1 inception_c2_1x1_2 inception_c2_1x1_2_relu inception_c2_1x1_relu inception_c2_1x3 inception_c2_1x3_2 inception_c2_1x3_2_relu inception_c2_1x3_reduce inception_c2_1x3_reduce_relu inception_c2_1x3_relu inception_c2_3x1 inception_c2_3x1_2 inception_c2_3x1_2_relu inception_c2_3x1_relu inception_c2_3x3 inception_c2_3x3_reduce inception_c2_3x3_reduce_relu inception_c2_3x3_relu inception_c2_concat inception_c2_pool input pool1_3x3_s2 pool2_3x3_s2 pool_8x8_s1 prob reduction_a_3x3 reduction_a_3x3_2 reduction_a_3x3_2_reduce reduction_a_3x3_2_reduce_relu reduction_a_3x3_2_relu reduction_a_3x3_3 reduction_a_3x3_3_relu reduction_a_3x3_relu reduction_a_concat reduction_a_pool reduction_b_1x7 reduction_b_1x7_reduce reduction_b_1x7_reduce_relu reduction_b_1x7_relu reduction_b_3x3 reduction_b_3x3_2 reduction_b_3x3_2_relu reduction_b_3x3_reduce reduction_b_3x3_reduce_relu reduction_b_3x3_relu reduction_b_7x1 reduction_b_7x1_relu reduction_b_concat reduction_b_pool ," +"prob ," +" 122 ," +"QuantizedConvolution conv1_3x3_s2 1 1 input conv1_3x3_relu 1 3 32 3 3 2 2 0 0 1 -1 1 1 1 ," +"QuantizedConvolution conv2_3x3_s1 1 1 conv1_3x3_relu conv2_3x3_relu 1 32 32 3 3 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution conv3_3x3_s1 1 1 conv2_3x3_relu conv3_3x3_relu 1 32 64 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedPooling pool1_3x3_s2 1 1 conv3_3x3_relu pool1_3x3_s2 0 3 3 2 2 0 0 -1 -1 -1 1 ," +"QuantizedConvolution conv4_3x3_reduce 1 1 pool1_3x3_s2 conv4_relu_3x3_reduce 1 64 80 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution conv4_3x3 1 1 conv4_relu_3x3_reduce conv4_relu_3x3 1 80 192 3 3 1 1 0 0 1 -1 1 1 1 ," +"QuantizedPooling pool2_3x3_s2 1 1 conv4_relu_3x3 pool2_3x3_s2 0 3 3 2 2 0 0 -1 -1 -1 1 ," +"QuantizedConvolution inception_a1_1x1 1 1 pool2_3x3_s2 inception_a1_1x1_relu 1 192 64 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution inception_a1_5x5_reduce 1 1 pool2_3x3_s2 inception_a1_5x5_reduce_relu 1 192 48 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution inception_a1_5x5 1 1 inception_a1_5x5_reduce_relu inception_a1_5x5_relu 1 48 64 5 5 1 1 2 2 1 -1 1 1 1 ," +"QuantizedConvolution inception_a1_3x3_reduce 1 1 pool2_3x3_s2 inception_a1_3x3_reduce_relu 1 192 64 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution inception_a1_3x3_1 1 1 inception_a1_3x3_reduce_relu inception_a1_3x3_1_relu 1 64 96 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConvolution inception_a1_3x3_2 1 1 inception_a1_3x3_1_relu inception_a1_3x3_2_relu 1 96 96 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedPooling inception_a1_pool 1 1 pool2_3x3_s2 inception_a1_pool 1 3 3 1 1 1 1 -1 -1 -1 1 ," +"QuantizedConvolution inception_a1_pool_proj 1 1 inception_a1_pool inception_a1_pool_proj_relu 1 192 32 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConcat inception_a1_output 4 1 inception_a1_1x1_relu inception_a1_5x5_relu inception_a1_3x3_2_relu inception_a1_pool_proj_relu inception_a1_output 1 ," +"QuantizedConvolution inception_a2_1x1 1 1 inception_a1_output inception_a2_1x1_relu 1 256 64 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution inception_a2_5x5_reduce 1 1 inception_a1_output inception_a2_5x5_reduce_relu 1 256 48 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution inception_a2_5x5 1 1 inception_a2_5x5_reduce_relu inception_a2_5x5_relu 1 48 64 5 5 1 1 2 2 1 -1 1 1 1 ," +"QuantizedConvolution inception_a2_3x3_reduce 1 1 inception_a1_output inception_a2_3x3_reduce_relu 1 256 64 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution inception_a2_3x3_1 1 1 inception_a2_3x3_reduce_relu inception_a2_3x3_1_relu 1 64 96 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConvolution inception_a2_3x3_2 1 1 inception_a2_3x3_1_relu inception_a2_3x3_2_relu 1 96 96 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedPooling inception_a2_pool 1 1 inception_a1_output inception_a2_pool 1 3 3 1 1 1 1 -1 -1 -1 1 ," +"QuantizedConvolution inception_a2_pool_proj 1 1 inception_a2_pool inception_a2_pool_proj_relu 1 256 64 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConcat inception_a2_output 4 1 inception_a2_1x1_relu inception_a2_5x5_relu inception_a2_3x3_2_relu inception_a2_pool_proj_relu inception_a2_output 1 ," +"QuantizedConvolution inception_a3_1x1 1 1 inception_a2_output inception_a3_1x1_relu 1 288 64 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution inception_a3_5x5_reduce 1 1 inception_a2_output inception_a3_5x5_reduce_relu 1 288 48 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution inception_a3_5x5 1 1 inception_a3_5x5_reduce_relu inception_a3_5x5_relu 1 48 64 5 5 1 1 2 2 1 -1 1 1 1 ," +"QuantizedConvolution inception_a3_3x3_reduce 1 1 inception_a2_output inception_a3_3x3_reduce_relu 1 288 64 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution inception_a3_3x3_1 1 1 inception_a3_3x3_reduce_relu inception_a3_3x3_1_relu 1 64 96 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConvolution inception_a3_3x3_2 1 1 inception_a3_3x3_1_relu inception_a3_3x3_2_relu 1 96 96 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedPooling inception_a3_pool 1 1 inception_a2_output inception_a3_pool 1 3 3 1 1 1 1 -1 -1 -1 1 ," +"QuantizedConvolution inception_a3_pool_proj 1 1 inception_a3_pool inception_a3_pool_proj_relu 1 288 64 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConcat inception_a3_output 4 1 inception_a3_1x1_relu inception_a3_5x5_relu inception_a3_3x3_2_relu inception_a3_pool_proj_relu inception_a3_output 1 ," +"QuantizedConvolution reduction_a_3x3 1 1 inception_a3_output reduction_a_3x3_relu 1 288 384 3 3 2 2 0 0 1 -1 1 1 1 ," +"QuantizedConvolution reduction_a_3x3_2_reduce 1 1 inception_a3_output reduction_a_3x3_2_reduce_relu 1 288 64 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution reduction_a_3x3_2 1 1 reduction_a_3x3_2_reduce_relu reduction_a_3x3_2_relu 1 64 96 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConvolution reduction_a_3x3_3 1 1 reduction_a_3x3_2_relu reduction_a_3x3_3_relu 1 96 96 3 3 2 2 0 0 1 -1 1 1 1 ," +"QuantizedPooling reduction_a_pool 1 1 inception_a3_output reduction_a_pool 0 3 3 2 2 0 0 -1 -1 -1 1 ," +"QuantizedConcat reduction_a_concat 3 1 reduction_a_3x3_relu reduction_a_3x3_3_relu reduction_a_pool reduction_a_concat 1 ," +"QuantizedConvolution inception_b1_1x1_2 1 1 reduction_a_concat inception_b1_1x1_2_relu 1 768 192 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution inception_b1_1x7_reduce 1 1 reduction_a_concat inception_b1_1x7_reduce_relu 1 768 128 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution inception_b1_1x7 1 1 inception_b1_1x7_reduce_relu inception_b1_1x7_relu 1 128 128 1 7 1 1 0 3 1 -1 1 1 1 ," +"QuantizedConvolution inception_b1_7x1 1 1 inception_b1_1x7_relu inception_b1_7x1_relu 1 128 192 7 1 1 1 3 0 1 -1 1 1 1 ," +"QuantizedConvolution inception_b1_7x1_reduce 1 1 reduction_a_concat inception_b1_7x1_reduce_relu 1 768 128 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution inception_b1_7x1_2 1 1 inception_b1_7x1_reduce_relu inception_b1_7x1_2_relu 1 128 128 7 1 1 1 3 0 1 -1 1 1 1 ," +"QuantizedConvolution inception_b1_1x7_2 1 1 inception_b1_7x1_2_relu inception_b1_1x7_2_relu 1 128 128 1 7 1 1 0 3 1 -1 1 1 1 ," +"QuantizedConvolution inception_b1_7x1_3 1 1 inception_b1_1x7_2_relu inception_b1_7x1_3_relu 1 128 128 7 1 1 1 3 0 1 -1 1 1 1 ," +"QuantizedConvolution inception_b1_1x7_3 1 1 inception_b1_7x1_3_relu inception_b1_1x7_3_relu 1 128 192 1 7 1 1 0 3 1 -1 1 1 1 ," +"QuantizedPooling inception_b1_pool_ave 1 1 reduction_a_concat inception_b1_pool_ave 1 3 3 1 1 1 1 -1 -1 -1 1 ," +"QuantizedConvolution inception_b1_1x1 1 1 inception_b1_pool_ave inception_b1_1x1_relu 1 768 192 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConcat inception_b1_concat 4 1 inception_b1_1x1_2_relu inception_b1_7x1_relu inception_b1_1x7_3_relu inception_b1_1x1_relu inception_b1_concat 1 ," +"QuantizedConvolution inception_b2_1x1_2 1 1 inception_b1_concat inception_b2_1x1_2_relu 1 768 192 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution inception_b2_1x7_reduce 1 1 inception_b1_concat inception_b2_1x7_reduce_relu 1 768 160 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution inception_b2_1x7 1 1 inception_b2_1x7_reduce_relu inception_b2_1x7_relu 1 160 160 1 7 1 1 0 3 1 -1 1 1 1 ," +"QuantizedConvolution inception_b2_7x1 1 1 inception_b2_1x7_relu inception_b2_7x1_relu 1 160 192 7 1 1 1 3 0 1 -1 1 1 1 ," +"QuantizedConvolution inception_b2_7x1_reduce 1 1 inception_b1_concat inception_b2_7x1_reduce_relu 1 768 160 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution inception_b2_7x1_2 1 1 inception_b2_7x1_reduce_relu inception_b2_7x1_2_relu 1 160 160 7 1 1 1 3 0 1 -1 1 1 1 ," +"QuantizedConvolution inception_b2_1x7_2 1 1 inception_b2_7x1_2_relu inception_b2_1x7_2_relu 1 160 160 1 7 1 1 0 3 1 -1 1 1 1 ," +"QuantizedConvolution inception_b2_7x1_3 1 1 inception_b2_1x7_2_relu inception_b2_7x1_3_relu 1 160 160 7 1 1 1 3 0 1 -1 1 1 1 ," +"QuantizedConvolution inception_b2_1x7_3 1 1 inception_b2_7x1_3_relu inception_b2_1x7_3_relu 1 160 192 1 7 1 1 0 3 1 -1 1 1 1 ," +"QuantizedPooling inception_b2_pool_ave 1 1 inception_b1_concat inception_b2_pool_ave 1 3 3 1 1 1 1 -1 -1 -1 1 ," +"QuantizedConvolution inception_b2_1x1 1 1 inception_b2_pool_ave inception_b2_1x1_relu 1 768 192 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConcat inception_b2_concat 4 1 inception_b2_1x1_2_relu inception_b2_7x1_relu inception_b2_1x7_3_relu inception_b2_1x1_relu inception_b2_concat 1 ," +"QuantizedConvolution inception_b3_1x1_2 1 1 inception_b2_concat inception_b3_1x1_2_relu 1 768 192 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution inception_b3_1x7_reduce 1 1 inception_b2_concat inception_b3_1x7_reduce_relu 1 768 160 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution inception_b3_1x7 1 1 inception_b3_1x7_reduce_relu inception_b3_1x7_relu 1 160 160 1 7 1 1 0 3 1 -1 1 1 1 ," +"QuantizedConvolution inception_b3_7x1 1 1 inception_b3_1x7_relu inception_b3_7x1_relu 1 160 192 7 1 1 1 3 0 1 -1 1 1 1 ," +"QuantizedConvolution inception_b3_7x1_reduce 1 1 inception_b2_concat inception_b3_7x1_reduce_relu 1 768 160 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution inception_b3_7x1_2 1 1 inception_b3_7x1_reduce_relu inception_b3_7x1_2_relu 1 160 160 7 1 1 1 3 0 1 -1 1 1 1 ," +"QuantizedConvolution inception_b3_1x7_2 1 1 inception_b3_7x1_2_relu inception_b3_1x7_2_relu 1 160 160 1 7 1 1 0 3 1 -1 1 1 1 ," +"QuantizedConvolution inception_b3_7x1_3 1 1 inception_b3_1x7_2_relu inception_b3_7x1_3_relu 1 160 160 7 1 1 1 3 0 1 -1 1 1 1 ," +"QuantizedConvolution inception_b3_1x7_3 1 1 inception_b3_7x1_3_relu inception_b3_1x7_3_relu 1 160 192 1 7 1 1 0 3 1 -1 1 1 1 ," +"QuantizedPooling inception_b3_pool_ave 1 1 inception_b2_concat inception_b3_pool_ave 1 3 3 1 1 1 1 -1 -1 -1 1 ," +"QuantizedConvolution inception_b3_1x1 1 1 inception_b3_pool_ave inception_b3_1x1_relu 1 768 192 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConcat inception_b3_concat 4 1 inception_b3_1x1_2_relu inception_b3_7x1_relu inception_b3_1x7_3_relu inception_b3_1x1_relu inception_b3_concat 1 ," +"QuantizedConvolution inception_b4_1x1_2 1 1 inception_b3_concat inception_b4_1x1_2_relu 1 768 192 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution inception_b4_1x7_reduce 1 1 inception_b3_concat inception_b4_1x7_reduce_relu 1 768 192 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution inception_b4_1x7 1 1 inception_b4_1x7_reduce_relu inception_b4_1x7_relu 1 192 192 1 7 1 1 0 3 1 -1 1 1 1 ," +"QuantizedConvolution inception_b4_7x1 1 1 inception_b4_1x7_relu inception_b4_7x1_relu 1 192 192 7 1 1 1 3 0 1 -1 1 1 1 ," +"QuantizedConvolution inception_b4_7x1_reduce 1 1 inception_b3_concat inception_b4_7x1_reduce_relu 1 768 192 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution inception_b4_7x1_2 1 1 inception_b4_7x1_reduce_relu inception_b4_7x1_2_relu 1 192 192 7 1 1 1 3 0 1 -1 1 1 1 ," +"QuantizedConvolution inception_b4_1x7_2 1 1 inception_b4_7x1_2_relu inception_b4_1x7_2_relu 1 192 192 1 7 1 1 0 3 1 -1 1 1 1 ," +"QuantizedConvolution inception_b4_7x1_3 1 1 inception_b4_1x7_2_relu inception_b4_7x1_3_relu 1 192 192 7 1 1 1 3 0 1 -1 1 1 1 ," +"QuantizedConvolution inception_b4_1x7_3 1 1 inception_b4_7x1_3_relu inception_b4_1x7_3_relu 1 192 192 1 7 1 1 0 3 1 -1 1 1 1 ," +"QuantizedPooling inception_b4_pool_ave 1 1 inception_b3_concat inception_b4_pool_ave 1 3 3 1 1 1 1 -1 -1 -1 1 ," +"QuantizedConvolution inception_b4_1x1 1 1 inception_b4_pool_ave inception_b4_1x1_relu 1 768 192 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConcat inception_b4_concat 4 1 inception_b4_1x1_2_relu inception_b4_7x1_relu inception_b4_1x7_3_relu inception_b4_1x1_relu inception_b4_concat 1 ," +"QuantizedConvolution reduction_b_3x3_reduce 1 1 inception_b4_concat reduction_b_3x3_reduce_relu 1 768 192 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution reduction_b_3x3 1 1 reduction_b_3x3_reduce_relu reduction_b_3x3_relu 1 192 320 3 3 2 2 0 0 1 -1 1 1 1 ," +"QuantizedConvolution reduction_b_1x7_reduce 1 1 inception_b4_concat reduction_b_1x7_reduce_relu 1 768 192 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution reduction_b_1x7 1 1 reduction_b_1x7_reduce_relu reduction_b_1x7_relu 1 192 192 1 7 1 1 0 3 1 -1 1 1 1 ," +"QuantizedConvolution reduction_b_7x1 1 1 reduction_b_1x7_relu reduction_b_7x1_relu 1 192 192 7 1 1 1 3 0 1 -1 1 1 1 ," +"QuantizedConvolution reduction_b_3x3_2 1 1 reduction_b_7x1_relu reduction_b_3x3_2_relu 1 192 192 3 3 2 2 0 0 1 -1 1 1 1 ," +"QuantizedPooling reduction_b_pool 1 1 inception_b4_concat reduction_b_pool 0 3 3 2 2 0 0 -1 -1 -1 1 ," +"QuantizedConcat reduction_b_concat 3 1 reduction_b_3x3_relu reduction_b_3x3_2_relu reduction_b_pool reduction_b_concat 1 ," +"QuantizedConvolution inception_c1_1x1_2 1 1 reduction_b_concat inception_c1_1x1_2_relu 1 1280 320 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution inception_c1_1x3_reduce 1 1 reduction_b_concat inception_c1_1x3_reduce_relu 1 1280 384 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution inception_c1_1x3 1 1 inception_c1_1x3_reduce_relu inception_c1_1x3_relu 1 384 384 1 3 1 1 0 1 1 -1 1 1 1 ," +"QuantizedConvolution inception_c1_3x1 1 1 inception_c1_1x3_reduce_relu inception_c1_3x1_relu 1 384 384 3 1 1 1 1 0 1 -1 1 1 1 ," +"QuantizedConvolution inception_c1_3x3_reduce 1 1 reduction_b_concat inception_c1_3x3_reduce_relu 1 1280 448 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution inception_c1_3x3 1 1 inception_c1_3x3_reduce_relu inception_c1_3x3_relu 1 448 384 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConvolution inception_c1_1x3_2 1 1 inception_c1_3x3_relu inception_c1_1x3_2_relu 1 384 384 1 3 1 1 0 1 1 -1 1 1 1 ," +"QuantizedConvolution inception_c1_3x1_2 1 1 inception_c1_3x3_relu inception_c1_3x1_2_relu 1 384 384 3 1 1 1 1 0 1 -1 1 1 1 ," +"QuantizedPooling inception_c1_pool 1 1 reduction_b_concat inception_c1_pool 1 3 3 1 1 1 1 -1 -1 -1 1 ," +"QuantizedConvolution inception_c1_1x1 1 1 inception_c1_pool inception_c1_1x1_relu 1 1280 192 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConcat inception_c1_concat 6 1 inception_c1_1x1_2_relu inception_c1_1x3_relu inception_c1_3x1_relu inception_c1_1x3_2_relu inception_c1_3x1_2_relu inception_c1_1x1_relu inception_c1_concat 1 ," +"QuantizedConvolution inception_c2_1x1_2 1 1 inception_c1_concat inception_c2_1x1_2_relu 1 2048 320 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution inception_c2_1x3_reduce 1 1 inception_c1_concat inception_c2_1x3_reduce_relu 1 2048 384 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution inception_c2_1x3 1 1 inception_c2_1x3_reduce_relu inception_c2_1x3_relu 1 384 384 1 3 1 1 0 1 1 -1 1 1 1 ," +"QuantizedConvolution inception_c2_3x1 1 1 inception_c2_1x3_reduce_relu inception_c2_3x1_relu 1 384 384 3 1 1 1 1 0 1 -1 1 1 1 ," +"QuantizedConvolution inception_c2_3x3_reduce 1 1 inception_c1_concat inception_c2_3x3_reduce_relu 1 2048 448 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution inception_c2_3x3 1 1 inception_c2_3x3_reduce_relu inception_c2_3x3_relu 1 448 384 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConvolution inception_c2_1x3_2 1 1 inception_c2_3x3_relu inception_c2_1x3_2_relu 1 384 384 1 3 1 1 0 1 1 -1 1 1 1 ," +"QuantizedConvolution inception_c2_3x1_2 1 1 inception_c2_3x3_relu inception_c2_3x1_2_relu 1 384 384 3 1 1 1 1 0 1 -1 1 1 1 ," +"QuantizedPooling inception_c2_pool 1 1 inception_c1_concat inception_c2_pool 0 3 3 1 1 1 1 -1 -1 -1 1 ," +"QuantizedConvolution inception_c2_1x1 1 1 inception_c2_pool inception_c2_1x1_relu 1 2048 192 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConcat inception_c2_concat 6 1 inception_c2_1x1_2_relu inception_c2_1x3_relu inception_c2_3x1_relu inception_c2_1x3_2_relu inception_c2_3x1_2_relu inception_c2_1x1_relu inception_c2_concat 1 ," +"QuantizedPooling pool_8x8_s1 1 1 inception_c2_concat pool_8x8_s1 1 11 11 1 1 0 0 -1 -1 -1 0 ," +"Reshape classifier_Reshape 1 1 pool_8x8_s1 classifier_Reshape 0 4 4 0 2048 1 1 0 ," +"InnerProduct classifier 1 1 classifier_Reshape classifier 1000 1 0 1 ," +"SoftmaxCaffe prob 1 1 classifier prob 1 ," diff --git a/3rdparty/TNN/benchmark/benchmark-model/quant_mobilenet_v1.tnnproto b/3rdparty/TNN/benchmark/benchmark-model/quant_mobilenet_v1.tnnproto new file mode 100644 index 0000000..a3c54b9 --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark-model/quant_mobilenet_v1.tnnproto @@ -0,0 +1,35 @@ +"1 58 1 4206624770 ," +"input 1 3 224 224 ," +" conv1 conv2_1/dw conv2_1/sep conv2_2/dw conv2_2/sep conv3_1/dw conv3_1/sep conv3_2/dw conv3_2/sep conv4_1/dw conv4_1/sep conv4_2/dw conv4_2/sep conv5_1/dw conv5_1/sep conv5_2/dw conv5_2/sep conv5_3/dw conv5_3/sep conv5_4/dw conv5_4/sep conv5_5/dw conv5_5/sep conv5_6/dw conv5_6/sep conv6/dw conv6/sep fc7 input pool6 prob relu1 relu2_1/dw relu2_1/sep relu2_2/dw relu2_2/sep relu3_1/dw relu3_1/sep relu3_2/dw relu3_2/sep relu4_1/dw relu4_1/sep relu4_2/dw relu4_2/sep relu5_1/dw relu5_1/sep relu5_2/dw relu5_2/sep relu5_3/dw relu5_3/sep relu5_4/dw relu5_4/sep relu5_5/dw relu5_5/sep relu5_6/dw relu5_6/sep relu6/dw relu6/sep ," +"prob ," +" 30 ," +"QuantizedConvolution conv1 1 1 input relu1 1 3 32 3 3 2 2 1 1 1 -1 1 1 1 ," +"QuantizedConvolution conv2_1/dw 1 1 relu1 relu2_1/dw 32 1 32 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConvolution conv2_1/sep 1 1 relu2_1/dw relu2_1/sep 1 32 64 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution conv2_2/dw 1 1 relu2_1/sep relu2_2/dw 64 1 64 3 3 2 2 1 1 1 -1 1 1 1 ," +"QuantizedConvolution conv2_2/sep 1 1 relu2_2/dw relu2_2/sep 1 64 128 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution conv3_1/dw 1 1 relu2_2/sep relu3_1/dw 128 1 128 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConvolution conv3_1/sep 1 1 relu3_1/dw relu3_1/sep 1 128 128 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution conv3_2/dw 1 1 relu3_1/sep relu3_2/dw 128 1 128 3 3 2 2 1 1 1 -1 1 1 1 ," +"QuantizedConvolution conv3_2/sep 1 1 relu3_2/dw relu3_2/sep 1 128 256 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution conv4_1/dw 1 1 relu3_2/sep relu4_1/dw 256 1 256 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConvolution conv4_1/sep 1 1 relu4_1/dw relu4_1/sep 1 256 256 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution conv4_2/dw 1 1 relu4_1/sep relu4_2/dw 256 1 256 3 3 2 2 1 1 1 -1 1 1 1 ," +"QuantizedConvolution conv4_2/sep 1 1 relu4_2/dw relu4_2/sep 1 256 512 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution conv5_1/dw 1 1 relu4_2/sep relu5_1/dw 512 1 512 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConvolution conv5_1/sep 1 1 relu5_1/dw relu5_1/sep 1 512 512 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution conv5_2/dw 1 1 relu5_1/sep relu5_2/dw 512 1 512 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConvolution conv5_2/sep 1 1 relu5_2/dw relu5_2/sep 1 512 512 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution conv5_3/dw 1 1 relu5_2/sep relu5_3/dw 512 1 512 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConvolution conv5_3/sep 1 1 relu5_3/dw relu5_3/sep 1 512 512 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution conv5_4/dw 1 1 relu5_3/sep relu5_4/dw 512 1 512 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConvolution conv5_4/sep 1 1 relu5_4/dw relu5_4/sep 1 512 512 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution conv5_5/dw 1 1 relu5_4/sep relu5_5/dw 512 1 512 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConvolution conv5_5/sep 1 1 relu5_5/dw relu5_5/sep 1 512 512 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution conv5_6/dw 1 1 relu5_5/sep relu5_6/dw 512 1 512 3 3 2 2 1 1 1 -1 1 1 1 ," +"QuantizedConvolution conv5_6/sep 1 1 relu5_6/dw relu5_6/sep 1 512 1024 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution conv6/dw 1 1 relu5_6/sep relu6/dw 1024 1 1024 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConvolution conv6/sep 1 1 relu6/dw relu6/sep 1 1024 1024 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedPooling pool6 1 1 relu6/sep pool6 1 7 7 1 1 0 0 -1 -1 -1 0 ," +"QuantizedConvolution fc7 1 1 pool6 fc7 1 1024 1000 1 1 1 1 0 0 1 -1 1 1 0 ," +"SoftmaxCaffe prob 1 1 fc7 prob 1 ," diff --git a/3rdparty/TNN/benchmark/benchmark-model/quant_mobilenet_v2.tnnproto b/3rdparty/TNN/benchmark/benchmark-model/quant_mobilenet_v2.tnnproto new file mode 100644 index 0000000..adf32fb --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark-model/quant_mobilenet_v2.tnnproto @@ -0,0 +1,71 @@ +"1 103 1 4206624770 ," +"input 1 3 224 224 ," +" block_3_1 block_4_1 block_4_2 block_4_4 block_4_5 block_4_6 block_5_1 block_5_2 block_6_1 block_6_2 conv1/bn conv2_1/dwise/bn conv2_1/expand/bn conv2_1/linear/bn conv2_2/dwise/bn conv2_2/expand/bn conv2_2/linear/bn conv3_1/dwise/bn conv3_1/expand/bn conv3_1/linear/bn conv3_2/dwise/bn conv3_2/expand/bn conv3_2/linear/bn conv4_1/dwise/bn conv4_1/expand/bn conv4_1/linear/bn conv4_2/dwise/bn conv4_2/expand/bn conv4_2/linear/bn conv4_3/dwise/bn conv4_3/expand/bn conv4_3/linear/bn conv4_4/dwise/bn conv4_4/expand/bn conv4_4/linear/bn conv4_5/dwise/bn conv4_5/expand/bn conv4_5/linear/bn conv4_6/dwise/bn conv4_6/expand/bn conv4_6/linear/bn conv4_7/dwise/bn conv4_7/expand/bn conv4_7/linear/bn conv5_1/dwise/bn conv5_1/expand/bn conv5_1/linear/bn conv5_2/dwise/bn conv5_2/expand/bn conv5_2/linear/bn conv5_3/dwise/bn conv5_3/expand/bn conv5_3/linear/bn conv6_1/dwise/bn conv6_1/expand/bn conv6_1/linear/bn conv6_2/dwise/bn conv6_2/expand/bn conv6_2/linear/bn conv6_3/dwise/bn conv6_3/expand/bn conv6_3/linear/bn conv6_4/bn fc7 input pool6 prob relu1 relu2_1/dwise relu2_1/expand relu2_2/dwise relu2_2/expand relu3_1/dwise relu3_1/expand relu3_2/dwise relu3_2/expand relu4_1/dwise relu4_1/expand relu4_2/dwise relu4_2/expand relu4_3/dwise relu4_3/expand relu4_4/dwise relu4_4/expand relu4_5/dwise relu4_5/expand relu4_6/dwise relu4_6/expand relu4_7/dwise relu4_7/expand relu5_1/dwise relu5_1/expand relu5_2/dwise relu5_2/expand relu5_3/dwise relu5_3/expand relu6_1/dwise relu6_1/expand relu6_2/dwise relu6_2/expand relu6_3/dwise relu6_3/expand relu6_4 ," +"prob ," +" 66 ," +"QuantizedConvolution conv1 1 1 input relu1 1 3 32 3 3 2 2 1 1 1 -1 1 1 1 ," +"QuantizedConvolution conv2_1/expand 1 1 relu1 relu2_1/expand 1 32 32 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution conv2_1/dwise 1 1 relu2_1/expand relu2_1/dwise 32 1 32 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConvolution conv2_1/linear 1 1 relu2_1/dwise conv2_1/linear/bn 1 32 16 1 1 1 1 0 0 1 -1 1 1 0 ," +"QuantizedConvolution conv2_2/expand 1 1 conv2_1/linear/bn relu2_2/expand 1 16 96 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution conv2_2/dwise 1 1 relu2_2/expand relu2_2/dwise 96 1 96 3 3 2 2 1 1 1 -1 1 1 1 ," +"QuantizedConvolution conv2_2/linear 1 1 relu2_2/dwise conv2_2/linear/bn 1 96 24 1 1 1 1 0 0 1 -1 1 1 0 ," +"QuantizedConvolution conv3_1/expand 1 1 conv2_2/linear/bn relu3_1/expand 1 24 144 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution conv3_1/dwise 1 1 relu3_1/expand relu3_1/dwise 144 1 144 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConvolution conv3_1/linear 1 1 relu3_1/dwise conv3_1/linear/bn 1 144 24 1 1 1 1 0 0 1 -1 1 1 0 ," +"QuantizedAdd block_3_1 2 1 conv2_2/linear/bn conv3_1/linear/bn block_3_1 1 ," +"QuantizedConvolution conv3_2/expand 1 1 block_3_1 relu3_2/expand 1 24 144 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution conv3_2/dwise 1 1 relu3_2/expand relu3_2/dwise 144 1 144 3 3 2 2 1 1 1 -1 1 1 1 ," +"QuantizedConvolution conv3_2/linear 1 1 relu3_2/dwise conv3_2/linear/bn 1 144 32 1 1 1 1 0 0 1 -1 1 1 0 ," +"QuantizedConvolution conv4_1/expand 1 1 conv3_2/linear/bn relu4_1/expand 1 32 192 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution conv4_1/dwise 1 1 relu4_1/expand relu4_1/dwise 192 1 192 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConvolution conv4_1/linear 1 1 relu4_1/dwise conv4_1/linear/bn 1 192 32 1 1 1 1 0 0 1 -1 1 1 0 ," +"QuantizedAdd block_4_1 2 1 conv3_2/linear/bn conv4_1/linear/bn block_4_1 1 ," +"QuantizedConvolution conv4_2/expand 1 1 block_4_1 relu4_2/expand 1 32 192 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution conv4_2/dwise 1 1 relu4_2/expand relu4_2/dwise 192 1 192 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConvolution conv4_2/linear 1 1 relu4_2/dwise conv4_2/linear/bn 1 192 32 1 1 1 1 0 0 1 -1 1 1 0 ," +"QuantizedAdd block_4_2 2 1 block_4_1 conv4_2/linear/bn block_4_2 1 ," +"QuantizedConvolution conv4_3/expand 1 1 block_4_2 relu4_3/expand 1 32 192 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution conv4_3/dwise 1 1 relu4_3/expand relu4_3/dwise 192 1 192 3 3 2 2 1 1 1 -1 1 1 1 ," +"QuantizedConvolution conv4_3/linear 1 1 relu4_3/dwise conv4_3/linear/bn 1 192 64 1 1 1 1 0 0 1 -1 1 1 0 ," +"QuantizedConvolution conv4_4/expand 1 1 conv4_3/linear/bn relu4_4/expand 1 64 384 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution conv4_4/dwise 1 1 relu4_4/expand relu4_4/dwise 384 1 384 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConvolution conv4_4/linear 1 1 relu4_4/dwise conv4_4/linear/bn 1 384 64 1 1 1 1 0 0 1 -1 1 1 0 ," +"QuantizedAdd block_4_4 2 1 conv4_3/linear/bn conv4_4/linear/bn block_4_4 1 ," +"QuantizedConvolution conv4_5/expand 1 1 block_4_4 relu4_5/expand 1 64 384 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution conv4_5/dwise 1 1 relu4_5/expand relu4_5/dwise 384 1 384 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConvolution conv4_5/linear 1 1 relu4_5/dwise conv4_5/linear/bn 1 384 64 1 1 1 1 0 0 1 -1 1 1 0 ," +"QuantizedAdd block_4_5 2 1 block_4_4 conv4_5/linear/bn block_4_5 1 ," +"QuantizedConvolution conv4_6/expand 1 1 block_4_5 relu4_6/expand 1 64 384 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution conv4_6/dwise 1 1 relu4_6/expand relu4_6/dwise 384 1 384 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConvolution conv4_6/linear 1 1 relu4_6/dwise conv4_6/linear/bn 1 384 64 1 1 1 1 0 0 1 -1 1 1 0 ," +"QuantizedAdd block_4_6 2 1 block_4_5 conv4_6/linear/bn block_4_6 1 ," +"QuantizedConvolution conv4_7/expand 1 1 block_4_6 relu4_7/expand 1 64 384 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution conv4_7/dwise 1 1 relu4_7/expand relu4_7/dwise 384 1 384 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConvolution conv4_7/linear 1 1 relu4_7/dwise conv4_7/linear/bn 1 384 96 1 1 1 1 0 0 1 -1 1 1 0 ," +"QuantizedConvolution conv5_1/expand 1 1 conv4_7/linear/bn relu5_1/expand 1 96 576 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution conv5_1/dwise 1 1 relu5_1/expand relu5_1/dwise 576 1 576 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConvolution conv5_1/linear 1 1 relu5_1/dwise conv5_1/linear/bn 1 576 96 1 1 1 1 0 0 1 -1 1 1 0 ," +"QuantizedAdd block_5_1 2 1 conv4_7/linear/bn conv5_1/linear/bn block_5_1 1 ," +"QuantizedConvolution conv5_2/expand 1 1 block_5_1 relu5_2/expand 1 96 576 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution conv5_2/dwise 1 1 relu5_2/expand relu5_2/dwise 576 1 576 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConvolution conv5_2/linear 1 1 relu5_2/dwise conv5_2/linear/bn 1 576 96 1 1 1 1 0 0 1 -1 1 1 0 ," +"QuantizedAdd block_5_2 2 1 block_5_1 conv5_2/linear/bn block_5_2 1 ," +"QuantizedConvolution conv5_3/expand 1 1 block_5_2 relu5_3/expand 1 96 576 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution conv5_3/dwise 1 1 relu5_3/expand relu5_3/dwise 576 1 576 3 3 2 2 1 1 1 -1 1 1 1 ," +"QuantizedConvolution conv5_3/linear 1 1 relu5_3/dwise conv5_3/linear/bn 1 576 160 1 1 1 1 0 0 1 -1 1 1 0 ," +"QuantizedConvolution conv6_1/expand 1 1 conv5_3/linear/bn relu6_1/expand 1 160 960 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution conv6_1/dwise 1 1 relu6_1/expand relu6_1/dwise 960 1 960 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConvolution conv6_1/linear 1 1 relu6_1/dwise conv6_1/linear/bn 1 960 160 1 1 1 1 0 0 1 -1 1 1 0 ," +"QuantizedAdd block_6_1 2 1 conv5_3/linear/bn conv6_1/linear/bn block_6_1 1 ," +"QuantizedConvolution conv6_2/expand 1 1 block_6_1 relu6_2/expand 1 160 960 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution conv6_2/dwise 1 1 relu6_2/expand relu6_2/dwise 960 1 960 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConvolution conv6_2/linear 1 1 relu6_2/dwise conv6_2/linear/bn 1 960 160 1 1 1 1 0 0 1 -1 1 1 0 ," +"QuantizedAdd block_6_2 2 1 block_6_1 conv6_2/linear/bn block_6_2 1 ," +"QuantizedConvolution conv6_3/expand 1 1 block_6_2 relu6_3/expand 1 160 960 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution conv6_3/dwise 1 1 relu6_3/expand relu6_3/dwise 960 1 960 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConvolution conv6_3/linear 1 1 relu6_3/dwise conv6_3/linear/bn 1 960 320 1 1 1 1 0 0 1 -1 1 1 0 ," +"QuantizedConvolution conv6_4 1 1 conv6_3/linear/bn relu6_4 1 320 1280 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedPooling pool6 1 1 relu6_4 pool6 1 0 0 1 1 0 0 -1 -1 -1 0 ," +"QuantizedConvolution fc7 1 1 pool6 fc7 1 1280 1000 1 1 1 1 0 0 1 -1 1 1 0 ," +"SoftmaxCaffe prob 1 1 fc7 prob 1 ," diff --git a/3rdparty/TNN/benchmark/benchmark-model/quant_resnet50.tnnproto b/3rdparty/TNN/benchmark/benchmark-model/quant_resnet50.tnnproto new file mode 100644 index 0000000..5b7e566 --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark-model/quant_resnet50.tnnproto @@ -0,0 +1,95 @@ +"1 124 1 4206624770 ," +"input 1 3 224 224 ," +" conv1_Y conv1_relu_Y fc1000_Gemm_Y fc1000_Reshape_Y input pool1_Y pool5_Y prob_Y res2a_Y res2a_branch1_Y res2a_branch2a_Y res2a_branch2a_relu_Y res2a_branch2b_Y res2a_branch2b_relu_Y res2a_branch2c_Y res2a_relu_Y res2b_Y res2b_branch2a_Y res2b_branch2a_relu_Y res2b_branch2b_Y res2b_branch2b_relu_Y res2b_branch2c_Y res2b_relu_Y res2c_Y res2c_branch2a_Y res2c_branch2a_relu_Y res2c_branch2b_Y res2c_branch2b_relu_Y res2c_branch2c_Y res2c_relu_Y res3a_Y res3a_branch1_Y res3a_branch2a_Y res3a_branch2a_relu_Y res3a_branch2b_Y res3a_branch2b_relu_Y res3a_branch2c_Y res3a_relu_Y res3b_Y res3b_branch2a_Y res3b_branch2a_relu_Y res3b_branch2b_Y res3b_branch2b_relu_Y res3b_branch2c_Y res3b_relu_Y res3c_Y res3c_branch2a_Y res3c_branch2a_relu_Y res3c_branch2b_Y res3c_branch2b_relu_Y res3c_branch2c_Y res3c_relu_Y res3d_Y res3d_branch2a_Y res3d_branch2a_relu_Y res3d_branch2b_Y res3d_branch2b_relu_Y res3d_branch2c_Y res3d_relu_Y res4a_Y res4a_branch1_Y res4a_branch2a_Y res4a_branch2a_relu_Y res4a_branch2b_Y res4a_branch2b_relu_Y res4a_branch2c_Y res4a_relu_Y res4b_Y res4b_branch2a_Y res4b_branch2a_relu_Y res4b_branch2b_Y res4b_branch2b_relu_Y res4b_branch2c_Y res4b_relu_Y res4c_Y res4c_branch2a_Y res4c_branch2a_relu_Y res4c_branch2b_Y res4c_branch2b_relu_Y res4c_branch2c_Y res4c_relu_Y res4d_Y res4d_branch2a_Y res4d_branch2a_relu_Y res4d_branch2b_Y res4d_branch2b_relu_Y res4d_branch2c_Y res4d_relu_Y res4e_Y res4e_branch2a_Y res4e_branch2a_relu_Y res4e_branch2b_Y res4e_branch2b_relu_Y res4e_branch2c_Y res4e_relu_Y res4f_Y res4f_branch2a_Y res4f_branch2a_relu_Y res4f_branch2b_Y res4f_branch2b_relu_Y res4f_branch2c_Y res4f_relu_Y res5a_Y res5a_branch1_Y res5a_branch2a_Y res5a_branch2a_relu_Y res5a_branch2b_Y res5a_branch2b_relu_Y res5a_branch2c_Y res5a_relu_Y res5b_Y res5b_branch2a_Y res5b_branch2a_relu_Y res5b_branch2b_Y res5b_branch2b_relu_Y res5b_branch2c_Y res5b_relu_Y res5c_Y res5c_branch2a_Y res5c_branch2a_relu_Y res5c_branch2b_Y res5c_branch2b_relu_Y res5c_branch2c_Y res5c_relu_Y ," +"prob_Y ," +" 90 ," +"QuantizedConvolution conv1 1 1 input conv1_relu_Y 1 3 64 7 7 2 2 3 3 1 -1 1 1 1 ," +"QuantizedPooling pool1 1 1 conv1_relu_Y pool1_Y 0 3 3 2 2 0 0 -1 -1 -1 1 ," +"QuantizedConvolution res2a_branch1 1 1 pool1_Y res2a_branch1_Y 1 64 256 1 1 1 1 0 0 1 -1 1 1 0 ," +"QuantizedConvolution res2a_branch2a 1 1 pool1_Y res2a_branch2a_relu_Y 1 64 64 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution res2a_branch2b 1 1 res2a_branch2a_relu_Y res2a_branch2b_relu_Y 1 64 64 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConvolution res2a_branch2c 1 1 res2a_branch2b_relu_Y res2a_branch2c_Y 1 64 256 1 1 1 1 0 0 1 -1 1 1 0 ," +"QuantizedAdd res2a 2 1 res2a_branch1_Y res2a_branch2c_Y res2a_Y ," +"QuantizedReLU res2a_relu 1 1 res2a_Y res2a_relu_Y ," +"QuantizedConvolution res2b_branch2a 1 1 res2a_relu_Y res2b_branch2a_relu_Y 1 256 64 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution res2b_branch2b 1 1 res2b_branch2a_relu_Y res2b_branch2b_relu_Y 1 64 64 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConvolution res2b_branch2c 1 1 res2b_branch2b_relu_Y res2b_branch2c_Y 1 64 256 1 1 1 1 0 0 1 -1 1 1 0 ," +"QuantizedAdd res2b 2 1 res2a_relu_Y res2b_branch2c_Y res2b_Y ," +"QuantizedReLU res2b_relu 1 1 res2b_Y res2b_relu_Y ," +"QuantizedConvolution res2c_branch2a 1 1 res2b_relu_Y res2c_branch2a_relu_Y 1 256 64 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution res2c_branch2b 1 1 res2c_branch2a_relu_Y res2c_branch2b_relu_Y 1 64 64 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConvolution res2c_branch2c 1 1 res2c_branch2b_relu_Y res2c_branch2c_Y 1 64 256 1 1 1 1 0 0 1 -1 1 1 0 ," +"QuantizedAdd res2c 2 1 res2b_relu_Y res2c_branch2c_Y res2c_Y ," +"QuantizedReLU res2c_relu 1 1 res2c_Y res2c_relu_Y ," +"QuantizedConvolution res3a_branch1 1 1 res2c_relu_Y res3a_branch1_Y 1 256 512 1 1 2 2 0 0 1 -1 1 1 0 ," +"QuantizedConvolution res3a_branch2a 1 1 res2c_relu_Y res3a_branch2a_relu_Y 1 256 128 1 1 2 2 0 0 1 -1 1 1 1 ," +"QuantizedConvolution res3a_branch2b 1 1 res3a_branch2a_relu_Y res3a_branch2b_relu_Y 1 128 128 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConvolution res3a_branch2c 1 1 res3a_branch2b_relu_Y res3a_branch2c_Y 1 128 512 1 1 1 1 0 0 1 -1 1 1 0 ," +"QuantizedAdd res3a 2 1 res3a_branch1_Y res3a_branch2c_Y res3a_Y ," +"QuantizedReLU res3a_relu 1 1 res3a_Y res3a_relu_Y ," +"QuantizedConvolution res3b_branch2a 1 1 res3a_relu_Y res3b_branch2a_relu_Y 1 512 128 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution res3b_branch2b 1 1 res3b_branch2a_relu_Y res3b_branch2b_relu_Y 1 128 128 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConvolution res3b_branch2c 1 1 res3b_branch2b_relu_Y res3b_branch2c_Y 1 128 512 1 1 1 1 0 0 1 -1 1 1 0 ," +"QuantizedAdd res3b 2 1 res3a_relu_Y res3b_branch2c_Y res3b_Y ," +"QuantizedReLU res3b_relu 1 1 res3b_Y res3b_relu_Y ," +"QuantizedConvolution res3c_branch2a 1 1 res3b_relu_Y res3c_branch2a_relu_Y 1 512 128 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution res3c_branch2b 1 1 res3c_branch2a_relu_Y res3c_branch2b_relu_Y 1 128 128 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConvolution res3c_branch2c 1 1 res3c_branch2b_relu_Y res3c_branch2c_Y 1 128 512 1 1 1 1 0 0 1 -1 1 1 0 ," +"QuantizedAdd res3c 2 1 res3b_relu_Y res3c_branch2c_Y res3c_Y ," +"QuantizedReLU res3c_relu 1 1 res3c_Y res3c_relu_Y ," +"QuantizedConvolution res3d_branch2a 1 1 res3c_relu_Y res3d_branch2a_relu_Y 1 512 128 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution res3d_branch2b 1 1 res3d_branch2a_relu_Y res3d_branch2b_relu_Y 1 128 128 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConvolution res3d_branch2c 1 1 res3d_branch2b_relu_Y res3d_branch2c_Y 1 128 512 1 1 1 1 0 0 1 -1 1 1 0 ," +"QuantizedAdd res3d 2 1 res3c_relu_Y res3d_branch2c_Y res3d_Y ," +"QuantizedReLU res3d_relu 1 1 res3d_Y res3d_relu_Y ," +"QuantizedConvolution res4a_branch1 1 1 res3d_relu_Y res4a_branch1_Y 1 512 1024 1 1 2 2 0 0 1 -1 1 1 0 ," +"QuantizedConvolution res4a_branch2a 1 1 res3d_relu_Y res4a_branch2a_relu_Y 1 512 256 1 1 2 2 0 0 1 -1 1 1 1 ," +"QuantizedConvolution res4a_branch2b 1 1 res4a_branch2a_relu_Y res4a_branch2b_relu_Y 1 256 256 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConvolution res4a_branch2c 1 1 res4a_branch2b_relu_Y res4a_branch2c_Y 1 256 1024 1 1 1 1 0 0 1 -1 1 1 0 ," +"QuantizedAdd res4a 2 1 res4a_branch1_Y res4a_branch2c_Y res4a_Y ," +"QuantizedReLU res4a_relu 1 1 res4a_Y res4a_relu_Y ," +"QuantizedConvolution res4b_branch2a 1 1 res4a_relu_Y res4b_branch2a_relu_Y 1 1024 256 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution res4b_branch2b 1 1 res4b_branch2a_relu_Y res4b_branch2b_relu_Y 1 256 256 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConvolution res4b_branch2c 1 1 res4b_branch2b_relu_Y res4b_branch2c_Y 1 256 1024 1 1 1 1 0 0 1 -1 1 1 0 ," +"QuantizedAdd res4b 2 1 res4a_relu_Y res4b_branch2c_Y res4b_Y ," +"QuantizedReLU res4b_relu 1 1 res4b_Y res4b_relu_Y ," +"QuantizedConvolution res4c_branch2a 1 1 res4b_relu_Y res4c_branch2a_relu_Y 1 1024 256 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution res4c_branch2b 1 1 res4c_branch2a_relu_Y res4c_branch2b_relu_Y 1 256 256 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConvolution res4c_branch2c 1 1 res4c_branch2b_relu_Y res4c_branch2c_Y 1 256 1024 1 1 1 1 0 0 1 -1 1 1 0 ," +"QuantizedAdd res4c 2 1 res4b_relu_Y res4c_branch2c_Y res4c_Y ," +"QuantizedReLU res4c_relu 1 1 res4c_Y res4c_relu_Y ," +"QuantizedConvolution res4d_branch2a 1 1 res4c_relu_Y res4d_branch2a_relu_Y 1 1024 256 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution res4d_branch2b 1 1 res4d_branch2a_relu_Y res4d_branch2b_relu_Y 1 256 256 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConvolution res4d_branch2c 1 1 res4d_branch2b_relu_Y res4d_branch2c_Y 1 256 1024 1 1 1 1 0 0 1 -1 1 1 0 ," +"QuantizedAdd res4d 2 1 res4c_relu_Y res4d_branch2c_Y res4d_Y ," +"QuantizedReLU res4d_relu 1 1 res4d_Y res4d_relu_Y ," +"QuantizedConvolution res4e_branch2a 1 1 res4d_relu_Y res4e_branch2a_relu_Y 1 1024 256 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution res4e_branch2b 1 1 res4e_branch2a_relu_Y res4e_branch2b_relu_Y 1 256 256 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConvolution res4e_branch2c 1 1 res4e_branch2b_relu_Y res4e_branch2c_Y 1 256 1024 1 1 1 1 0 0 1 -1 1 1 0 ," +"QuantizedAdd res4e 2 1 res4d_relu_Y res4e_branch2c_Y res4e_Y ," +"QuantizedReLU res4e_relu 1 1 res4e_Y res4e_relu_Y ," +"QuantizedConvolution res4f_branch2a 1 1 res4e_relu_Y res4f_branch2a_relu_Y 1 1024 256 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution res4f_branch2b 1 1 res4f_branch2a_relu_Y res4f_branch2b_relu_Y 1 256 256 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConvolution res4f_branch2c 1 1 res4f_branch2b_relu_Y res4f_branch2c_Y 1 256 1024 1 1 1 1 0 0 1 -1 1 1 0 ," +"QuantizedAdd res4f 2 1 res4e_relu_Y res4f_branch2c_Y res4f_Y ," +"QuantizedReLU res4f_relu 1 1 res4f_Y res4f_relu_Y ," +"QuantizedConvolution res5a_branch1 1 1 res4f_relu_Y res5a_branch1_Y 1 1024 2048 1 1 2 2 0 0 1 -1 1 1 0 ," +"QuantizedConvolution res5a_branch2a 1 1 res4f_relu_Y res5a_branch2a_relu_Y 1 1024 512 1 1 2 2 0 0 1 -1 1 1 1 ," +"QuantizedConvolution res5a_branch2b 1 1 res5a_branch2a_relu_Y res5a_branch2b_relu_Y 1 512 512 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConvolution res5a_branch2c 1 1 res5a_branch2b_relu_Y res5a_branch2c_Y 1 512 2048 1 1 1 1 0 0 1 -1 1 1 0 ," +"QuantizedAdd res5a 2 1 res5a_branch1_Y res5a_branch2c_Y res5a_Y ," +"QuantizedReLU res5a_relu 1 1 res5a_Y res5a_relu_Y ," +"QuantizedConvolution res5b_branch2a 1 1 res5a_relu_Y res5b_branch2a_relu_Y 1 2048 512 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution res5b_branch2b 1 1 res5b_branch2a_relu_Y res5b_branch2b_relu_Y 1 512 512 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConvolution res5b_branch2c 1 1 res5b_branch2b_relu_Y res5b_branch2c_Y 1 512 2048 1 1 1 1 0 0 1 -1 1 1 0 ," +"QuantizedAdd res5b 2 1 res5a_relu_Y res5b_branch2c_Y res5b_Y ," +"QuantizedReLU res5b_relu 1 1 res5b_Y res5b_relu_Y ," +"QuantizedConvolution res5c_branch2a 1 1 res5b_relu_Y res5c_branch2a_relu_Y 1 2048 512 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution res5c_branch2b 1 1 res5c_branch2a_relu_Y res5c_branch2b_relu_Y 1 512 512 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConvolution res5c_branch2c 1 1 res5c_branch2b_relu_Y res5c_branch2c_Y 1 512 2048 1 1 1 1 0 0 1 -1 1 1 0 ," +"QuantizedAdd res5c 2 1 res5b_relu_Y res5c_branch2c_Y res5c_Y ," +"QuantizedReLU res5c_relu 1 1 res5c_Y res5c_relu_Y ," +"QuantizedPooling pool5 1 1 res5c_relu_Y pool5_Y 1 7 7 1 1 0 0 -1 -1 -1 0 ," +"Reshape fc1000_Reshape 1 1 pool5_Y fc1000_Reshape_Y 0 4 4 0 2048 1 1 0 ," +"InnerProduct fc1000_Gemm 1 1 fc1000_Reshape_Y fc1000_Gemm_Y 1000 1 0 1 ," +"SoftmaxCaffe prob 1 1 fc1000_Gemm_Y prob_Y 1 ," diff --git a/3rdparty/TNN/benchmark/benchmark-model/quant_squeezenet_v1.0.tnnproto b/3rdparty/TNN/benchmark/benchmark-model/quant_squeezenet_v1.0.tnnproto new file mode 100644 index 0000000..7fba617 --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark-model/quant_squeezenet_v1.0.tnnproto @@ -0,0 +1,44 @@ +"1 66 1 4206624770 ," +"input 1 3 227 227 ," +" conv1 conv10 fire2/concat fire2/expand1x1 fire2/expand3x3 fire2/relu_expand1x1 fire2/relu_expand3x3 fire2/relu_squeeze1x1 fire2/squeeze1x1 fire3/concat fire3/expand1x1 fire3/expand3x3 fire3/relu_expand1x1 fire3/relu_expand3x3 fire3/relu_squeeze1x1 fire3/squeeze1x1 fire4/concat fire4/expand1x1 fire4/expand3x3 fire4/relu_expand1x1 fire4/relu_expand3x3 fire4/relu_squeeze1x1 fire4/squeeze1x1 fire5/concat fire5/expand1x1 fire5/expand3x3 fire5/relu_expand1x1 fire5/relu_expand3x3 fire5/relu_squeeze1x1 fire5/squeeze1x1 fire6/concat fire6/expand1x1 fire6/expand3x3 fire6/relu_expand1x1 fire6/relu_expand3x3 fire6/relu_squeeze1x1 fire6/squeeze1x1 fire7/concat fire7/expand1x1 fire7/expand3x3 fire7/relu_expand1x1 fire7/relu_expand3x3 fire7/relu_squeeze1x1 fire7/squeeze1x1 fire8/concat fire8/expand1x1 fire8/expand3x3 fire8/relu_expand1x1 fire8/relu_expand3x3 fire8/relu_squeeze1x1 fire8/squeeze1x1 fire9/concat fire9/expand1x1 fire9/expand3x3 fire9/relu_expand1x1 fire9/relu_expand3x3 fire9/relu_squeeze1x1 fire9/squeeze1x1 input pool1 pool10 pool4 pool8 prob relu_conv1 relu_conv10 ," +"prob ," +" 39 ," +"QuantizedConvolution conv1 1 1 input relu_conv1 1 3 96 7 7 2 2 0 0 1 -1 1 1 1 ," +"QuantizedPooling pool1 1 1 relu_conv1 pool1 0 3 3 2 2 0 0 -1 -1 -1 1 ," +"QuantizedConvolution fire2/squeeze1x1 1 1 pool1 fire2/relu_squeeze1x1 1 96 16 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution fire2/expand1x1 1 1 fire2/relu_squeeze1x1 fire2/relu_expand1x1 1 16 64 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution fire2/expand3x3 1 1 fire2/relu_squeeze1x1 fire2/relu_expand3x3 1 16 64 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConcat fire2/concat 2 1 fire2/relu_expand1x1 fire2/relu_expand3x3 fire2/concat 1 ," +"QuantizedConvolution fire3/squeeze1x1 1 1 fire2/concat fire3/relu_squeeze1x1 1 128 16 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution fire3/expand1x1 1 1 fire3/relu_squeeze1x1 fire3/relu_expand1x1 1 16 64 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution fire3/expand3x3 1 1 fire3/relu_squeeze1x1 fire3/relu_expand3x3 1 16 64 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConcat fire3/concat 2 1 fire3/relu_expand1x1 fire3/relu_expand3x3 fire3/concat 1 ," +"QuantizedConvolution fire4/squeeze1x1 1 1 fire3/concat fire4/relu_squeeze1x1 1 128 32 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution fire4/expand1x1 1 1 fire4/relu_squeeze1x1 fire4/relu_expand1x1 1 32 128 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution fire4/expand3x3 1 1 fire4/relu_squeeze1x1 fire4/relu_expand3x3 1 32 128 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConcat fire4/concat 2 1 fire4/relu_expand1x1 fire4/relu_expand3x3 fire4/concat 1 ," +"QuantizedPooling pool4 1 1 fire4/concat pool4 0 3 3 2 2 0 0 -1 -1 -1 1 ," +"QuantizedConvolution fire5/squeeze1x1 1 1 pool4 fire5/relu_squeeze1x1 1 256 32 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution fire5/expand1x1 1 1 fire5/relu_squeeze1x1 fire5/relu_expand1x1 1 32 128 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution fire5/expand3x3 1 1 fire5/relu_squeeze1x1 fire5/relu_expand3x3 1 32 128 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConcat fire5/concat 2 1 fire5/relu_expand1x1 fire5/relu_expand3x3 fire5/concat 1 ," +"QuantizedConvolution fire6/squeeze1x1 1 1 fire5/concat fire6/relu_squeeze1x1 1 256 48 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution fire6/expand1x1 1 1 fire6/relu_squeeze1x1 fire6/relu_expand1x1 1 48 192 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution fire6/expand3x3 1 1 fire6/relu_squeeze1x1 fire6/relu_expand3x3 1 48 192 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConcat fire6/concat 2 1 fire6/relu_expand1x1 fire6/relu_expand3x3 fire6/concat 1 ," +"QuantizedConvolution fire7/squeeze1x1 1 1 fire6/concat fire7/relu_squeeze1x1 1 384 48 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution fire7/expand1x1 1 1 fire7/relu_squeeze1x1 fire7/relu_expand1x1 1 48 192 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution fire7/expand3x3 1 1 fire7/relu_squeeze1x1 fire7/relu_expand3x3 1 48 192 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConcat fire7/concat 2 1 fire7/relu_expand1x1 fire7/relu_expand3x3 fire7/concat 1 ," +"QuantizedConvolution fire8/squeeze1x1 1 1 fire7/concat fire8/relu_squeeze1x1 1 384 64 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution fire8/expand1x1 1 1 fire8/relu_squeeze1x1 fire8/relu_expand1x1 1 64 256 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution fire8/expand3x3 1 1 fire8/relu_squeeze1x1 fire8/relu_expand3x3 1 64 256 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConcat fire8/concat 2 1 fire8/relu_expand1x1 fire8/relu_expand3x3 fire8/concat 1 ," +"QuantizedPooling pool8 1 1 fire8/concat pool8 0 3 3 2 2 0 0 -1 -1 -1 1 ," +"QuantizedConvolution fire9/squeeze1x1 1 1 pool8 fire9/relu_squeeze1x1 1 512 64 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution fire9/expand1x1 1 1 fire9/relu_squeeze1x1 fire9/relu_expand1x1 1 64 256 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution fire9/expand3x3 1 1 fire9/relu_squeeze1x1 fire9/relu_expand3x3 1 64 256 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConcat fire9/concat 2 1 fire9/relu_expand1x1 fire9/relu_expand3x3 fire9/concat 1 ," +"QuantizedConvolution conv10 1 1 fire9/concat relu_conv10 1 512 1000 1 1 1 1 1 1 1 -1 1 1 1 ," +"QuantizedPooling pool10 1 1 relu_conv10 pool10 1 15 15 1 1 0 0 -1 -1 -1 0 ," +"SoftmaxCaffe prob 1 1 pool10 prob 1 ," diff --git a/3rdparty/TNN/benchmark/benchmark-model/quant_squeezenet_v1.1.tnnproto b/3rdparty/TNN/benchmark/benchmark-model/quant_squeezenet_v1.1.tnnproto new file mode 100644 index 0000000..f768103 --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark-model/quant_squeezenet_v1.1.tnnproto @@ -0,0 +1,44 @@ +"1 66 1 4206624770 ," +"data 1 3 224 224 ," +" data squeezenet0_concat0 squeezenet0_concat1 squeezenet0_concat2 squeezenet0_concat3 squeezenet0_concat4 squeezenet0_concat5 squeezenet0_concat6 squeezenet0_concat7 squeezenet0_conv0_fwd squeezenet0_conv10_fwd squeezenet0_conv11_fwd squeezenet0_conv12_fwd squeezenet0_conv13_fwd squeezenet0_conv14_fwd squeezenet0_conv15_fwd squeezenet0_conv16_fwd squeezenet0_conv17_fwd squeezenet0_conv18_fwd squeezenet0_conv19_fwd squeezenet0_conv1_fwd squeezenet0_conv20_fwd squeezenet0_conv21_fwd squeezenet0_conv22_fwd squeezenet0_conv23_fwd squeezenet0_conv24_fwd squeezenet0_conv25_fwd squeezenet0_conv2_fwd squeezenet0_conv3_fwd squeezenet0_conv4_fwd squeezenet0_conv5_fwd squeezenet0_conv6_fwd squeezenet0_conv7_fwd squeezenet0_conv8_fwd squeezenet0_conv9_fwd squeezenet0_flatten0_reshape0 squeezenet0_pool0_fwd squeezenet0_pool1_fwd squeezenet0_pool2_fwd squeezenet0_pool3_fwd squeezenet0_relu0_fwd squeezenet0_relu10_fwd squeezenet0_relu11_fwd squeezenet0_relu12_fwd squeezenet0_relu13_fwd squeezenet0_relu14_fwd squeezenet0_relu15_fwd squeezenet0_relu16_fwd squeezenet0_relu17_fwd squeezenet0_relu18_fwd squeezenet0_relu19_fwd squeezenet0_relu1_fwd squeezenet0_relu20_fwd squeezenet0_relu21_fwd squeezenet0_relu22_fwd squeezenet0_relu23_fwd squeezenet0_relu24_fwd squeezenet0_relu25_fwd squeezenet0_relu2_fwd squeezenet0_relu3_fwd squeezenet0_relu4_fwd squeezenet0_relu5_fwd squeezenet0_relu6_fwd squeezenet0_relu7_fwd squeezenet0_relu8_fwd squeezenet0_relu9_fwd ," +"squeezenet0_flatten0_reshape0 ," +" 39 ," +"QuantizedConvolution squeezenet0_conv0_fwd 1 1 data squeezenet0_relu0_fwd 1 3 64 3 3 2 2 0 0 1 -1 1 1 1 ," +"QuantizedPooling squeezenet0_pool0_fwd 1 1 squeezenet0_relu0_fwd squeezenet0_pool0_fwd 0 3 3 2 2 0 0 -1 -1 -1 0 ," +"QuantizedConvolution squeezenet0_conv1_fwd 1 1 squeezenet0_pool0_fwd squeezenet0_relu1_fwd 1 64 16 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution squeezenet0_conv2_fwd 1 1 squeezenet0_relu1_fwd squeezenet0_relu2_fwd 1 16 64 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution squeezenet0_conv3_fwd 1 1 squeezenet0_relu1_fwd squeezenet0_relu3_fwd 1 16 64 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConcat squeezenet0_concat0 2 1 squeezenet0_relu2_fwd squeezenet0_relu3_fwd squeezenet0_concat0 1 ," +"QuantizedConvolution squeezenet0_conv4_fwd 1 1 squeezenet0_concat0 squeezenet0_relu4_fwd 1 128 16 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution squeezenet0_conv5_fwd 1 1 squeezenet0_relu4_fwd squeezenet0_relu5_fwd 1 16 64 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution squeezenet0_conv6_fwd 1 1 squeezenet0_relu4_fwd squeezenet0_relu6_fwd 1 16 64 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConcat squeezenet0_concat1 2 1 squeezenet0_relu5_fwd squeezenet0_relu6_fwd squeezenet0_concat1 1 ," +"QuantizedPooling squeezenet0_pool1_fwd 1 1 squeezenet0_concat1 squeezenet0_pool1_fwd 0 3 3 2 2 0 0 -1 -1 -1 0 ," +"QuantizedConvolution squeezenet0_conv7_fwd 1 1 squeezenet0_pool1_fwd squeezenet0_relu7_fwd 1 128 32 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution squeezenet0_conv8_fwd 1 1 squeezenet0_relu7_fwd squeezenet0_relu8_fwd 1 32 128 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution squeezenet0_conv9_fwd 1 1 squeezenet0_relu7_fwd squeezenet0_relu9_fwd 1 32 128 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConcat squeezenet0_concat2 2 1 squeezenet0_relu8_fwd squeezenet0_relu9_fwd squeezenet0_concat2 1 ," +"QuantizedConvolution squeezenet0_conv10_fwd 1 1 squeezenet0_concat2 squeezenet0_relu10_fwd 1 256 32 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution squeezenet0_conv11_fwd 1 1 squeezenet0_relu10_fwd squeezenet0_relu11_fwd 1 32 128 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution squeezenet0_conv12_fwd 1 1 squeezenet0_relu10_fwd squeezenet0_relu12_fwd 1 32 128 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConcat squeezenet0_concat3 2 1 squeezenet0_relu11_fwd squeezenet0_relu12_fwd squeezenet0_concat3 1 ," +"QuantizedPooling squeezenet0_pool2_fwd 1 1 squeezenet0_concat3 squeezenet0_pool2_fwd 0 3 3 2 2 0 0 -1 -1 -1 0 ," +"QuantizedConvolution squeezenet0_conv13_fwd 1 1 squeezenet0_pool2_fwd squeezenet0_relu13_fwd 1 256 48 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution squeezenet0_conv14_fwd 1 1 squeezenet0_relu13_fwd squeezenet0_relu14_fwd 1 48 192 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution squeezenet0_conv15_fwd 1 1 squeezenet0_relu13_fwd squeezenet0_relu15_fwd 1 48 192 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConcat squeezenet0_concat4 2 1 squeezenet0_relu14_fwd squeezenet0_relu15_fwd squeezenet0_concat4 1 ," +"QuantizedConvolution squeezenet0_conv16_fwd 1 1 squeezenet0_concat4 squeezenet0_relu16_fwd 1 384 48 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution squeezenet0_conv17_fwd 1 1 squeezenet0_relu16_fwd squeezenet0_relu17_fwd 1 48 192 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution squeezenet0_conv18_fwd 1 1 squeezenet0_relu16_fwd squeezenet0_relu18_fwd 1 48 192 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConcat squeezenet0_concat5 2 1 squeezenet0_relu17_fwd squeezenet0_relu18_fwd squeezenet0_concat5 1 ," +"QuantizedConvolution squeezenet0_conv19_fwd 1 1 squeezenet0_concat5 squeezenet0_relu19_fwd 1 384 64 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution squeezenet0_conv20_fwd 1 1 squeezenet0_relu19_fwd squeezenet0_relu20_fwd 1 64 256 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution squeezenet0_conv21_fwd 1 1 squeezenet0_relu19_fwd squeezenet0_relu21_fwd 1 64 256 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConcat squeezenet0_concat6 2 1 squeezenet0_relu20_fwd squeezenet0_relu21_fwd squeezenet0_concat6 1 ," +"QuantizedConvolution squeezenet0_conv22_fwd 1 1 squeezenet0_concat6 squeezenet0_relu22_fwd 1 512 64 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution squeezenet0_conv23_fwd 1 1 squeezenet0_relu22_fwd squeezenet0_relu23_fwd 1 64 256 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedConvolution squeezenet0_conv24_fwd 1 1 squeezenet0_relu22_fwd squeezenet0_relu24_fwd 1 64 256 3 3 1 1 1 1 1 -1 1 1 1 ," +"QuantizedConcat squeezenet0_concat7 2 1 squeezenet0_relu23_fwd squeezenet0_relu24_fwd squeezenet0_concat7 1 ," +"QuantizedConvolution squeezenet0_conv25_fwd 1 1 squeezenet0_concat7 squeezenet0_relu25_fwd 1 512 1000 1 1 1 1 0 0 1 -1 1 1 1 ," +"QuantizedPooling squeezenet0_pool3_fwd 1 1 squeezenet0_relu25_fwd squeezenet0_pool3_fwd 1 13 13 13 13 0 0 -1 -1 -1 0 ," +"Reshape squeezenet0_flatten0_reshape0 1 1 squeezenet0_pool3_fwd squeezenet0_flatten0_reshape0 0 4 4 0 0 -1 1 0 ," diff --git a/3rdparty/TNN/benchmark/benchmark-model/resnet50.tnnproto b/3rdparty/TNN/benchmark/benchmark-model/resnet50.tnnproto new file mode 100644 index 0000000..87d46dc --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark-model/resnet50.tnnproto @@ -0,0 +1,128 @@ +"1 124 1 4206624770 ," +"input 1 3 224 224 ," +" conv1_Y conv1_relu_Y fc1000_Gemm_Y fc1000_Reshape_Y input pool1_Y pool5_Y prob_Y res2a_Y res2a_branch1_Y res2a_branch2a_Y res2a_branch2a_relu_Y res2a_branch2b_Y res2a_branch2b_relu_Y res2a_branch2c_Y res2a_relu_Y res2b_Y res2b_branch2a_Y res2b_branch2a_relu_Y res2b_branch2b_Y res2b_branch2b_relu_Y res2b_branch2c_Y res2b_relu_Y res2c_Y res2c_branch2a_Y res2c_branch2a_relu_Y res2c_branch2b_Y res2c_branch2b_relu_Y res2c_branch2c_Y res2c_relu_Y res3a_Y res3a_branch1_Y res3a_branch2a_Y res3a_branch2a_relu_Y res3a_branch2b_Y res3a_branch2b_relu_Y res3a_branch2c_Y res3a_relu_Y res3b_Y res3b_branch2a_Y res3b_branch2a_relu_Y res3b_branch2b_Y res3b_branch2b_relu_Y res3b_branch2c_Y res3b_relu_Y res3c_Y res3c_branch2a_Y res3c_branch2a_relu_Y res3c_branch2b_Y res3c_branch2b_relu_Y res3c_branch2c_Y res3c_relu_Y res3d_Y res3d_branch2a_Y res3d_branch2a_relu_Y res3d_branch2b_Y res3d_branch2b_relu_Y res3d_branch2c_Y res3d_relu_Y res4a_Y res4a_branch1_Y res4a_branch2a_Y res4a_branch2a_relu_Y res4a_branch2b_Y res4a_branch2b_relu_Y res4a_branch2c_Y res4a_relu_Y res4b_Y res4b_branch2a_Y res4b_branch2a_relu_Y res4b_branch2b_Y res4b_branch2b_relu_Y res4b_branch2c_Y res4b_relu_Y res4c_Y res4c_branch2a_Y res4c_branch2a_relu_Y res4c_branch2b_Y res4c_branch2b_relu_Y res4c_branch2c_Y res4c_relu_Y res4d_Y res4d_branch2a_Y res4d_branch2a_relu_Y res4d_branch2b_Y res4d_branch2b_relu_Y res4d_branch2c_Y res4d_relu_Y res4e_Y res4e_branch2a_Y res4e_branch2a_relu_Y res4e_branch2b_Y res4e_branch2b_relu_Y res4e_branch2c_Y res4e_relu_Y res4f_Y res4f_branch2a_Y res4f_branch2a_relu_Y res4f_branch2b_Y res4f_branch2b_relu_Y res4f_branch2c_Y res4f_relu_Y res5a_Y res5a_branch1_Y res5a_branch2a_Y res5a_branch2a_relu_Y res5a_branch2b_Y res5a_branch2b_relu_Y res5a_branch2c_Y res5a_relu_Y res5b_Y res5b_branch2a_Y res5b_branch2a_relu_Y res5b_branch2b_Y res5b_branch2b_relu_Y res5b_branch2c_Y res5b_relu_Y res5c_Y res5c_branch2a_Y res5c_branch2a_relu_Y res5c_branch2b_Y res5c_branch2b_relu_Y res5c_branch2c_Y res5c_relu_Y ," +"prob_Y ," +" 123 ," +"Convolution conv1 1 1 input conv1_Y 1 3 64 7 7 2 2 3 3 1 -1 1 1 ," +"ReLU conv1_relu 1 1 conv1_Y conv1_relu_Y ," +"Pooling pool1 1 1 conv1_relu_Y pool1_Y 0 3 3 2 2 0 0 -1 -1 -1 1 ," +"Convolution res2a_branch1 1 1 pool1_Y res2a_branch1_Y 1 64 256 1 1 1 1 0 0 1 -1 1 1 ," +"Convolution res2a_branch2a 1 1 pool1_Y res2a_branch2a_Y 1 64 64 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU res2a_branch2a_relu 1 1 res2a_branch2a_Y res2a_branch2a_relu_Y ," +"Convolution res2a_branch2b 1 1 res2a_branch2a_relu_Y res2a_branch2b_Y 1 64 64 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU res2a_branch2b_relu 1 1 res2a_branch2b_Y res2a_branch2b_relu_Y ," +"Convolution res2a_branch2c 1 1 res2a_branch2b_relu_Y res2a_branch2c_Y 1 64 256 1 1 1 1 0 0 1 -1 1 1 ," +"Add res2a 2 1 res2a_branch1_Y res2a_branch2c_Y res2a_Y ," +"ReLU res2a_relu 1 1 res2a_Y res2a_relu_Y ," +"Convolution res2b_branch2a 1 1 res2a_relu_Y res2b_branch2a_Y 1 256 64 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU res2b_branch2a_relu 1 1 res2b_branch2a_Y res2b_branch2a_relu_Y ," +"Convolution res2b_branch2b 1 1 res2b_branch2a_relu_Y res2b_branch2b_Y 1 64 64 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU res2b_branch2b_relu 1 1 res2b_branch2b_Y res2b_branch2b_relu_Y ," +"Convolution res2b_branch2c 1 1 res2b_branch2b_relu_Y res2b_branch2c_Y 1 64 256 1 1 1 1 0 0 1 -1 1 1 ," +"Add res2b 2 1 res2a_relu_Y res2b_branch2c_Y res2b_Y ," +"ReLU res2b_relu 1 1 res2b_Y res2b_relu_Y ," +"Convolution res2c_branch2a 1 1 res2b_relu_Y res2c_branch2a_Y 1 256 64 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU res2c_branch2a_relu 1 1 res2c_branch2a_Y res2c_branch2a_relu_Y ," +"Convolution res2c_branch2b 1 1 res2c_branch2a_relu_Y res2c_branch2b_Y 1 64 64 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU res2c_branch2b_relu 1 1 res2c_branch2b_Y res2c_branch2b_relu_Y ," +"Convolution res2c_branch2c 1 1 res2c_branch2b_relu_Y res2c_branch2c_Y 1 64 256 1 1 1 1 0 0 1 -1 1 1 ," +"Add res2c 2 1 res2b_relu_Y res2c_branch2c_Y res2c_Y ," +"ReLU res2c_relu 1 1 res2c_Y res2c_relu_Y ," +"Convolution res3a_branch1 1 1 res2c_relu_Y res3a_branch1_Y 1 256 512 1 1 2 2 0 0 1 -1 1 1 ," +"Convolution res3a_branch2a 1 1 res2c_relu_Y res3a_branch2a_Y 1 256 128 1 1 2 2 0 0 1 -1 1 1 ," +"ReLU res3a_branch2a_relu 1 1 res3a_branch2a_Y res3a_branch2a_relu_Y ," +"Convolution res3a_branch2b 1 1 res3a_branch2a_relu_Y res3a_branch2b_Y 1 128 128 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU res3a_branch2b_relu 1 1 res3a_branch2b_Y res3a_branch2b_relu_Y ," +"Convolution res3a_branch2c 1 1 res3a_branch2b_relu_Y res3a_branch2c_Y 1 128 512 1 1 1 1 0 0 1 -1 1 1 ," +"Add res3a 2 1 res3a_branch1_Y res3a_branch2c_Y res3a_Y ," +"ReLU res3a_relu 1 1 res3a_Y res3a_relu_Y ," +"Convolution res3b_branch2a 1 1 res3a_relu_Y res3b_branch2a_Y 1 512 128 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU res3b_branch2a_relu 1 1 res3b_branch2a_Y res3b_branch2a_relu_Y ," +"Convolution res3b_branch2b 1 1 res3b_branch2a_relu_Y res3b_branch2b_Y 1 128 128 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU res3b_branch2b_relu 1 1 res3b_branch2b_Y res3b_branch2b_relu_Y ," +"Convolution res3b_branch2c 1 1 res3b_branch2b_relu_Y res3b_branch2c_Y 1 128 512 1 1 1 1 0 0 1 -1 1 1 ," +"Add res3b 2 1 res3a_relu_Y res3b_branch2c_Y res3b_Y ," +"ReLU res3b_relu 1 1 res3b_Y res3b_relu_Y ," +"Convolution res3c_branch2a 1 1 res3b_relu_Y res3c_branch2a_Y 1 512 128 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU res3c_branch2a_relu 1 1 res3c_branch2a_Y res3c_branch2a_relu_Y ," +"Convolution res3c_branch2b 1 1 res3c_branch2a_relu_Y res3c_branch2b_Y 1 128 128 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU res3c_branch2b_relu 1 1 res3c_branch2b_Y res3c_branch2b_relu_Y ," +"Convolution res3c_branch2c 1 1 res3c_branch2b_relu_Y res3c_branch2c_Y 1 128 512 1 1 1 1 0 0 1 -1 1 1 ," +"Add res3c 2 1 res3b_relu_Y res3c_branch2c_Y res3c_Y ," +"ReLU res3c_relu 1 1 res3c_Y res3c_relu_Y ," +"Convolution res3d_branch2a 1 1 res3c_relu_Y res3d_branch2a_Y 1 512 128 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU res3d_branch2a_relu 1 1 res3d_branch2a_Y res3d_branch2a_relu_Y ," +"Convolution res3d_branch2b 1 1 res3d_branch2a_relu_Y res3d_branch2b_Y 1 128 128 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU res3d_branch2b_relu 1 1 res3d_branch2b_Y res3d_branch2b_relu_Y ," +"Convolution res3d_branch2c 1 1 res3d_branch2b_relu_Y res3d_branch2c_Y 1 128 512 1 1 1 1 0 0 1 -1 1 1 ," +"Add res3d 2 1 res3c_relu_Y res3d_branch2c_Y res3d_Y ," +"ReLU res3d_relu 1 1 res3d_Y res3d_relu_Y ," +"Convolution res4a_branch1 1 1 res3d_relu_Y res4a_branch1_Y 1 512 1024 1 1 2 2 0 0 1 -1 1 1 ," +"Convolution res4a_branch2a 1 1 res3d_relu_Y res4a_branch2a_Y 1 512 256 1 1 2 2 0 0 1 -1 1 1 ," +"ReLU res4a_branch2a_relu 1 1 res4a_branch2a_Y res4a_branch2a_relu_Y ," +"Convolution res4a_branch2b 1 1 res4a_branch2a_relu_Y res4a_branch2b_Y 1 256 256 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU res4a_branch2b_relu 1 1 res4a_branch2b_Y res4a_branch2b_relu_Y ," +"Convolution res4a_branch2c 1 1 res4a_branch2b_relu_Y res4a_branch2c_Y 1 256 1024 1 1 1 1 0 0 1 -1 1 1 ," +"Add res4a 2 1 res4a_branch1_Y res4a_branch2c_Y res4a_Y ," +"ReLU res4a_relu 1 1 res4a_Y res4a_relu_Y ," +"Convolution res4b_branch2a 1 1 res4a_relu_Y res4b_branch2a_Y 1 1024 256 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU res4b_branch2a_relu 1 1 res4b_branch2a_Y res4b_branch2a_relu_Y ," +"Convolution res4b_branch2b 1 1 res4b_branch2a_relu_Y res4b_branch2b_Y 1 256 256 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU res4b_branch2b_relu 1 1 res4b_branch2b_Y res4b_branch2b_relu_Y ," +"Convolution res4b_branch2c 1 1 res4b_branch2b_relu_Y res4b_branch2c_Y 1 256 1024 1 1 1 1 0 0 1 -1 1 1 ," +"Add res4b 2 1 res4a_relu_Y res4b_branch2c_Y res4b_Y ," +"ReLU res4b_relu 1 1 res4b_Y res4b_relu_Y ," +"Convolution res4c_branch2a 1 1 res4b_relu_Y res4c_branch2a_Y 1 1024 256 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU res4c_branch2a_relu 1 1 res4c_branch2a_Y res4c_branch2a_relu_Y ," +"Convolution res4c_branch2b 1 1 res4c_branch2a_relu_Y res4c_branch2b_Y 1 256 256 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU res4c_branch2b_relu 1 1 res4c_branch2b_Y res4c_branch2b_relu_Y ," +"Convolution res4c_branch2c 1 1 res4c_branch2b_relu_Y res4c_branch2c_Y 1 256 1024 1 1 1 1 0 0 1 -1 1 1 ," +"Add res4c 2 1 res4b_relu_Y res4c_branch2c_Y res4c_Y ," +"ReLU res4c_relu 1 1 res4c_Y res4c_relu_Y ," +"Convolution res4d_branch2a 1 1 res4c_relu_Y res4d_branch2a_Y 1 1024 256 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU res4d_branch2a_relu 1 1 res4d_branch2a_Y res4d_branch2a_relu_Y ," +"Convolution res4d_branch2b 1 1 res4d_branch2a_relu_Y res4d_branch2b_Y 1 256 256 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU res4d_branch2b_relu 1 1 res4d_branch2b_Y res4d_branch2b_relu_Y ," +"Convolution res4d_branch2c 1 1 res4d_branch2b_relu_Y res4d_branch2c_Y 1 256 1024 1 1 1 1 0 0 1 -1 1 1 ," +"Add res4d 2 1 res4c_relu_Y res4d_branch2c_Y res4d_Y ," +"ReLU res4d_relu 1 1 res4d_Y res4d_relu_Y ," +"Convolution res4e_branch2a 1 1 res4d_relu_Y res4e_branch2a_Y 1 1024 256 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU res4e_branch2a_relu 1 1 res4e_branch2a_Y res4e_branch2a_relu_Y ," +"Convolution res4e_branch2b 1 1 res4e_branch2a_relu_Y res4e_branch2b_Y 1 256 256 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU res4e_branch2b_relu 1 1 res4e_branch2b_Y res4e_branch2b_relu_Y ," +"Convolution res4e_branch2c 1 1 res4e_branch2b_relu_Y res4e_branch2c_Y 1 256 1024 1 1 1 1 0 0 1 -1 1 1 ," +"Add res4e 2 1 res4d_relu_Y res4e_branch2c_Y res4e_Y ," +"ReLU res4e_relu 1 1 res4e_Y res4e_relu_Y ," +"Convolution res4f_branch2a 1 1 res4e_relu_Y res4f_branch2a_Y 1 1024 256 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU res4f_branch2a_relu 1 1 res4f_branch2a_Y res4f_branch2a_relu_Y ," +"Convolution res4f_branch2b 1 1 res4f_branch2a_relu_Y res4f_branch2b_Y 1 256 256 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU res4f_branch2b_relu 1 1 res4f_branch2b_Y res4f_branch2b_relu_Y ," +"Convolution res4f_branch2c 1 1 res4f_branch2b_relu_Y res4f_branch2c_Y 1 256 1024 1 1 1 1 0 0 1 -1 1 1 ," +"Add res4f 2 1 res4e_relu_Y res4f_branch2c_Y res4f_Y ," +"ReLU res4f_relu 1 1 res4f_Y res4f_relu_Y ," +"Convolution res5a_branch1 1 1 res4f_relu_Y res5a_branch1_Y 1 1024 2048 1 1 2 2 0 0 1 -1 1 1 ," +"Convolution res5a_branch2a 1 1 res4f_relu_Y res5a_branch2a_Y 1 1024 512 1 1 2 2 0 0 1 -1 1 1 ," +"ReLU res5a_branch2a_relu 1 1 res5a_branch2a_Y res5a_branch2a_relu_Y ," +"Convolution res5a_branch2b 1 1 res5a_branch2a_relu_Y res5a_branch2b_Y 1 512 512 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU res5a_branch2b_relu 1 1 res5a_branch2b_Y res5a_branch2b_relu_Y ," +"Convolution res5a_branch2c 1 1 res5a_branch2b_relu_Y res5a_branch2c_Y 1 512 2048 1 1 1 1 0 0 1 -1 1 1 ," +"Add res5a 2 1 res5a_branch1_Y res5a_branch2c_Y res5a_Y ," +"ReLU res5a_relu 1 1 res5a_Y res5a_relu_Y ," +"Convolution res5b_branch2a 1 1 res5a_relu_Y res5b_branch2a_Y 1 2048 512 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU res5b_branch2a_relu 1 1 res5b_branch2a_Y res5b_branch2a_relu_Y ," +"Convolution res5b_branch2b 1 1 res5b_branch2a_relu_Y res5b_branch2b_Y 1 512 512 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU res5b_branch2b_relu 1 1 res5b_branch2b_Y res5b_branch2b_relu_Y ," +"Convolution res5b_branch2c 1 1 res5b_branch2b_relu_Y res5b_branch2c_Y 1 512 2048 1 1 1 1 0 0 1 -1 1 1 ," +"Add res5b 2 1 res5a_relu_Y res5b_branch2c_Y res5b_Y ," +"ReLU res5b_relu 1 1 res5b_Y res5b_relu_Y ," +"Convolution res5c_branch2a 1 1 res5b_relu_Y res5c_branch2a_Y 1 2048 512 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU res5c_branch2a_relu 1 1 res5c_branch2a_Y res5c_branch2a_relu_Y ," +"Convolution res5c_branch2b 1 1 res5c_branch2a_relu_Y res5c_branch2b_Y 1 512 512 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU res5c_branch2b_relu 1 1 res5c_branch2b_Y res5c_branch2b_relu_Y ," +"Convolution res5c_branch2c 1 1 res5c_branch2b_relu_Y res5c_branch2c_Y 1 512 2048 1 1 1 1 0 0 1 -1 1 1 ," +"Add res5c 2 1 res5b_relu_Y res5c_branch2c_Y res5c_Y ," +"ReLU res5c_relu 1 1 res5c_Y res5c_relu_Y ," +"Pooling pool5 1 1 res5c_relu_Y pool5_Y 1 7 7 1 1 0 0 -1 -1 -1 0 ," +"Reshape fc1000_Reshape 1 1 pool5_Y fc1000_Reshape_Y 0 4 4 0 2048 1 1 ," +"InnerProduct fc1000_Gemm 1 1 fc1000_Reshape_Y fc1000_Gemm_Y 1000 1 0 1 ," +"SoftmaxCaffe prob 1 1 fc1000_Gemm_Y prob_Y 1 ," diff --git a/3rdparty/TNN/benchmark/benchmark-model/shufflenet_v2.tnnproto b/3rdparty/TNN/benchmark/benchmark-model/shufflenet_v2.tnnproto new file mode 100644 index 0000000..018e4e9 --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark-model/shufflenet_v2.tnnproto @@ -0,0 +1,160 @@ +"1 156 1 4206624770 ," +"input_input 1 3 224 224 ," +" conv5 conv5/relu data fc input_input pool stage1/conv stage1/conv/relu stage1/pool stage_2_1/concat stage_2_1/conv1 stage_2_1/conv1/relu stage_2_1/conv2 stage_2_1/conv3 stage_2_1/conv3/relu stage_2_1/conv4 stage_2_1/conv5 stage_2_1/conv5/relu stage_2_1/shuffle stage_2_2/concat stage_2_2/conv1 stage_2_2/conv1/relu stage_2_2/conv2 stage_2_2/conv3 stage_2_2/conv3/relu stage_2_2/shuffle stage_2_2/slice1 stage_2_2/slice2 stage_2_3/concat stage_2_3/conv1 stage_2_3/conv1/relu stage_2_3/conv2 stage_2_3/conv3 stage_2_3/conv3/relu stage_2_3/shuffle stage_2_3/slice1 stage_2_3/slice2 stage_2_4/concat stage_2_4/conv1 stage_2_4/conv1/relu stage_2_4/conv2 stage_2_4/conv3 stage_2_4/conv3/relu stage_2_4/shuffle stage_2_4/slice1 stage_2_4/slice2 stage_3_1/concat stage_3_1/conv1 stage_3_1/conv1/relu stage_3_1/conv2 stage_3_1/conv3 stage_3_1/conv3/relu stage_3_1/conv4 stage_3_1/conv5 stage_3_1/conv5/relu stage_3_1/shuffle stage_3_2/concat stage_3_2/conv1 stage_3_2/conv1/relu stage_3_2/conv2 stage_3_2/conv3 stage_3_2/conv3/relu stage_3_2/shuffle stage_3_2/slice1 stage_3_2/slice2 stage_3_3/concat stage_3_3/conv1 stage_3_3/conv1/relu stage_3_3/conv2 stage_3_3/conv3 stage_3_3/conv3/relu stage_3_3/shuffle stage_3_3/slice1 stage_3_3/slice2 stage_3_4/concat stage_3_4/conv1 stage_3_4/conv1/relu stage_3_4/conv2 stage_3_4/conv3 stage_3_4/conv3/relu stage_3_4/shuffle stage_3_4/slice1 stage_3_4/slice2 stage_3_5/concat stage_3_5/conv1 stage_3_5/conv1/relu stage_3_5/conv2 stage_3_5/conv3 stage_3_5/conv3/relu stage_3_5/shuffle stage_3_5/slice1 stage_3_5/slice2 stage_3_6/concat stage_3_6/conv1 stage_3_6/conv1/relu stage_3_6/conv2 stage_3_6/conv3 stage_3_6/conv3/relu stage_3_6/shuffle stage_3_6/slice1 stage_3_6/slice2 stage_3_7/concat stage_3_7/conv1 stage_3_7/conv1/relu stage_3_7/conv2 stage_3_7/conv3 stage_3_7/conv3/relu stage_3_7/shuffle stage_3_7/slice1 stage_3_7/slice2 stage_3_8/concat stage_3_8/conv1 stage_3_8/conv1/relu stage_3_8/conv2 stage_3_8/conv3 stage_3_8/conv3/relu stage_3_8/shuffle stage_3_8/slice1 stage_3_8/slice2 stage_4_1/concat stage_4_1/conv1 stage_4_1/conv1/relu stage_4_1/conv2 stage_4_1/conv3 stage_4_1/conv3/relu stage_4_1/conv4 stage_4_1/conv5 stage_4_1/conv5/relu stage_4_1/shuffle stage_4_2/concat stage_4_2/conv1 stage_4_2/conv1/relu stage_4_2/conv2 stage_4_2/conv3 stage_4_2/conv3/relu stage_4_2/shuffle stage_4_2/slice1 stage_4_2/slice2 stage_4_3/concat stage_4_3/conv1 stage_4_3/conv1/relu stage_4_3/conv2 stage_4_3/conv3 stage_4_3/conv3/relu stage_4_3/shuffle stage_4_3/slice1 stage_4_3/slice2 stage_4_4/concat stage_4_4/conv1 stage_4_4/conv1/relu stage_4_4/conv2 stage_4_4/conv3 stage_4_4/conv3/relu stage_4_4/shuffle stage_4_4/slice1 stage_4_4/slice2 ," +"fc ," +" 155 ," +"BatchNormCxx data/bn 1 1 input_input data ," +"Convolution stage1/conv 1 1 data stage1/conv 1 3 24 3 3 2 2 1 1 1 -1 1 1 ," +"ReLU stage1/conv/relu 1 1 stage1/conv stage1/conv/relu ," +"Pooling stage1/pool 1 1 stage1/conv/relu stage1/pool 0 3 3 2 2 0 0 -1 -1 -1 1 ," +"Convolution stage_2_1/conv4 1 1 stage1/pool stage_2_1/conv4 24 1 24 3 3 2 2 1 1 1 -1 1 1 ," +"Convolution stage_2_1/conv5 1 1 stage_2_1/conv4 stage_2_1/conv5 1 24 24 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU stage_2_1/conv5/relu 1 1 stage_2_1/conv5 stage_2_1/conv5/relu ," +"Convolution stage_2_1/conv1 1 1 stage1/pool stage_2_1/conv1 1 24 24 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU stage_2_1/conv1/relu 1 1 stage_2_1/conv1 stage_2_1/conv1/relu ," +"Convolution stage_2_1/conv2 1 1 stage_2_1/conv1/relu stage_2_1/conv2 24 1 24 3 3 2 2 1 1 1 -1 1 1 ," +"Convolution stage_2_1/conv3 1 1 stage_2_1/conv2 stage_2_1/conv3 1 24 24 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU stage_2_1/conv3/relu 1 1 stage_2_1/conv3 stage_2_1/conv3/relu ," +"Concat stage_2_1/concat 2 1 stage_2_1/conv5/relu stage_2_1/conv3/relu stage_2_1/concat 1 ," +"ShuffleChannel stage_2_1/shuffle 1 1 stage_2_1/concat stage_2_1/shuffle 2 ," +"StridedSlice stage_2_2/slice1 1 1 stage_2_1/shuffle stage_2_2/slice1 4 0 0 0 0 4 0 24 0 0 4 1 1 1 1 ," +"StridedSlice stage_2_2/slice2 1 1 stage_2_1/shuffle stage_2_2/slice2 4 0 24 0 0 4 0 48 0 0 4 1 1 1 1 ," +"Convolution stage_2_2/conv1 1 1 stage_2_2/slice2 stage_2_2/conv1 1 24 24 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU stage_2_2/conv1/relu 1 1 stage_2_2/conv1 stage_2_2/conv1/relu ," +"Convolution stage_2_2/conv2 1 1 stage_2_2/conv1/relu stage_2_2/conv2 24 1 24 3 3 1 1 1 1 1 -1 1 1 ," +"Convolution stage_2_2/conv3 1 1 stage_2_2/conv2 stage_2_2/conv3 1 24 24 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU stage_2_2/conv3/relu 1 1 stage_2_2/conv3 stage_2_2/conv3/relu ," +"Concat stage_2_2/concat 2 1 stage_2_2/slice1 stage_2_2/conv3/relu stage_2_2/concat 1 ," +"ShuffleChannel stage_2_2/shuffle 1 1 stage_2_2/concat stage_2_2/shuffle 2 ," +"StridedSlice stage_2_3/slice1 1 1 stage_2_2/shuffle stage_2_3/slice1 4 0 0 0 0 4 0 24 0 0 4 1 1 1 1 ," +"StridedSlice stage_2_3/slice2 1 1 stage_2_2/shuffle stage_2_3/slice2 4 0 24 0 0 4 0 48 0 0 4 1 1 1 1 ," +"Convolution stage_2_3/conv1 1 1 stage_2_3/slice2 stage_2_3/conv1 1 24 24 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU stage_2_3/conv1/relu 1 1 stage_2_3/conv1 stage_2_3/conv1/relu ," +"Convolution stage_2_3/conv2 1 1 stage_2_3/conv1/relu stage_2_3/conv2 24 1 24 3 3 1 1 1 1 1 -1 1 1 ," +"Convolution stage_2_3/conv3 1 1 stage_2_3/conv2 stage_2_3/conv3 1 24 24 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU stage_2_3/conv3/relu 1 1 stage_2_3/conv3 stage_2_3/conv3/relu ," +"Concat stage_2_3/concat 2 1 stage_2_3/slice1 stage_2_3/conv3/relu stage_2_3/concat 1 ," +"ShuffleChannel stage_2_3/shuffle 1 1 stage_2_3/concat stage_2_3/shuffle 2 ," +"StridedSlice stage_2_4/slice1 1 1 stage_2_3/shuffle stage_2_4/slice1 4 0 0 0 0 4 0 24 0 0 4 1 1 1 1 ," +"StridedSlice stage_2_4/slice2 1 1 stage_2_3/shuffle stage_2_4/slice2 4 0 24 0 0 4 0 48 0 0 4 1 1 1 1 ," +"Convolution stage_2_4/conv1 1 1 stage_2_4/slice2 stage_2_4/conv1 1 24 24 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU stage_2_4/conv1/relu 1 1 stage_2_4/conv1 stage_2_4/conv1/relu ," +"Convolution stage_2_4/conv2 1 1 stage_2_4/conv1/relu stage_2_4/conv2 24 1 24 3 3 1 1 1 1 1 -1 1 1 ," +"Convolution stage_2_4/conv3 1 1 stage_2_4/conv2 stage_2_4/conv3 1 24 24 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU stage_2_4/conv3/relu 1 1 stage_2_4/conv3 stage_2_4/conv3/relu ," +"Concat stage_2_4/concat 2 1 stage_2_4/slice1 stage_2_4/conv3/relu stage_2_4/concat 1 ," +"ShuffleChannel stage_2_4/shuffle 1 1 stage_2_4/concat stage_2_4/shuffle 2 ," +"Convolution stage_3_1/conv4 1 1 stage_2_4/shuffle stage_3_1/conv4 48 1 48 3 3 2 2 1 1 1 -1 1 1 ," +"Convolution stage_3_1/conv5 1 1 stage_3_1/conv4 stage_3_1/conv5 1 48 48 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU stage_3_1/conv5/relu 1 1 stage_3_1/conv5 stage_3_1/conv5/relu ," +"Convolution stage_3_1/conv1 1 1 stage_2_4/shuffle stage_3_1/conv1 1 48 48 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU stage_3_1/conv1/relu 1 1 stage_3_1/conv1 stage_3_1/conv1/relu ," +"Convolution stage_3_1/conv2 1 1 stage_3_1/conv1/relu stage_3_1/conv2 48 1 48 3 3 2 2 1 1 1 -1 1 1 ," +"Convolution stage_3_1/conv3 1 1 stage_3_1/conv2 stage_3_1/conv3 1 48 48 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU stage_3_1/conv3/relu 1 1 stage_3_1/conv3 stage_3_1/conv3/relu ," +"Concat stage_3_1/concat 2 1 stage_3_1/conv5/relu stage_3_1/conv3/relu stage_3_1/concat 1 ," +"ShuffleChannel stage_3_1/shuffle 1 1 stage_3_1/concat stage_3_1/shuffle 2 ," +"StridedSlice stage_3_2/slice1 1 1 stage_3_1/shuffle stage_3_2/slice1 4 0 0 0 0 4 0 48 0 0 4 1 1 1 1 ," +"StridedSlice stage_3_2/slice2 1 1 stage_3_1/shuffle stage_3_2/slice2 4 0 48 0 0 4 0 96 0 0 4 1 1 1 1 ," +"Convolution stage_3_2/conv1 1 1 stage_3_2/slice2 stage_3_2/conv1 1 48 48 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU stage_3_2/conv1/relu 1 1 stage_3_2/conv1 stage_3_2/conv1/relu ," +"Convolution stage_3_2/conv2 1 1 stage_3_2/conv1/relu stage_3_2/conv2 48 1 48 3 3 1 1 1 1 1 -1 1 1 ," +"Convolution stage_3_2/conv3 1 1 stage_3_2/conv2 stage_3_2/conv3 1 48 48 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU stage_3_2/conv3/relu 1 1 stage_3_2/conv3 stage_3_2/conv3/relu ," +"Concat stage_3_2/concat 2 1 stage_3_2/slice1 stage_3_2/conv3/relu stage_3_2/concat 1 ," +"ShuffleChannel stage_3_2/shuffle 1 1 stage_3_2/concat stage_3_2/shuffle 2 ," +"StridedSlice stage_3_3/slice1 1 1 stage_3_2/shuffle stage_3_3/slice1 4 0 0 0 0 4 0 48 0 0 4 1 1 1 1 ," +"StridedSlice stage_3_3/slice2 1 1 stage_3_2/shuffle stage_3_3/slice2 4 0 48 0 0 4 0 96 0 0 4 1 1 1 1 ," +"Convolution stage_3_3/conv1 1 1 stage_3_3/slice2 stage_3_3/conv1 1 48 48 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU stage_3_3/conv1/relu 1 1 stage_3_3/conv1 stage_3_3/conv1/relu ," +"Convolution stage_3_3/conv2 1 1 stage_3_3/conv1/relu stage_3_3/conv2 48 1 48 3 3 1 1 1 1 1 -1 1 1 ," +"Convolution stage_3_3/conv3 1 1 stage_3_3/conv2 stage_3_3/conv3 1 48 48 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU stage_3_3/conv3/relu 1 1 stage_3_3/conv3 stage_3_3/conv3/relu ," +"Concat stage_3_3/concat 2 1 stage_3_3/slice1 stage_3_3/conv3/relu stage_3_3/concat 1 ," +"ShuffleChannel stage_3_3/shuffle 1 1 stage_3_3/concat stage_3_3/shuffle 2 ," +"StridedSlice stage_3_4/slice1 1 1 stage_3_3/shuffle stage_3_4/slice1 4 0 0 0 0 4 0 48 0 0 4 1 1 1 1 ," +"StridedSlice stage_3_4/slice2 1 1 stage_3_3/shuffle stage_3_4/slice2 4 0 48 0 0 4 0 96 0 0 4 1 1 1 1 ," +"Convolution stage_3_4/conv1 1 1 stage_3_4/slice2 stage_3_4/conv1 1 48 48 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU stage_3_4/conv1/relu 1 1 stage_3_4/conv1 stage_3_4/conv1/relu ," +"Convolution stage_3_4/conv2 1 1 stage_3_4/conv1/relu stage_3_4/conv2 48 1 48 3 3 1 1 1 1 1 -1 1 1 ," +"Convolution stage_3_4/conv3 1 1 stage_3_4/conv2 stage_3_4/conv3 1 48 48 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU stage_3_4/conv3/relu 1 1 stage_3_4/conv3 stage_3_4/conv3/relu ," +"Concat stage_3_4/concat 2 1 stage_3_4/slice1 stage_3_4/conv3/relu stage_3_4/concat 1 ," +"ShuffleChannel stage_3_4/shuffle 1 1 stage_3_4/concat stage_3_4/shuffle 2 ," +"StridedSlice stage_3_5/slice1 1 1 stage_3_4/shuffle stage_3_5/slice1 4 0 0 0 0 4 0 48 0 0 4 1 1 1 1 ," +"StridedSlice stage_3_5/slice2 1 1 stage_3_4/shuffle stage_3_5/slice2 4 0 48 0 0 4 0 96 0 0 4 1 1 1 1 ," +"Convolution stage_3_5/conv1 1 1 stage_3_5/slice2 stage_3_5/conv1 1 48 48 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU stage_3_5/conv1/relu 1 1 stage_3_5/conv1 stage_3_5/conv1/relu ," +"Convolution stage_3_5/conv2 1 1 stage_3_5/conv1/relu stage_3_5/conv2 48 1 48 3 3 1 1 1 1 1 -1 1 1 ," +"Convolution stage_3_5/conv3 1 1 stage_3_5/conv2 stage_3_5/conv3 1 48 48 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU stage_3_5/conv3/relu 1 1 stage_3_5/conv3 stage_3_5/conv3/relu ," +"Concat stage_3_5/concat 2 1 stage_3_5/slice1 stage_3_5/conv3/relu stage_3_5/concat 1 ," +"ShuffleChannel stage_3_5/shuffle 1 1 stage_3_5/concat stage_3_5/shuffle 2 ," +"StridedSlice stage_3_6/slice1 1 1 stage_3_5/shuffle stage_3_6/slice1 4 0 0 0 0 4 0 48 0 0 4 1 1 1 1 ," +"StridedSlice stage_3_6/slice2 1 1 stage_3_5/shuffle stage_3_6/slice2 4 0 48 0 0 4 0 96 0 0 4 1 1 1 1 ," +"Convolution stage_3_6/conv1 1 1 stage_3_6/slice2 stage_3_6/conv1 1 48 48 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU stage_3_6/conv1/relu 1 1 stage_3_6/conv1 stage_3_6/conv1/relu ," +"Convolution stage_3_6/conv2 1 1 stage_3_6/conv1/relu stage_3_6/conv2 48 1 48 3 3 1 1 1 1 1 -1 1 1 ," +"Convolution stage_3_6/conv3 1 1 stage_3_6/conv2 stage_3_6/conv3 1 48 48 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU stage_3_6/conv3/relu 1 1 stage_3_6/conv3 stage_3_6/conv3/relu ," +"Concat stage_3_6/concat 2 1 stage_3_6/slice1 stage_3_6/conv3/relu stage_3_6/concat 1 ," +"ShuffleChannel stage_3_6/shuffle 1 1 stage_3_6/concat stage_3_6/shuffle 2 ," +"StridedSlice stage_3_7/slice1 1 1 stage_3_6/shuffle stage_3_7/slice1 4 0 0 0 0 4 0 48 0 0 4 1 1 1 1 ," +"StridedSlice stage_3_7/slice2 1 1 stage_3_6/shuffle stage_3_7/slice2 4 0 48 0 0 4 0 96 0 0 4 1 1 1 1 ," +"Convolution stage_3_7/conv1 1 1 stage_3_7/slice2 stage_3_7/conv1 1 48 48 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU stage_3_7/conv1/relu 1 1 stage_3_7/conv1 stage_3_7/conv1/relu ," +"Convolution stage_3_7/conv2 1 1 stage_3_7/conv1/relu stage_3_7/conv2 48 1 48 3 3 1 1 1 1 1 -1 1 1 ," +"Convolution stage_3_7/conv3 1 1 stage_3_7/conv2 stage_3_7/conv3 1 48 48 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU stage_3_7/conv3/relu 1 1 stage_3_7/conv3 stage_3_7/conv3/relu ," +"Concat stage_3_7/concat 2 1 stage_3_7/slice1 stage_3_7/conv3/relu stage_3_7/concat 1 ," +"ShuffleChannel stage_3_7/shuffle 1 1 stage_3_7/concat stage_3_7/shuffle 2 ," +"StridedSlice stage_3_8/slice1 1 1 stage_3_7/shuffle stage_3_8/slice1 4 0 0 0 0 4 0 48 0 0 4 1 1 1 1 ," +"StridedSlice stage_3_8/slice2 1 1 stage_3_7/shuffle stage_3_8/slice2 4 0 48 0 0 4 0 96 0 0 4 1 1 1 1 ," +"Convolution stage_3_8/conv1 1 1 stage_3_8/slice2 stage_3_8/conv1 1 48 48 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU stage_3_8/conv1/relu 1 1 stage_3_8/conv1 stage_3_8/conv1/relu ," +"Convolution stage_3_8/conv2 1 1 stage_3_8/conv1/relu stage_3_8/conv2 48 1 48 3 3 1 1 1 1 1 -1 1 1 ," +"Convolution stage_3_8/conv3 1 1 stage_3_8/conv2 stage_3_8/conv3 1 48 48 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU stage_3_8/conv3/relu 1 1 stage_3_8/conv3 stage_3_8/conv3/relu ," +"Concat stage_3_8/concat 2 1 stage_3_8/slice1 stage_3_8/conv3/relu stage_3_8/concat 1 ," +"ShuffleChannel stage_3_8/shuffle 1 1 stage_3_8/concat stage_3_8/shuffle 2 ," +"Convolution stage_4_1/conv4 1 1 stage_3_8/shuffle stage_4_1/conv4 96 1 96 3 3 2 2 1 1 1 -1 1 1 ," +"Convolution stage_4_1/conv5 1 1 stage_4_1/conv4 stage_4_1/conv5 1 96 96 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU stage_4_1/conv5/relu 1 1 stage_4_1/conv5 stage_4_1/conv5/relu ," +"Convolution stage_4_1/conv1 1 1 stage_3_8/shuffle stage_4_1/conv1 1 96 96 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU stage_4_1/conv1/relu 1 1 stage_4_1/conv1 stage_4_1/conv1/relu ," +"Convolution stage_4_1/conv2 1 1 stage_4_1/conv1/relu stage_4_1/conv2 96 1 96 3 3 2 2 1 1 1 -1 1 1 ," +"Convolution stage_4_1/conv3 1 1 stage_4_1/conv2 stage_4_1/conv3 1 96 96 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU stage_4_1/conv3/relu 1 1 stage_4_1/conv3 stage_4_1/conv3/relu ," +"Concat stage_4_1/concat 2 1 stage_4_1/conv5/relu stage_4_1/conv3/relu stage_4_1/concat 1 ," +"ShuffleChannel stage_4_1/shuffle 1 1 stage_4_1/concat stage_4_1/shuffle 2 ," +"StridedSlice stage_4_2/slice1 1 1 stage_4_1/shuffle stage_4_2/slice1 4 0 0 0 0 4 0 96 0 0 4 1 1 1 1 ," +"StridedSlice stage_4_2/slice2 1 1 stage_4_1/shuffle stage_4_2/slice2 4 0 96 0 0 4 0 192 0 0 4 1 1 1 1 ," +"Convolution stage_4_2/conv1 1 1 stage_4_2/slice2 stage_4_2/conv1 1 96 96 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU stage_4_2/conv1/relu 1 1 stage_4_2/conv1 stage_4_2/conv1/relu ," +"Convolution stage_4_2/conv2 1 1 stage_4_2/conv1/relu stage_4_2/conv2 96 1 96 3 3 1 1 1 1 1 -1 1 1 ," +"Convolution stage_4_2/conv3 1 1 stage_4_2/conv2 stage_4_2/conv3 1 96 96 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU stage_4_2/conv3/relu 1 1 stage_4_2/conv3 stage_4_2/conv3/relu ," +"Concat stage_4_2/concat 2 1 stage_4_2/slice1 stage_4_2/conv3/relu stage_4_2/concat 1 ," +"ShuffleChannel stage_4_2/shuffle 1 1 stage_4_2/concat stage_4_2/shuffle 2 ," +"StridedSlice stage_4_3/slice1 1 1 stage_4_2/shuffle stage_4_3/slice1 4 0 0 0 0 4 0 96 0 0 4 1 1 1 1 ," +"StridedSlice stage_4_3/slice2 1 1 stage_4_2/shuffle stage_4_3/slice2 4 0 96 0 0 4 0 192 0 0 4 1 1 1 1 ," +"Convolution stage_4_3/conv1 1 1 stage_4_3/slice2 stage_4_3/conv1 1 96 96 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU stage_4_3/conv1/relu 1 1 stage_4_3/conv1 stage_4_3/conv1/relu ," +"Convolution stage_4_3/conv2 1 1 stage_4_3/conv1/relu stage_4_3/conv2 96 1 96 3 3 1 1 1 1 1 -1 1 1 ," +"Convolution stage_4_3/conv3 1 1 stage_4_3/conv2 stage_4_3/conv3 1 96 96 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU stage_4_3/conv3/relu 1 1 stage_4_3/conv3 stage_4_3/conv3/relu ," +"Concat stage_4_3/concat 2 1 stage_4_3/slice1 stage_4_3/conv3/relu stage_4_3/concat 1 ," +"ShuffleChannel stage_4_3/shuffle 1 1 stage_4_3/concat stage_4_3/shuffle 2 ," +"StridedSlice stage_4_4/slice1 1 1 stage_4_3/shuffle stage_4_4/slice1 4 0 0 0 0 4 0 96 0 0 4 1 1 1 1 ," +"StridedSlice stage_4_4/slice2 1 1 stage_4_3/shuffle stage_4_4/slice2 4 0 96 0 0 4 0 192 0 0 4 1 1 1 1 ," +"Convolution stage_4_4/conv1 1 1 stage_4_4/slice2 stage_4_4/conv1 1 96 96 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU stage_4_4/conv1/relu 1 1 stage_4_4/conv1 stage_4_4/conv1/relu ," +"Convolution stage_4_4/conv2 1 1 stage_4_4/conv1/relu stage_4_4/conv2 96 1 96 3 3 1 1 1 1 1 -1 1 1 ," +"Convolution stage_4_4/conv3 1 1 stage_4_4/conv2 stage_4_4/conv3 1 96 96 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU stage_4_4/conv3/relu 1 1 stage_4_4/conv3 stage_4_4/conv3/relu ," +"Concat stage_4_4/concat 2 1 stage_4_4/slice1 stage_4_4/conv3/relu stage_4_4/concat 1 ," +"ShuffleChannel stage_4_4/shuffle 1 1 stage_4_4/concat stage_4_4/shuffle 2 ," +"Convolution conv5 1 1 stage_4_4/shuffle conv5 1 192 1024 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU conv5/relu 1 1 conv5 conv5/relu ," +"Pooling pool 1 1 conv5/relu pool 1 7 7 1 1 0 0 -1 -1 -1 1 ," +"Convolution fc 1 1 pool fc 1 1024 1000 1 1 1 1 0 0 1 -1 1 1 ," diff --git a/3rdparty/TNN/benchmark/benchmark-model/squeezenet_v1.0.tnnproto b/3rdparty/TNN/benchmark/benchmark-model/squeezenet_v1.0.tnnproto new file mode 100644 index 0000000..706f149 --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark-model/squeezenet_v1.0.tnnproto @@ -0,0 +1,70 @@ +"1 66 1 4206624770 ," +"input 1 3 227 227 ," +" conv1 conv10 fire2/concat fire2/expand1x1 fire2/expand3x3 fire2/relu_expand1x1 fire2/relu_expand3x3 fire2/relu_squeeze1x1 fire2/squeeze1x1 fire3/concat fire3/expand1x1 fire3/expand3x3 fire3/relu_expand1x1 fire3/relu_expand3x3 fire3/relu_squeeze1x1 fire3/squeeze1x1 fire4/concat fire4/expand1x1 fire4/expand3x3 fire4/relu_expand1x1 fire4/relu_expand3x3 fire4/relu_squeeze1x1 fire4/squeeze1x1 fire5/concat fire5/expand1x1 fire5/expand3x3 fire5/relu_expand1x1 fire5/relu_expand3x3 fire5/relu_squeeze1x1 fire5/squeeze1x1 fire6/concat fire6/expand1x1 fire6/expand3x3 fire6/relu_expand1x1 fire6/relu_expand3x3 fire6/relu_squeeze1x1 fire6/squeeze1x1 fire7/concat fire7/expand1x1 fire7/expand3x3 fire7/relu_expand1x1 fire7/relu_expand3x3 fire7/relu_squeeze1x1 fire7/squeeze1x1 fire8/concat fire8/expand1x1 fire8/expand3x3 fire8/relu_expand1x1 fire8/relu_expand3x3 fire8/relu_squeeze1x1 fire8/squeeze1x1 fire9/concat fire9/expand1x1 fire9/expand3x3 fire9/relu_expand1x1 fire9/relu_expand3x3 fire9/relu_squeeze1x1 fire9/squeeze1x1 input pool1 pool10 pool4 pool8 prob relu_conv1 relu_conv10 ," +"prob ," +" 65 ," +"Convolution conv1 1 1 input conv1 1 3 96 7 7 2 2 0 0 1 -1 1 1 ," +"ReLU relu_conv1 1 1 conv1 relu_conv1 ," +"Pooling pool1 1 1 relu_conv1 pool1 0 3 3 2 2 0 0 -1 -1 -1 1 ," +"Convolution fire2/squeeze1x1 1 1 pool1 fire2/squeeze1x1 1 96 16 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU fire2/relu_squeeze1x1 1 1 fire2/squeeze1x1 fire2/relu_squeeze1x1 ," +"Convolution fire2/expand1x1 1 1 fire2/relu_squeeze1x1 fire2/expand1x1 1 16 64 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU fire2/relu_expand1x1 1 1 fire2/expand1x1 fire2/relu_expand1x1 ," +"Convolution fire2/expand3x3 1 1 fire2/relu_squeeze1x1 fire2/expand3x3 1 16 64 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU fire2/relu_expand3x3 1 1 fire2/expand3x3 fire2/relu_expand3x3 ," +"Concat fire2/concat 2 1 fire2/relu_expand1x1 fire2/relu_expand3x3 fire2/concat 1 ," +"Convolution fire3/squeeze1x1 1 1 fire2/concat fire3/squeeze1x1 1 128 16 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU fire3/relu_squeeze1x1 1 1 fire3/squeeze1x1 fire3/relu_squeeze1x1 ," +"Convolution fire3/expand1x1 1 1 fire3/relu_squeeze1x1 fire3/expand1x1 1 16 64 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU fire3/relu_expand1x1 1 1 fire3/expand1x1 fire3/relu_expand1x1 ," +"Convolution fire3/expand3x3 1 1 fire3/relu_squeeze1x1 fire3/expand3x3 1 16 64 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU fire3/relu_expand3x3 1 1 fire3/expand3x3 fire3/relu_expand3x3 ," +"Concat fire3/concat 2 1 fire3/relu_expand1x1 fire3/relu_expand3x3 fire3/concat 1 ," +"Convolution fire4/squeeze1x1 1 1 fire3/concat fire4/squeeze1x1 1 128 32 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU fire4/relu_squeeze1x1 1 1 fire4/squeeze1x1 fire4/relu_squeeze1x1 ," +"Convolution fire4/expand1x1 1 1 fire4/relu_squeeze1x1 fire4/expand1x1 1 32 128 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU fire4/relu_expand1x1 1 1 fire4/expand1x1 fire4/relu_expand1x1 ," +"Convolution fire4/expand3x3 1 1 fire4/relu_squeeze1x1 fire4/expand3x3 1 32 128 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU fire4/relu_expand3x3 1 1 fire4/expand3x3 fire4/relu_expand3x3 ," +"Concat fire4/concat 2 1 fire4/relu_expand1x1 fire4/relu_expand3x3 fire4/concat 1 ," +"Pooling pool4 1 1 fire4/concat pool4 0 3 3 2 2 0 0 -1 -1 -1 1 ," +"Convolution fire5/squeeze1x1 1 1 pool4 fire5/squeeze1x1 1 256 32 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU fire5/relu_squeeze1x1 1 1 fire5/squeeze1x1 fire5/relu_squeeze1x1 ," +"Convolution fire5/expand1x1 1 1 fire5/relu_squeeze1x1 fire5/expand1x1 1 32 128 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU fire5/relu_expand1x1 1 1 fire5/expand1x1 fire5/relu_expand1x1 ," +"Convolution fire5/expand3x3 1 1 fire5/relu_squeeze1x1 fire5/expand3x3 1 32 128 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU fire5/relu_expand3x3 1 1 fire5/expand3x3 fire5/relu_expand3x3 ," +"Concat fire5/concat 2 1 fire5/relu_expand1x1 fire5/relu_expand3x3 fire5/concat 1 ," +"Convolution fire6/squeeze1x1 1 1 fire5/concat fire6/squeeze1x1 1 256 48 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU fire6/relu_squeeze1x1 1 1 fire6/squeeze1x1 fire6/relu_squeeze1x1 ," +"Convolution fire6/expand1x1 1 1 fire6/relu_squeeze1x1 fire6/expand1x1 1 48 192 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU fire6/relu_expand1x1 1 1 fire6/expand1x1 fire6/relu_expand1x1 ," +"Convolution fire6/expand3x3 1 1 fire6/relu_squeeze1x1 fire6/expand3x3 1 48 192 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU fire6/relu_expand3x3 1 1 fire6/expand3x3 fire6/relu_expand3x3 ," +"Concat fire6/concat 2 1 fire6/relu_expand1x1 fire6/relu_expand3x3 fire6/concat 1 ," +"Convolution fire7/squeeze1x1 1 1 fire6/concat fire7/squeeze1x1 1 384 48 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU fire7/relu_squeeze1x1 1 1 fire7/squeeze1x1 fire7/relu_squeeze1x1 ," +"Convolution fire7/expand1x1 1 1 fire7/relu_squeeze1x1 fire7/expand1x1 1 48 192 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU fire7/relu_expand1x1 1 1 fire7/expand1x1 fire7/relu_expand1x1 ," +"Convolution fire7/expand3x3 1 1 fire7/relu_squeeze1x1 fire7/expand3x3 1 48 192 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU fire7/relu_expand3x3 1 1 fire7/expand3x3 fire7/relu_expand3x3 ," +"Concat fire7/concat 2 1 fire7/relu_expand1x1 fire7/relu_expand3x3 fire7/concat 1 ," +"Convolution fire8/squeeze1x1 1 1 fire7/concat fire8/squeeze1x1 1 384 64 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU fire8/relu_squeeze1x1 1 1 fire8/squeeze1x1 fire8/relu_squeeze1x1 ," +"Convolution fire8/expand1x1 1 1 fire8/relu_squeeze1x1 fire8/expand1x1 1 64 256 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU fire8/relu_expand1x1 1 1 fire8/expand1x1 fire8/relu_expand1x1 ," +"Convolution fire8/expand3x3 1 1 fire8/relu_squeeze1x1 fire8/expand3x3 1 64 256 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU fire8/relu_expand3x3 1 1 fire8/expand3x3 fire8/relu_expand3x3 ," +"Concat fire8/concat 2 1 fire8/relu_expand1x1 fire8/relu_expand3x3 fire8/concat 1 ," +"Pooling pool8 1 1 fire8/concat pool8 0 3 3 2 2 0 0 -1 -1 -1 1 ," +"Convolution fire9/squeeze1x1 1 1 pool8 fire9/squeeze1x1 1 512 64 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU fire9/relu_squeeze1x1 1 1 fire9/squeeze1x1 fire9/relu_squeeze1x1 ," +"Convolution fire9/expand1x1 1 1 fire9/relu_squeeze1x1 fire9/expand1x1 1 64 256 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU fire9/relu_expand1x1 1 1 fire9/expand1x1 fire9/relu_expand1x1 ," +"Convolution fire9/expand3x3 1 1 fire9/relu_squeeze1x1 fire9/expand3x3 1 64 256 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU fire9/relu_expand3x3 1 1 fire9/expand3x3 fire9/relu_expand3x3 ," +"Concat fire9/concat 2 1 fire9/relu_expand1x1 fire9/relu_expand3x3 fire9/concat 1 ," +"Convolution conv10 1 1 fire9/concat conv10 1 512 1000 1 1 1 1 1 1 1 -1 1 1 ," +"ReLU relu_conv10 1 1 conv10 relu_conv10 ," +"Pooling pool10 1 1 relu_conv10 pool10 1 0 0 1 1 0 0 -1 -1 -1 0 ," +"SoftmaxCaffe prob 1 1 pool10 prob 1 ," diff --git a/3rdparty/TNN/benchmark/benchmark-model/squeezenet_v1.1.tnnproto b/3rdparty/TNN/benchmark/benchmark-model/squeezenet_v1.1.tnnproto new file mode 100644 index 0000000..bc87108 --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark-model/squeezenet_v1.1.tnnproto @@ -0,0 +1,70 @@ +"1 67 1 4206624770 ," +"data 1 3 224 224 ," +" data squeezenet0_concat0 squeezenet0_concat1 squeezenet0_concat2 squeezenet0_concat3 squeezenet0_concat4 squeezenet0_concat5 squeezenet0_concat6 squeezenet0_concat7 squeezenet0_conv0_fwd squeezenet0_conv10_fwd squeezenet0_conv11_fwd squeezenet0_conv12_fwd squeezenet0_conv13_fwd squeezenet0_conv14_fwd squeezenet0_conv15_fwd squeezenet0_conv16_fwd squeezenet0_conv17_fwd squeezenet0_conv18_fwd squeezenet0_conv19_fwd squeezenet0_conv1_fwd squeezenet0_conv20_fwd squeezenet0_conv21_fwd squeezenet0_conv22_fwd squeezenet0_conv23_fwd squeezenet0_conv24_fwd squeezenet0_conv25_fwd squeezenet0_conv2_fwd squeezenet0_conv3_fwd squeezenet0_conv4_fwd squeezenet0_conv5_fwd squeezenet0_conv6_fwd squeezenet0_conv7_fwd squeezenet0_conv8_fwd squeezenet0_conv9_fwd squeezenet0_dropout0_fwd squeezenet0_flatten0_reshape0 squeezenet0_pool0_fwd squeezenet0_pool1_fwd squeezenet0_pool2_fwd squeezenet0_pool3_fwd squeezenet0_relu0_fwd squeezenet0_relu10_fwd squeezenet0_relu11_fwd squeezenet0_relu12_fwd squeezenet0_relu13_fwd squeezenet0_relu14_fwd squeezenet0_relu15_fwd squeezenet0_relu16_fwd squeezenet0_relu17_fwd squeezenet0_relu18_fwd squeezenet0_relu19_fwd squeezenet0_relu1_fwd squeezenet0_relu20_fwd squeezenet0_relu21_fwd squeezenet0_relu22_fwd squeezenet0_relu23_fwd squeezenet0_relu24_fwd squeezenet0_relu25_fwd squeezenet0_relu2_fwd squeezenet0_relu3_fwd squeezenet0_relu4_fwd squeezenet0_relu5_fwd squeezenet0_relu6_fwd squeezenet0_relu7_fwd squeezenet0_relu8_fwd squeezenet0_relu9_fwd ," +"squeezenet0_flatten0_reshape0 ," +" 65 ," +"Convolution squeezenet0_conv0_fwd 1 1 data squeezenet0_conv0_fwd 1 3 64 3 3 2 2 0 0 1 -1 1 1 ," +"ReLU squeezenet0_relu0_fwd 1 1 squeezenet0_conv0_fwd squeezenet0_relu0_fwd ," +"Pooling squeezenet0_pool0_fwd 1 1 squeezenet0_relu0_fwd squeezenet0_pool0_fwd 0 3 3 2 2 0 0 -1 -1 -1 0 ," +"Convolution squeezenet0_conv1_fwd 1 1 squeezenet0_pool0_fwd squeezenet0_conv1_fwd 1 64 16 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU squeezenet0_relu1_fwd 1 1 squeezenet0_conv1_fwd squeezenet0_relu1_fwd ," +"Convolution squeezenet0_conv2_fwd 1 1 squeezenet0_relu1_fwd squeezenet0_conv2_fwd 1 16 64 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU squeezenet0_relu2_fwd 1 1 squeezenet0_conv2_fwd squeezenet0_relu2_fwd ," +"Convolution squeezenet0_conv3_fwd 1 1 squeezenet0_relu1_fwd squeezenet0_conv3_fwd 1 16 64 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU squeezenet0_relu3_fwd 1 1 squeezenet0_conv3_fwd squeezenet0_relu3_fwd ," +"Concat squeezenet0_concat0 2 1 squeezenet0_relu2_fwd squeezenet0_relu3_fwd squeezenet0_concat0 1 ," +"Convolution squeezenet0_conv4_fwd 1 1 squeezenet0_concat0 squeezenet0_conv4_fwd 1 128 16 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU squeezenet0_relu4_fwd 1 1 squeezenet0_conv4_fwd squeezenet0_relu4_fwd ," +"Convolution squeezenet0_conv5_fwd 1 1 squeezenet0_relu4_fwd squeezenet0_conv5_fwd 1 16 64 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU squeezenet0_relu5_fwd 1 1 squeezenet0_conv5_fwd squeezenet0_relu5_fwd ," +"Convolution squeezenet0_conv6_fwd 1 1 squeezenet0_relu4_fwd squeezenet0_conv6_fwd 1 16 64 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU squeezenet0_relu6_fwd 1 1 squeezenet0_conv6_fwd squeezenet0_relu6_fwd ," +"Concat squeezenet0_concat1 2 1 squeezenet0_relu5_fwd squeezenet0_relu6_fwd squeezenet0_concat1 1 ," +"Pooling squeezenet0_pool1_fwd 1 1 squeezenet0_concat1 squeezenet0_pool1_fwd 0 3 3 2 2 0 0 -1 -1 -1 0 ," +"Convolution squeezenet0_conv7_fwd 1 1 squeezenet0_pool1_fwd squeezenet0_conv7_fwd 1 128 32 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU squeezenet0_relu7_fwd 1 1 squeezenet0_conv7_fwd squeezenet0_relu7_fwd ," +"Convolution squeezenet0_conv8_fwd 1 1 squeezenet0_relu7_fwd squeezenet0_conv8_fwd 1 32 128 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU squeezenet0_relu8_fwd 1 1 squeezenet0_conv8_fwd squeezenet0_relu8_fwd ," +"Convolution squeezenet0_conv9_fwd 1 1 squeezenet0_relu7_fwd squeezenet0_conv9_fwd 1 32 128 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU squeezenet0_relu9_fwd 1 1 squeezenet0_conv9_fwd squeezenet0_relu9_fwd ," +"Concat squeezenet0_concat2 2 1 squeezenet0_relu8_fwd squeezenet0_relu9_fwd squeezenet0_concat2 1 ," +"Convolution squeezenet0_conv10_fwd 1 1 squeezenet0_concat2 squeezenet0_conv10_fwd 1 256 32 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU squeezenet0_relu10_fwd 1 1 squeezenet0_conv10_fwd squeezenet0_relu10_fwd ," +"Convolution squeezenet0_conv11_fwd 1 1 squeezenet0_relu10_fwd squeezenet0_conv11_fwd 1 32 128 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU squeezenet0_relu11_fwd 1 1 squeezenet0_conv11_fwd squeezenet0_relu11_fwd ," +"Convolution squeezenet0_conv12_fwd 1 1 squeezenet0_relu10_fwd squeezenet0_conv12_fwd 1 32 128 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU squeezenet0_relu12_fwd 1 1 squeezenet0_conv12_fwd squeezenet0_relu12_fwd ," +"Concat squeezenet0_concat3 2 1 squeezenet0_relu11_fwd squeezenet0_relu12_fwd squeezenet0_concat3 1 ," +"Pooling squeezenet0_pool2_fwd 1 1 squeezenet0_concat3 squeezenet0_pool2_fwd 0 3 3 2 2 0 0 -1 -1 -1 0 ," +"Convolution squeezenet0_conv13_fwd 1 1 squeezenet0_pool2_fwd squeezenet0_conv13_fwd 1 256 48 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU squeezenet0_relu13_fwd 1 1 squeezenet0_conv13_fwd squeezenet0_relu13_fwd ," +"Convolution squeezenet0_conv14_fwd 1 1 squeezenet0_relu13_fwd squeezenet0_conv14_fwd 1 48 192 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU squeezenet0_relu14_fwd 1 1 squeezenet0_conv14_fwd squeezenet0_relu14_fwd ," +"Convolution squeezenet0_conv15_fwd 1 1 squeezenet0_relu13_fwd squeezenet0_conv15_fwd 1 48 192 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU squeezenet0_relu15_fwd 1 1 squeezenet0_conv15_fwd squeezenet0_relu15_fwd ," +"Concat squeezenet0_concat4 2 1 squeezenet0_relu14_fwd squeezenet0_relu15_fwd squeezenet0_concat4 1 ," +"Convolution squeezenet0_conv16_fwd 1 1 squeezenet0_concat4 squeezenet0_conv16_fwd 1 384 48 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU squeezenet0_relu16_fwd 1 1 squeezenet0_conv16_fwd squeezenet0_relu16_fwd ," +"Convolution squeezenet0_conv17_fwd 1 1 squeezenet0_relu16_fwd squeezenet0_conv17_fwd 1 48 192 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU squeezenet0_relu17_fwd 1 1 squeezenet0_conv17_fwd squeezenet0_relu17_fwd ," +"Convolution squeezenet0_conv18_fwd 1 1 squeezenet0_relu16_fwd squeezenet0_conv18_fwd 1 48 192 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU squeezenet0_relu18_fwd 1 1 squeezenet0_conv18_fwd squeezenet0_relu18_fwd ," +"Concat squeezenet0_concat5 2 1 squeezenet0_relu17_fwd squeezenet0_relu18_fwd squeezenet0_concat5 1 ," +"Convolution squeezenet0_conv19_fwd 1 1 squeezenet0_concat5 squeezenet0_conv19_fwd 1 384 64 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU squeezenet0_relu19_fwd 1 1 squeezenet0_conv19_fwd squeezenet0_relu19_fwd ," +"Convolution squeezenet0_conv20_fwd 1 1 squeezenet0_relu19_fwd squeezenet0_conv20_fwd 1 64 256 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU squeezenet0_relu20_fwd 1 1 squeezenet0_conv20_fwd squeezenet0_relu20_fwd ," +"Convolution squeezenet0_conv21_fwd 1 1 squeezenet0_relu19_fwd squeezenet0_conv21_fwd 1 64 256 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU squeezenet0_relu21_fwd 1 1 squeezenet0_conv21_fwd squeezenet0_relu21_fwd ," +"Concat squeezenet0_concat6 2 1 squeezenet0_relu20_fwd squeezenet0_relu21_fwd squeezenet0_concat6 1 ," +"Convolution squeezenet0_conv22_fwd 1 1 squeezenet0_concat6 squeezenet0_conv22_fwd 1 512 64 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU squeezenet0_relu22_fwd 1 1 squeezenet0_conv22_fwd squeezenet0_relu22_fwd ," +"Convolution squeezenet0_conv23_fwd 1 1 squeezenet0_relu22_fwd squeezenet0_conv23_fwd 1 64 256 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU squeezenet0_relu23_fwd 1 1 squeezenet0_conv23_fwd squeezenet0_relu23_fwd ," +"Convolution squeezenet0_conv24_fwd 1 1 squeezenet0_relu22_fwd squeezenet0_conv24_fwd 1 64 256 3 3 1 1 1 1 1 -1 1 1 ," +"ReLU squeezenet0_relu24_fwd 1 1 squeezenet0_conv24_fwd squeezenet0_relu24_fwd ," +"Concat squeezenet0_concat7 2 1 squeezenet0_relu23_fwd squeezenet0_relu24_fwd squeezenet0_concat7 1 ," +"Convolution squeezenet0_conv25_fwd 1 1 squeezenet0_concat7 squeezenet0_conv25_fwd 1 512 1000 1 1 1 1 0 0 1 -1 1 1 ," +"ReLU squeezenet0_relu25_fwd 1 1 squeezenet0_conv25_fwd squeezenet0_relu25_fwd ," +"Pooling squeezenet0_pool3_fwd 1 1 squeezenet0_relu25_fwd squeezenet0_pool3_fwd 1 13 13 13 13 0 0 -1 -1 -1 0 ," +"Reshape squeezenet0_flatten0_reshape0 1 1 squeezenet0_pool3_fwd squeezenet0_flatten0_reshape0 0 4 4 0 0 -1 1 ," diff --git a/3rdparty/TNN/benchmark/benchmark_android/.gitignore b/3rdparty/TNN/benchmark/benchmark_android/.gitignore new file mode 100644 index 0000000..2211df6 --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark_android/.gitignore @@ -0,0 +1 @@ +*.txt diff --git a/3rdparty/TNN/benchmark/benchmark_android/README.md b/3rdparty/TNN/benchmark/benchmark_android/README.md new file mode 100644 index 0000000..ce8879a --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark_android/README.md @@ -0,0 +1,7 @@ +models benchmark: +push all benchmark models to android device dir /data/local/tmp/benchmark-model, then run benchmark_models.sh, you will get all model benchmark cost time info. + +layer benchmark: +run benchmark_layer.sh -h, you can get help info. below is some import info: +run benchmark_layer.sh --gtest_list_tests, you can get all layer benchmark list with parameters info, use --gtest_filter to filter layer benchmark. for example, run benchmark_layer.sh --gtest_filter=LayerTest/AddLayer*, you can benchmark add layer only;run benchmark_layer.sh --gtest_filter=LayerTest/AddLayerTest.AddLayer/0, you can benchmark add layer with one special parameter only. + diff --git a/3rdparty/TNN/benchmark/benchmark_android/benchmark-release.apk b/3rdparty/TNN/benchmark/benchmark_android/benchmark-release.apk new file mode 100644 index 0000000..5f72527 Binary files /dev/null and b/3rdparty/TNN/benchmark/benchmark_android/benchmark-release.apk differ diff --git a/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/.gitignore b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/.gitignore new file mode 100644 index 0000000..4978360 --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/.gitignore @@ -0,0 +1,2 @@ +/build +.idea diff --git a/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/build.gradle b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/build.gradle new file mode 100644 index 0000000..2cf9c39 --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/build.gradle @@ -0,0 +1,26 @@ +apply plugin: 'com.android.application' + +android { + compileSdkVersion 26 + buildToolsVersion "26.0.2" + + defaultConfig { + minSdkVersion 15 + targetSdkVersion 26 + versionCode 1000 + versionName "1.0.0" + } + buildTypes { + release { + minifyEnabled true + proguardFiles getDefaultProguardFile('proguard-android.txt'), 'proguard-rules.pro' + } + } +} + +dependencies { + implementation fileTree(include: ['*.jar'], dir: 'libs') + implementation 'com.android.support:appcompat-v7:26.0.2' +} + + diff --git a/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/gradle/wrapper/gradle-wrapper.jar b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/gradle/wrapper/gradle-wrapper.jar new file mode 100644 index 0000000..f6b961f Binary files /dev/null and b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/gradle/wrapper/gradle-wrapper.jar differ diff --git a/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/gradle/wrapper/gradle-wrapper.properties b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/gradle/wrapper/gradle-wrapper.properties new file mode 100644 index 0000000..8126a14 --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/gradle/wrapper/gradle-wrapper.properties @@ -0,0 +1,6 @@ +#Tue Dec 15 11:14:35 CST 2020 +distributionBase=GRADLE_USER_HOME +distributionPath=wrapper/dists +zipStoreBase=GRADLE_USER_HOME +zipStorePath=wrapper/dists +distributionUrl=https\://services.gradle.org/distributions/gradle-6.3-all.zip diff --git a/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/gradlew b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/gradlew new file mode 100755 index 0000000..cccdd3d --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/gradlew @@ -0,0 +1,172 @@ +#!/usr/bin/env sh + +############################################################################## +## +## Gradle start up script for UN*X +## +############################################################################## + +# Attempt to set APP_HOME +# Resolve links: $0 may be a link +PRG="$0" +# Need this for relative symlinks. +while [ -h "$PRG" ] ; do + ls=`ls -ld "$PRG"` + link=`expr "$ls" : '.*-> \(.*\)$'` + if expr "$link" : '/.*' > /dev/null; then + PRG="$link" + else + PRG=`dirname "$PRG"`"/$link" + fi +done +SAVED="`pwd`" +cd "`dirname \"$PRG\"`/" >/dev/null +APP_HOME="`pwd -P`" +cd "$SAVED" >/dev/null + +APP_NAME="Gradle" +APP_BASE_NAME=`basename "$0"` + +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +DEFAULT_JVM_OPTS="" + +# Use the maximum available, or set MAX_FD != -1 to use that value. +MAX_FD="maximum" + +warn () { + echo "$*" +} + +die () { + echo + echo "$*" + echo + exit 1 +} + +# OS specific support (must be 'true' or 'false'). +cygwin=false +msys=false +darwin=false +nonstop=false +case "`uname`" in + CYGWIN* ) + cygwin=true + ;; + Darwin* ) + darwin=true + ;; + MINGW* ) + msys=true + ;; + NONSTOP* ) + nonstop=true + ;; +esac + +CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar + +# Determine the Java command to use to start the JVM. +if [ -n "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD="$JAVA_HOME/jre/sh/java" + else + JAVACMD="$JAVA_HOME/bin/java" + fi + if [ ! -x "$JAVACMD" ] ; then + die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +else + JAVACMD="java" + which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." +fi + +# Increase the maximum file descriptors if we can. +if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then + MAX_FD_LIMIT=`ulimit -H -n` + if [ $? -eq 0 ] ; then + if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then + MAX_FD="$MAX_FD_LIMIT" + fi + ulimit -n $MAX_FD + if [ $? -ne 0 ] ; then + warn "Could not set maximum file descriptor limit: $MAX_FD" + fi + else + warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" + fi +fi + +# For Darwin, add options to specify how the application appears in the dock +if $darwin; then + GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" +fi + +# For Cygwin, switch paths to Windows format before running java +if $cygwin ; then + APP_HOME=`cygpath --path --mixed "$APP_HOME"` + CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` + JAVACMD=`cygpath --unix "$JAVACMD"` + + # We build the pattern for arguments to be converted via cygpath + ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` + SEP="" + for dir in $ROOTDIRSRAW ; do + ROOTDIRS="$ROOTDIRS$SEP$dir" + SEP="|" + done + OURCYGPATTERN="(^($ROOTDIRS))" + # Add a user-defined pattern to the cygpath arguments + if [ "$GRADLE_CYGPATTERN" != "" ] ; then + OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" + fi + # Now convert the arguments - kludge to limit ourselves to /bin/sh + i=0 + for arg in "$@" ; do + CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` + CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option + + if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition + eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` + else + eval `echo args$i`="\"$arg\"" + fi + i=$((i+1)) + done + case $i in + (0) set -- ;; + (1) set -- "$args0" ;; + (2) set -- "$args0" "$args1" ;; + (3) set -- "$args0" "$args1" "$args2" ;; + (4) set -- "$args0" "$args1" "$args2" "$args3" ;; + (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; + (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; + (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; + (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; + (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; + esac +fi + +# Escape application args +save () { + for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done + echo " " +} +APP_ARGS=$(save "$@") + +# Collect all arguments for the java command, following the shell quoting and substitution rules +eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" + +# by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong +if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then + cd "$(dirname "$0")" +fi + +exec "$JAVACMD" "$@" diff --git a/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/gradlew.bat b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/gradlew.bat new file mode 100644 index 0000000..f955316 --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/gradlew.bat @@ -0,0 +1,84 @@ +@if "%DEBUG%" == "" @echo off +@rem ########################################################################## +@rem +@rem Gradle startup script for Windows +@rem +@rem ########################################################################## + +@rem Set local scope for the variables with windows NT shell +if "%OS%"=="Windows_NT" setlocal + +set DIRNAME=%~dp0 +if "%DIRNAME%" == "" set DIRNAME=. +set APP_BASE_NAME=%~n0 +set APP_HOME=%DIRNAME% + +@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +set DEFAULT_JVM_OPTS= + +@rem Find java.exe +if defined JAVA_HOME goto findJavaFromJavaHome + +set JAVA_EXE=java.exe +%JAVA_EXE% -version >NUL 2>&1 +if "%ERRORLEVEL%" == "0" goto init + +echo. +echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:findJavaFromJavaHome +set JAVA_HOME=%JAVA_HOME:"=% +set JAVA_EXE=%JAVA_HOME%/bin/java.exe + +if exist "%JAVA_EXE%" goto init + +echo. +echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:init +@rem Get command-line arguments, handling Windows variants + +if not "%OS%" == "Windows_NT" goto win9xME_args + +:win9xME_args +@rem Slurp the command line arguments. +set CMD_LINE_ARGS= +set _SKIP=2 + +:win9xME_args_slurp +if "x%~1" == "x" goto execute + +set CMD_LINE_ARGS=%* + +:execute +@rem Setup the command line + +set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar + +@rem Execute Gradle +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% + +:end +@rem End local scope for the variables with windows NT shell +if "%ERRORLEVEL%"=="0" goto mainEnd + +:fail +rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of +rem the _cmd.exe /c_ return code! +if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 +exit /b 1 + +:mainEnd +if "%OS%"=="Windows_NT" endlocal + +:omega diff --git a/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/proguard-rules.pro b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/proguard-rules.pro new file mode 100644 index 0000000..f61a577 --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/proguard-rules.pro @@ -0,0 +1,17 @@ +# Add project specific ProGuard rules here. +# By default, the flags in this file are appended to flags specified +# in C:\Users\neiltian\AppData\Local\Android\Sdk/tools/proguard/proguard-android.txt +# You can edit the include path and order by changing the proguardFiles +# directive in build.gradle. +# +# For more details, see +# http://developer.android.com/guide/developing/tools/proguard.html + +# Add any project specific keep options here: + +# If your project uses WebView with JS, uncomment the following +# and specify the fully qualified class name to the JavaScript interface +# class: +#-keepclassmembers class fqcn.of.javascript.interface.for.webview { +# public *; +#} diff --git a/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/src/main/AndroidManifest.xml b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/src/main/AndroidManifest.xml new file mode 100644 index 0000000..be81cec --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/src/main/AndroidManifest.xml @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + diff --git a/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/src/main/java/com/tencent/tnn/benchmark/BenchmarkModel.java b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/src/main/java/com/tencent/tnn/benchmark/BenchmarkModel.java new file mode 100644 index 0000000..74af129 --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/src/main/java/com/tencent/tnn/benchmark/BenchmarkModel.java @@ -0,0 +1,5 @@ +package com.tencent.tnn.benchmark; + +public class BenchmarkModel { + public native int nativeRun(String args, String fileDir); +} diff --git a/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/src/main/java/com/tencent/tnn/benchmark/FileUtils.java b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/src/main/java/com/tencent/tnn/benchmark/FileUtils.java new file mode 100644 index 0000000..043c6b3 --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/src/main/java/com/tencent/tnn/benchmark/FileUtils.java @@ -0,0 +1,52 @@ +package com.tencent.tnn.benchmark; + +import android.content.res.AssetManager; +import android.graphics.Bitmap; +import android.graphics.BitmapFactory; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.OutputStream; +import java.util.ArrayList; + + +public class FileUtils { + + private FileUtils() { + throw new AssertionError(); + } + + public static boolean copyFile(String input_path, String out_path) { + InputStream in = null; + OutputStream out = null; + try { + in = new FileInputStream(input_path); + out = new FileOutputStream(out_path); + copyFile(in, out); + in.close(); + in = null; + out.flush(); + out.close(); + out = null; + return true; + } catch(Exception e) { + e.printStackTrace(); + return false; + } + } + + public static void copyFile(InputStream in, OutputStream out) throws IOException { + byte[] buffer = new byte[1024]; + int read; + while((read = in.read(buffer)) != -1){ + out.write(buffer, 0, read); + } + } +} + + diff --git a/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/src/main/java/com/tencent/tnn/benchmark/MainActivity.java b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/src/main/java/com/tencent/tnn/benchmark/MainActivity.java new file mode 100644 index 0000000..f7796e3 --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/src/main/java/com/tencent/tnn/benchmark/MainActivity.java @@ -0,0 +1,70 @@ +package com.tencent.tnn.benchmark; + + +import android.app.Activity; +import android.content.Intent; +import android.os.Bundle; +import android.util.Log; +import android.widget.TextView; + +import java.io.File; + +public class MainActivity extends Activity { + + private TextView lightLiveCheckBtn; + + private static final String TAG = "TNN_BenchmarkModelActivity"; + private BenchmarkModel benchmark = new BenchmarkModel(); + private static final String ARGS_INTENT_KEY_ARGS_0 = "args"; + private static final String ARGS_INTENT_KEY_ARGS_1 = "--args"; + private static final String ARGS_INTENT_KEY_BENCHMARK_DIR = "benchmark-dir"; + private static final String ARGS_INTENT_KEY_LOAD_LIST = "load-list"; + private static final String ARGS_INTENT_KEY_MODEL = "model"; + + @Override + protected void onCreate(Bundle savedInstanceState) { + + super.onCreate(savedInstanceState); + setContentView(R.layout.activity_main); + + init(); + + } + + private void init() { + String model = ""; + try { + Intent intent = getIntent(); + Bundle bundle = intent.getExtras(); + String benchmark_dir = bundle.getString(ARGS_INTENT_KEY_BENCHMARK_DIR, "/data/local/tmp/tnn-benchmark/"); + String[] load_list = bundle.getStringArray(ARGS_INTENT_KEY_LOAD_LIST); + model = bundle.getString(ARGS_INTENT_KEY_MODEL); + for(String element : load_list) { + FileUtils.copyFile(benchmark_dir + "/" + element, getFilesDir().getAbsolutePath() + "/" + element); + System.load(getFilesDir().getAbsolutePath() + "/" + element); + } + final String args = bundle.getString(ARGS_INTENT_KEY_ARGS_0, bundle.getString(ARGS_INTENT_KEY_ARGS_1)); + final String file_dir = this.getFilesDir().getAbsolutePath(); + String output_path = file_dir + "/" + model; + File model_file = new File(output_path); + if(model_file.exists()) { + model_file.delete(); + } + model_file.createNewFile(); + + FileUtils.copyFile(benchmark_dir + "/" + "benchmark-model/" + model, output_path); + int result = benchmark.nativeRun(args, file_dir); + if(result != 0) { + Log.i("tnn", String.format(" %s TNN Benchmark time cost failed error code: %d \n", model , result)); + } + } catch(Error | Exception e) { + Log.i("tnn", String.format(" %s TNN Benchmark time cost failed error/exception: %s \n", model, e.getMessage())); + } + } + + @Override + protected void onResume() { + super.onResume(); + } + +} diff --git a/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/src/main/jni/cc/benchmark_model_jni.cc b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/src/main/jni/cc/benchmark_model_jni.cc new file mode 100644 index 0000000..7bc6c82 --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/src/main/jni/cc/benchmark_model_jni.cc @@ -0,0 +1,61 @@ +// Tencent is pleased to support the open source community by making TNN available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include + +#include +#include +#include + +#include "benchmark_model_jni.h" +#include "test.h" + +#ifdef __ANDROID__ +#include +#endif + +JNIEXPORT jint JNICALL TNN_BENCHMARK_MODEL(nativeRun)(JNIEnv* env, jobject thiz, jstring args_obj, jstring file_dir) { + const char* args_chars = env->GetStringUTFChars(args_obj, nullptr); + const char* file_chars = env->GetStringUTFChars(file_dir, nullptr); + + // Split the args string into individual arg tokens. + std::istringstream iss(args_chars); + std::vector args_split{std::istream_iterator(iss), + {}}; + + // Construct a fake argv command-line object for the benchmark. + std::vector argv; + std::string arg0 = "(BenchmarkModelAndroid)"; + std::string model_file; + bool model_path_option = false; + argv.push_back(const_cast(arg0.data())); + for (auto& arg : args_split) { + // Deal with the model path + if (!model_path_option) { + argv.push_back(const_cast(arg.data())); + } else { + model_file = arg; + std::ifstream fin(arg); + if (!fin) { + model_file = std::string(file_chars) + "/" + arg; + } + argv.push_back(const_cast(model_file.data())); + } + model_path_option = (arg.find("-mp") != std::string::npos); + } + + int result = TNN_NS::test::Run(static_cast(argv.size()), argv.data()); + env->ReleaseStringUTFChars(args_obj, args_chars); + return result; +} diff --git a/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/src/main/jni/cc/benchmark_model_jni.h b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/src/main/jni/cc/benchmark_model_jni.h new file mode 100644 index 0000000..fb3ca42 --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/src/main/jni/cc/benchmark_model_jni.h @@ -0,0 +1,30 @@ +// Tencent is pleased to support the open source community by making TNN available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef ANDROID_BENCHMARK_MODEL_JNI_H_ +#define ANDROID_BENCHMARK_MODEL_JNI_H_ + +#include + +#define TNN_BENCHMARK_MODEL(sig) Java_com_tencent_tnn_benchmark_BenchmarkModel_##sig +#ifdef __cplusplus +extern "C"{ +#endif + +JNIEXPORT jint JNICALL TNN_BENCHMARK_MODEL(nativeRun)(JNIEnv *env, jobject thiz, jstring args_obj, jstring file_dir); + +#ifdef __cplusplus +} +#endif +#endif //ANDROID_BENCHMARK_MODEL_JNI_H_ diff --git a/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/src/main/res/drawable/tnn.png b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/src/main/res/drawable/tnn.png new file mode 100644 index 0000000..b07ecc7 Binary files /dev/null and b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/src/main/res/drawable/tnn.png differ diff --git a/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/src/main/res/ic_launcher.png b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/src/main/res/ic_launcher.png new file mode 100644 index 0000000..3d82028 Binary files /dev/null and b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/src/main/res/ic_launcher.png differ diff --git a/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/src/main/res/layout/activity_main.xml b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/src/main/res/layout/activity_main.xml new file mode 100644 index 0000000..2fe84b4 --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/src/main/res/layout/activity_main.xml @@ -0,0 +1,20 @@ + + + + + + + + diff --git a/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/src/main/res/mipmap-hdpi/ic_launcher.png b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/src/main/res/mipmap-hdpi/ic_launcher.png new file mode 100644 index 0000000..96ff172 Binary files /dev/null and b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/src/main/res/mipmap-hdpi/ic_launcher.png differ diff --git a/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/src/main/res/mipmap-ldpi/ic_launcher.png b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/src/main/res/mipmap-ldpi/ic_launcher.png new file mode 100644 index 0000000..45cc506 Binary files /dev/null and b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/src/main/res/mipmap-ldpi/ic_launcher.png differ diff --git a/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/src/main/res/mipmap-mdpi/ic_launcher.png b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/src/main/res/mipmap-mdpi/ic_launcher.png new file mode 100644 index 0000000..680c5f8 Binary files /dev/null and b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/src/main/res/mipmap-mdpi/ic_launcher.png differ diff --git a/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/src/main/res/mipmap-xhdpi/ic_launcher.png b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/src/main/res/mipmap-xhdpi/ic_launcher.png new file mode 100644 index 0000000..40372f2 Binary files /dev/null and b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/src/main/res/mipmap-xhdpi/ic_launcher.png differ diff --git a/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/src/main/res/mipmap-xxhdpi/ic_launcher.png b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/src/main/res/mipmap-xxhdpi/ic_launcher.png new file mode 100644 index 0000000..54219a3 Binary files /dev/null and b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/src/main/res/mipmap-xxhdpi/ic_launcher.png differ diff --git a/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/src/main/res/mipmap-xxxhdpi/ic_launcher.png b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/src/main/res/mipmap-xxxhdpi/ic_launcher.png new file mode 100644 index 0000000..8b610c5 Binary files /dev/null and b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/benchmark/src/main/res/mipmap-xxxhdpi/ic_launcher.png differ diff --git a/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/build.gradle b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/build.gradle new file mode 100644 index 0000000..18ae165 --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/build.gradle @@ -0,0 +1,27 @@ +// Top-level build file where you can add configuration options common to all sub-projects/modules. + +buildscript { + + repositories { + google() + jcenter() + } + dependencies { + classpath 'com.android.tools.build:gradle:3.5.2' + + + // NOTE: Do not place your application dependencies here; they belong + // in the individual module build.gradle files + } +} + +allprojects { + repositories { + google() + jcenter() + } +} + +task clean(type: Delete) { + delete rootProject.buildDir +} \ No newline at end of file diff --git a/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/gradle.properties b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/gradle.properties new file mode 100644 index 0000000..45a138d --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/gradle.properties @@ -0,0 +1,20 @@ +# Project-wide Gradle settings. + +# IDE (e.g. Android Studio) users: +# Gradle settings configured through the IDE *will override* +# any settings specified in this file. + +# For more details on how to configure your build environment visit +# http://www.gradle.org/docs/current/userguide/build_environment.html + +# Specifies the JVM arguments used for the daemon process. +# The setting is particularly useful for tweaking memory settings. +org.gradle.jvmargs=-Xmx1536m + +# When configured, Gradle will run in incubating parallel mode. +# This option should only be used with decoupled projects. More details, visit +# http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects +# org.gradle.parallel=true + + +android.injected.testOnly=false diff --git a/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/gradle/wrapper/gradle-wrapper.jar b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/gradle/wrapper/gradle-wrapper.jar new file mode 100644 index 0000000..13372ae Binary files /dev/null and b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/gradle/wrapper/gradle-wrapper.jar differ diff --git a/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/gradle/wrapper/gradle-wrapper.properties b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/gradle/wrapper/gradle-wrapper.properties new file mode 100644 index 0000000..2cc90eb --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/gradle/wrapper/gradle-wrapper.properties @@ -0,0 +1,6 @@ +#Fri Apr 24 15:10:50 CST 2020 +distributionBase=GRADLE_USER_HOME +distributionPath=wrapper/dists +zipStoreBase=GRADLE_USER_HOME +zipStorePath=wrapper/dists +distributionUrl=https\://services.gradle.org/distributions/gradle-6.3-all.zip diff --git a/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/gradlew b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/gradlew new file mode 100644 index 0000000..9d82f78 --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/gradlew @@ -0,0 +1,160 @@ +#!/usr/bin/env bash + +############################################################################## +## +## Gradle start up script for UN*X +## +############################################################################## + +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +DEFAULT_JVM_OPTS="" + +APP_NAME="Gradle" +APP_BASE_NAME=`basename "$0"` + +# Use the maximum available, or set MAX_FD != -1 to use that value. +MAX_FD="maximum" + +warn ( ) { + echo "$*" +} + +die ( ) { + echo + echo "$*" + echo + exit 1 +} + +# OS specific support (must be 'true' or 'false'). +cygwin=false +msys=false +darwin=false +case "`uname`" in + CYGWIN* ) + cygwin=true + ;; + Darwin* ) + darwin=true + ;; + MINGW* ) + msys=true + ;; +esac + +# Attempt to set APP_HOME +# Resolve links: $0 may be a link +PRG="$0" +# Need this for relative symlinks. +while [ -h "$PRG" ] ; do + ls=`ls -ld "$PRG"` + link=`expr "$ls" : '.*-> \(.*\)$'` + if expr "$link" : '/.*' > /dev/null; then + PRG="$link" + else + PRG=`dirname "$PRG"`"/$link" + fi +done +SAVED="`pwd`" +cd "`dirname \"$PRG\"`/" >/dev/null +APP_HOME="`pwd -P`" +cd "$SAVED" >/dev/null + +CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar + +# Determine the Java command to use to start the JVM. +if [ -n "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD="$JAVA_HOME/jre/sh/java" + else + JAVACMD="$JAVA_HOME/bin/java" + fi + if [ ! -x "$JAVACMD" ] ; then + die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +else + JAVACMD="java" + which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." +fi + +# Increase the maximum file descriptors if we can. +if [ "$cygwin" = "false" -a "$darwin" = "false" ] ; then + MAX_FD_LIMIT=`ulimit -H -n` + if [ $? -eq 0 ] ; then + if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then + MAX_FD="$MAX_FD_LIMIT" + fi + ulimit -n $MAX_FD + if [ $? -ne 0 ] ; then + warn "Could not set maximum file descriptor limit: $MAX_FD" + fi + else + warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" + fi +fi + +# For Darwin, add options to specify how the application appears in the dock +if $darwin; then + GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" +fi + +# For Cygwin, switch paths to Windows format before running java +if $cygwin ; then + APP_HOME=`cygpath --path --mixed "$APP_HOME"` + CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` + JAVACMD=`cygpath --unix "$JAVACMD"` + + # We build the pattern for arguments to be converted via cygpath + ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` + SEP="" + for dir in $ROOTDIRSRAW ; do + ROOTDIRS="$ROOTDIRS$SEP$dir" + SEP="|" + done + OURCYGPATTERN="(^($ROOTDIRS))" + # Add a user-defined pattern to the cygpath arguments + if [ "$GRADLE_CYGPATTERN" != "" ] ; then + OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" + fi + # Now convert the arguments - kludge to limit ourselves to /bin/sh + i=0 + for arg in "$@" ; do + CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` + CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option + + if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition + eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` + else + eval `echo args$i`="\"$arg\"" + fi + i=$((i+1)) + done + case $i in + (0) set -- ;; + (1) set -- "$args0" ;; + (2) set -- "$args0" "$args1" ;; + (3) set -- "$args0" "$args1" "$args2" ;; + (4) set -- "$args0" "$args1" "$args2" "$args3" ;; + (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; + (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; + (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; + (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; + (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; + esac +fi + +# Split up the JVM_OPTS And GRADLE_OPTS values into an array, following the shell quoting and substitution rules +function splitJvmOpts() { + JVM_OPTS=("$@") +} +eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS +JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME" + +exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@" diff --git a/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/gradlew.bat b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/gradlew.bat new file mode 100644 index 0000000..8a0b282 --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/gradlew.bat @@ -0,0 +1,90 @@ +@if "%DEBUG%" == "" @echo off +@rem ########################################################################## +@rem +@rem Gradle startup script for Windows +@rem +@rem ########################################################################## + +@rem Set local scope for the variables with windows NT shell +if "%OS%"=="Windows_NT" setlocal + +@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +set DEFAULT_JVM_OPTS= + +set DIRNAME=%~dp0 +if "%DIRNAME%" == "" set DIRNAME=. +set APP_BASE_NAME=%~n0 +set APP_HOME=%DIRNAME% + +@rem Find java.exe +if defined JAVA_HOME goto findJavaFromJavaHome + +set JAVA_EXE=java.exe +%JAVA_EXE% -version >NUL 2>&1 +if "%ERRORLEVEL%" == "0" goto init + +echo. +echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:findJavaFromJavaHome +set JAVA_HOME=%JAVA_HOME:"=% +set JAVA_EXE=%JAVA_HOME%/bin/java.exe + +if exist "%JAVA_EXE%" goto init + +echo. +echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:init +@rem Get command-line arguments, handling Windowz variants + +if not "%OS%" == "Windows_NT" goto win9xME_args +if "%@eval[2+2]" == "4" goto 4NT_args + +:win9xME_args +@rem Slurp the command line arguments. +set CMD_LINE_ARGS= +set _SKIP=2 + +:win9xME_args_slurp +if "x%~1" == "x" goto execute + +set CMD_LINE_ARGS=%* +goto execute + +:4NT_args +@rem Get arguments from the 4NT Shell from JP Software +set CMD_LINE_ARGS=%$ + +:execute +@rem Setup the command line + +set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar + +@rem Execute Gradle +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% + +:end +@rem End local scope for the variables with windows NT shell +if "%ERRORLEVEL%"=="0" goto mainEnd + +:fail +rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of +rem the _cmd.exe /c_ return code! +if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 +exit /b 1 + +:mainEnd +if "%OS%"=="Windows_NT" endlocal + +:omega diff --git a/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/settings.gradle b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/settings.gradle new file mode 100644 index 0000000..81101f3 --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark_android/benchmark_app/settings.gradle @@ -0,0 +1 @@ +include ':benchmark' diff --git a/3rdparty/TNN/benchmark/benchmark_android/benchmark_layer.sh b/3rdparty/TNN/benchmark/benchmark_android/benchmark_layer.sh new file mode 100755 index 0000000..1dfe3f2 --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark_android/benchmark_layer.sh @@ -0,0 +1,129 @@ +#!/bin/bash + +ABI="arm64-v8a" +CLEAN="" +WORK_DIR=`pwd` +FILTER="" +DEVICE_TYPE="" +KERNEL_TUNE="-et" +BUILD_DIR=build +ANDROID_DIR=/data/local/tmp/tnn-benchmark +OUTPUT_LOG_FILE=benchmark_layer_result.txt +LOOP_COUNT=10 +ADB=adb + +function usage() { + echo "usage: ./benchmark_layer.sh [-32] [-c] [-f] [-d] [-t] " + echo "options:" + echo " -32 Build 32 bit." + echo " -c Clean up build folders." + echo " -d run with specified device" + echo " -f specified layer" + echo " -t CPU/GPU specify the platform to run" + echo " -et/-noet set kernel enable tune on or off" +} + +function exit_with_msg() { + echo $1 + exit 1 +} + +function clean_build() { + echo $1 | grep "$BUILD_DIR\b" > /dev/null + if [[ "$?" != "0" ]]; then + exit_with_msg "Warnning: $1 seems not to be a BUILD folder." + fi + rm -rf $1 + mkdir $1 +} + +function build_android_bench() { + if [ "-c" == "$CLEAN" ]; then + clean_build $BUILD_DIR + fi + mkdir -p build + cd $BUILD_DIR + cmake ../../.. \ + -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \ + -DCMAKE_BUILD_TYPE=Release \ + -DANDROID_ABI="${ABI}" \ + -DANDROID_STL=c++_static \ + -DANDROID_NATIVE_API_LEVEL=android-14 \ + -DANDROID_TOOLCHAIN=clang \ + -DTNN_ARM_ENABLE:BOOL=ON \ + -DTNN_OPENCL_ENABLE:BOOL=ON \ + -DTNN_TEST_ENABLE:BOOL=ON \ + -DTNN_BENCHMARK_MODE:BOOL=ON \ + -DTNN_UNIT_TEST_ENABLE:BOOL=ON \ + -DTNN_UNIT_TEST_BENCHMARK:BOOL=ON \ + -DTNN_PROFILER_ENABLE:BOOL=ON \ + -DBUILD_FOR_ANDROID_COMMAND=true + make -j4 +} + +function bench_android() { + build_android_bench + + if [ $? != 0 ];then + exit_with_msg "build failed" + fi + + $ADB shell "mkdir -p $ANDROID_DIR" + find . -name "*.so" | while read solib; do + $ADB push $solib $ANDROID_DIR + done + $ADB push test/unit_test/unit_test $ANDROID_DIR/unit_test + $ADB shell chmod 0777 $ANDROID_DIR/unit_test + + $ADB shell "getprop ro.product.model > ${ANDROID_DIR}/$OUTPUT_LOG_FILE" + if [ "$DEVICE_TYPE" != "GPU" ] && [ "$DEVICE_TYPE" != "CPU" ];then + DEVICE_TYPE="" + fi + + if [ "$DEVICE_TYPE" = "" ] || [ "$DEVICE_TYPE" = "CPU" ];then + $ADB shell "echo '\nbenchmark device: ARM \n' >> ${ANDROID_DIR}/$OUTPUT_LOG_FILE" + $ADB shell "cd ${ANDROID_DIR}; LD_LIBRARY_PATH=. ./unit_test ${KERNEL_TUNE} -ic ${LOOP_COUNT} -dt ARM --gtest_filter="*${FILTER}*" -ub >> $OUTPUT_LOG_FILE" + fi + + if [ "$DEVICE_TYPE" = "" ] || [ "$DEVICE_TYPE" = "GPU" ];then + LOOP_COUNT=1 + $ADB shell "echo '\nbenchmark device: OPENCL \n' >> ${ANDROID_DIR}/$OUTPUT_LOG_FILE" + $ADB shell "cd ${ANDROID_DIR}; LD_LIBRARY_PATH=. ./unit_test ${KERNEL_TUNE} -ic ${LOOP_COUNT} -dt OPENCL --gtest_filter="*${FILTER}*" -ub >> $OUTPUT_LOG_FILE" + fi + + $ADB pull $ANDROID_DIR/$OUTPUT_LOG_FILE ../$OUTPUT_LOG_FILE + cat ${WORK_DIR}/$OUTPUT_LOG_FILE +} + +while [ "$1" != "" ]; do + case $1 in + -32) + shift + ABI="armeabi-v7a with NEON" + ;; + -c) + shift + CLEAN="-c" + ;; + -f) + shift + FILTER=$1 + shift + ;; + -d) + shift + ADB="adb -s $1" + shift + ;; + -t) + shift + DEVICE_TYPE="$1" + shift + ;; + *) + usage + exit 1 + esac +done + +bench_android diff --git a/3rdparty/TNN/benchmark/benchmark_android/benchmark_models.sh b/3rdparty/TNN/benchmark/benchmark_android/benchmark_models.sh new file mode 100755 index 0000000..15ce89a --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark_android/benchmark_models.sh @@ -0,0 +1,366 @@ +#!/bin/bash + +export PATH=$PATH:$ANDROID_HOME/platform-tools + +ABI="arm64-v8a" +STL="c++_static" +PROFILING="OFF" +CLEAN="" +DEVICE_TYPE="" +MODEL_TYPE=TNN +USE_NCNN_MODEL=0 +KERNEL_TUNE="-et" +THREAD_NUM=1 +ADB=adb +BENCHMARK_TYPE="APP" +BENCHMARK_APP_DIR=benchmark_app/benchmark/ + +WORK_DIR=`pwd` +BENCHMARK_MODEL_DIR=$WORK_DIR/../benchmark-model +BUILD_DIR=build +BUILD_APP_DIR=build_app +ANDROID_DIR=/data/local/tmp/tnn-benchmark +ANDROID_DATA_DIR=$ANDROID_DIR/benchmark-model +OUTPUT_LOG_FILE=benchmark_models_result.txt +LOOP_COUNT=16 +WARM_UP_COUNT=5 +INTERVAL=5 + +benchmark_model_list=( +#test.tnnproto \ +) + +function usage() { + echo "usage: ./benchmark_models.sh [-32] [-c] [-b] [-f] [-d] [-t] " + echo "options:" + echo " -32 Build 32 bit." + echo " -c Clean up build folders." + echo " -b build targets only" + echo " -f build profiling targets " + echo " -d run with specified device" + echo " -t CPU/GPU/HUAWEI_NPU specify the platform to run" + echo " -th num of threads to run, default: 1" + echo " -n use ncnn model" + echo " -bs benchmark shell" +} + +function exit_with_msg() { + echo $1 + exit 1 +} + +function clean_build() { + echo $1 | grep "$BUILD_DIR\b" > /dev/null + if [[ "$?" != "0" ]]; then + exit_with_msg "Warnning: $1 seems not to be a BUILD folder." + fi + rm -rf $1 + mkdir $1 +} + +function build_android_bench() { + if [ "-c" == "$CLEAN" ]; then + clean_build $BUILD_DIR + fi + if [ "$DEVICE_TYPE" = "HUAWEI_NPU" ]; then + echo "NPU Enable" + # set c++ shared + STL="c++_shared" + HUAWEI_NPU_ENABLE="ON" + #start to cp + if [ ! -d ${WORK_DIR}/../../third_party/huawei_npu/cpp_lib/ ]; then + mkdir -p ${WORK_DIR}/../../third_party/huawei_npu/cpp_lib/ + fi + mkdir -p ${WORK_DIR}/../../third_party/huawei_npu/cpp_lib/armeabi-v7a + mkdir -p ${WORK_DIR}/../../third_party/huawei_npu/cpp_lib/arm64-v8a + cp $ANDROID_NDK/sources/cxx-stl/llvm-libc++/libs/armeabi-v7a/libc++_shared.so ${WORK_DIR}/../../third_party/huawei_npu/cpp_lib/armeabi-v7a/ + cp $ANDROID_NDK/sources/cxx-stl/llvm-libc++/libs/arm64-v8a/libc++_shared.so ${WORK_DIR}/../../third_party/huawei_npu/cpp_lib/arm64-v8a/ + fi + mkdir -p $BUILD_DIR + cd $BUILD_DIR + cmake ../../.. \ + -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \ + -DCMAKE_BUILD_TYPE=Release \ + -DANDROID_ABI="${ABI}" \ + -DANDROID_STL=${STL}\ + -DANDROID_NATIVE_API_LEVEL=android-14 \ + -DANDROID_TOOLCHAIN=clang \ + -DTNN_ARM_ENABLE:BOOL=ON \ + -DTNN_OPENCL_ENABLE:BOOL=ON \ + -DTNN_HUAWEI_NPU_ENABLE:BOOL=${HUAWEI_NPU_ENABLE} \ + -DTNN_OPENMP_ENABLE:BOOL=ON \ + -DTNN_TEST_ENABLE:BOOL=ON \ + -DTNN_BUILD_BENCHMARK_TEST_LIB_ENABLE:BOOL=ON \ + -DTNN_BENCHMARK_MODE:BOOL=ON \ + -DTNN_PROFILER_ENABLE:BOOL=${PROFILING} \ + -DTNN_BUILD_SHARED:BOOL=ON \ + -DBUILD_FOR_ANDROID_COMMAND=true + make -j4 +} + +function bench_android_shell() { + build_android_bench + + if [ $? != 0 ];then + exit_with_msg "build failed" + fi + + if [ "" != "$BUILD_ONLY" ]; then + echo "build done!" + exit 0 + fi + + $ADB shell "mkdir -p $ANDROID_DIR" + find . -name "*.so" | while read solib; do + $ADB push $solib $ANDROID_DIR + done + $ADB push test/TNNTest $ANDROID_DIR/TNNTest + $ADB shell chmod 0777 $ANDROID_DIR/TNNTest + + $ADB shell "mkdir -p $ANDROID_DIR/benchmark-model" + $ADB push ${BENCHMARK_MODEL_DIR} $ANDROID_DIR + + cd ${BENCHMARK_MODEL_DIR} + $ADB shell "getprop ro.product.model > ${ANDROID_DIR}/$OUTPUT_LOG_FILE" + + if [ ${#benchmark_model_list[*]} == 0 ];then + benchmark_model_list=`ls *.tnnproto` + fi + + if [ "$DEVICE_TYPE" != "GPU" ] && [ "$DEVICE_TYPE" != "CPU" ] && [ "$DEVICE_TYPE" != "HUAWEI_NPU" ]; then + DEVICE_TYPE="" + fi + + if [ "$DEVICE_TYPE" = "" ] || [ "$DEVICE_TYPE" = "CPU" ];then + device=ARM + $ADB shell "echo '\nbenchmark device: ${device} \n' >> ${ANDROID_DIR}/$OUTPUT_LOG_FILE" + + for benchmark_model in ${benchmark_model_list[*]} + do + $ADB logcat -c + $ADB shell "cd ${ANDROID_DIR}; LD_LIBRARY_PATH=. ./TNNTest -th ${THREAD_NUM} ${KERNEL_TUNE} -wc ${WARM_UP_COUNT} -ic ${LOOP_COUNT} -dt ${device} -mt ${MODEL_TYPE} -mp ${ANDROID_DATA_DIR}/${benchmark_model} >> $OUTPUT_LOG_FILE" + sleep $INTERVAL + $ADB shell "cd ${ANDROID_DIR}; logcat -d | grep \"TNN Benchmark time cost\" | grep ${device} | grep -w ${benchmark_model} | tail -n 1 >> $OUTPUT_LOG_FILE" + done + fi + + if [ "ON" == $PROFILING ]; then + WARM_UP_COUNT=5 + LOOP_COUNT=5 + fi + + if [ "$DEVICE_TYPE" = "" ] || [ "$DEVICE_TYPE" = "GPU" ];then + device=OPENCL + $ADB shell "echo '\nbenchmark device: ${device} \n' >> ${ANDROID_DIR}/$OUTPUT_LOG_FILE" + for benchmark_model in ${benchmark_model_list[*]} + do + $ADB logcat -c + $ADB shell "cd ${ANDROID_DIR}; LD_LIBRARY_PATH=. ./TNNTest -th ${THREAD_NUM} ${KERNEL_TUNE} -wc ${WARM_UP_COUNT} -ic ${LOOP_COUNT} -dt ${device} -mt ${MODEL_TYPE} -mp ${ANDROID_DATA_DIR}/${benchmark_model} >> $OUTPUT_LOG_FILE" + sleep $INTERVAL + $ADB shell "cd ${ANDROID_DIR}; logcat -d | grep \"TNN Benchmark time cost\" | grep ${device} | grep -w ${benchmark_model} | tail -n 1 >> $OUTPUT_LOG_FILE" + done + fi + + if [ "$DEVICE_TYPE" = "HUAWEI_NPU" ];then + echo "Run Huawei Npu" + device=HUAWEI_NPU + $ADB push ${WORK_DIR}/../../third_party/huawei_npu/cpp_lib/${ABI}/* $ANDROID_DIR/ + $ADB push ${WORK_DIR}/../../third_party/huawei_npu/hiai_ddk_latest/${ABI}/* $ANDROID_DIR/ + $ADB shell "echo '\nbenchmark device: ${device} \n' >> ${ANDROID_DIR}/$OUTPUT_LOG_FILE" + for benchmark_model in ${benchmark_model_list[*]} + do + $ADB logcat -c + $ADB shell "cd ${ANDROID_DIR}; LD_LIBRARY_PATH=. ./TNNTest -th ${THREAD_NUM} ${KERNEL_TUNE} -wc ${WARM_UP_COUNT} -ic ${LOOP_COUNT} -dt ${device} -nt ${device} -mt ${MODEL_TYPE} -mp ${ANDROID_DATA_DIR}/${benchmark_model} >> $OUTPUT_LOG_FILE" + sleep $INTERVAL + $ADB shell "cd ${ANDROID_DIR}; logcat -d | grep \"TNN Benchmark time cost\" | grep ${device} | grep -w ${benchmark_model} | tail -n 1 >> $OUTPUT_LOG_FILE" + done + fi + + $ADB shell "echo '' >> $ANDROID_DIR/$OUTPUT_LOG_FILE" + $ADB shell "date >> $ANDROID_DIR/$OUTPUT_LOG_FILE" + + $ADB pull $ANDROID_DIR/$OUTPUT_LOG_FILE ${WORK_DIR}/$OUTPUT_LOG_FILE + cat ${WORK_DIR}/$OUTPUT_LOG_FILE + +} + +function build_android_bench_app() { + mkdir -p $BUILD_APP_DIR + cd $BUILD_APP_DIR + + cmake ../../benchmark_app/benchmark/ \ + -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \ + -DCMAKE_BUILD_TYPE=Release \ + -DANDROID_ABI="${ABI}" \ + -DANDROID_STL=${STL}\ + -DANDROID_NATIVE_API_LEVEL=android-14 \ + -DANDROID_TOOLCHAIN=clang + make -j4 + cd ../.. +} + +function bench_android_app() { + build_android_bench + build_android_bench_app + + if [ "$ABI" = "armeabi-v7a with NEON" ];then + adb install -r --abi armeabi-v7a benchmark-release.apk + else + adb install -r --abi $ABI benchmark-release.apk + fi + + $ADB shell "mkdir -p $ANDROID_DIR/benchmark-model" + $ADB push ${BENCHMARK_MODEL_DIR} $ANDROID_DIR + + $ADB shell "getprop ro.product.model" > $OUTPUT_LOG_FILE + + cd ${BUILD_DIR} + $ADB shell "mkdir -p $ANDROID_DIR" + find . -name "*.so" | while read solib; do + $ADB push $solib $ANDROID_DIR + done + + cd ${BENCHMARK_MODEL_DIR} + if [ ${#benchmark_model_list[*]} == 0 ];then + benchmark_model_list=`ls *.tnnproto` + fi + + if [ "$DEVICE_TYPE" != "GPU" ] && [ "$DEVICE_TYPE" != "CPU" ] && [ "$DEVICE_TYPE" != "HUAWEI_NPU" ]; then + DEVICE_TYPE="" + fi + + if [ "$DEVICE_TYPE" = "" ] || [ "$DEVICE_TYPE" = "CPU" ]; then + device=ARM + echo -e "\nbenchmark device: ${device}\n" + for benchmark_model in ${benchmark_model_list[*]} + do + TEST_ARGS="-th ${THREAD_NUM} ${KERNEL_TUNE} -wc ${WARM_UP_COUNT} -ic ${LOOP_COUNT} -dt ${device} -mt ${MODEL_TYPE} -mp ${benchmark_model}" + $ADB logcat -c + $ADB shell am start -S -W \ + -n com.tencent.tnn.benchmark/.MainActivity \ + --es args \'${TEST_ARGS}\' --es benchmark-dir ${ANDROID_DIR} \ + --es model ${benchmark_model} \ + --esa load-list "libTNN.so,libTNNBenchmarkTest.so,libtnn_wrapper.so" + result="" + while [[ $result == "" ]] + do + sleep 1 + result=$($ADB logcat -d | grep "TNN Benchmark time cost" | grep -w ${benchmark_model} | tail -n 1) + done + echo $result + echo $result | grep -v "failed" >> $WORK_DIR/$OUTPUT_LOG_FILE + sleep $INTERVAL + done + fi + + if [ "$DEVICE_TYPE" = "" ] || [ "$DEVICE_TYPE" = "GPU" ]; then + device=OPENCL + echo -e "\nbenchmark device: ${device}\n" + for benchmark_model in ${benchmark_model_list[*]} + do + TEST_ARGS="-th ${THREAD_NUM} ${KERNEL_TUNE} -wc ${WARM_UP_COUNT} -ic ${LOOP_COUNT} -dt ${device} -mt ${MODEL_TYPE} -mp ${benchmark_model}" + $ADB logcat -c + $ADB shell am start -S -W \ + -n com.tencent.tnn.benchmark/.MainActivity \ + --es args \'${TEST_ARGS}\' --es benchmark-dir ${ANDROID_DIR} \ + --es model ${benchmark_model} \ + --esa load-list "libTNN.so,libTNNBenchmarkTest.so,libtnn_wrapper.so" + result="" + while [[ $result == "" ]] + do + sleep 1 + result=$($ADB logcat -d | grep "TNN Benchmark time cost" | grep -w ${benchmark_model} | tail -n 1) + done + echo $result + echo $result | grep -v "failed" >> $WORK_DIR/$OUTPUT_LOG_FILE + sleep $INTERVAL + done + fi + + if [ "$DEVICE_TYPE" = "HUAWEI_NPU" ];then + device=HUAWEI_NPU + echo -e "\nbenchmark device: ${device}\n" + $ADB push ${WORK_DIR}/../../third_party/huawei_npu/cpp_lib/${ABI}/* $ANDROID_DIR/ + $ADB push ${WORK_DIR}/../../third_party/huawei_npu/hiai_ddk_latest/${ABI}/* $ANDROID_DIR/ + for benchmark_model in ${benchmark_model_list[*]} + do + TEST_ARGS="-th ${THREAD_NUM} ${KERNEL_TUNE} -wc ${WARM_UP_COUNT} -ic ${LOOP_COUNT} -dt ${device} -nt ${device} -mt ${MODEL_TYPE} -mp ${benchmark_model}" + $ADB logcat -c + $ADB shell am start -S -W \ + -n com.tencent.tnn.benchmark/.MainActivity \ + --es args \'${TEST_ARGS}\' --es benchmark-dir ${ANDROID_DIR} \ + --es model ${benchmark_model} \ + --esa load-list "libc++_shared.so,libhiai_ir.so,libcpucl.so,libhcl.so,libhiai.so,libhiai_ir_build.so,libTNN.so,libTNNBenchmarkTest.so,libtnn_wrapper.so" + result="" + while [[ $result == "" ]] + do + sleep 1 + result=$($ADB logcat -d | grep "TNN Benchmark time cost" | grep -w ${benchmark_model} | tail -n 1) + done + echo $result + echo $result | grep -v "failed" >> $WORK_DIR/$OUTPUT_LOG_FILE + sleep $INTERVAL + done + fi + + $ADB uninstall com.tencent.tnn.benchmark + + $ADB shell "echo ''" >> $WORK_DIR/$OUTPUT_LOG_FILE + $ADB shell "date" >> $WORK_DIR/$OUTPUT_LOG_FILE + + cat ${WORK_DIR}/$OUTPUT_LOG_FILE + +} + +while [ "$1" != "" ]; do + case $1 in + -32) + shift + ABI="armeabi-v7a with NEON" + ;; + -c) + shift + CLEAN="-c" + ;; + -b) + shift + BUILD_ONLY="-b" + ;; + -f) + shift + PROFILING="ON" + ;; + -d) + shift + ADB="adb -s $1" + shift + ;; + -t) + shift + DEVICE_TYPE="$1" + shift + ;; + -n) + shift + MODEL_TYPE=NCNN + ;; + -th) + shift + THREAD_NUM=$1 + shift + ;; + -bs) + shift + BENCHMARK_TYPE="SHELL" + ;; + *) + usage + exit 1 + esac +done + +if [[ "$BENCHMARK_TYPE" == "APP" && "$PROFILING" == "OFF" ]]; then + bench_android_app +else + bench_android_shell +fi diff --git a/3rdparty/TNN/benchmark/benchmark_armlinux/benchmark_models.sh b/3rdparty/TNN/benchmark/benchmark_armlinux/benchmark_models.sh new file mode 100755 index 0000000..8ca1760 --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark_armlinux/benchmark_models.sh @@ -0,0 +1,169 @@ +#!/bin/bash + +PROFILING="OFF" +CLEAN="" +DEVICE_TYPE="" +MODEL_TYPE=TNN +USE_NCNN_MODEL=0 +SHARED_LIB="ON" +ARM="ON" +OPENMP="ON" +OPENCL="ON" +CC=aarch64-linux-gnu-gcc +CXX=aarch64-linux-gnu-g++ +TARGET_ARCH=aarch64 + +if [ -z $TNN_ROOT_PATH ] +then + TNN_ROOT_PATH=$(cd `dirname $0`; pwd)/../.. +fi + +WORK_DIR=`pwd` +BENCHMARK_MODEL_DIR=$WORK_DIR/../benchmark-model +BUILD_DIR=build +OUTPUT_LOG_FILE=benchmark_models_result.txt +LOOP_COUNT=16 +WARM_UP_COUNT=8 + +benchmark_model_list=( +#test.tnnproto \ +) + +function usage() { + echo "usage: ./benchmark_models.sh [-32] [-c] [-b] [-f] [-t] " + echo "options:" + echo " -32 Build 32 bit." + echo " -c Clean up build folders." + echo " -b build targets only" + echo " -f build profiling targets " + echo " -t CPU/GPU specify the platform to run" +} + +function exit_with_msg() { + echo $1 + exit 1 +} + +function clean_build() { + echo $1 | grep "$BUILD_DIR\b" > /dev/null + if [[ "$?" != "0" ]]; then + exit_with_msg "Warnning: $1 seems not to be a BUILD folder." + fi + rm -rf $1 + mkdir $1 +} + +function build_armlinux_bench() { + if [ "-c" == "$CLEAN" ]; then + clean_build $BUILD_DIR + fi + mkdir -p build + cd $BUILD_DIR + cmake ${TNN_ROOT_PATH} \ + -DCMAKE_SYSTEM_NAME=Linux \ + -DTNN_TEST_ENABLE=ON \ + -DTNN_CPU_ENABLE=ON \ + -DCMAKE_C_COMPILER=$CC \ + -DCMAKE_CXX_COMPILER=$CXX \ + -DCMAKE_BUILD_TYPE=Debug \ + -DTNN_ARM_ENABLE:BOOL=$ARM \ + -DTNN_OPENMP_ENABLE:BOOL=$OPENMP \ + -DTNN_OPENCL_ENABLE:BOOL=$OPENCL \ + -DTNN_PROFILER_ENABLE:BOOL=${PROFILING} \ + -DTNN_TEST_ENABLE=ON \ + -DTNN_UNIT_TEST_ENABLE=ON \ + -DTNN_COVERAGE=ON \ + -DCMAKE_SYSTEM_PROCESSOR=$TARGET_ARCH \ + -DTNN_BUILD_SHARED:BOOL=$SHARED_LIB \ + -DTNN_BENCHMARK_MODE=ON + + make -j4 +} + +function bench_armlinux() { + build_armlinux_bench + if [ $? != 0 ];then + exit_with_msg "build failed" + fi + + if [ "" != "$BUILD_ONLY" ]; then + echo "build done!" + exit 0 + fi + + cd ${BENCHMARK_MODEL_DIR} + + if [ ${#benchmark_model_list[*]} == 0 ];then + benchmark_model_list=`ls *.tnnproto` + fi + + if [ "$DEVICE_TYPE" != "GPU" ] && [ "$DEVICE_TYPE" != "CPU" ];then + DEVICE_TYPE="" + fi + + if [ "$DEVICE_TYPE" = "" ] || [ "$DEVICE_TYPE" = "CPU" ];then + device=ARM + echo "benchmark device: ${device} " >> $WORK_DIR/$OUTPUT_LOG_FILE + + for benchmark_model in ${benchmark_model_list[*]} + do + cd ${WORK_DIR}; LD_LIBRARY_PATH=. ./build/test/TNNTest -wc ${WARM_UP_COUNT} -ic ${LOOP_COUNT} -dt ${device} -mt ${MODEL_TYPE} -mp ${BENCHMARK_MODEL_DIR}/${benchmark_model} >> $OUTPUT_LOG_FILE + done + fi + + if [ "ON" == $PROFILING ]; then + WARM_UP_COUNT=5 + LOOP_COUNT=1 + fi + + if [ "$DEVICE_TYPE" = "" ] || [ "$DEVICE_TYPE" = "GPU" ];then + device=OPENCL + echo "benchmark device: ${device} " >> $WORK_DIR/$OUTPUT_LOG_FILE + for benchmark_model in ${benchmark_model_list[*]} + do + cd ${WORK_DIR}; LD_LIBRARY_PATH=. ./build/test/TNNTest -wc ${WARM_UP_COUNT} -ic ${LOOP_COUNT} -dt ${device} -mt ${MODEL_TYPE} -mp ${BENCHMARK_MODEL_DIR}/${benchmark_model} >> $OUTPUT_LOG_FILE + done + fi + + echo '' >> $OUTPUT_LOG_FILE + date >> $OUTPUT_LOG_FILE + + cat ${WORK_DIR}/$OUTPUT_LOG_FILE +} + +while [ "$1" != "" ]; do + case $1 in + -32) + shift + CC=arm-linux-gnueabihf-gcc + CXX=arm-linux-gnueabihf-g++ + TARGET_ARCH=arm + ;; + -c) + shift + CLEAN="-c" + ;; + -b) + shift + BUILD_ONLY="-b" + ;; + -f) + shift + PROFILING="ON" + ;; + -t) + shift + DEVICE_TYPE="$1" + shift + ;; + -n) + shift + MODEL_TYPE=NCNN + ;; + *) + usage + exit 1 + esac +done + +bench_armlinux diff --git a/3rdparty/TNN/benchmark/benchmark_cuda_linux/benchmark_models.sh b/3rdparty/TNN/benchmark/benchmark_cuda_linux/benchmark_models.sh new file mode 100755 index 0000000..a9e79ce --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark_cuda_linux/benchmark_models.sh @@ -0,0 +1,168 @@ +#!/bin/bash + +MODEL_TYPE=TNN +NETWORK_TYPE=TENSORRT +BUILD_ONLY="OFF" +DOWNLOAD_MODEL="OFF" + +if [ -z $TNN_ROOT_PATH ] +then + TNN_ROOT_PATH=$(cd `dirname $0`; pwd)/../.. +fi + +WORK_DIR=`pwd` +BENCHMARK_MODEL_DIR=$WORK_DIR/benchmark_model +OUTPUT_LOG_FILE=benchmark_models_result.txt +LOOP_COUNT=20 +WARM_UP_COUNT=5 + +benchmark_model_list=( +#test.tnnproto \ +) + +#URL, local path +function download_file() { #URL, path + if [ -e $2 ]; then return 0; fi + + name=`basename $2` + echo "downloading $name ..." + # status=`wget $1 -o $2` + status=`curl $1 -s -w %{http_code} -o $2` + if (( status == 200 )); then + return 0 + else + echo "download $name failed" 1>&2 + return -1 + fi +} + +#URL proto, URL model, directory +function download_model() { + directory="./$3" + if [ ! -e ${directory} ]; then + mkdir -p ${directory} + fi + + proto_name=`basename $1` + proto_path_local="${directory}/${proto_name}" + if [ ! -f ${proto_path_local} ]; then + download_file $1 $proto_path_local + succ=$? + if [ ! $succ -eq 0 ]; then + echo "please download model manually!!!(url:https://github.com/darrenyao87/tnn-models/tree/master/model)" + rm -r ${directory} + fi + fi + + model_name=`basename $2` + model_path_local="${directory}/${model_name}" + if [ ! -f ${model_path_local} ]; then + download_file $2 $model_path_local + succ=$? + if [ ! $succ -eq 0 ]; then + echo "please download model manually!!!(url:https://github.com/darrenyao87/tnn-models/tree/master/model)" + rm -r ${directory} + fi + fi +} + +function download_bench_model() { + download_model \ + "https://raw.githubusercontent.com/darrenyao87/tnn-models/master/model/resnet50/resnet50.opt.tnnproto" \ + "https://media.githubusercontent.com/media/darrenyao87/tnn-models/master/model/resnet50/resnet50.opt.tnnmodel" \ + benchmark_model + + download_model \ + "https://raw.githubusercontent.com/darrenyao87/tnn-models/master/model/bert-based/bert-based.tnnproto" \ + "https://media.githubusercontent.com/media/darrenyao87/tnn-models/master/model/bert-based/bert-based.tnnmodel" \ + benchmark_model + + download_model \ + "https://raw.githubusercontent.com/darrenyao87/tnn-models/master/model/bertsquad10/bertsquad10_clean.tnnproto" \ + "https://media.githubusercontent.com/media/darrenyao87/tnn-models/master/model/bertsquad10/bertsquad10_clean.tnnmodel" \ + benchmark_model +} + +function usage() { + echo "usage: ./benchmark_models.sh [-b] [-dl] [-mp]" + echo "options:" + echo " -b build only " + echo " -dl download model from github " + echo " -mp model dir path" +} + +function exit_with_msg() { + echo $1 + exit 1 +} + +function build_cuda_linux_bench() { + cd $TNN_ROOT_PATH/scripts + ./build_cuda_linux.sh + cp $TNN_ROOT_PATH/scripts/cuda_linux_release $TNN_ROOT_PATH/benchmark/benchmark_cuda_linux/ -r +} + +function bench_cuda_linux() { + if [ "OFF" != "$DOWNLOAD_MODEL" ];then + download_bench_model + fi + + build_cuda_linux_bench + if [ $? != 0 ];then + exit_with_msg "build failed" + fi + + if [ "OFF" != "$BUILD_ONLY" ]; then + echo "build done!" + exit 0 + fi + + if [ ! -d ${BENCHMARK_MODEL_DIR} ]; then + echo "please set model dir path or exec script with option -dl" + usage + exit -1 + fi + cd ${BENCHMARK_MODEL_DIR} + + if [ ${#benchmark_model_list[*]} == 0 ];then + benchmark_model_list=`ls *.tnnproto` + fi + + if [ "$DEVICE_TYPE" = "" ] || [ "$DEVICE_TYPE" = "CUDA" ];then + device=CUDA + echo "benchmark device: ${device} " >> $WORK_DIR/$OUTPUT_LOG_FILE + + for benchmark_model in ${benchmark_model_list[*]} + do + cd ${WORK_DIR}; LD_LIBRARY_PATH=cuda_linux_release/lib ./cuda_linux_release/bin/TNNTest -wc ${WARM_UP_COUNT} -ic ${LOOP_COUNT} -dt ${device} -mt ${MODEL_TYPE} -nt ${NETWORK_TYPE} -mp ${BENCHMARK_MODEL_DIR}/${benchmark_model} >> $OUTPUT_LOG_FILE + done + fi + + echo '' >> $OUTPUT_LOG_FILE + date >> $OUTPUT_LOG_FILE + + cat ${WORK_DIR}/$OUTPUT_LOG_FILE +} + +while [ "$1" != "" ]; do + case $1 in + -b) + shift + BUILD_ONLY=ON + ;; + -dl) + shift + DOWNLOAD_MODEL=ON + ;; + -mp) + shift + BENCHMARK_MODEL_DIR=$(cd $1; pwd) + shift + ;; + *) + usage + exit 1 + esac +done + +bench_cuda_linux diff --git a/3rdparty/TNN/benchmark/benchmark_ios/benchmark.xcodeproj/project.pbxproj b/3rdparty/TNN/benchmark/benchmark_ios/benchmark.xcodeproj/project.pbxproj new file mode 100644 index 0000000..295c6f7 --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark_ios/benchmark.xcodeproj/project.pbxproj @@ -0,0 +1,469 @@ +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 50; + objects = { + +/* Begin PBXBuildFile section */ + 9D961FEA241163EE009B3FB1 /* BenchmarkListController.mm in Sources */ = {isa = PBXBuildFile; fileRef = 9D961FE9241163EE009B3FB1 /* BenchmarkListController.mm */; }; + 9D961FED24116548009B3FB1 /* RootNavController.mm in Sources */ = {isa = PBXBuildFile; fileRef = 9D961FEB24116548009B3FB1 /* RootNavController.mm */; }; + 9DD579EF23B5A20500A96E63 /* AppDelegate.m in Sources */ = {isa = PBXBuildFile; fileRef = 9DD579EE23B5A20500A96E63 /* AppDelegate.m */; }; + 9DD579F223B5A20500A96E63 /* SceneDelegate.m in Sources */ = {isa = PBXBuildFile; fileRef = 9DD579F123B5A20500A96E63 /* SceneDelegate.m */; }; + 9DD579F523B5A20500A96E63 /* BenchmarkController.mm in Sources */ = {isa = PBXBuildFile; fileRef = 9DD579F423B5A20500A96E63 /* BenchmarkController.mm */; }; + 9DD579F823B5A20500A96E63 /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 9DD579F623B5A20500A96E63 /* Main.storyboard */; }; + 9DD579FA23B5A20E00A96E63 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 9DD579F923B5A20E00A96E63 /* Assets.xcassets */; }; + 9DD579FD23B5A20E00A96E63 /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 9DD579FB23B5A20E00A96E63 /* LaunchScreen.storyboard */; }; + 9DD57A0023B5A20E00A96E63 /* main.m in Sources */ = {isa = PBXBuildFile; fileRef = 9DD579FF23B5A20E00A96E63 /* main.m */; }; + 9DD57A0723B5A6BD00A96E63 /* model in Resources */ = {isa = PBXBuildFile; fileRef = 9DD57A0623B5A6BD00A96E63 /* model */; }; + 9DD57A1123B5A8D000A96E63 /* tnn.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 9DD57A0D23B5A8C400A96E63 /* tnn.framework */; }; + 9DD57A1523B5ACEB00A96E63 /* CoreML.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 9DD57A1423B5ACEB00A96E63 /* CoreML.framework */; }; + 9DD57A1723B5ACF900A96E63 /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 9DD57A1623B5ACF900A96E63 /* Foundation.framework */; }; + 9DD57A1923B5AD0100A96E63 /* Accelerate.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 9DD57A1823B5AD0100A96E63 /* Accelerate.framework */; }; +/* End PBXBuildFile section */ + +/* Begin PBXContainerItemProxy section */ + 9DD57A0C23B5A8C400A96E63 /* PBXContainerItemProxy */ = { + isa = PBXContainerItemProxy; + containerPortal = 9DD57A0823B5A8C400A96E63 /* tnn.xcodeproj */; + proxyType = 2; + remoteGlobalIDString = 9D2DB1D122D759C8000C508F; + remoteInfo = tnn; + }; + 9DD57A0E23B5A8CB00A96E63 /* PBXContainerItemProxy */ = { + isa = PBXContainerItemProxy; + containerPortal = 9DD57A0823B5A8C400A96E63 /* tnn.xcodeproj */; + proxyType = 1; + remoteGlobalIDString = 9D2DB1D022D759C8000C508F; + remoteInfo = tnn; + }; +/* End PBXContainerItemProxy section */ + +/* Begin PBXFileReference section */ + 9D961FE8241163EE009B3FB1 /* BenchmarkListController.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = BenchmarkListController.h; sourceTree = ""; }; + 9D961FE9241163EE009B3FB1 /* BenchmarkListController.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = BenchmarkListController.mm; sourceTree = ""; }; + 9D961FEB24116548009B3FB1 /* RootNavController.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = RootNavController.mm; sourceTree = ""; }; + 9D961FEC24116548009B3FB1 /* RootNavController.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RootNavController.h; sourceTree = ""; }; + 9DD579EA23B5A20500A96E63 /* benchmark.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = benchmark.app; sourceTree = BUILT_PRODUCTS_DIR; }; + 9DD579ED23B5A20500A96E63 /* AppDelegate.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = AppDelegate.h; sourceTree = ""; }; + 9DD579EE23B5A20500A96E63 /* AppDelegate.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = AppDelegate.m; sourceTree = ""; }; + 9DD579F023B5A20500A96E63 /* SceneDelegate.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = SceneDelegate.h; sourceTree = ""; }; + 9DD579F123B5A20500A96E63 /* SceneDelegate.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = SceneDelegate.m; sourceTree = ""; }; + 9DD579F323B5A20500A96E63 /* BenchmarkController.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = BenchmarkController.h; sourceTree = ""; }; + 9DD579F423B5A20500A96E63 /* BenchmarkController.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = BenchmarkController.mm; sourceTree = ""; }; + 9DD579F723B5A20500A96E63 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/Main.storyboard; sourceTree = ""; }; + 9DD579F923B5A20E00A96E63 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = ""; }; + 9DD579FC23B5A20E00A96E63 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/LaunchScreen.storyboard; sourceTree = ""; }; + 9DD579FE23B5A20E00A96E63 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; + 9DD579FF23B5A20E00A96E63 /* main.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = main.m; sourceTree = ""; }; + 9DD57A0623B5A6BD00A96E63 /* model */ = {isa = PBXFileReference; lastKnownFileType = folder; name = model; path = ../../../model; sourceTree = ""; }; + 9DD57A0823B5A8C400A96E63 /* tnn.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; name = tnn.xcodeproj; path = ../../../platforms/ios/tnn.xcodeproj; sourceTree = ""; }; + 9DD57A1423B5ACEB00A96E63 /* CoreML.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = CoreML.framework; path = System/Library/Frameworks/CoreML.framework; sourceTree = SDKROOT; }; + 9DD57A1623B5ACF900A96E63 /* Foundation.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Foundation.framework; path = System/Library/Frameworks/Foundation.framework; sourceTree = SDKROOT; }; + 9DD57A1823B5AD0100A96E63 /* Accelerate.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Accelerate.framework; path = System/Library/Frameworks/Accelerate.framework; sourceTree = SDKROOT; }; +/* End PBXFileReference section */ + +/* Begin PBXFrameworksBuildPhase section */ + 9DD579E723B5A20500A96E63 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + 9DD57A1923B5AD0100A96E63 /* Accelerate.framework in Frameworks */, + 9DD57A1723B5ACF900A96E63 /* Foundation.framework in Frameworks */, + 9DD57A1523B5ACEB00A96E63 /* CoreML.framework in Frameworks */, + 9DD57A1123B5A8D000A96E63 /* tnn.framework in Frameworks */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXFrameworksBuildPhase section */ + +/* Begin PBXGroup section */ + 9DD579E123B5A20500A96E63 = { + isa = PBXGroup; + children = ( + 9DD579EC23B5A20500A96E63 /* benchmark */, + 9DD579EB23B5A20500A96E63 /* Products */, + 9DD57A1023B5A8D000A96E63 /* Frameworks */, + ); + sourceTree = ""; + }; + 9DD579EB23B5A20500A96E63 /* Products */ = { + isa = PBXGroup; + children = ( + 9DD579EA23B5A20500A96E63 /* benchmark.app */, + ); + name = Products; + sourceTree = ""; + }; + 9DD579EC23B5A20500A96E63 /* benchmark */ = { + isa = PBXGroup; + children = ( + 9DD57A0823B5A8C400A96E63 /* tnn.xcodeproj */, + 9DD57A0623B5A6BD00A96E63 /* model */, + 9DD579ED23B5A20500A96E63 /* AppDelegate.h */, + 9DD579EE23B5A20500A96E63 /* AppDelegate.m */, + 9DD579F023B5A20500A96E63 /* SceneDelegate.h */, + 9DD579F123B5A20500A96E63 /* SceneDelegate.m */, + 9D961FEC24116548009B3FB1 /* RootNavController.h */, + 9D961FEB24116548009B3FB1 /* RootNavController.mm */, + 9DD579F323B5A20500A96E63 /* BenchmarkController.h */, + 9DD579F423B5A20500A96E63 /* BenchmarkController.mm */, + 9D961FE8241163EE009B3FB1 /* BenchmarkListController.h */, + 9D961FE9241163EE009B3FB1 /* BenchmarkListController.mm */, + 9DD579F623B5A20500A96E63 /* Main.storyboard */, + 9DD579F923B5A20E00A96E63 /* Assets.xcassets */, + 9DD579FB23B5A20E00A96E63 /* LaunchScreen.storyboard */, + 9DD579FE23B5A20E00A96E63 /* Info.plist */, + 9DD579FF23B5A20E00A96E63 /* main.m */, + ); + path = benchmark; + sourceTree = ""; + }; + 9DD57A0923B5A8C400A96E63 /* Products */ = { + isa = PBXGroup; + children = ( + 9DD57A0D23B5A8C400A96E63 /* tnn.framework */, + ); + name = Products; + sourceTree = ""; + }; + 9DD57A1023B5A8D000A96E63 /* Frameworks */ = { + isa = PBXGroup; + children = ( + 9DD57A1823B5AD0100A96E63 /* Accelerate.framework */, + 9DD57A1623B5ACF900A96E63 /* Foundation.framework */, + 9DD57A1423B5ACEB00A96E63 /* CoreML.framework */, + ); + name = Frameworks; + sourceTree = ""; + }; +/* End PBXGroup section */ + +/* Begin PBXNativeTarget section */ + 9DD579E923B5A20500A96E63 /* benchmark */ = { + isa = PBXNativeTarget; + buildConfigurationList = 9DD57A0323B5A20E00A96E63 /* Build configuration list for PBXNativeTarget "benchmark" */; + buildPhases = ( + 9DD579E623B5A20500A96E63 /* Sources */, + 9DD579E723B5A20500A96E63 /* Frameworks */, + 9DD579E823B5A20500A96E63 /* Resources */, + 9DD57A1323B5A91700A96E63 /* ShellScript */, + ); + buildRules = ( + ); + dependencies = ( + 9DD57A0F23B5A8CB00A96E63 /* PBXTargetDependency */, + ); + name = benchmark; + productName = benchmark; + productReference = 9DD579EA23B5A20500A96E63 /* benchmark.app */; + productType = "com.apple.product-type.application"; + }; +/* End PBXNativeTarget section */ + +/* Begin PBXProject section */ + 9DD579E223B5A20500A96E63 /* Project object */ = { + isa = PBXProject; + attributes = { + LastUpgradeCheck = 1130; + ORGANIZATIONNAME = tencent; + TargetAttributes = { + 9DD579E923B5A20500A96E63 = { + CreatedOnToolsVersion = 11.3; + }; + }; + }; + buildConfigurationList = 9DD579E523B5A20500A96E63 /* Build configuration list for PBXProject "benchmark" */; + compatibilityVersion = "Xcode 9.3"; + developmentRegion = en; + hasScannedForEncodings = 0; + knownRegions = ( + en, + Base, + ); + mainGroup = 9DD579E123B5A20500A96E63; + productRefGroup = 9DD579EB23B5A20500A96E63 /* Products */; + projectDirPath = ""; + projectReferences = ( + { + ProductGroup = 9DD57A0923B5A8C400A96E63 /* Products */; + ProjectRef = 9DD57A0823B5A8C400A96E63 /* tnn.xcodeproj */; + }, + ); + projectRoot = ""; + targets = ( + 9DD579E923B5A20500A96E63 /* benchmark */, + ); + }; +/* End PBXProject section */ + +/* Begin PBXReferenceProxy section */ + 9DD57A0D23B5A8C400A96E63 /* tnn.framework */ = { + isa = PBXReferenceProxy; + fileType = wrapper.framework; + path = tnn.framework; + remoteRef = 9DD57A0C23B5A8C400A96E63 /* PBXContainerItemProxy */; + sourceTree = BUILT_PRODUCTS_DIR; + }; +/* End PBXReferenceProxy section */ + +/* Begin PBXResourcesBuildPhase section */ + 9DD579E823B5A20500A96E63 /* Resources */ = { + isa = PBXResourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 9DD579FD23B5A20E00A96E63 /* LaunchScreen.storyboard in Resources */, + 9DD579FA23B5A20E00A96E63 /* Assets.xcassets in Resources */, + 9DD57A0723B5A6BD00A96E63 /* model in Resources */, + 9DD579F823B5A20500A96E63 /* Main.storyboard in Resources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXResourcesBuildPhase section */ + +/* Begin PBXShellScriptBuildPhase section */ + 9DD57A1323B5A91700A96E63 /* ShellScript */ = { + isa = PBXShellScriptBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + inputFileListPaths = ( + ); + inputPaths = ( + ); + outputFileListPaths = ( + ); + outputPaths = ( + ); + runOnlyForDeploymentPostprocessing = 0; + shellPath = /bin/sh; + shellScript = "echo $TARGET_BUILD_DIR\ncp $TARGET_BUILD_DIR/tnn.framework/default.metallib $TARGET_BUILD_DIR/$TARGET_NAME.app/tnn.metallib\n"; + }; +/* End PBXShellScriptBuildPhase section */ + +/* Begin PBXSourcesBuildPhase section */ + 9DD579E623B5A20500A96E63 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 9DD579F523B5A20500A96E63 /* BenchmarkController.mm in Sources */, + 9DD579EF23B5A20500A96E63 /* AppDelegate.m in Sources */, + 9DD57A0023B5A20E00A96E63 /* main.m in Sources */, + 9DD579F223B5A20500A96E63 /* SceneDelegate.m in Sources */, + 9D961FEA241163EE009B3FB1 /* BenchmarkListController.mm in Sources */, + 9D961FED24116548009B3FB1 /* RootNavController.mm in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXSourcesBuildPhase section */ + +/* Begin PBXTargetDependency section */ + 9DD57A0F23B5A8CB00A96E63 /* PBXTargetDependency */ = { + isa = PBXTargetDependency; + name = tnn; + targetProxy = 9DD57A0E23B5A8CB00A96E63 /* PBXContainerItemProxy */; + }; +/* End PBXTargetDependency section */ + +/* Begin PBXVariantGroup section */ + 9DD579F623B5A20500A96E63 /* Main.storyboard */ = { + isa = PBXVariantGroup; + children = ( + 9DD579F723B5A20500A96E63 /* Base */, + ); + name = Main.storyboard; + sourceTree = ""; + }; + 9DD579FB23B5A20E00A96E63 /* LaunchScreen.storyboard */ = { + isa = PBXVariantGroup; + children = ( + 9DD579FC23B5A20E00A96E63 /* Base */, + ); + name = LaunchScreen.storyboard; + sourceTree = ""; + }; +/* End PBXVariantGroup section */ + +/* Begin XCBuildConfiguration section */ + 9DD57A0123B5A20E00A96E63 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++14"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_ENABLE_OBJC_WEAK = YES; + CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_COMMA = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; + CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; + CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; + CLANG_WARN_STRICT_PROTOTYPES = YES; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = NO; + DEBUG_INFORMATION_FORMAT = dwarf; + ENABLE_STRICT_OBJC_MSGSEND = YES; + ENABLE_TESTABILITY = YES; + GCC_C_LANGUAGE_STANDARD = gnu11; + GCC_DYNAMIC_NO_PIC = NO; + GCC_NO_COMMON_BLOCKS = YES; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_PREPROCESSOR_DEFINITIONS = ( + "DEBUG=1", + "$(inherited)", + ); + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + IPHONEOS_DEPLOYMENT_TARGET = 13.2; + MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE; + MTL_FAST_MATH = YES; + ONLY_ACTIVE_ARCH = YES; + SDKROOT = iphoneos; + }; + name = Debug; + }; + 9DD57A0223B5A20E00A96E63 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++14"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_ENABLE_OBJC_WEAK = YES; + CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_COMMA = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; + CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; + CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; + CLANG_WARN_STRICT_PROTOTYPES = YES; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = NO; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + ENABLE_NS_ASSERTIONS = NO; + ENABLE_STRICT_OBJC_MSGSEND = YES; + GCC_C_LANGUAGE_STANDARD = gnu11; + GCC_NO_COMMON_BLOCKS = YES; + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + IPHONEOS_DEPLOYMENT_TARGET = 13.2; + MTL_ENABLE_DEBUG_INFO = NO; + MTL_FAST_MATH = YES; + SDKROOT = iphoneos; + VALIDATE_PRODUCT = YES; + }; + name = Release; + }; + 9DD57A0423B5A20E00A96E63 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; + CODE_SIGN_STYLE = Automatic; + DEVELOPMENT_TEAM = T7UMVXQMZ6; + INFOPLIST_FILE = benchmark/Info.plist; + IPHONEOS_DEPLOYMENT_TARGET = 10.0; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/Frameworks", + ); + OTHER_LDFLAGS = ( + "-force_load", + "$(BUILD_DIR)/$(CONFIGURATION)$(EFFECTIVE_PLATFORM_NAME)/tnn.framework/tnn", + ); + PRODUCT_BUNDLE_IDENTIFIER = com.tencent.youtu.sdk.benchmark; + PRODUCT_NAME = "$(TARGET_NAME)"; + TARGETED_DEVICE_FAMILY = "1,2"; + VALID_ARCHS = arm64; + }; + name = Debug; + }; + 9DD57A0523B5A20E00A96E63 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; + CODE_SIGN_STYLE = Automatic; + DEVELOPMENT_TEAM = T7UMVXQMZ6; + INFOPLIST_FILE = benchmark/Info.plist; + IPHONEOS_DEPLOYMENT_TARGET = 10.0; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/Frameworks", + ); + OTHER_LDFLAGS = ( + "-force_load", + "$(BUILD_DIR)/$(CONFIGURATION)$(EFFECTIVE_PLATFORM_NAME)/tnn.framework/tnn", + ); + PRODUCT_BUNDLE_IDENTIFIER = com.tencent.youtu.sdk.benchmark; + PRODUCT_NAME = "$(TARGET_NAME)"; + TARGETED_DEVICE_FAMILY = "1,2"; + VALID_ARCHS = arm64; + }; + name = Release; + }; +/* End XCBuildConfiguration section */ + +/* Begin XCConfigurationList section */ + 9DD579E523B5A20500A96E63 /* Build configuration list for PBXProject "benchmark" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 9DD57A0123B5A20E00A96E63 /* Debug */, + 9DD57A0223B5A20E00A96E63 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 9DD57A0323B5A20E00A96E63 /* Build configuration list for PBXNativeTarget "benchmark" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 9DD57A0423B5A20E00A96E63 /* Debug */, + 9DD57A0523B5A20E00A96E63 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; +/* End XCConfigurationList section */ + }; + rootObject = 9DD579E223B5A20500A96E63 /* Project object */; +} diff --git a/3rdparty/TNN/benchmark/benchmark_ios/benchmark.xcodeproj/project.xcworkspace/contents.xcworkspacedata b/3rdparty/TNN/benchmark/benchmark_ios/benchmark.xcodeproj/project.xcworkspace/contents.xcworkspacedata new file mode 100644 index 0000000..bb70779 --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark_ios/benchmark.xcodeproj/project.xcworkspace/contents.xcworkspacedata @@ -0,0 +1,7 @@ + + + + + diff --git a/3rdparty/TNN/benchmark/benchmark_ios/benchmark.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist b/3rdparty/TNN/benchmark/benchmark_ios/benchmark.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist new file mode 100644 index 0000000..18d9810 --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark_ios/benchmark.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist @@ -0,0 +1,8 @@ + + + + + IDEDidComputeMac32BitWarning + + + diff --git a/3rdparty/TNN/benchmark/benchmark_ios/benchmark.xcodeproj/xcshareddata/xcschemes/benchmark.xcscheme b/3rdparty/TNN/benchmark/benchmark_ios/benchmark.xcodeproj/xcshareddata/xcschemes/benchmark.xcscheme new file mode 100644 index 0000000..c9242b1 --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark_ios/benchmark.xcodeproj/xcshareddata/xcschemes/benchmark.xcscheme @@ -0,0 +1,78 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/3rdparty/TNN/benchmark/benchmark_ios/benchmark.xcodeproj/xcuserdata/darrenyao.xcuserdatad/xcdebugger/Breakpoints_v2.xcbkptlist b/3rdparty/TNN/benchmark/benchmark_ios/benchmark.xcodeproj/xcuserdata/darrenyao.xcuserdatad/xcdebugger/Breakpoints_v2.xcbkptlist new file mode 100644 index 0000000..276b079 --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark_ios/benchmark.xcodeproj/xcuserdata/darrenyao.xcuserdatad/xcdebugger/Breakpoints_v2.xcbkptlist @@ -0,0 +1,6 @@ + + + diff --git a/3rdparty/TNN/benchmark/benchmark_ios/benchmark/AppDelegate.h b/3rdparty/TNN/benchmark/benchmark_ios/benchmark/AppDelegate.h new file mode 100644 index 0000000..4667d4c --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark_ios/benchmark/AppDelegate.h @@ -0,0 +1,21 @@ +// Tencent is pleased to support the open source community by making TNN available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#import + +@interface AppDelegate : UIResponder + + +@end + diff --git a/3rdparty/TNN/benchmark/benchmark_ios/benchmark/AppDelegate.m b/3rdparty/TNN/benchmark/benchmark_ios/benchmark/AppDelegate.m new file mode 100644 index 0000000..986fff3 --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark_ios/benchmark/AppDelegate.m @@ -0,0 +1,51 @@ +// Tencent is pleased to support the open source community by making TNN available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#import "AppDelegate.h" + +@interface AppDelegate () +@end + +@implementation AppDelegate +@synthesize window = _window; + +- (BOOL)application:(UIApplication *)application +didFinishLaunchingWithOptions:(NSDictionary *)launchOptions { + // Override point for customization after application launch. + return YES; +} + + +#pragma mark - UISceneSession lifecycle + + +- (UISceneConfiguration *)application:(UIApplication *)application +configurationForConnectingSceneSession:(UISceneSession *)connectingSceneSession + options:(UISceneConnectionOptions *)options { + // Called when a new scene session is being created. + // Use this method to select a configuration to create the new scene with. + return [[UISceneConfiguration alloc] initWithName:@"Default Configuration" + sessionRole:connectingSceneSession.role]; +} + + +- (void)application:(UIApplication *)application +didDiscardSceneSessions:(NSSet *)sceneSessions { + // Called when the user discards a scene session. + // If any sessions were discarded while the application was not running, this will be called shortly after application:didFinishLaunchingWithOptions. + // Use this method to release any resources that were specific to the discarded scenes, as they will not return. +} + + +@end diff --git a/3rdparty/TNN/benchmark/benchmark_ios/benchmark/Assets.xcassets/AppIcon.appiconset/Contents.json b/3rdparty/TNN/benchmark/benchmark_ios/benchmark/Assets.xcassets/AppIcon.appiconset/Contents.json new file mode 100644 index 0000000..d8db8d6 --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark_ios/benchmark/Assets.xcassets/AppIcon.appiconset/Contents.json @@ -0,0 +1,98 @@ +{ + "images" : [ + { + "idiom" : "iphone", + "size" : "20x20", + "scale" : "2x" + }, + { + "idiom" : "iphone", + "size" : "20x20", + "scale" : "3x" + }, + { + "idiom" : "iphone", + "size" : "29x29", + "scale" : "2x" + }, + { + "idiom" : "iphone", + "size" : "29x29", + "scale" : "3x" + }, + { + "idiom" : "iphone", + "size" : "40x40", + "scale" : "2x" + }, + { + "idiom" : "iphone", + "size" : "40x40", + "scale" : "3x" + }, + { + "idiom" : "iphone", + "size" : "60x60", + "scale" : "2x" + }, + { + "idiom" : "iphone", + "size" : "60x60", + "scale" : "3x" + }, + { + "idiom" : "ipad", + "size" : "20x20", + "scale" : "1x" + }, + { + "idiom" : "ipad", + "size" : "20x20", + "scale" : "2x" + }, + { + "idiom" : "ipad", + "size" : "29x29", + "scale" : "1x" + }, + { + "idiom" : "ipad", + "size" : "29x29", + "scale" : "2x" + }, + { + "idiom" : "ipad", + "size" : "40x40", + "scale" : "1x" + }, + { + "idiom" : "ipad", + "size" : "40x40", + "scale" : "2x" + }, + { + "idiom" : "ipad", + "size" : "76x76", + "scale" : "1x" + }, + { + "idiom" : "ipad", + "size" : "76x76", + "scale" : "2x" + }, + { + "idiom" : "ipad", + "size" : "83.5x83.5", + "scale" : "2x" + }, + { + "idiom" : "ios-marketing", + "size" : "1024x1024", + "scale" : "1x" + } + ], + "info" : { + "version" : 1, + "author" : "xcode" + } +} \ No newline at end of file diff --git a/3rdparty/TNN/benchmark/benchmark_ios/benchmark/Assets.xcassets/Contents.json b/3rdparty/TNN/benchmark/benchmark_ios/benchmark/Assets.xcassets/Contents.json new file mode 100644 index 0000000..da4a164 --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark_ios/benchmark/Assets.xcassets/Contents.json @@ -0,0 +1,6 @@ +{ + "info" : { + "version" : 1, + "author" : "xcode" + } +} \ No newline at end of file diff --git a/3rdparty/TNN/benchmark/benchmark_ios/benchmark/Base.lproj/LaunchScreen.storyboard b/3rdparty/TNN/benchmark/benchmark_ios/benchmark/Base.lproj/LaunchScreen.storyboard new file mode 100644 index 0000000..865e932 --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark_ios/benchmark/Base.lproj/LaunchScreen.storyboard @@ -0,0 +1,25 @@ + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/3rdparty/TNN/benchmark/benchmark_ios/benchmark/Base.lproj/Main.storyboard b/3rdparty/TNN/benchmark/benchmark_ios/benchmark/Base.lproj/Main.storyboard new file mode 100644 index 0000000..51b1a07 --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark_ios/benchmark/Base.lproj/Main.storyboard @@ -0,0 +1,111 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/3rdparty/TNN/benchmark/benchmark_ios/benchmark/BenchmarkController.h b/3rdparty/TNN/benchmark/benchmark_ios/benchmark/BenchmarkController.h new file mode 100644 index 0000000..f3ea6d6 --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark_ios/benchmark/BenchmarkController.h @@ -0,0 +1,21 @@ +// Tencent is pleased to support the open source community by making TNN available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#import + +@interface BenchmarkController : UIViewController + + +@end + diff --git a/3rdparty/TNN/benchmark/benchmark_ios/benchmark/BenchmarkController.mm b/3rdparty/TNN/benchmark/benchmark_ios/benchmark/BenchmarkController.mm new file mode 100644 index 0000000..bf1bf8d --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark_ios/benchmark/BenchmarkController.mm @@ -0,0 +1,294 @@ +// Tencent is pleased to support the open source community by making TNN available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#import "BenchmarkController.h" +#import +#include +#include +#include +#include +#include + +using namespace std; +using namespace TNN_NS; + +struct BenchModel { + string name; + string tnn_proto_content; + string tnn_model_content; + string coreml; +}; + +struct BenchOption { + int warm_count = 10; + int forward_count = 20; + int create_count = 1; + + string description() { + ostringstream ostr; + ostr << "create_count = " << create_count + << " warm_count = " << warm_count + << " forward_count = " << forward_count; + + ostr << std::endl; + return ostr.str(); + }; +}; + +struct BenchResult { + Status status; + + //time + float min = FLT_MAX; + float max = FLT_MIN; + float avg = 0; + float total = 0; + int count = 0; + + float diff = 0; + + int addTime(float time){ + count++; + total += time; + min = std::min(min, time); + max = std::max(max, time); + avg = total/count; + return 0; + }; + + string description() { + ostringstream ostr; + ostr << "min = " << min << " max = " << max << " avg = " < netmodels; + + for (NSString *modelDir in modelList) { +// if (![modelDir hasPrefix:@"mobilenetv1-ssd"]) { +// continue; +// } + NSString *modelDirPath = [modelZone stringByAppendingPathComponent:modelDir]; + BOOL isDirectory = NO; + + if ([[NSFileManager defaultManager] fileExistsAtPath:modelDirPath + isDirectory:&isDirectory]) { + if (!isDirectory) { + continue; + } + + BenchModel model; + model.name = modelDir.UTF8String; + + NSArray *modelFiles = [[NSFileManager defaultManager] contentsOfDirectoryAtPath:modelDirPath + error:nil]; + NSArray *protos = [modelFiles filteredArrayUsingPredicate:predicateProto]; + if (protos.count > 0) { + auto proto = [NSString stringWithContentsOfFile:[modelDirPath stringByAppendingPathComponent:protos[0]] + encoding:NSUTF8StringEncoding + error:nil]; + if (proto.length > 0) { + model.tnn_proto_content = proto.UTF8String; + } + } + NSArray *models = [modelFiles filteredArrayUsingPredicate:predicateModel]; + if (models.count > 0) { +// model.tnn_model_content = [modelDirPath stringByAppendingPathComponent:models[0]].UTF8String; + NSData *data = [NSData dataWithContentsOfFile:[modelDirPath + stringByAppendingPathComponent:models[0]]]; + model.tnn_model_content = string((const char *)[data bytes], [data length]); + } + NSArray *coremls = [modelFiles filteredArrayUsingPredicate:predicateCoreML]; + if (coremls.count > 0) { + model.coreml = [modelDirPath stringByAppendingPathComponent:coremls[0]].UTF8String; + } + netmodels.push_back(model); + } + } + return netmodels; +} + +- (IBAction)onBtnBenchmark:(id)sender { + //check release mode at Product->Scheme when running + //运行时请在Product->Scheme中确认意见调整到release模式 + + //搜索model目录下的所有模型 + auto allModels = [self getAllModels]; + + BenchOption option; + option.warm_count = 5; + option.forward_count = 10; + option.create_count = 1; + + //Get metallib path from app bundle + //PS:A script(Build Phases -> Run Script) is added to copy the metallib file in tnn framework project to benchmark app + //注意:此工程添加了脚本将tnn工程生成的tnn.metallib自动复制到app内 + auto pathLibrary = [[NSBundle mainBundle] pathForResource:@"tnn.metallib" + ofType:nil]; + pathLibrary = pathLibrary ? pathLibrary : @""; + + NSString *allResult = [NSString string]; + for (auto model : allModels) { + NSLog(@"model: %s", model.name.c_str()); + allResult = [allResult stringByAppendingFormat:@"model: %s\n", model.name.c_str()]; + + //benchmark on arm cpu + auto result_arm = [self benchmarkWithProtoContent:model.tnn_proto_content + model:model.tnn_model_content + coreml:model.coreml + library:pathLibrary.UTF8String + netType:NETWORK_TYPE_DEFAULT + deviceType:DEVICE_ARM + option:option]; + NSLog(@"arm: \ntime: %s", result_arm.description().c_str()); + allResult = [allResult stringByAppendingFormat:@"arm: \ntime: %s", + result_arm.description().c_str()]; + + + //benchmark on gpu + auto result_gpu = [self benchmarkWithProtoContent:model.tnn_proto_content + model:model.tnn_model_content + coreml:model.coreml + library:pathLibrary.UTF8String + netType:NETWORK_TYPE_DEFAULT + deviceType:DEVICE_METAL + option:option]; + NSLog(@"gpu: \ntime: %s", result_gpu.description().c_str()); + allResult = [allResult stringByAppendingFormat:@"gpu: \ntime: %s\n", + result_gpu.description().c_str()]; + } + + self.textViewResult.text = allResult; +} + +- (BenchResult)benchmarkWithProtoContent:(string)protoContent + model:(string)modelPathOrContent + coreml:(string)coremlDir + library:(string)metallibPath + netType:(NetworkType)net_type + deviceType:(DeviceType)device_type + option:(BenchOption)option { + BenchResult result; + + net_type = net_type == NETWORK_TYPE_COREML ? NETWORK_TYPE_COREML : NETWORK_TYPE_DEFAULT; + + //network init + //网络初始化 + TNN net; + { + ModelConfig config; + if (net_type == NETWORK_TYPE_COREML) { + config.model_type = MODEL_TYPE_COREML; + config.params = {coremlDir}; + } else { + config.model_type = MODEL_TYPE_TNN; + config.params = {protoContent, modelPathOrContent}; + } + + if (net_type == NETWORK_TYPE_COREML) { + config.model_type = MODEL_TYPE_COREML; + } + + result.status = net.Init(config); + if (result.status != TNN_OK) { + NSLog(@"net.Init Error: %s", result.status.description().c_str()); + return result; + } + } + + //create instance + //创建实例instance + std::shared_ptr instance = nullptr; + { + NetworkConfig network_config; + network_config.network_type = net_type; + network_config.library_path = {metallibPath}; + network_config.device_type = device_type; + instance = net.CreateInst(network_config, result.status); + if (result.status != TNN_OK || !instance) { + NSLog(@"net.CreateInst Error: %s", result.status.description().c_str()); + return result; + } + } + + //warm cpu, only used when benchmark + for (int cc=0; ccForward(); + if (result.status != TNN_OK) { + NSLog(@"instance.Forward Error: %s", result.status.description().c_str()); + return result; + } + } + + //inference + //前向推断 + bool profile_layer_time = false; +#if TNN_PROFILE + if (profile_layer_time) { + instance->StartProfile(); + } +#endif + for (int cc=0; ccForward(); + + gettimeofday(&tv_end, NULL); + double elapsed = (tv_end.tv_sec - tv_begin.tv_sec) * 1000.0 + (tv_end.tv_usec - tv_begin.tv_usec) / 1000.0; + result.addTime(elapsed); + } +#if TNN_PROFILE + if (profile_layer_time) { + instance->FinishProfile(true); + } +#endif + + return result; +} + +@end + diff --git a/3rdparty/TNN/benchmark/benchmark_ios/benchmark/BenchmarkListController.h b/3rdparty/TNN/benchmark/benchmark_ios/benchmark/BenchmarkListController.h new file mode 100644 index 0000000..86ae0cd --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark_ios/benchmark/BenchmarkListController.h @@ -0,0 +1,21 @@ +// Tencent is pleased to support the open source community by making TNN available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#import + +@interface BenchmarkListController : UITableViewController + + +@end + diff --git a/3rdparty/TNN/benchmark/benchmark_ios/benchmark/BenchmarkListController.mm b/3rdparty/TNN/benchmark/benchmark_ios/benchmark/BenchmarkListController.mm new file mode 100644 index 0000000..a4a1c31 --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark_ios/benchmark/BenchmarkListController.mm @@ -0,0 +1,42 @@ +// Tencent is pleased to support the open source community by making TNN available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#import "BenchmarkListController.h" +#import +#include +#include +#include +#include +#include + +using namespace std; +using namespace TNN_NS; + + +@interface BenchmarkListController () { +} +@end + +@implementation BenchmarkListController + +- (void)viewDidLoad { + [super viewDidLoad]; + // Do any additional setup after loading the view. + + +} + + +@end + diff --git a/3rdparty/TNN/benchmark/benchmark_ios/benchmark/Info.plist b/3rdparty/TNN/benchmark/benchmark_ios/benchmark/Info.plist new file mode 100644 index 0000000..7b6037c --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark_ios/benchmark/Info.plist @@ -0,0 +1,64 @@ + + + + + CFBundleDevelopmentRegion + $(DEVELOPMENT_LANGUAGE) + CFBundleExecutable + $(EXECUTABLE_NAME) + CFBundleIdentifier + $(PRODUCT_BUNDLE_IDENTIFIER) + CFBundleInfoDictionaryVersion + 6.0 + CFBundleName + $(PRODUCT_NAME) + CFBundlePackageType + $(PRODUCT_BUNDLE_PACKAGE_TYPE) + CFBundleShortVersionString + 1.0 + CFBundleVersion + 1 + LSRequiresIPhoneOS + + UIApplicationSceneManifest + + UIApplicationSupportsMultipleScenes + + UISceneConfigurations + + UIWindowSceneSessionRoleApplication + + + UISceneConfigurationName + Default Configuration + UISceneDelegateClassName + SceneDelegate + UISceneStoryboardFile + Main + + + + + UILaunchStoryboardName + LaunchScreen + UIMainStoryboardFile + Main + UIRequiredDeviceCapabilities + + armv7 + + UISupportedInterfaceOrientations + + UIInterfaceOrientationPortrait + UIInterfaceOrientationLandscapeLeft + UIInterfaceOrientationLandscapeRight + + UISupportedInterfaceOrientations~ipad + + UIInterfaceOrientationPortrait + UIInterfaceOrientationPortraitUpsideDown + UIInterfaceOrientationLandscapeLeft + UIInterfaceOrientationLandscapeRight + + + diff --git a/3rdparty/TNN/benchmark/benchmark_ios/benchmark/RootNavController.h b/3rdparty/TNN/benchmark/benchmark_ios/benchmark/RootNavController.h new file mode 100644 index 0000000..a21668e --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark_ios/benchmark/RootNavController.h @@ -0,0 +1,21 @@ +// Tencent is pleased to support the open source community by making TNN available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#import + +@interface RootNavController : UINavigationController + + +@end + diff --git a/3rdparty/TNN/benchmark/benchmark_ios/benchmark/RootNavController.mm b/3rdparty/TNN/benchmark/benchmark_ios/benchmark/RootNavController.mm new file mode 100644 index 0000000..5781b2a --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark_ios/benchmark/RootNavController.mm @@ -0,0 +1,32 @@ +// Tencent is pleased to support the open source community by making TNN available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#import "RootNavController.h" + +@interface RootNavController () { +} +@end + +@implementation RootNavController + +- (void)viewDidLoad { + [super viewDidLoad]; + // Do any additional setup after loading the view. + + +} + + +@end + diff --git a/3rdparty/TNN/benchmark/benchmark_ios/benchmark/SceneDelegate.h b/3rdparty/TNN/benchmark/benchmark_ios/benchmark/SceneDelegate.h new file mode 100644 index 0000000..5758a87 --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark_ios/benchmark/SceneDelegate.h @@ -0,0 +1,22 @@ +// Tencent is pleased to support the open source community by making TNN available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#import + +@interface SceneDelegate : UIResponder + +@property (strong, nonatomic) UIWindow * window; + +@end + diff --git a/3rdparty/TNN/benchmark/benchmark_ios/benchmark/SceneDelegate.m b/3rdparty/TNN/benchmark/benchmark_ios/benchmark/SceneDelegate.m new file mode 100644 index 0000000..62b8666 --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark_ios/benchmark/SceneDelegate.m @@ -0,0 +1,52 @@ +#import "SceneDelegate.h" + +@interface SceneDelegate () + +@end + +@implementation SceneDelegate + + +- (void)scene:(UIScene *)scene +willConnectToSession:(UISceneSession *)session + options:(UISceneConnectionOptions *)connectionOptions { + // Use this method to optionally configure and attach the UIWindow `window` to the provided UIWindowScene `scene`. + // If using a storyboard, the `window` property will automatically be initialized and attached to the scene. + // This delegate does not imply the connecting scene or session are new (see `application:configurationForConnectingSceneSession` instead). +} + + +- (void)sceneDidDisconnect:(UIScene *)scene { + // Called as the scene is being released by the system. + // This occurs shortly after the scene enters the background, or when its session is discarded. + // Release any resources associated with this scene that can be re-created the next time the scene connects. + // The scene may re-connect later, as its session was not neccessarily discarded (see `application:didDiscardSceneSessions` instead). +} + + +- (void)sceneDidBecomeActive:(UIScene *)scene { + // Called when the scene has moved from an inactive state to an active state. + // Use this method to restart any tasks that were paused (or not yet started) when the scene was inactive. +} + + +- (void)sceneWillResignActive:(UIScene *)scene { + // Called when the scene will move from an active state to an inactive state. + // This may occur due to temporary interruptions (ex. an incoming phone call). +} + + +- (void)sceneWillEnterForeground:(UIScene *)scene { + // Called as the scene transitions from the background to the foreground. + // Use this method to undo the changes made on entering the background. +} + + +- (void)sceneDidEnterBackground:(UIScene *)scene { + // Called as the scene transitions from the foreground to the background. + // Use this method to save data, release shared resources, and store enough scene-specific state information + // to restore the scene back to its current state. +} + + +@end diff --git a/3rdparty/TNN/benchmark/benchmark_ios/benchmark/main.m b/3rdparty/TNN/benchmark/benchmark_ios/benchmark/main.m new file mode 100644 index 0000000..3bd5764 --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark_ios/benchmark/main.m @@ -0,0 +1,12 @@ + +#import +#import "AppDelegate.h" + +int main(int argc, char * argv[]) { + NSString * appDelegateClassName; + @autoreleasepool { + // Setup code that might create autoreleased objects goes here. + appDelegateClassName = NSStringFromClass([AppDelegate class]); + } + return UIApplicationMain(argc, argv, nil, appDelegateClassName); +} diff --git a/3rdparty/TNN/benchmark/benchmark_linux/benchmark_layer.sh b/3rdparty/TNN/benchmark/benchmark_linux/benchmark_layer.sh new file mode 100755 index 0000000..8080717 --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark_linux/benchmark_layer.sh @@ -0,0 +1,101 @@ +#!/bin/bash + +CLEAN="" +WORK_DIR=`pwd` +FILTER="" +DEVICE_TYPE="" +KERNEL_TUNE="-et" +BUILD_DIR=build +OUTPUT_LOG_FILE=benchmark_layer_result.txt +LOOP_COUNT=10 + +function usage() { + echo "usage: ./benchmark_layer.sh [-32] [-c] [-f] [-d] [-t] " + echo "options:" + echo " -32 Build 32 bit." + echo " -c Clean up build folders." + echo " -d run with specified device" + echo " -f specified layer" + echo " -t CPU/GPU specify the platform to run" + echo " -et/-noet set kernel enable tune on or off" +} + +function exit_with_msg() { + echo $1 + exit 1 +} + +function clean_build() { + echo $1 | grep "$BUILD_DIR\b" > /dev/null + if [[ "$?" != "0" ]]; then + exit_with_msg "Warnning: $1 seems not to be a BUILD folder." + fi + rm -rf $1 + mkdir $1 +} + +function build_linux_bench() { + if [ "-c" == "$CLEAN" ]; then + clean_build $BUILD_DIR + fi + mkdir -p build + cd $BUILD_DIR + cmake ../../.. \ + -DCMAKE_BUILD_TYPE=Release \ + -DTNN_ARM_ENABLE:BOOL=ON \ + -DTNN_OPENCL_ENABLE:BOOL=ON \ + -DTNN_TEST_ENABLE:BOOL=ON \ + -DTNN_BENCHMARK_MODE:BOOL=ON \ + -DTNN_UNIT_TEST_ENABLE:BOOL=ON \ + -DTNN_UNIT_TEST_BENCHMARK:BOOL=ON \ + -DTNN_PROFILER_ENABLE:BOOL=ON + make -j4 +} + +function bench_android() { + build_linux_bench + + if [ $? != 0 ];then + exit_with_msg "build failed" + fi + + if [ "$DEVICE_TYPE" != "GPU" ] && [ "$DEVICE_TYPE" != "CPU" ];then + DEVICE_TYPE="" + fi + + echo 'layer benchmark' 2>&1 |tee $WORK_DIR/$OUTPUT_LOG_FILE + if [ "$DEVICE_TYPE" = "" ] || [ "$DEVICE_TYPE" = "CPU" ];then + echo 'benchmark device: ARM' 2>&1 |tee -a $WORK_DIR/$OUTPUT_LOG_FILE + cd ${WORK_DIR}; LD_LIBRARY_PATH=. ./build/test/unit_test/unit_test ${KERNEL_TUNE} -ic $LOOP_COUNT -dt ARM --gtest_filter="*${FILTER}*" -ub 2>&1 |tee -a $WORK_DIR/$OUTPUT_LOG_FILE + fi + + if [ "$DEVICE_TYPE" = "" ] || [ "$DEVICE_TYPE" = "GPU" ];then + LOOP_COUNT=1 + echo 'benchmark device: OPENCL' 2>&1 |tee -a $WORK_DIR/$OUTPUT_LOG_FILE + cd ${WORK_DIR}; LD_LIBRARY_PATH=. ./build/test/unit_test/unit_test ${KERNEL_TUNE} -ic $LOOP_COUNT -dt OPENCL --gtest_filter="*${FILTER}*" -ub 2>&1 |tee -a $WORK_DIR/$OUTPUT_LOG_FILE + fi +} + +while [ "$1" != "" ]; do + case $1 in + -c) + shift + CLEAN="-c" + ;; + -f) + shift + FILTER=$1 + shift + ;; + -t) + shift + DEVICE_TYPE="$1" + shift + ;; + *) + usage + exit 1 + esac +done + +bench_android diff --git a/3rdparty/TNN/benchmark/benchmark_linux/benchmark_models.sh b/3rdparty/TNN/benchmark/benchmark_linux/benchmark_models.sh new file mode 100755 index 0000000..23e8b5f --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark_linux/benchmark_models.sh @@ -0,0 +1,159 @@ +#!/bin/bash + +PROFILING="OFF" +CLEAN="" +DEVICE_TYPE="" +MODEL_TYPE=TNN +USE_NCNN_MODEL=0 +SHARED_LIB="ON" +OPENCL="ON" + +if [ -z $TNN_ROOT_PATH ] +then + TNN_ROOT_PATH=$(cd `dirname $0`; pwd)/../.. +fi + +WORK_DIR=`pwd` +BENCHMARK_MODEL_DIR=$WORK_DIR/../benchmark-model +BUILD_DIR=build +OUTPUT_LOG_FILE=benchmark_models_result.txt +LOOP_COUNT=20 +WARM_UP_COUNT=10 + +benchmark_model_list=( +#test.tnnproto \ +) + +function usage() { + echo "usage: ./benchmark_models.sh [-32] [-c] [-b] [-f] [-t] " + echo "options:" + echo " -32 Build 32 bit." + echo " -c Clean up build folders." + echo " -b build targets only" + echo " -f build profiling targets " + echo " -t CPU/GPU specify the platform to run" +} + +function exit_with_msg() { + echo $1 + exit 1 +} + +function clean_build() { + echo $1 | grep "$BUILD_DIR\b" > /dev/null + if [[ "$?" != "0" ]]; then + exit_with_msg "Warnning: $1 seems not to be a BUILD folder." + fi + rm -rf $1 + mkdir $1 +} + +function build_bench() { + if [ "-c" == "$CLEAN" ]; then + clean_build $BUILD_DIR + fi + mkdir -p build + cd $BUILD_DIR + cmake ${TNN_ROOT_PATH} \ + -DCMAKE_BUILD_TYPE=Release \ + -DTNN_CPU_ENABLE=OFF \ + -DTNN_X86_ENABLE=ON \ + -DTNN_OPENCL_ENABLE:BOOL=$OPENCL \ + -DTNN_PROFILER_ENABLE:BOOL=${PROFILING} \ + -DTNN_TEST_ENABLE=ON \ + -DTNN_BUILD_SHARED:BOOL=$SHARED_LIB \ + -DTNN_BENCHMARK_MODE=ON \ + -DINTTYPES_FORMAT=C99 + + make -j4 +} + +function bench_linux() { + build_bench + if [ $? != 0 ];then + exit_with_msg "build failed" + fi + + if [ "" != "$BUILD_ONLY" ]; then + echo "build done!" + exit 0 + fi + + cd ${BENCHMARK_MODEL_DIR} + + if [ ${#benchmark_model_list[*]} == 0 ];then + benchmark_model_list=`ls *.tnnproto` + fi + + if [ "$DEVICE_TYPE" != "GPU" ] && [ "$DEVICE_TYPE" != "CPU" ];then + DEVICE_TYPE="" + fi + + echo "benchmark log:" > $WORK_DIR/log_temp.txt + if [ "$DEVICE_TYPE" = "" ] || [ "$DEVICE_TYPE" = "CPU" ];then + device=X86 + echo "benchmark device: ${device} " >> $WORK_DIR/log_temp.txt + + for benchmark_model in ${benchmark_model_list[*]} + do + cd ${WORK_DIR}; LD_LIBRARY_PATH=. ./build/test/TNNTest -wc ${WARM_UP_COUNT} -ic ${LOOP_COUNT} -dt ${device} -mt ${MODEL_TYPE} -mp ${BENCHMARK_MODEL_DIR}/${benchmark_model} >> log_temp.txt + done + fi + + if [ "ON" == $PROFILING ]; then + WARM_UP_COUNT=5 + LOOP_COUNT=1 + fi + + if [ "$DEVICE_TYPE" = "" ] || [ "$DEVICE_TYPE" = "GPU" ];then + device=OPENCL + echo "benchmark device: ${device} " >> $WORK_DIR/log_temp.txt + for benchmark_model in ${benchmark_model_list[*]} + do + cd ${WORK_DIR}; LD_LIBRARY_PATH=. ./build/test/TNNTest -wc ${WARM_UP_COUNT} -ic ${LOOP_COUNT} -dt ${device} -mt ${MODEL_TYPE} -mp ${BENCHMARK_MODEL_DIR}/${benchmark_model} -et >> log_temp.txt + done + fi + + cat $WORK_DIR/log_temp.txt |grep "time cost:" > $WORK_DIR/$OUTPUT_LOG_FILE + echo '' >> $OUTPUT_LOG_FILE + date >> $OUTPUT_LOG_FILE + + cat ${WORK_DIR}/$OUTPUT_LOG_FILE +} + +while [ "$1" != "" ]; do + case $1 in + -32) + shift + CC=arm-linux-gnueabihf-gcc + CXX=arm-linux-gnueabihf-g++ + TARGET_ARCH=arm + ;; + -c) + shift + CLEAN="-c" + ;; + -b) + shift + BUILD_ONLY="-b" + ;; + -f) + shift + PROFILING="ON" + ;; + -t) + shift + DEVICE_TYPE="$1" + shift + ;; + -n) + shift + MODEL_TYPE=NCNN + ;; + *) + usage + exit 1 + esac +done + +bench_linux diff --git a/3rdparty/TNN/benchmark/benchmark_windows/benchmark_models.bat b/3rdparty/TNN/benchmark/benchmark_windows/benchmark_models.bat new file mode 100644 index 0000000..6ffe3b7 --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark_windows/benchmark_models.bat @@ -0,0 +1,50 @@ +set TNN_DIR=%~dp0..\..\ + +@echo off +echo %TNN_DIR% +echo %1 + +if "%2" == "" ( + goto init_fold +) else ( + goto init_env +) + +:init_env + if %1 == x86 ( + echo "build x86" + call "D:\Microsoft\Microsoft Visual Studio\2019\Professional\VC\Auxiliary\Build\vcvars32.bat" + ) else ( + echo "build x64" + call "D:\Microsoft\Microsoft Visual Studio\2019\Professional\VC\Auxiliary\Build\vcvars64.bat" + ) + goto init_fold + +:init_fold + mkdir build_win + cd build_win + +cmake %TNN_DIR% -G "Ninja" ^ +-DCMAKE_BUILD_TYPE=Release ^ +-DCMAKE_SYSTEM_NAME=Windows ^ +-DTNN_CPU_ENABLE=ON ^ +-DTNN_OPENCL_ENABLE=ON ^ +-DTNN_X86_ENABLE=ON ^ +-DTNN_TEST_ENABLE=ON ^ +-DTNN_BENCHMARK_MODE=ON ^ +-DINTTYPES_FORMAT=C99 + +cmake --build . --config Release + +copy TNN.dll test\ +cd test +echo "Windows Benchmark" > log_temp.txt +FOR %%C IN (..\..\..\benchmark-model\*.tnnproto) DO (.\TNNTest.exe -wc 10 -ic 20 -mp %%C -mt TNN -dt OPENCL >> log_temp.txt) + +echo "Windows Benchmark" > result.txt +FOR /f "delims=] tokens=3" %%a IN (log_temp.txt) DO ( +echo "%%a"|find "time cost:" && echo %%a >>result.txt +) + +del log_temp.txt +copy result.txt ..\..\ \ No newline at end of file diff --git a/3rdparty/TNN/benchmark/benchmark_x86_linux/benchmark_models.sh b/3rdparty/TNN/benchmark/benchmark_x86_linux/benchmark_models.sh new file mode 100755 index 0000000..3832191 --- /dev/null +++ b/3rdparty/TNN/benchmark/benchmark_x86_linux/benchmark_models.sh @@ -0,0 +1,180 @@ +#!/bin/bash + +MODEL_TYPE=TNN +NETWORK_TYPE=OPENVINO +NUM_THREAD=4 +BUILD_ONLY="OFF" +DOWNLOAD_MODEL="OFF" + +if [ -z $TNN_ROOT_PATH ] +then + TNN_ROOT_PATH=$(cd `dirname $0`; pwd)/../.. +fi + +WORK_DIR=`pwd` +BENCHMARK_MODEL_DIR=$WORK_DIR/benchmark_model +OUTPUT_LOG_FILE=benchmark_models_result.txt +LOOP_COUNT=20 +WARM_UP_COUNT=5 + +benchmark_model_list=( +#test.tnnproto \ +) + +#URL, local path +function download_file() { #URL, path + if [ -e $2 ]; then return 0; fi + + name=`basename $2` + echo "downloading $name ..." + # status=`wget $1 -o $2` + status=`curl $1 -s -w %{http_code} -o $2` + if (( status == 200 )); then + return 0 + else + echo "download $name failed" 1>&2 + return -1 + fi +} + +#URL proto, URL model, directory +function download_model() { + directory="./$3" + if [ ! -e ${directory} ]; then + mkdir -p ${directory} + fi + + proto_name=`basename $1` + proto_path_local="${directory}/${proto_name}" + if [ ! -f ${proto_path_local} ]; then + download_file $1 $proto_path_local + succ=$? + if [ ! $succ -eq 0 ]; then + echo "please download model manually!!!(url:https://github.com/darrenyao87/tnn-models/tree/master/model)" + rm -r ${directory} + fi + fi + + model_name=`basename $2` + model_path_local="${directory}/${model_name}" + if [ ! -f ${model_path_local} ]; then + download_file $2 $model_path_local + succ=$? + if [ ! $succ -eq 0 ]; then + echo "please download model manually!!!(url:https://github.com/darrenyao87/tnn-models/tree/master/model)" + rm -r ${directory} + fi + fi +} + +function download_bench_model() { + download_model \ + "https://raw.githubusercontent.com/darrenyao87/tnn-models/master/model/resnet50/resnet50.opt.tnnproto" \ + "https://media.githubusercontent.com/media/darrenyao87/tnn-models/master/model/resnet50/resnet50.opt.tnnmodel" \ + benchmark_model + + download_model \ + "https://raw.githubusercontent.com/darrenyao87/tnn-models/master/model/bert-based/bert-based.tnnproto" \ + "https://media.githubusercontent.com/media/darrenyao87/tnn-models/master/model/bert-based/bert-based.tnnmodel" \ + benchmark_model + + download_model \ + "https://raw.githubusercontent.com/darrenyao87/tnn-models/master/model/bertsquad10/bertsquad10_clean.tnnproto" \ + "https://media.githubusercontent.com/media/darrenyao87/tnn-models/master/model/bertsquad10/bertsquad10_clean.tnnmodel" \ + benchmark_model +} + +function usage() { + echo "usage: ./benchmark_models.sh [-th] [-b] [-dl] [-mp] [-native]" + echo "options:" + echo " -th thread num, defalut 1" + echo " -b build only " + echo " -dl download model from github " + echo " -mp model dir path" + echo " -native bench with native optimization" +} + +function exit_with_msg() { + echo $1 + exit 1 +} + +function build_x86_linux_bench() { + cd $TNN_ROOT_PATH/scripts + ./build_x86_linux.sh + cp $TNN_ROOT_PATH/scripts/x86_linux_release $TNN_ROOT_PATH/benchmark/benchmark_x86_linux/ -r +} + +function bench_x86_linux() { + if [ "OFF" != "$DOWNLOAD_MODEL" ];then + download_bench_model + fi + + build_x86_linux_bench + if [ $? != 0 ];then + exit_with_msg "build failed" + fi + + if [ "OFF" != "$BUILD_ONLY" ]; then + echo "build done!" + exit 0 + fi + + if [ ! -d ${BENCHMARK_MODEL_DIR} ]; then + echo "please set model dir path or exec script with option -dl" + usage + exit -1 + fi + cd ${BENCHMARK_MODEL_DIR} + + if [ ${#benchmark_model_list[*]} == 0 ];then + benchmark_model_list=`ls *.tnnproto` + fi + + if [ "$DEVICE_TYPE" = "" ] || [ "$DEVICE_TYPE" = "CPU" ];then + device=X86 + echo "benchmark device: ${device} " >> $WORK_DIR/$OUTPUT_LOG_FILE + + for benchmark_model in ${benchmark_model_list[*]} + do + cd ${WORK_DIR}; LD_LIBRARY_PATH=x86_linux_release/lib ./x86_linux_release/bin/TNNTest -th ${NUM_THREAD} -wc ${WARM_UP_COUNT} -ic ${LOOP_COUNT} -dt ${device} -mt ${MODEL_TYPE} -nt ${NETWORK_TYPE} -mp ${BENCHMARK_MODEL_DIR}/${benchmark_model} >> $OUTPUT_LOG_FILE + done + fi + + echo '' >> $OUTPUT_LOG_FILE + date >> $OUTPUT_LOG_FILE + + cat ${WORK_DIR}/$OUTPUT_LOG_FILE +} + +while [ "$1" != "" ]; do + case $1 in + -native) + shift + NETWORK_TYPE=DEFAULT + ;; + -th) + shift + NUM_THREAD="$1" + shift + ;; + -b) + shift + BUILD_ONLY=ON + ;; + -dl) + shift + DOWNLOAD_MODEL=ON + ;; + -mp) + shift + BENCHMARK_MODEL_DIR=$(cd $1; pwd) + shift + ;; + *) + usage + exit 1 + esac +done + +bench_x86_linux diff --git a/3rdparty/TNN/build_linux_native/libTNN.so.0 b/3rdparty/TNN/build_linux_native/libTNN.so.0 new file mode 100755 index 0000000..a7d6f0d Binary files /dev/null and b/3rdparty/TNN/build_linux_native/libTNN.so.0 differ diff --git a/3rdparty/TNN/build_linux_native/libTNN.so.0.1.0.0 b/3rdparty/TNN/build_linux_native/libTNN.so.0.1.0.0 new file mode 100755 index 0000000..a7d6f0d Binary files /dev/null and b/3rdparty/TNN/build_linux_native/libTNN.so.0.1.0.0 differ diff --git a/3rdparty/TNN/build_linux_native/test/TNNTest b/3rdparty/TNN/build_linux_native/test/TNNTest new file mode 100755 index 0000000..b8ee8e0 Binary files /dev/null and b/3rdparty/TNN/build_linux_native/test/TNNTest differ diff --git a/3rdparty/TNN/build_linux_native/third_party/gflags/gflags-config-install.cmake b/3rdparty/TNN/build_linux_native/third_party/gflags/gflags-config-install.cmake new file mode 100644 index 0000000..4629652 --- /dev/null +++ b/3rdparty/TNN/build_linux_native/third_party/gflags/gflags-config-install.cmake @@ -0,0 +1,169 @@ +## gflags CMake configuration file + +# library version information +set (GFLAGS_VERSION_STRING "2.2.1") +set (GFLAGS_VERSION_MAJOR 2) +set (GFLAGS_VERSION_MINOR 2) +set (GFLAGS_VERSION_PATCH 0) + +# import targets +include ("${CMAKE_CURRENT_LIST_DIR}/gflags-targets.cmake") + +# installation prefix +get_filename_component (CMAKE_CURRENT_LIST_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) +get_filename_component (_INSTALL_PREFIX "${CMAKE_CURRENT_LIST_DIR}/../../../" ABSOLUTE) + +# include directory +# +# Newer versions of CMake set the INTERFACE_INCLUDE_DIRECTORIES property +# of the imported targets. It is hence not necessary to add this path +# manually to the include search path for targets which link to gflags. +set (GFLAGS_INCLUDE_DIR "${_INSTALL_PREFIX}/include") + +if (gflags_FIND_COMPONENTS) + foreach (gflags_FIND_COMPONENT IN LISTS gflags_FIND_COMPONENTS) + if (gflags_FIND_REQUIRED_${gflags_FIND_COMPONENT} AND NOT TARGET gflags_${gflags_FIND_COMPONENT}) + message (FATAL_ERROR "Package gflags was installed without required component ${gflags_FIND_COMPONENT}!") + endif () + endforeach () + list (GET gflags_FIND_COMPONENTS 0 gflags_FIND_COMPONENT) +else () + set (gflags_FIND_COMPONENT) +endif () + +# default settings of GFLAGS_SHARED and GFLAGS_NOTHREADS +# +# It is recommended to use either one of the following find_package commands +# instead of setting the GFLAGS_(SHARED|NOTHREADS) variables: +# - find_package(gflags REQUIRED) +# - find_package(gflags COMPONENTS nothreads_static) +# - find_package(gflags COMPONENTS nothreads_shared) +# - find_package(gflags COMPONENTS static) +# - find_package(gflags COMPONENTS shared) +if (NOT DEFINED GFLAGS_SHARED) + if (DEFINED gflags_SHARED) + set (GFLAGS_SHARED ${gflags_SHARED}) + elseif (gflags_FIND_COMPONENT) + if (gflags_FIND_COMPONENT MATCHES "shared") + set (GFLAGS_SHARED TRUE) + else () + set (GFLAGS_SHARED FALSE) + endif () + elseif (TARGET gflags_shared OR TARGET gflags_nothreads_shared) + set (GFLAGS_SHARED TRUE) + else () + set (GFLAGS_SHARED FALSE) + endif () +endif () +if (NOT DEFINED GFLAGS_NOTHREADS) + if (DEFINED gflags_NOTHREADS) + set (GFLAGS_NOTHREADS ${gflags_NOTHREADS}) + elseif (gflags_FIND_COMPONENT) + if (gflags_FIND_COMPONENT MATCHES "nothreads") + set (GFLAGS_NOTHREADS TRUE) + else () + set (GFLAGS_NOTHREADS FALSE) + endif () + elseif (TARGET gflags_static OR TARGET gflags_shared) + set (GFLAGS_NOTHREADS FALSE) + else () + set (GFLAGS_NOTHREADS TRUE) + endif () +endif () + +# choose imported library target +if (NOT GFLAGS_TARGET) + if (gflags_TARGET) + set (GFLAGS_TARGET ${gflags_TARGET}) + elseif (GFLAGS_SHARED) + if (GFLAGS_NOTHREADS) + set (GFLAGS_TARGET gflags_nothreads_shared) + else () + set (GFLAGS_TARGET gflags_shared) + endif () + else () + if (GFLAGS_NOTHREADS) + set (GFLAGS_TARGET gflags_nothreads_static) + else () + set (GFLAGS_TARGET gflags_static) + endif () + endif () +endif () +if (NOT TARGET ${GFLAGS_TARGET}) + message (FATAL_ERROR "Your gflags installation does not contain a ${GFLAGS_TARGET} library target!" + " Try a different combination of GFLAGS_SHARED and GFLAGS_NOTHREADS.") +endif () + +# add more convenient "gflags" import target +if (NOT TARGET gflags) + if (GFLAGS_SHARED) + add_library (gflags SHARED IMPORTED) + else () + add_library (gflags STATIC IMPORTED) + endif () + # copy INTERFACE_* properties + foreach (_GFLAGS_PROPERTY_NAME IN ITEMS + COMPILE_DEFINITIONS + COMPILE_FEATURES + COMPILE_OPTIONS + INCLUDE_DIRECTORIES + LINK_LIBRARIES + POSITION_INDEPENDENT_CODE + ) + get_target_property (_GFLAGS_PROPERTY_VALUE ${GFLAGS_TARGET} INTERFACE_${_GFLAGS_PROPERTY_NAME}) + if (_GFLAGS_PROPERTY_VALUE) + set_target_properties(gflags PROPERTIES + INTERFACE_${_GFLAGS_PROPERTY_NAME} "${_GFLAGS_PROPERTY_VALUE}" + ) + endif () + endforeach () + # copy IMPORTED_*_ properties + get_target_property (_GFLAGS_CONFIGURATIONS ${GFLAGS_TARGET} IMPORTED_CONFIGURATIONS) + set_target_properties (gflags PROPERTIES IMPORTED_CONFIGURATIONS "${_GFLAGS_CONFIGURATIONS}") + foreach (_GFLAGS_PROPERTY_NAME IN ITEMS + IMPLIB + LOCATION + LINK_DEPENDENT_LIBRARIES + LINK_INTERFACE_LIBRARIES + LINK_INTERFACE_LANGUAGES + LINK_INTERFACE_MULTIPLICITY + NO_SONAME + SONAME + ) + foreach (_GFLAGS_CONFIG IN LISTS _GFLAGS_CONFIGURATIONS) + get_target_property (_GFLAGS_PROPERTY_VALUE ${GFLAGS_TARGET} IMPORTED_${_GFLAGS_PROPERTY_NAME}_${_GFLAGS_CONFIG}) + if (_GFLAGS_PROPERTY_VALUE) + set_target_properties(gflags PROPERTIES + IMPORTED_${_GFLAGS_PROPERTY_NAME}_${_GFLAGS_CONFIG} "${_GFLAGS_PROPERTY_VALUE}" + ) + endif () + endforeach () + endforeach () + unset (_GFLAGS_CONFIGURATIONS) + unset (_GFLAGS_CONFIG) + unset (_GFLAGS_PROPERTY_NAME) + unset (_GFLAGS_PROPERTY_VALUE) +endif () + +# alias for default import target to be compatible with older CMake package configurations +set (GFLAGS_LIBRARIES "${GFLAGS_TARGET}") + +# set gflags_* variables for backwards compatibility +if (NOT "^gflags$" STREQUAL "^GFLAGS$") + foreach (_GFLAGS_VARIABLE IN ITEMS + VERSION_STRING + VERSION_MAJOR + VERSION_MINOR + VERSION_PATCH + INCLUDE_DIR + LIBRARIES + TARGET + ) + set (gflags_${_GFLAGS_VARIABLE} "${GFLAGS_${_GFLAGS_VARIABLE}}") + endforeach () + unset (_GFLAGS_VARIABLE) +endif () + +# unset private variables +unset (gflags_FIND_COMPONENT) +unset (_INSTALL_PREFIX) diff --git a/3rdparty/TNN/build_linux_native/third_party/gflags/gflags-config-version.cmake b/3rdparty/TNN/build_linux_native/third_party/gflags/gflags-config-version.cmake new file mode 100644 index 0000000..d68a39f --- /dev/null +++ b/3rdparty/TNN/build_linux_native/third_party/gflags/gflags-config-version.cmake @@ -0,0 +1,21 @@ +## gflags CMake configuration version file + +# ----------------------------------------------------------------------------- +# library version +set (PACKAGE_VERSION "2.2.1") + +# ----------------------------------------------------------------------------- +# check compatibility + +# Perform compatibility check here using the input CMake variables. +# See example in http://www.cmake.org/Wiki/CMake_2.6_Notes. + +set (PACKAGE_VERSION_COMPATIBLE TRUE) +set (PACKAGE_VERSION_UNSUITABLE FALSE) + +if ("${PACKAGE_FIND_VERSION_MAJOR}" EQUAL "2" AND + "${PACKAGE_FIND_VERSION_MINOR}" EQUAL "2") + set (PACKAGE_VERSION_EXACT TRUE) +else () + set (PACKAGE_VERSION_EXACT FALSE) +endif () diff --git a/3rdparty/TNN/build_linux_native/third_party/gflags/gflags-config.cmake b/3rdparty/TNN/build_linux_native/third_party/gflags/gflags-config.cmake new file mode 100644 index 0000000..82b5837 --- /dev/null +++ b/3rdparty/TNN/build_linux_native/third_party/gflags/gflags-config.cmake @@ -0,0 +1,169 @@ +## gflags CMake configuration file + +# library version information +set (GFLAGS_VERSION_STRING "2.2.1") +set (GFLAGS_VERSION_MAJOR 2) +set (GFLAGS_VERSION_MINOR 2) +set (GFLAGS_VERSION_PATCH 0) + +# import targets +include ("${CMAKE_CURRENT_LIST_DIR}/gflags-targets.cmake") + +# installation prefix +get_filename_component (CMAKE_CURRENT_LIST_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) +get_filename_component (_INSTALL_PREFIX "${CMAKE_CURRENT_LIST_DIR}/." ABSOLUTE) + +# include directory +# +# Newer versions of CMake set the INTERFACE_INCLUDE_DIRECTORIES property +# of the imported targets. It is hence not necessary to add this path +# manually to the include search path for targets which link to gflags. +set (GFLAGS_INCLUDE_DIR "${_INSTALL_PREFIX}/include") + +if (gflags_FIND_COMPONENTS) + foreach (gflags_FIND_COMPONENT IN LISTS gflags_FIND_COMPONENTS) + if (gflags_FIND_REQUIRED_${gflags_FIND_COMPONENT} AND NOT TARGET gflags_${gflags_FIND_COMPONENT}) + message (FATAL_ERROR "Package gflags was installed without required component ${gflags_FIND_COMPONENT}!") + endif () + endforeach () + list (GET gflags_FIND_COMPONENTS 0 gflags_FIND_COMPONENT) +else () + set (gflags_FIND_COMPONENT) +endif () + +# default settings of GFLAGS_SHARED and GFLAGS_NOTHREADS +# +# It is recommended to use either one of the following find_package commands +# instead of setting the GFLAGS_(SHARED|NOTHREADS) variables: +# - find_package(gflags REQUIRED) +# - find_package(gflags COMPONENTS nothreads_static) +# - find_package(gflags COMPONENTS nothreads_shared) +# - find_package(gflags COMPONENTS static) +# - find_package(gflags COMPONENTS shared) +if (NOT DEFINED GFLAGS_SHARED) + if (DEFINED gflags_SHARED) + set (GFLAGS_SHARED ${gflags_SHARED}) + elseif (gflags_FIND_COMPONENT) + if (gflags_FIND_COMPONENT MATCHES "shared") + set (GFLAGS_SHARED TRUE) + else () + set (GFLAGS_SHARED FALSE) + endif () + elseif (TARGET gflags_shared OR TARGET gflags_nothreads_shared) + set (GFLAGS_SHARED TRUE) + else () + set (GFLAGS_SHARED FALSE) + endif () +endif () +if (NOT DEFINED GFLAGS_NOTHREADS) + if (DEFINED gflags_NOTHREADS) + set (GFLAGS_NOTHREADS ${gflags_NOTHREADS}) + elseif (gflags_FIND_COMPONENT) + if (gflags_FIND_COMPONENT MATCHES "nothreads") + set (GFLAGS_NOTHREADS TRUE) + else () + set (GFLAGS_NOTHREADS FALSE) + endif () + elseif (TARGET gflags_static OR TARGET gflags_shared) + set (GFLAGS_NOTHREADS FALSE) + else () + set (GFLAGS_NOTHREADS TRUE) + endif () +endif () + +# choose imported library target +if (NOT GFLAGS_TARGET) + if (gflags_TARGET) + set (GFLAGS_TARGET ${gflags_TARGET}) + elseif (GFLAGS_SHARED) + if (GFLAGS_NOTHREADS) + set (GFLAGS_TARGET gflags_nothreads_shared) + else () + set (GFLAGS_TARGET gflags_shared) + endif () + else () + if (GFLAGS_NOTHREADS) + set (GFLAGS_TARGET gflags_nothreads_static) + else () + set (GFLAGS_TARGET gflags_static) + endif () + endif () +endif () +if (NOT TARGET ${GFLAGS_TARGET}) + message (FATAL_ERROR "Your gflags installation does not contain a ${GFLAGS_TARGET} library target!" + " Try a different combination of GFLAGS_SHARED and GFLAGS_NOTHREADS.") +endif () + +# add more convenient "gflags" import target +if (NOT TARGET gflags) + if (GFLAGS_SHARED) + add_library (gflags SHARED IMPORTED) + else () + add_library (gflags STATIC IMPORTED) + endif () + # copy INTERFACE_* properties + foreach (_GFLAGS_PROPERTY_NAME IN ITEMS + COMPILE_DEFINITIONS + COMPILE_FEATURES + COMPILE_OPTIONS + INCLUDE_DIRECTORIES + LINK_LIBRARIES + POSITION_INDEPENDENT_CODE + ) + get_target_property (_GFLAGS_PROPERTY_VALUE ${GFLAGS_TARGET} INTERFACE_${_GFLAGS_PROPERTY_NAME}) + if (_GFLAGS_PROPERTY_VALUE) + set_target_properties(gflags PROPERTIES + INTERFACE_${_GFLAGS_PROPERTY_NAME} "${_GFLAGS_PROPERTY_VALUE}" + ) + endif () + endforeach () + # copy IMPORTED_*_ properties + get_target_property (_GFLAGS_CONFIGURATIONS ${GFLAGS_TARGET} IMPORTED_CONFIGURATIONS) + set_target_properties (gflags PROPERTIES IMPORTED_CONFIGURATIONS "${_GFLAGS_CONFIGURATIONS}") + foreach (_GFLAGS_PROPERTY_NAME IN ITEMS + IMPLIB + LOCATION + LINK_DEPENDENT_LIBRARIES + LINK_INTERFACE_LIBRARIES + LINK_INTERFACE_LANGUAGES + LINK_INTERFACE_MULTIPLICITY + NO_SONAME + SONAME + ) + foreach (_GFLAGS_CONFIG IN LISTS _GFLAGS_CONFIGURATIONS) + get_target_property (_GFLAGS_PROPERTY_VALUE ${GFLAGS_TARGET} IMPORTED_${_GFLAGS_PROPERTY_NAME}_${_GFLAGS_CONFIG}) + if (_GFLAGS_PROPERTY_VALUE) + set_target_properties(gflags PROPERTIES + IMPORTED_${_GFLAGS_PROPERTY_NAME}_${_GFLAGS_CONFIG} "${_GFLAGS_PROPERTY_VALUE}" + ) + endif () + endforeach () + endforeach () + unset (_GFLAGS_CONFIGURATIONS) + unset (_GFLAGS_CONFIG) + unset (_GFLAGS_PROPERTY_NAME) + unset (_GFLAGS_PROPERTY_VALUE) +endif () + +# alias for default import target to be compatible with older CMake package configurations +set (GFLAGS_LIBRARIES "${GFLAGS_TARGET}") + +# set gflags_* variables for backwards compatibility +if (NOT "^gflags$" STREQUAL "^GFLAGS$") + foreach (_GFLAGS_VARIABLE IN ITEMS + VERSION_STRING + VERSION_MAJOR + VERSION_MINOR + VERSION_PATCH + INCLUDE_DIR + LIBRARIES + TARGET + ) + set (gflags_${_GFLAGS_VARIABLE} "${GFLAGS_${_GFLAGS_VARIABLE}}") + endforeach () + unset (_GFLAGS_VARIABLE) +endif () + +# unset private variables +unset (gflags_FIND_COMPONENT) +unset (_INSTALL_PREFIX) diff --git a/3rdparty/TNN/build_linux_native/third_party/gflags/gflags-targets.cmake b/3rdparty/TNN/build_linux_native/third_party/gflags/gflags-targets.cmake new file mode 100644 index 0000000..89807e4 --- /dev/null +++ b/3rdparty/TNN/build_linux_native/third_party/gflags/gflags-targets.cmake @@ -0,0 +1,65 @@ +# Generated by CMake + +if("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}" LESS 2.5) + message(FATAL_ERROR "CMake >= 2.6.0 required") +endif() +cmake_policy(PUSH) +cmake_policy(VERSION 2.6) +#---------------------------------------------------------------- +# Generated CMake target import file. +#---------------------------------------------------------------- + +# Commands may need to know the format version. +set(CMAKE_IMPORT_FILE_VERSION 1) + +# Protect against multiple inclusion, which would fail when already imported targets are added once more. +set(_targetsDefined) +set(_targetsNotDefined) +set(_expectedTargets) +foreach(_expectedTarget gflags_nothreads_static) + list(APPEND _expectedTargets ${_expectedTarget}) + if(NOT TARGET ${_expectedTarget}) + list(APPEND _targetsNotDefined ${_expectedTarget}) + endif() + if(TARGET ${_expectedTarget}) + list(APPEND _targetsDefined ${_expectedTarget}) + endif() +endforeach() +if("${_targetsDefined}" STREQUAL "${_expectedTargets}") + unset(_targetsDefined) + unset(_targetsNotDefined) + unset(_expectedTargets) + set(CMAKE_IMPORT_FILE_VERSION) + cmake_policy(POP) + return() +endif() +if(NOT "${_targetsDefined}" STREQUAL "") + message(FATAL_ERROR "Some (but not all) targets in this export set were already defined.\nTargets Defined: ${_targetsDefined}\nTargets not yet defined: ${_targetsNotDefined}\n") +endif() +unset(_targetsDefined) +unset(_targetsNotDefined) +unset(_expectedTargets) + + +# Create imported target gflags_nothreads_static +add_library(gflags_nothreads_static STATIC IMPORTED) + +set_target_properties(gflags_nothreads_static PROPERTIES + INTERFACE_COMPILE_DEFINITIONS "GFLAGS_IS_A_DLL=0" + INTERFACE_INCLUDE_DIRECTORIES "/home/dm/project/SDK/TNN_lib/TNN-latest/build_linux_native/third_party/gflags/include" + INTERFACE_LINK_LIBRARIES "/usr/lib/gcc/x86_64-linux-gnu/7/libgomp.so;/usr/lib/x86_64-linux-gnu/libpthread.so;/usr/lib/gcc/x86_64-linux-gnu/7/libgomp.so;/usr/lib/x86_64-linux-gnu/libpthread.so" +) + +# Import target "gflags_nothreads_static" for configuration "Release" +set_property(TARGET gflags_nothreads_static APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(gflags_nothreads_static PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "/home/dm/project/SDK/TNN_lib/TNN-latest/build_linux_native/third_party/gflags/libgflags_nothreads.a" + ) + +# This file does not depend on other imported targets which have +# been exported from the same project but in a separate export set. + +# Commands beyond this point should not need to know the version. +set(CMAKE_IMPORT_FILE_VERSION) +cmake_policy(POP) diff --git a/3rdparty/TNN/build_linux_native/third_party/gflags/gflags.pc b/3rdparty/TNN/build_linux_native/third_party/gflags/gflags.pc new file mode 100644 index 0000000..d4662b8 --- /dev/null +++ b/3rdparty/TNN/build_linux_native/third_party/gflags/gflags.pc @@ -0,0 +1,14 @@ +prefix=/usr/local +exec_prefix=${prefix} +bindir=${prefix}/bin +libdir=${prefix}/lib +includedir=${prefix}/include + +Name: gflags +Version: 2.2.1 +Description: A commandline flags library that allows for distributed flags. +URL: http://gflags.github.io/gflags +Requires: +Libs: -L${libdir} -lgflags +Libs.private: -lpthread +Cflags: -I${includedir} diff --git a/3rdparty/TNN/build_linux_native/third_party/gflags/include/gflags/config.h b/3rdparty/TNN/build_linux_native/third_party/gflags/include/gflags/config.h new file mode 100644 index 0000000..bf43a9a --- /dev/null +++ b/3rdparty/TNN/build_linux_native/third_party/gflags/include/gflags/config.h @@ -0,0 +1,114 @@ +/* Generated from config.h.in during build configuration using CMake. */ + +// Note: This header file is only used internally. It is not part of public interface! + +#ifndef GFLAGS_CONFIG_H_ +#define GFLAGS_CONFIG_H_ + + +// --------------------------------------------------------------------------- +// System checks + +// Define if you build this library for a MS Windows OS. +/* #undef OS_WINDOWS */ + +// Define if you have the header file. +#define HAVE_STDINT_H + +// Define if you have the header file. +#define HAVE_SYS_TYPES_H + +// Define if you have the header file. +#define HAVE_INTTYPES_H + +// Define if you have the header file. +#define HAVE_SYS_STAT_H + +// Define if you have the header file. +#define HAVE_UNISTD_H + +// Define if you have the header file. +#define HAVE_FNMATCH_H + +// Define if you have the header file (Windows 2000/XP). +/* #undef HAVE_SHLWAPI_H */ + +// Define if you have the strtoll function. +#define HAVE_STRTOLL + +// Define if you have the strtoq function. +/* #undef HAVE_STRTOQ */ + +// Define if you have the header file. +/* #undef HAVE_PTHREAD */ + +// Define if your pthread library defines the type pthread_rwlock_t +/* #undef HAVE_RWLOCK */ + +// gcc requires this to get PRId64, etc. +#if defined(HAVE_INTTYPES_H) && !defined(__STDC_FORMAT_MACROS) +# define __STDC_FORMAT_MACROS 1 +#endif + +// --------------------------------------------------------------------------- +// Package information + +// Name of package. +#define PACKAGE gflags + +// Define to the full name of this package. +#define PACKAGE_NAME gflags + +// Define to the full name and version of this package. +#define PACKAGE_STRING gflags 2.2.1 + +// Define to the one symbol short name of this package. +#define PACKAGE_TARNAME gflags-2.2.1 + +// Define to the version of this package. +#define PACKAGE_VERSION 2.2.1 + +// Version number of package. +#define VERSION PACKAGE_VERSION + +// Define to the address where bug reports for this package should be sent. +#define PACKAGE_BUGREPORT https://github.com/gflags/gflags/issues + +// --------------------------------------------------------------------------- +// Path separator +#ifndef PATH_SEPARATOR +# ifdef OS_WINDOWS +# define PATH_SEPARATOR '\\' +# else +# define PATH_SEPARATOR '/' +# endif +#endif + +// --------------------------------------------------------------------------- +// Windows + +// Always export symbols when compiling a shared library as this file is only +// included by internal modules when building the gflags library itself. +// The gflags_declare.h header file will set it to import these symbols otherwise. +#ifndef GFLAGS_DLL_DECL +# if GFLAGS_IS_A_DLL && defined(_MSC_VER) +# define GFLAGS_DLL_DECL __declspec(dllexport) +# else +# define GFLAGS_DLL_DECL +# endif +#endif +// Flags defined by the gflags library itself must be exported +#ifndef GFLAGS_DLL_DEFINE_FLAG +# define GFLAGS_DLL_DEFINE_FLAG GFLAGS_DLL_DECL +#endif + +#ifdef OS_WINDOWS +// The unittests import the symbols of the shared gflags library +# if GFLAGS_IS_A_DLL && defined(_MSC_VER) +# define GFLAGS_DLL_DECL_FOR_UNITTESTS __declspec(dllimport) +# endif +# include "windows_port.h" +#endif + + +#endif // GFLAGS_CONFIG_H_ diff --git a/3rdparty/TNN/build_linux_native/third_party/gflags/include/gflags/gflags.h b/3rdparty/TNN/build_linux_native/third_party/gflags/include/gflags/gflags.h new file mode 100644 index 0000000..18cd369 --- /dev/null +++ b/3rdparty/TNN/build_linux_native/third_party/gflags/include/gflags/gflags.h @@ -0,0 +1,605 @@ +// Copyright (c) 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Revamped and reorganized by Craig Silverstein +// +// This is the file that should be included by any file which declares +// or defines a command line flag or wants to parse command line flags +// or print a program usage message (which will include information about +// flags). Executive summary, in the form of an example foo.cc file: +// +// #include "foo.h" // foo.h has a line "DECLARE_int32(start);" +// #include "validators.h" // hypothetical file defining ValidateIsFile() +// +// DEFINE_int32(end, 1000, "The last record to read"); +// +// DEFINE_string(filename, "my_file.txt", "The file to read"); +// // Crash if the specified file does not exist. +// static bool dummy = RegisterFlagValidator(&FLAGS_filename, +// &ValidateIsFile); +// +// DECLARE_bool(verbose); // some other file has a DEFINE_bool(verbose, ...) +// +// void MyFunc() { +// if (FLAGS_verbose) printf("Records %d-%d\n", FLAGS_start, FLAGS_end); +// } +// +// Then, at the command-line: +// ./foo --noverbose --start=5 --end=100 +// +// For more details, see +// doc/gflags.html +// +// --- A note about thread-safety: +// +// We describe many functions in this routine as being thread-hostile, +// thread-compatible, or thread-safe. Here are the meanings we use: +// +// thread-safe: it is safe for multiple threads to call this routine +// (or, when referring to a class, methods of this class) +// concurrently. +// thread-hostile: it is not safe for multiple threads to call this +// routine (or methods of this class) concurrently. In gflags, +// most thread-hostile routines are intended to be called early in, +// or even before, main() -- that is, before threads are spawned. +// thread-compatible: it is safe for multiple threads to read from +// this variable (when applied to variables), or to call const +// methods of this class (when applied to classes), as long as no +// other thread is writing to the variable or calling non-const +// methods of this class. + +#ifndef GFLAGS_GFLAGS_H_ +#define GFLAGS_GFLAGS_H_ + +#include +#include + +#include "gflags/gflags_declare.h" // IWYU pragma: export + + +// We always want to export variables defined in user code +#ifndef GFLAGS_DLL_DEFINE_FLAG +# if GFLAGS_IS_A_DLL && defined(_MSC_VER) +# define GFLAGS_DLL_DEFINE_FLAG __declspec(dllexport) +# else +# define GFLAGS_DLL_DEFINE_FLAG +# endif +#endif + + +namespace GFLAGS_NAMESPACE { + + +// -------------------------------------------------------------------- +// To actually define a flag in a file, use DEFINE_bool, +// DEFINE_string, etc. at the bottom of this file. You may also find +// it useful to register a validator with the flag. This ensures that +// when the flag is parsed from the commandline, or is later set via +// SetCommandLineOption, we call the validation function. It is _not_ +// called when you assign the value to the flag directly using the = operator. +// +// The validation function should return true if the flag value is valid, and +// false otherwise. If the function returns false for the new setting of the +// flag, the flag will retain its current value. If it returns false for the +// default value, ParseCommandLineFlags() will die. +// +// This function is safe to call at global construct time (as in the +// example below). +// +// Example use: +// static bool ValidatePort(const char* flagname, int32 value) { +// if (value > 0 && value < 32768) // value is ok +// return true; +// printf("Invalid value for --%s: %d\n", flagname, (int)value); +// return false; +// } +// DEFINE_int32(port, 0, "What port to listen on"); +// static bool dummy = RegisterFlagValidator(&FLAGS_port, &ValidatePort); + +// Returns true if successfully registered, false if not (because the +// first argument doesn't point to a command-line flag, or because a +// validator is already registered for this flag). +extern GFLAGS_DLL_DECL bool RegisterFlagValidator(const bool* flag, bool (*validate_fn)(const char*, bool)); +extern GFLAGS_DLL_DECL bool RegisterFlagValidator(const int32* flag, bool (*validate_fn)(const char*, int32)); +extern GFLAGS_DLL_DECL bool RegisterFlagValidator(const uint32* flag, bool (*validate_fn)(const char*, uint32)); +extern GFLAGS_DLL_DECL bool RegisterFlagValidator(const int64* flag, bool (*validate_fn)(const char*, int64)); +extern GFLAGS_DLL_DECL bool RegisterFlagValidator(const uint64* flag, bool (*validate_fn)(const char*, uint64)); +extern GFLAGS_DLL_DECL bool RegisterFlagValidator(const double* flag, bool (*validate_fn)(const char*, double)); +extern GFLAGS_DLL_DECL bool RegisterFlagValidator(const std::string* flag, bool (*validate_fn)(const char*, const std::string&)); + +// Convenience macro for the registration of a flag validator +#define DEFINE_validator(name, validator) \ + static const bool name##_validator_registered = \ + GFLAGS_NAMESPACE::RegisterFlagValidator(&FLAGS_##name, validator) + + +// -------------------------------------------------------------------- +// These methods are the best way to get access to info about the +// list of commandline flags. Note that these routines are pretty slow. +// GetAllFlags: mostly-complete info about the list, sorted by file. +// ShowUsageWithFlags: pretty-prints the list to stdout (what --help does) +// ShowUsageWithFlagsRestrict: limit to filenames with restrict as a substr +// +// In addition to accessing flags, you can also access argv[0] (the program +// name) and argv (the entire commandline), which we sock away a copy of. +// These variables are static, so you should only set them once. +// +// No need to export this data only structure from DLL, avoiding VS warning 4251. +struct CommandLineFlagInfo { + std::string name; // the name of the flag + std::string type; // the type of the flag: int32, etc + std::string description; // the "help text" associated with the flag + std::string current_value; // the current value, as a string + std::string default_value; // the default value, as a string + std::string filename; // 'cleaned' version of filename holding the flag + bool has_validator_fn; // true if RegisterFlagValidator called on this flag + bool is_default; // true if the flag has the default value and + // has not been set explicitly from the cmdline + // or via SetCommandLineOption + const void* flag_ptr; // pointer to the flag's current value (i.e. FLAGS_foo) +}; + +// Using this inside of a validator is a recipe for a deadlock. +// TODO(user) Fix locking when validators are running, to make it safe to +// call validators during ParseAllFlags. +// Also make sure then to uncomment the corresponding unit test in +// gflags_unittest.sh +extern GFLAGS_DLL_DECL void GetAllFlags(std::vector* OUTPUT); +// These two are actually defined in gflags_reporting.cc. +extern GFLAGS_DLL_DECL void ShowUsageWithFlags(const char *argv0); // what --help does +extern GFLAGS_DLL_DECL void ShowUsageWithFlagsRestrict(const char *argv0, const char *restrict); + +// Create a descriptive string for a flag. +// Goes to some trouble to make pretty line breaks. +extern GFLAGS_DLL_DECL std::string DescribeOneFlag(const CommandLineFlagInfo& flag); + +// Thread-hostile; meant to be called before any threads are spawned. +extern GFLAGS_DLL_DECL void SetArgv(int argc, const char** argv); + +// The following functions are thread-safe as long as SetArgv() is +// only called before any threads start. +extern GFLAGS_DLL_DECL const std::vector& GetArgvs(); +extern GFLAGS_DLL_DECL const char* GetArgv(); // all of argv as a string +extern GFLAGS_DLL_DECL const char* GetArgv0(); // only argv0 +extern GFLAGS_DLL_DECL uint32 GetArgvSum(); // simple checksum of argv +extern GFLAGS_DLL_DECL const char* ProgramInvocationName(); // argv0, or "UNKNOWN" if not set +extern GFLAGS_DLL_DECL const char* ProgramInvocationShortName(); // basename(argv0) + +// ProgramUsage() is thread-safe as long as SetUsageMessage() is only +// called before any threads start. +extern GFLAGS_DLL_DECL const char* ProgramUsage(); // string set by SetUsageMessage() + +// VersionString() is thread-safe as long as SetVersionString() is only +// called before any threads start. +extern GFLAGS_DLL_DECL const char* VersionString(); // string set by SetVersionString() + + + +// -------------------------------------------------------------------- +// Normally you access commandline flags by just saying "if (FLAGS_foo)" +// or whatever, and set them by calling "FLAGS_foo = bar" (or, more +// commonly, via the DEFINE_foo macro). But if you need a bit more +// control, we have programmatic ways to get/set the flags as well. +// These programmatic ways to access flags are thread-safe, but direct +// access is only thread-compatible. + +// Return true iff the flagname was found. +// OUTPUT is set to the flag's value, or unchanged if we return false. +extern GFLAGS_DLL_DECL bool GetCommandLineOption(const char* name, std::string* OUTPUT); + +// Return true iff the flagname was found. OUTPUT is set to the flag's +// CommandLineFlagInfo or unchanged if we return false. +extern GFLAGS_DLL_DECL bool GetCommandLineFlagInfo(const char* name, CommandLineFlagInfo* OUTPUT); + +// Return the CommandLineFlagInfo of the flagname. exit() if name not found. +// Example usage, to check if a flag's value is currently the default value: +// if (GetCommandLineFlagInfoOrDie("foo").is_default) ... +extern GFLAGS_DLL_DECL CommandLineFlagInfo GetCommandLineFlagInfoOrDie(const char* name); + +enum GFLAGS_DLL_DECL FlagSettingMode { + // update the flag's value (can call this multiple times). + SET_FLAGS_VALUE, + // update the flag's value, but *only if* it has not yet been updated + // with SET_FLAGS_VALUE, SET_FLAG_IF_DEFAULT, or "FLAGS_xxx = nondef". + SET_FLAG_IF_DEFAULT, + // set the flag's default value to this. If the flag has not yet updated + // yet (via SET_FLAGS_VALUE, SET_FLAG_IF_DEFAULT, or "FLAGS_xxx = nondef") + // change the flag's current value to the new default value as well. + SET_FLAGS_DEFAULT +}; + +// Set a particular flag ("command line option"). Returns a string +// describing the new value that the option has been set to. The +// return value API is not well-specified, so basically just depend on +// it to be empty if the setting failed for some reason -- the name is +// not a valid flag name, or the value is not a valid value -- and +// non-empty else. + +// SetCommandLineOption uses set_mode == SET_FLAGS_VALUE (the common case) +extern GFLAGS_DLL_DECL std::string SetCommandLineOption (const char* name, const char* value); +extern GFLAGS_DLL_DECL std::string SetCommandLineOptionWithMode(const char* name, const char* value, FlagSettingMode set_mode); + + +// -------------------------------------------------------------------- +// Saves the states (value, default value, whether the user has set +// the flag, registered validators, etc) of all flags, and restores +// them when the FlagSaver is destroyed. This is very useful in +// tests, say, when you want to let your tests change the flags, but +// make sure that they get reverted to the original states when your +// test is complete. +// +// Example usage: +// void TestFoo() { +// FlagSaver s1; +// FLAG_foo = false; +// FLAG_bar = "some value"; +// +// // test happens here. You can return at any time +// // without worrying about restoring the FLAG values. +// } +// +// Note: This class is marked with GFLAGS_ATTRIBUTE_UNUSED because all +// the work is done in the constructor and destructor, so in the standard +// usage example above, the compiler would complain that it's an +// unused variable. +// +// This class is thread-safe. However, its destructor writes to +// exactly the set of flags that have changed value during its +// lifetime, so concurrent _direct_ access to those flags +// (i.e. FLAGS_foo instead of {Get,Set}CommandLineOption()) is unsafe. + +class GFLAGS_DLL_DECL FlagSaver { + public: + FlagSaver(); + ~FlagSaver(); + + private: + class FlagSaverImpl* impl_; // we use pimpl here to keep API steady + + FlagSaver(const FlagSaver&); // no copying! + void operator=(const FlagSaver&); +}__attribute((unused)); + +// -------------------------------------------------------------------- +// Some deprecated or hopefully-soon-to-be-deprecated functions. + +// This is often used for logging. TODO(csilvers): figure out a better way +extern GFLAGS_DLL_DECL std::string CommandlineFlagsIntoString(); +// Usually where this is used, a FlagSaver should be used instead. +extern GFLAGS_DLL_DECL +bool ReadFlagsFromString(const std::string& flagfilecontents, + const char* prog_name, + bool errors_are_fatal); // uses SET_FLAGS_VALUE + +// These let you manually implement --flagfile functionality. +// DEPRECATED. +extern GFLAGS_DLL_DECL bool AppendFlagsIntoFile(const std::string& filename, const char* prog_name); +extern GFLAGS_DLL_DECL bool ReadFromFlagsFile(const std::string& filename, const char* prog_name, bool errors_are_fatal); // uses SET_FLAGS_VALUE + + +// -------------------------------------------------------------------- +// Useful routines for initializing flags from the environment. +// In each case, if 'varname' does not exist in the environment +// return defval. If 'varname' does exist but is not valid +// (e.g., not a number for an int32 flag), abort with an error. +// Otherwise, return the value. NOTE: for booleans, for true use +// 't' or 'T' or 'true' or '1', for false 'f' or 'F' or 'false' or '0'. + +extern GFLAGS_DLL_DECL bool BoolFromEnv(const char *varname, bool defval); +extern GFLAGS_DLL_DECL int32 Int32FromEnv(const char *varname, int32 defval); +extern GFLAGS_DLL_DECL uint32 Uint32FromEnv(const char *varname, uint32 defval); +extern GFLAGS_DLL_DECL int64 Int64FromEnv(const char *varname, int64 defval); +extern GFLAGS_DLL_DECL uint64 Uint64FromEnv(const char *varname, uint64 defval); +extern GFLAGS_DLL_DECL double DoubleFromEnv(const char *varname, double defval); +extern GFLAGS_DLL_DECL const char *StringFromEnv(const char *varname, const char *defval); + + +// -------------------------------------------------------------------- +// The next two functions parse gflags from main(): + +// Set the "usage" message for this program. For example: +// string usage("This program does nothing. Sample usage:\n"); +// usage += argv[0] + " "; +// SetUsageMessage(usage); +// Do not include commandline flags in the usage: we do that for you! +// Thread-hostile; meant to be called before any threads are spawned. +extern GFLAGS_DLL_DECL void SetUsageMessage(const std::string& usage); + +// Sets the version string, which is emitted with --version. +// For instance: SetVersionString("1.3"); +// Thread-hostile; meant to be called before any threads are spawned. +extern GFLAGS_DLL_DECL void SetVersionString(const std::string& version); + + +// Looks for flags in argv and parses them. Rearranges argv to put +// flags first, or removes them entirely if remove_flags is true. +// If a flag is defined more than once in the command line or flag +// file, the last definition is used. Returns the index (into argv) +// of the first non-flag argument. +// See top-of-file for more details on this function. +#ifndef SWIG // In swig, use ParseCommandLineFlagsScript() instead. +extern GFLAGS_DLL_DECL uint32 ParseCommandLineFlags(int *argc, char*** argv, bool remove_flags); +#endif + + +// Calls to ParseCommandLineNonHelpFlags and then to +// HandleCommandLineHelpFlags can be used instead of a call to +// ParseCommandLineFlags during initialization, in order to allow for +// changing default values for some FLAGS (via +// e.g. SetCommandLineOptionWithMode calls) between the time of +// command line parsing and the time of dumping help information for +// the flags as a result of command line parsing. If a flag is +// defined more than once in the command line or flag file, the last +// definition is used. Returns the index (into argv) of the first +// non-flag argument. (If remove_flags is true, will always return 1.) +extern GFLAGS_DLL_DECL uint32 ParseCommandLineNonHelpFlags(int *argc, char*** argv, bool remove_flags); + +// This is actually defined in gflags_reporting.cc. +// This function is misnamed (it also handles --version, etc.), but +// it's too late to change that now. :-( +extern GFLAGS_DLL_DECL void HandleCommandLineHelpFlags(); // in gflags_reporting.cc + +// Allow command line reparsing. Disables the error normally +// generated when an unknown flag is found, since it may be found in a +// later parse. Thread-hostile; meant to be called before any threads +// are spawned. +extern GFLAGS_DLL_DECL void AllowCommandLineReparsing(); + +// Reparse the flags that have not yet been recognized. Only flags +// registered since the last parse will be recognized. Any flag value +// must be provided as part of the argument using "=", not as a +// separate command line argument that follows the flag argument. +// Intended for handling flags from dynamically loaded libraries, +// since their flags are not registered until they are loaded. +extern GFLAGS_DLL_DECL void ReparseCommandLineNonHelpFlags(); + +// Clean up memory allocated by flags. This is only needed to reduce +// the quantity of "potentially leaked" reports emitted by memory +// debugging tools such as valgrind. It is not required for normal +// operation, or for the google perftools heap-checker. It must only +// be called when the process is about to exit, and all threads that +// might access flags are quiescent. Referencing flags after this is +// called will have unexpected consequences. This is not safe to run +// when multiple threads might be running: the function is +// thread-hostile. +extern GFLAGS_DLL_DECL void ShutDownCommandLineFlags(); + + +// -------------------------------------------------------------------- +// Now come the command line flag declaration/definition macros that +// will actually be used. They're kind of hairy. A major reason +// for this is initialization: we want people to be able to access +// variables in global constructors and have that not crash, even if +// their global constructor runs before the global constructor here. +// (Obviously, we can't guarantee the flags will have the correct +// default value in that case, but at least accessing them is safe.) +// The only way to do that is have flags point to a static buffer. +// So we make one, using a union to ensure proper alignment, and +// then use placement-new to actually set up the flag with the +// correct default value. In the same vein, we have to worry about +// flag access in global destructors, so FlagRegisterer has to be +// careful never to destroy the flag-values it constructs. +// +// Note that when we define a flag variable FLAGS_, we also +// preemptively define a junk variable, FLAGS_no. This is to +// cause a link-time error if someone tries to define 2 flags with +// names like "logging" and "nologging". We do this because a bool +// flag FLAG can be set from the command line to true with a "-FLAG" +// argument, and to false with a "-noFLAG" argument, and so this can +// potentially avert confusion. +// +// We also put flags into their own namespace. It is purposefully +// named in an opaque way that people should have trouble typing +// directly. The idea is that DEFINE puts the flag in the weird +// namespace, and DECLARE imports the flag from there into the current +// namespace. The net result is to force people to use DECLARE to get +// access to a flag, rather than saying "extern GFLAGS_DLL_DECL bool FLAGS_whatever;" +// or some such instead. We want this so we can put extra +// functionality (like sanity-checking) in DECLARE if we want, and +// make sure it is picked up everywhere. +// +// We also put the type of the variable in the namespace, so that +// people can't DECLARE_int32 something that they DEFINE_bool'd +// elsewhere. + +class GFLAGS_DLL_DECL FlagRegisterer { + public: + // We instantiate this template ctor for all supported types, + // so it is possible to place implementation of the FlagRegisterer ctor in + // .cc file. + // Calling this constructor with unsupported type will produce linker error. + template + FlagRegisterer(const char* name, + const char* help, const char* filename, + FlagType* current_storage, FlagType* defvalue_storage); +}; + +// If your application #defines STRIP_FLAG_HELP to a non-zero value +// before #including this file, we remove the help message from the +// binary file. This can reduce the size of the resulting binary +// somewhat, and may also be useful for security reasons. + +extern GFLAGS_DLL_DECL const char kStrippedFlagHelp[]; + + +} // namespace GFLAGS_NAMESPACE + + +#ifndef SWIG // In swig, ignore the main flag declarations + +#if defined(STRIP_FLAG_HELP) && STRIP_FLAG_HELP > 0 +// Need this construct to avoid the 'defined but not used' warning. +#define MAYBE_STRIPPED_HELP(txt) \ + (false ? (txt) : GFLAGS_NAMESPACE::kStrippedFlagHelp) +#else +#define MAYBE_STRIPPED_HELP(txt) txt +#endif + +// Each command-line flag has two variables associated with it: one +// with the current value, and one with the default value. However, +// we have a third variable, which is where value is assigned; it's a +// constant. This guarantees that FLAG_##value is initialized at +// static initialization time (e.g. before program-start) rather than +// than global construction time (which is after program-start but +// before main), at least when 'value' is a compile-time constant. We +// use a small trick for the "default value" variable, and call it +// FLAGS_no. This serves the second purpose of assuring a +// compile error if someone tries to define a flag named no +// which is illegal (--foo and --nofoo both affect the "foo" flag). +#define DEFINE_VARIABLE(type, shorttype, name, value, help) \ + namespace fL##shorttype { \ + static const type FLAGS_nono##name = value; \ + /* We always want to export defined variables, dll or no */ \ + GFLAGS_DLL_DEFINE_FLAG type FLAGS_##name = FLAGS_nono##name; \ + type FLAGS_no##name = FLAGS_nono##name; \ + static GFLAGS_NAMESPACE::FlagRegisterer o_##name( \ + #name, MAYBE_STRIPPED_HELP(help), __FILE__, \ + &FLAGS_##name, &FLAGS_no##name); \ + } \ + using fL##shorttype::FLAGS_##name + +// For DEFINE_bool, we want to do the extra check that the passed-in +// value is actually a bool, and not a string or something that can be +// coerced to a bool. These declarations (no definition needed!) will +// help us do that, and never evaluate From, which is important. +// We'll use 'sizeof(IsBool(val))' to distinguish. This code requires +// that the compiler have different sizes for bool & double. Since +// this is not guaranteed by the standard, we check it with a +// COMPILE_ASSERT. +namespace fLB { +struct CompileAssert {}; +typedef CompileAssert expected_sizeof_double_neq_sizeof_bool[ + (sizeof(double) != sizeof(bool)) ? 1 : -1]; +template double GFLAGS_DLL_DECL IsBoolFlag(const From& from); +GFLAGS_DLL_DECL bool IsBoolFlag(bool from); +} // namespace fLB + +// Here are the actual DEFINE_*-macros. The respective DECLARE_*-macros +// are in a separate include, gflags_declare.h, for reducing +// the physical transitive size for DECLARE use. +#define DEFINE_bool(name, val, txt) \ + namespace fLB { \ + typedef ::fLB::CompileAssert FLAG_##name##_value_is_not_a_bool[ \ + (sizeof(::fLB::IsBoolFlag(val)) != sizeof(double))? 1: -1]; \ + } \ + DEFINE_VARIABLE(bool, B, name, val, txt) + +#define DEFINE_int32(name, val, txt) \ + DEFINE_VARIABLE(GFLAGS_NAMESPACE::int32, I, \ + name, val, txt) + +#define DEFINE_uint32(name,val, txt) \ + DEFINE_VARIABLE(GFLAGS_NAMESPACE::uint32, U, \ + name, val, txt) + +#define DEFINE_int64(name, val, txt) \ + DEFINE_VARIABLE(GFLAGS_NAMESPACE::int64, I64, \ + name, val, txt) + +#define DEFINE_uint64(name,val, txt) \ + DEFINE_VARIABLE(GFLAGS_NAMESPACE::uint64, U64, \ + name, val, txt) + +#define DEFINE_double(name, val, txt) \ + DEFINE_VARIABLE(double, D, name, val, txt) + +// Strings are trickier, because they're not a POD, so we can't +// construct them at static-initialization time (instead they get +// constructed at global-constructor time, which is much later). To +// try to avoid crashes in that case, we use a char buffer to store +// the string, which we can static-initialize, and then placement-new +// into it later. It's not perfect, but the best we can do. + +namespace fLS { + +inline clstring* dont_pass0toDEFINE_string(char *stringspot, + const char *value) { + return new(stringspot) clstring(value); +} +inline clstring* dont_pass0toDEFINE_string(char *stringspot, + const clstring &value) { + return new(stringspot) clstring(value); +} +inline clstring* dont_pass0toDEFINE_string(char *stringspot, + int value); + +// Auxiliary class used to explicitly call destructor of string objects +// allocated using placement new during static program deinitialization. +// The destructor MUST be an inline function such that the explicit +// destruction occurs in the same compilation unit as the placement new. +class StringFlagDestructor { + void *current_storage_; + void *defvalue_storage_; + +public: + + StringFlagDestructor(void *current, void *defvalue) + : current_storage_(current), defvalue_storage_(defvalue) {} + + ~StringFlagDestructor() { + reinterpret_cast(current_storage_ )->~clstring(); + reinterpret_cast(defvalue_storage_)->~clstring(); + } +}; + +} // namespace fLS + +// We need to define a var named FLAGS_no##name so people don't define +// --string and --nostring. And we need a temporary place to put val +// so we don't have to evaluate it twice. Two great needs that go +// great together! +// The weird 'using' + 'extern' inside the fLS namespace is to work around +// an unknown compiler bug/issue with the gcc 4.2.1 on SUSE 10. See +// http://code.google.com/p/google-gflags/issues/detail?id=20 +#define DEFINE_string(name, val, txt) \ + namespace fLS { \ + using ::fLS::clstring; \ + using ::fLS::StringFlagDestructor; \ + static union { void* align; char s[sizeof(clstring)]; } s_##name[2]; \ + clstring* const FLAGS_no##name = ::fLS:: \ + dont_pass0toDEFINE_string(s_##name[0].s, \ + val); \ + static GFLAGS_NAMESPACE::FlagRegisterer o_##name( \ + #name, MAYBE_STRIPPED_HELP(txt), __FILE__, \ + FLAGS_no##name, new (s_##name[1].s) clstring(*FLAGS_no##name)); \ + static StringFlagDestructor d_##name(s_##name[0].s, s_##name[1].s); \ + extern GFLAGS_DLL_DEFINE_FLAG clstring& FLAGS_##name; \ + using fLS::FLAGS_##name; \ + clstring& FLAGS_##name = *FLAGS_no##name; \ + } \ + using fLS::FLAGS_##name + +#endif // SWIG + + + + + +#endif // GFLAGS_GFLAGS_H_ diff --git a/3rdparty/TNN/build_linux_native/third_party/gflags/include/gflags/gflags_completions.h b/3rdparty/TNN/build_linux_native/third_party/gflags/include/gflags/gflags_completions.h new file mode 100644 index 0000000..2fa0db6 --- /dev/null +++ b/3rdparty/TNN/build_linux_native/third_party/gflags/include/gflags/gflags_completions.h @@ -0,0 +1,121 @@ +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// --- + +// +// Implement helpful bash-style command line flag completions +// +// ** Functional API: +// HandleCommandLineCompletions() should be called early during +// program startup, but after command line flag code has been +// initialized, such as the beginning of HandleCommandLineHelpFlags(). +// It checks the value of the flag --tab_completion_word. If this +// flag is empty, nothing happens here. If it contains a string, +// however, then HandleCommandLineCompletions() will hijack the +// process, attempting to identify the intention behind this +// completion. Regardless of the outcome of this deduction, the +// process will be terminated, similar to --helpshort flag +// handling. +// +// ** Overview of Bash completions: +// Bash can be told to programatically determine completions for the +// current 'cursor word'. It does this by (in this case) invoking a +// command with some additional arguments identifying the command +// being executed, the word being completed, and the previous word +// (if any). Bash then expects a sequence of output lines to be +// printed to stdout. If these lines all contain a common prefix +// longer than the cursor word, bash will replace the cursor word +// with that common prefix, and display nothing. If there isn't such +// a common prefix, bash will display the lines in pages using 'more'. +// +// ** Strategy taken for command line completions: +// If we can deduce either the exact flag intended, or a common flag +// prefix, we'll output exactly that. Otherwise, if information +// must be displayed to the user, we'll take the opportunity to add +// some helpful information beyond just the flag name (specifically, +// we'll include the default flag value and as much of the flag's +// description as can fit on a single terminal line width, as specified +// by the flag --tab_completion_columns). Furthermore, we'll try to +// make bash order the output such that the most useful or relevent +// flags are the most likely to be shown at the top. +// +// ** Additional features: +// To assist in finding that one really useful flag, substring matching +// was implemented. Before pressing a to get completion for the +// current word, you can append one or more '?' to the flag to do +// substring matching. Here's the semantics: +// --foo Show me all flags with names prefixed by 'foo' +// --foo? Show me all flags with 'foo' somewhere in the name +// --foo?? Same as prior case, but also search in module +// definition path for 'foo' +// --foo??? Same as prior case, but also search in flag +// descriptions for 'foo' +// Finally, we'll trim the output to a relatively small number of +// flags to keep bash quiet about the verbosity of output. If one +// really wanted to see all possible matches, appending a '+' to the +// search word will force the exhaustive list of matches to be printed. +// +// ** How to have bash accept completions from a binary: +// Bash requires that it be informed about each command that programmatic +// completion should be enabled for. Example addition to a .bashrc +// file would be (your path to gflags_completions.sh file may differ): + +/* +$ complete -o bashdefault -o default -o nospace -C \ + '/home/build/eng/bash/bash_completions.sh --tab_completion_columns $COLUMNS' \ + time env binary_name another_binary [...] +*/ + +// This would allow the following to work: +// $ /path/to/binary_name --vmodule +// Or: +// $ ./bin/path/another_binary --gfs_u +// (etc) +// +// Sadly, it appears that bash gives no easy way to force this behavior for +// all commands. That's where the "time" in the above example comes in. +// If you haven't specifically added a command to the list of completion +// supported commands, you can still get completions by prefixing the +// entire command with "env". +// $ env /some/brand/new/binary --vmod +// Assuming that "binary" is a newly compiled binary, this should still +// produce the expected completion output. + + +#ifndef GFLAGS_COMPLETIONS_H_ +#define GFLAGS_COMPLETIONS_H_ + +namespace gflags { + +extern void HandleCommandLineCompletions(void); + +} + +#endif // GFLAGS_COMPLETIONS_H_ diff --git a/3rdparty/TNN/build_linux_native/third_party/gflags/include/gflags/gflags_declare.h b/3rdparty/TNN/build_linux_native/third_party/gflags/include/gflags/gflags_declare.h new file mode 100644 index 0000000..2366380 --- /dev/null +++ b/3rdparty/TNN/build_linux_native/third_party/gflags/include/gflags/gflags_declare.h @@ -0,0 +1,153 @@ +// Copyright (c) 1999, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// +// Revamped and reorganized by Craig Silverstein +// +// This is the file that should be included by any file which declares +// command line flag. + +#ifndef GFLAGS_DECLARE_H_ +#define GFLAGS_DECLARE_H_ + + +// --------------------------------------------------------------------------- +// Namespace of gflags library symbols. +#define GFLAGS_NAMESPACE gflags + +// --------------------------------------------------------------------------- +// Windows DLL import/export. + +// Whether gflags library is a DLL. +// +// Set to 1 by default when the shared gflags library was built on Windows. +// Must be overwritten when this header file is used with the optionally also +// built static library instead; set by CMake's INTERFACE_COMPILE_DEFINITIONS. +#ifndef GFLAGS_IS_A_DLL +# define GFLAGS_IS_A_DLL 0 +#endif + +// We always want to import the symbols of the gflags library. +#ifndef GFLAGS_DLL_DECL +# if GFLAGS_IS_A_DLL && defined(_MSC_VER) +# define GFLAGS_DLL_DECL __declspec(dllimport) +# else +# define GFLAGS_DLL_DECL +# endif +#endif + +// We always want to import variables declared in user code. +#ifndef GFLAGS_DLL_DECLARE_FLAG +# if GFLAGS_IS_A_DLL && defined(_MSC_VER) +# define GFLAGS_DLL_DECLARE_FLAG __declspec(dllimport) +# else +# define GFLAGS_DLL_DECLARE_FLAG +# endif +#endif + +// --------------------------------------------------------------------------- +// Flag types +#include +#if 1 +# include // the normal place uint32_t is defined +#elif 1 +# include // the normal place u_int32_t is defined +#elif 1 +# include // a third place for uint32_t or u_int32_t +#endif + +namespace GFLAGS_NAMESPACE { + +#if 1 // C99 +typedef int32_t int32; +typedef uint32_t uint32; +typedef int64_t int64; +typedef uint64_t uint64; +#elif 0 // BSD +typedef int32_t int32; +typedef u_int32_t uint32; +typedef int64_t int64; +typedef u_int64_t uint64; +#elif 0 // Windows +typedef __int32 int32; +typedef unsigned __int32 uint32; +typedef __int64 int64; +typedef unsigned __int64 uint64; +#else +# error Do not know how to define a 32-bit integer quantity on your system +#endif + +} // namespace GFLAGS_NAMESPACE + + +namespace fLS { + +// The meaning of "string" might be different between now and when the +// macros below get invoked (e.g., if someone is experimenting with +// other string implementations that get defined after this file is +// included). Save the current meaning now and use it in the macros. +typedef std::string clstring; + +} // namespace fLS + + +#define DECLARE_VARIABLE(type, shorttype, name) \ + /* We always want to import declared variables, dll or no */ \ + namespace fL##shorttype { extern GFLAGS_DLL_DECLARE_FLAG type FLAGS_##name; } \ + using fL##shorttype::FLAGS_##name + +#define DECLARE_bool(name) \ + DECLARE_VARIABLE(bool, B, name) + +#define DECLARE_int32(name) \ + DECLARE_VARIABLE(::GFLAGS_NAMESPACE::int32, I, name) + +#define DECLARE_uint32(name) \ + DECLARE_VARIABLE(::GFLAGS_NAMESPACE::uint32, U, name) + +#define DECLARE_int64(name) \ + DECLARE_VARIABLE(::GFLAGS_NAMESPACE::int64, I64, name) + +#define DECLARE_uint64(name) \ + DECLARE_VARIABLE(::GFLAGS_NAMESPACE::uint64, U64, name) + +#define DECLARE_double(name) \ + DECLARE_VARIABLE(double, D, name) + +#define DECLARE_string(name) \ + /* We always want to import declared variables, dll or no */ \ + namespace fLS { \ + using ::fLS::clstring; \ + extern GFLAGS_DLL_DECLARE_FLAG ::fLS::clstring& FLAGS_##name; \ + } \ + using fLS::FLAGS_##name + + +#endif // GFLAGS_DECLARE_H_ diff --git a/3rdparty/TNN/cmake/macros.cmake b/3rdparty/TNN/cmake/macros.cmake new file mode 100644 index 0000000..65f71e5 --- /dev/null +++ b/3rdparty/TNN/cmake/macros.cmake @@ -0,0 +1,32 @@ +# The Lib Prefix +if (UNIX) + set(LIB_PFX "lib") + if (APPLE) + set(LIB_EXT ".dylib") + else () + set(LIB_EXT ".so") + endif () +else (UNIX) + set(LIB_PFX "") + set(LIB_EXT ".dll") +endif (UNIX) + +if(CMAKE_SYSTEM_NAME MATCHES "^Android") + set(SYSTEM.Android 1) +elseif(CMAKE_SYSTEM_NAME MATCHES "^Linux") + set(SYSTEM.Linux 1) +elseif(CMAKE_SYSTEM_NAME MATCHES "^Darwin") + set(SYSTEM.Darwin 1) +elseif(CMAKE_SYSTEM_NAME MATCHES "^iOS") + set(SYSTEM.iOS 1) +elseif(CMAKE_SYSTEM_NAME MATCHES "^Windows") + set(SYSTEM.Windows 1) +endif() + +if(CMAKE_SYSTEM_PROCESSOR MATCHES "^arm") + set(PROCESSOR.arm 1) +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^aarch64") + set(PROCESSOR.aarch64 1) +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^x86") + set(PROCESSOR.x86 1) +endif() diff --git a/3rdparty/TNN/doc/benchmark_data.md b/3rdparty/TNN/doc/benchmark_data.md new file mode 100644 index 0000000..3e2061c --- /dev/null +++ b/3rdparty/TNN/doc/benchmark_data.md @@ -0,0 +1,119 @@ +# v0.3 benchmark + +* huawei P30 Pro(Kirin 980, Mali-G76 MP10) + +| benchmark model | cpu time(thread 1,fp16, ms) | gpu time(ms) | +|-----------------|-----------------------------|--------------| +| DenseNet 121 | 65.70 | 45.83 | +| Inception v3 | 130.98 | 67.36 | +| Inception v4 | 310.67 | 129.59 | +| MnasNet | 11.74 | 9.16 | +| MobileNet v1 | 16.39 | 11.18 | +| MobileNet v2 | 14.81 | 11.24 | +| ResNet50 v1 | 77.11 | 44.29 | +| ResNet50 v2 | 90.53 | 48.63 | +| ShuffleNet v2 | 7.66 | 10.39 | +| SqueezeNet 1.0 | 8.38 | 8.90 | +| SqueezeNet 1.1 | 8.37 | 8.66 | + +* xiaomi 6(Snapdragon 835, Adreno 540) + +| benchmark model | cpu time(thread 1,fp16, ms) | gpu time(ms) | +|-----------------|-----------------------------|--------------| +| DenseNet 121 | 349.65 | 86.81 | +| Inception v3 | 924.54 | 77.01 | +| Inception v4 | 2286.02 | 229.54 | +| MnasNet | 61.80 | 16.64 | +| MobileNet v1 | 95.46 | 12.30 | +| MobileNet v2 | 82.85 | 11.58 | +| ResNet50 v1 | 465.54 | 65.77 | +| ResNet50 v2 | 575.29 | 72.23 | +| ShuffleNet v2 | 36.93 | 22.30 | +| SqueezeNet 1.0 | 53.37 | 11.60 | +| SqueezeNet 1.1 | 53.47 | 12.18 | + +* samsung Galaxy S9+(Snapdragon 845, Adreno 630) + +| benchmark model | cpu time(thread 1,fp16, ms) | gpu time(ms) | +|-----------------|-----------------------------|--------------| +| DenseNet 121 | 128.19 | 63.65 | +| Inception v3 | 245.01 | 71.00 | +| Inception v4 | 591.45 | 145.76 | +| MnasNet | 21.86 | 9.35 | +| MobileNet v1 | 31.91 | 10.15 | +| MobileNet v2 | 28.22 | 9.89 | +| ResNet50 v1 | 152.59 | 39.94 | +| ResNet50 v2 | 177.18 | 45.34 | +| ShuffleNet v2 | 13.78 | 9.41 | +| SqueezeNet 1.0 | 15.71 | 6.58 | +| SqueezeNet 1.1 | 15.64 | 7.00 | + +* Oppo K3(Snapdragon 710, Adreno 616) + +| benchmark model | cpu time(thread 1,fp16, ms) | gpu time(ms) | +|-----------------|-----------------------------|--------------| +| DenseNet 121 | 157.61 | 114.56 | +| Inception v3 | 299.34 | 163.22 | +| Inception v4 | 711.74 | 345.85 | +| MnasNet | 26.08 | 18.69 | +| MobileNet v1 | 39.69 | 23.10 | +| MobileNet v2 | 34.20 | 22.21 | +| ResNet50 v1 | 184.75 | 94.61 | +| ResNet50 v2 | 216.65 | 107.23 | +| ShuffleNet v2 | 16.29 | 12.90 | +| SqueezeNet 1.0 | 19.81 | 15.70 | +| SqueezeNet 1.1 | 19.74 | 15.74 | + +* Intel(R) Xeon(R) Gold 6133 CPU + +| benchmark model | cpu time(thread 1,fp32, ms) | +|-----------------|-----------------------------| +| Resnet50 | 151.00 | +| YoloV5 | 2428.00 | +| Bert-Based | 832.00 | +| Bert-Squad10 | 1093.00 | + +* TITAN Xp GPU + +| benchmark model | gpu time(fp32, ms) | +|-----------------|--------------------| +| Resnet50 | 2.22 | +| YoloV5 | 17.47 | +| Bert-Based | 8.16 | +| Bert-Squad10 | 9.60 | + + +# v0.1 benchmark + +* Kirin970: + +| model | cpu time(single thread, ms) | gpu time(ms) | +|---------------------------|--------------|--------------| +| Mobilenet_v1 | 88 | 12 | +| Mobilenet_v1_int8 | 55 | | +| Mobilenet_v2 | 58 | 11 | +| Mobilenet_v2_int8 | 41 | | +| squeezenet_v1.0 | 127 | 20 | +| squeezenet_v1.0_int8 | 82 | | + +* Snapdragon 835: + +| model | cpu time(single thread, ms) | gpu time(ms) | +|---------------------------|--------------|--------------| +| Mobilenet_v1 | 94 | 16 | +| Mobilenet_v1_int8 | 62 | | +| Mobilenet_v2 | 61 | 14 | +| Mobilenet_v2_int8 | 47 | | +| squeezenet_v1.0 | 122 | 28 | +| squeezenet_v1.0_int8 | 93 | | + +* Snapdragon 845: + +| model | cpu time(single thread, ms) | gpu time(ms) | +|---------------------------|--------------|--------------| +| Mobilenet_v1 | 60 | 10 | +| Mobilenet_v1_int8 | 37 | | +| Mobilenet_v2 | 39 | 8 | +| Mobilenet_v2_int8 | 28 | | +| squeezenet_v1.0 | 74 | 14 | +| squeezenet_v1.0_int8 | 56 | | diff --git a/3rdparty/TNN/doc/cn/development/add_op.md b/3rdparty/TNN/doc/cn/development/add_op.md new file mode 100644 index 0000000..b2f6022 --- /dev/null +++ b/3rdparty/TNN/doc/cn/development/add_op.md @@ -0,0 +1,182 @@ +# 新增OP + +[English Version](../../en/development/add_op_en.md) + +如果需要的算子不在[算子列表](../user/support.md)中,则需要通过以下步骤添加新的算子。 +* [添加算子解析](#1) +* [添加Layer实现](#2) +* [添加LayerAcc实现](#3) +* [添加单元测试](#4) + +## 1. 添加算子解析 +### 1.1 添加算子参数 + +* 添加LayerType +(1)修改文件 `/source/tnn/core/layer_type.h`,在`LayerType`中添加新算子的枚举,格式为`LAYER_XXX`。 +(2)修改文件 `/source/tnn/core/layer_type.cc`,在`global_layer_type_map`中添加新算子枚举值对应的算子名称,此名称与proto文件中层的名称一致。 + +* 添加LayerParam +如果新算子在proto里除了输入输出blob,还有其他参数,则需要添加LayerParam,修改文件 `/source/tnn/interpreter/layer_param.h`,添加类似`ConvLayerParam`的结构,继承于`LayerParam` + +```cpp + struct ConvLayerParam : public LayerParam { + int pad_type = -1; + // input channels of blob, divide by group + int input_channel = 0; + // the total output channels of blob, not devide by group + int output_channel = 0; + //[w_begin w_end h_begin h_end d_begin d_end] + std::vector pads; + // order [w h d] + std::vector kernels; + // order [w h d] + std::vector strides; + // order [w h d] + std::vector dialations; + int group = 1; + int bias = 0; + int activation_type = ActivationType_None; + }; +``` + +* 添加LayerResource +如果新算子有需要保存到model里的参数,则需要添加LayerResource,修改文件 `/source/tnn/interpreter/layer_resource.h`,添加类似`ConvLayerResource`的结构,继承于`LayerResource` + +```cpp + struct ConvLayerResource : public LayerResource { + // conv layer filter format + ConvLayerFilterFormat filter_format = OIHW; + + // conv layer handle + // NOTE: for deconv, the weight's default format is [n][i][o][h][w] + RawBuffer filter_handle; + + // bias handle + RawBuffer bias_handle; + + // extra scale handle for different precision + RawBuffer scale_handle; + }; +``` + +### 1.2 添加LayerInterpreter +如果新算子添加了LayerParam或者LayerResource,则需要添加对应的`LayerInterpreter`。在文件夹`/source/tnn/interpreter/tnn/layer_interpreter`下添加对应的实现。 +(1)通过`DECLARE_LAYER_INTERPRETER()`声明新算子的Interpreter; +(2)通过`REGISTER_LAYER_INTERPRETER()`注册新算子的Interpreter; +(3)实现以下接口: +* `InterpretProto()` -- 解析新算子的LayerParam +* `InterpretResource()` -- 解析新算子的LayerResource +* `SaveProto()` -- 保存新算子的LayerParam +* `SaveResource()` -- 保存新算子的LayerResource + +## 2. 添加Layer实现 +在文件夹 `/source/tnn/layer` 下添加对应layer的实现。 +(1)`DECLARE_LAYER()` 声明新算子的Layer实现; +(2)`REGISTER_LAYER()` 注册新算子的Layer实现; +(3)实现以下接口: +* `InferOutputDataType()` -- 设置对应层输出Blob的数据类型 +* `InferOutputShape()` -- 计算对应层输出Blob的大小 + +## 3. 添加LayerAcc实现 +每个新的算子都需要实现对应设备的LayerAcc。 +### 3.1 CPU平台 +在文件夹`/source/tnn/device/cpu/acc`下添加对应算子的LayerAcc实现。 +(1)`DECLARE_CPU_ACC()` 声明新算子的LayerAcc实现; +(2)`REGISTER_CPU_ACC()` 注册新算子的LayerAcc实现; +(3)实现以下接口: +* `Forward()` -- 新算子的cpu实现; + +### 3.2 ARM平台 +在文件夹`/source/tnn/device/arm/acc`下添加对应算子的LayerAcc实现。 +(1)声明新算子的LayerAcc实现,如果没有特殊的参数,可以直接使用`DECLARE_ARM_ACC()`声明; +(2)`REGISTER_ARM_ACC()` 注册新算子的LayerAcc实现; +(3)实现以下接口: +* `Init()` -- 对LayerParam和LayerResource进行处理; +* `Reshape()` -- 实现在输入blob大小改变的情况下的逻辑; +* `Forward()` -- 新算子的ARM实现; + +### 3.3 OpenCL平台 +在文件夹`/source/tnn/device/opencl/acc`下添加对应算子的LayerAcc实现。 +(1)声明新算子的LayerAcc实现,如果没有特殊的参数,可以直接使用`DECLARE_OPENCL_ACC()`声明; +(2)`REGISTER_OPENCL_ACC()` 注册新算子的LayerAcc实现; +(3)实现以下接口: +* `Init()` -- 对LayerParam和LayerResource进行处理,创建OpenCL的kernel; +* `Reshape()` -- 实现在输入blob大小改变的情况下的逻辑,对于OpenCL,在此处调用SegArgs设置kernel参数; +* `Forward()` -- 执行OpenCL的kernel; + +(4)实现OpenCL的kernel,在目录 `/source/tnn/device/opencl/cl` 添加对应的kernel文件,以.cl为后缀。添加之后需要执行脚本: + + ``` python + python opencl_codegen.py + ``` + +### 3.4 Metal平台 +在文件夹`/source/tnn/device/metal/acc`下添加对应算子的LayerAcc实现。 +(1)声明新算子的LayerAcc实现,如果没有特殊的参数,可以直接使用`DECLARE_METAL_ACC()`声明; +(2)`REGISTER_METAL_ACC()` 注册新算子的LayerAcc实现; +(3)实现以下接口: +* `Init()` +* `Reshape()` +* `Forward()` + +(4)实现Metal的kernel,在目录 `/source/tnn/device/metal/acc` 添加对应的metal文件,以.metal为后缀。 + +### 3.5 NPU平台 +在文件夹`/source/tnn/device/huawei_npu/convert`下添加对应算子的LayerConvert实现。 +(1)声明新算子的LayerConvert实现,如果没有其他权重input,可以直接使用`DECLARE_NPU_LAYER`声明; +(2)`REGISTER_NPU_LAYER` 注册新算子的LayerConvert实现; +(3)实现以下接口: +* `Convert()` -- 使用ir翻译tnn模型算子; + +### 3.6 X86平台 + +#### 3.6.1 openvino算子导入 +在文件夹`/source/tnn/network/openvino/layer_builder`下添加对应算子的OpenVINOLayerBuilder实现。 +(1)声明新算子的OpenVINOLayerBuilder实现,可以直接使用`DECLARE_OPENVINO_LAYER_BUILDER`声明; +(2)`REGISTER_OPENVINO_LAYER_BUILDER` 注册新算子的LayerConvert实现; +(3)实现以下接口: +* `Build()` -- 将tnn的算子转换成ngraph的node; + +对于openvino不支持或者性能较差的算子可以注册custom op来替代openvino的op。 +(1)在`/source/tnn/network/openvino/custom_layer`下使用`DECLARE_CUSTOM_IMPLEMENTATION`和`REGISTER_CUSTOM_IMPLEMENTATION`进行声明和注册 +(2)在`Build()`函数里使用已注册的custom op来构建ngraph的node + +#### 3.6.2 kernel编写 +在文件夹`/source/tnn/device/x86/acc`下添加对应算子的LayerAcc实现。 +(1)声明新算子的LayerAcc实现,如果没有特殊的参数,可以直接使用`DECLARE_X86_ACC()`声明; +(2)`REGISTER_X86_ACC()` 注册新算子的LayerAcc实现; +(3)实现以下接口: +* `Init()` -- 对LayerParam和LayerResource进行处理; +* `Reshape()` -- 实现在输入blob大小改变的情况下的逻辑; +* `Forward()` -- 新算子的X86实现; + +### 3.7 CUDA平台 + +#### 3.7.1 tensorrt算子导入 +在文件夹`/source/tnn/network/tensorrt/layer_builder`下添加对应算子的TensorRTLayerBuilder实现。 +(1)声明新算子的TensorRTLayerBuilder实现,可以直接使用`DECLARE_TENSORRT_LAYER_BUILDER`声明; +(2)`REGISTER_TENSORRT_LAYER_BUILDER` 注册新算子的TensorRTLayerBuilder实现; +(3)实现以下接口: +* `AddToNetwork()` -- 网络导入对应的tensorrt算子。 + +对于tensorrt不支持或者性能较差的算子可以注册plugin op来替代tensorrt的op。 +(1)在`/source/tnn/network/tensorrt/layer_build`下使用`DECLARE_TENSORRT_PLUGIN_LAYER_BUILDER`和`REGISTER_TENSORRT_PLUGIN_LAYER_BUILDER`进行声明和注册 +(2)实现以下接口: +* `supportsFormatCombination` -- 判断支持的数据类型和数据排布; +* `getPluginType` -- 自定义plugin type; +* `getOutputDataType` -- 设定输出data type; +* `AddToNetwork` -- 实现插件导入网络; +* `getOutputDimensions` -- 返回输出尺寸计算公式 +* `getPluginName` -- 自定义plugin name + +#### 3.7.2 kernel编写 +在文件夹`/source/tnn/device/cuda/acc`下添加对应算子的LayerAcc实现。 +(1)声明新算子的LayerAcc实现,如果没有特殊的参数,可以直接使用`DECLARE_CUDA_ACC()`声明; +(2)`REGISTER_CUDA_ACC()` 注册新算子的LayerAcc实现; +(3)实现以下接口: +* `Init()` -- 对LayerParam和LayerResource进行处理; +* `Reshape()` -- 实现在输入blob大小改变的情况下的逻辑; +* `Forward()` -- 新算子的CUDA实现; + +## 4. 添加单元测试 +在文件夹 `/test/unit_test/layer_test` 下添加对应层的单元测试文件。 diff --git a/3rdparty/TNN/doc/cn/development/architecture.md b/3rdparty/TNN/doc/cn/development/architecture.md new file mode 100644 index 0000000..2dbd012 --- /dev/null +++ b/3rdparty/TNN/doc/cn/development/architecture.md @@ -0,0 +1,147 @@ +# 架构详解 + +[English Version](../../en/development/architecture_en.md) + +## 一、API设计 +考虑开源库后期维护及版本兼容性,所有对外暴露接口均通过include目录统一管理。具体API相关介绍可参见[API文档](../user/api.md) + + +## 二、模型解析 + +对模型解析相关接口进行了抽象,可支持多种模型格式解析和扩充,相关代码见source/tnn/interpreter模块。 + +
+ +AbstractModelInterpreter定义了抽象的Interpret接口,不同的模型解析器解析不同类型模型。DefaultModelInterpreter相关的接口将相关结果存入NetStruture和NetResource结构中,部分第三方模型无法完成内部结构解析的有单独适配,如CoreMLModelInterpreter,以完成第三方库适配。 + +不同的模型解析器均有对应的creator + +```cpp +// @brief ModelInterpreterCreator define model interpreter creator interface +class ModelInterpreterCreator { +public: + virtual ~ModelInterpreterCreator() {}; + virtual AbstractModelInterpreter* CreateModelInterpreter() = 0; +}; + +// @brief TypeModelInterpreterCreator create different type model interpreter +template +class TypeModelInterpreterCreator:public ModelInterpreterCreator { + virtual AbstractModelInterpreter* CreateModelInterpreter() { + return new T(); + } +}; +``` + +不同的model interpreter creator均通过Register进行注册。 + +```cpp +//@brief TypeModelInterpreterRegister register TypeModelInterpreterCreator +template +class TypeModelInterpreterRegister { +public: + TypeModelInterpreterRegister(ModelType type) { + GetGlobalModelInterpreterCreatorMap()[type] = std::shared_ptr(new T()); + } +}; + +``` + +以TNN模型解析注册为例: TypeModelInterpreterRegister\> g\_tnn\_model\_interpreter\_register(MODEL\_TYPE\_TNN); + +通过TypeModelInterpreterRegister构造函数,可将TNN对应的TypeModelInterpreterCreator\注册到全局model interpreter creator map中,后续通过model type即可获取对应creator并构建对应的model interpreter。 + + +## 三、网络构建 + +网络构建主要包含两大部分,第一部分为网络Layer构建,第二部分为Blob节点构建。 + + +```cpp + +//@brief BaseLaye define the layer interface +class BaseLayer { +public: + + ... + + virtual Status Init(Context* context, LayerParam* param, + LayerResource* resource, std::vector& inputs, + std::vector& outputs, + AbstractDevice* device); + + ... +}; + +``` + +与前面模型注册机制类似,不同Layer会注册不同的Layer Creator。通过Layer Type获取对应的Layer Creator后即可构建出对应的Layer,Layer构建完成后可计算对应输出blob尺寸以及创建平台加速算子。 + +Blob节点构建核心在于内存的分配和优化,主要分为blob内存循环复用,blob内存拼接与监控。 + +
+ +首先不同layer输出blob间内存会通过内部算法实现循环复用,不同blob间内存复用会优先选择尺寸接近的blob。 + +确定blob内存复用关系后,会对blob内存进行拼接,并统一分配内存,最终同一Instance不同blob间持有相同的base指针以及不同的偏移量,同一线程多个instance间以及不同线程instance间内存有了内存复用的基础。TNN内部提供了单一线程内不同instance间内存复用自动实现机制,通过SHARE\_MEMORY\_MODE\_SHARE\_ONE\_THREAD构建的Instance会自动实现多Instance内存复用。同时SHARE\_MEMORY\_MODE\_SET\_FROM\_EXTERNAL构建的Instance支持内存外部传入,由调用者维护内存复用关系以及内存分配释放,对于多线程复用还需要处理线程间加锁机制。 + +## 四、多平台加速算子实现 + +
+ +抽象AbstractDevice接口,用于隐藏不同Device实现细节。提供Device Memory 尺寸计算,Device Memory分配释放,内存CPU Memory与Device meomoy拷贝,Device Layer加速算子构建,以及Instance对应Device Context构建等接口。 + +```cpp +// @brief AbstractDevice define create memory, context and layer acc interface. +class AbstractDevice { +public: + ... + virtual BlobMemorySizeInfo Calculate(BlobDesc& desc) = 0; + ... + virtual Status Allocate(void** handle, MatType mat_type, DimsVector dims) = 0; + ... + virtual Status Allocate(void** handle, BlobMemorySizeInfo& size_info) = 0; + ... + virtual Status Free(void* handle) = 0; + ... + virtual Status CopyToDevice(BlobHandle* dst, const BlobHandle* src, + BlobDesc& desc, void* command_queue) = 0; + ... + virtual Status CopyFromDevice(BlobHandle* dst, const BlobHandle* src, + BlobDesc& desc, void* command_queue) = 0; + ... + virtual AbstractLayerAcc* CreateLayerAcc(LayerType type) = 0; + ... + virtual Context* CreateContext(int device_id) = 0; + ... +}; +``` + +网络构建根据配置的DeviceType可获取对应的Device实现,不同的Layer通过CreateLayerAcc接口即可构建特定平台加速算子,并通过统一的抽象基类接口AbstractLayerAcc进行交互。 + +```cpp + +// @brief AbstractLayerAcc define the layer acc interface +class AbstractLayerAcc { +public: + + ... + + virtual Status Init(Context *context, LayerParam *param, + LayerResource *resource, + const std::vector &inputs, + const std::vector &outputs) = 0; + + ... + + virtual Status Forward(const std::vector &inputs, + const std::vector &outputs) = 0; +}; + +``` + +同样不同的LayerAcc通过注册机制进行注册,Layer根据LayerType即可构建不同的LayerAcc。 + +## 五、单元测试 + +TNN 单元测试基于googletest构建,当前主要对Layer Acc以及blob converter构建了单元测试。单元测试以CPU Default实现为对齐基准,以监控不同平台加速算子实现,具体单元测试相关介绍可参见[单元测试](unit_test.md) diff --git a/3rdparty/TNN/doc/cn/development/contributing.md b/3rdparty/TNN/doc/cn/development/contributing.md new file mode 100644 index 0000000..9622d1e --- /dev/null +++ b/3rdparty/TNN/doc/cn/development/contributing.md @@ -0,0 +1,15 @@ +# 贡献代码 + +[English Version](../../en/development/contributing_en.md) +## 贡献代码 + +[添加算子](add_op.md) + +## 编程风格 + +TNN项目对于C、C++、Objective-C、Python、Shell代码风格参照 +[谷歌开源项目风格指南](https://zh-google-styleguide.readthedocs.io/en/latest/contents/) + +## 代码格式化 + +TNN项目代码格式化使用clang-format、git-clang-format, 格式化后部分缩进格式与谷歌开源项目风格有差异,以clang-format为准。 diff --git a/3rdparty/TNN/doc/cn/development/model_check.md b/3rdparty/TNN/doc/cn/development/model_check.md new file mode 100644 index 0000000..6984f4c --- /dev/null +++ b/3rdparty/TNN/doc/cn/development/model_check.md @@ -0,0 +1,58 @@ +# 模型结果校验 + +[English Version](../../en/development/model_check_en.md) + +## 一、工具的作用 +校验对应平台(OpenCL,Metal,Cuda,ARM,HuaweiNPU)的模型输出结果是否正确。 + +## 二、编译 +编译model_check工具需要将以下宏设置为ON: +* 打开以下选项编译TNN(编译方法参照[TNN编译文档](../user/compile.md)) +* `TNN_CPU_ENABLE` +* `TNN_MODEL_CHECK_ENABLE` +* 对应device的宏,如`TNN_OPENCL_ENABLE`, `TNN_ARM_ENABLE` + +## 三、校验工具使用 +### 1. 命令 +``` +./model_check [-h] [-p] [-m] [-d] [-i] [-f] [-e] [-n] [-s] [-o] [-b] +``` +### 2. 参数说明 + +|命令参数 |是否必须|带参数 |参数说明 | +|:------------------|:------:|:-----:|:-------------------------------------------| +|-h, --help | | |输出命令提示。 | +|-p, --proto |√ |√|指定tnnproto模型描述文件。 | +|-m, --model |√ |√|指定tnnmodel模型参数文件。 | +|-d, --device |√ |√|指定模型执行的平台,如OPENCL,ARM,METAL,CUDA,HUAWEI_NPU等。 | +|-i, --input | |√|指定输入文件。目前支持格式为:
• 文本文件(文件后缀为.txt), 格式与模型转换工具导出的输入格式一致。
• 常用图片格式文件(文件后缀为 .jpg .jpeg .png .bmp)
如果不指定,则会使用 (-1, 1) 随机输入| +|-f, --ref | |√|采用指定输出进行结果对比。目前支持格式为:
• 文本文件(文件后缀为.txt),格式与模型转换工具导出的输出格式一致。| +|-e, --end | | |仅校验模型的最终输出。 | +|-n, --bias | |√|预处理,仅对输入为图片时有效。对输入数据各通道进行bias操作,参数格式为:0.0,0.0,0.0| +|-s, --scale | |√|预处理,仅对输入为图片时有效。对输入数据各通道进行scale操作,参数格式为:1.0,1.0,1.0| +|-o, --output | | |是否保存最终的输出。 | +|-b, --batch | | |验证多batch情况下,每个batch结果是否正确。(还未开发完成) | + +## 四、执行脚本 +### 1. Android +#### 1.1 模型准备 +将待校验的模型的tnnproto和tnnmodel文件拷贝进`/platforms/android/models`,并改名为`test.tnnproto`和`test.tnnmodel` +#### 1.2 执行脚本 +``` +cd /platforms/android/ +./model_check_android.sh -c -m -p +``` +### 2. Linux +#### 2.1. 编译脚本 +``` +cd /platforms/linux/ +./build_model_check.sh -c +``` +#### 2.2. 执行命令 +``` +/platforms/linux/build/model_check -p -m -d +``` + +## 五、工具限制 +* 目前只支持fp32的模型校验; +* 对于逐层校验,只针对fp32精度下的结果进行校验;对于最后结果校验,使用Auto精度进行校验。 diff --git a/3rdparty/TNN/doc/cn/development/profiling.md b/3rdparty/TNN/doc/cn/development/profiling.md new file mode 100644 index 0000000..0642331 --- /dev/null +++ b/3rdparty/TNN/doc/cn/development/profiling.md @@ -0,0 +1,126 @@ +# 模型性能分析 + +[English Version](../../en/development/profiling_en.md) + +分析模型耗时情况 + +## 一、iOS平台耗时测试 +### 测试步骤 +1. 添加测试模型 + + 在`/model`目录下添加测试模型,每个模型一个文件夹,文件夹中包含以proto和model结尾的模型文件。目前工程中已有模型squeezenetv1.1 + +2. 打开benchmark工程 + + 进入目录`/benchmark/benchmark_ios`,双击打开benchmark工程 + +3. 设置开发者账号 + + 如下图点击benchmark工程,找到工程设置`Signing & Capabilities`,点击Team选项卡选择`Add an Account...` + +
+ + 在如下界面输入Apple ID账号和密码,添加完成后回到`Signing & Capabilities`界面,并在Team选项卡中选中添加的账号。如果没有Apple ID也可以通过`Create Apple ID`选项根据相关提示进行申请。 + + `PS:申请Apple ID无需付费,可以即时通过,通过后才可在真机上运行APP调试` + +
+ + +4. 真机运行 + + 4.1 修改`Bundle Identitifier` + + 如图在现有`Bundle Identifier`后随机添加后缀(限数字和字母),避免个人账户遇到签名冲突。 + +
+ + 4.2 验证授权 + + 首次运行先利用快捷键`Command + Shift + K`对工程进行清理,再执行快捷键`Command + R`运行。如果是首次登陆Apple ID,Xcode会弹框报如下错误,需要在iOS设备上根据提示进行授权验证。一般来说手机上的授权路径为:设置 -> 通用 -> 描述文件与设备管理 -> Apple Development选项 -> 点击信任 + +
+ + 4.3 运行结果 + + 首次运行先利用快捷键`Command + Shift + K`对工程进行清理,再执行快捷键`Command + R`运行。在界面上点击Run按钮,界面会显示model目录下所有模型的CPU和GPU耗时情况。iPhone7真机运行结果如下图。 + +
+ + PS: + + a) 由于GPU和CPU加速原理不同,具体模型的GPU性能不一定比CPU高,与具体机型、模型结构以及工程实现有关。欢迎大家参与到TNN开发中,共同进步。 + + b) 如遇到`Unable to install...`错误提示,请在真机设备上删除已有的benchmark app,重新运行安装。 + + c) 真机运行时,如果遇到CodeSign错误`Command CodeSign failed with a nonzero exit code`,可参看issue20 `iOS Demo运行步骤说明` + +## 二、Android平台耗时测试 +### 1. 环境搭建 +#### 1.1 编译环境 +参考[TNN编译文档](../user/compile.md) 中Android库编译,检查环境是否满足要求。 + +#### 1.2 执行环境 +* adb命令配置 +下载[安卓SDK工具](https://developer.android.com/studio/releases/platform-tools),将`platform-tool`目录加入`$PATH`环境变量中。 +PS: 如果adb版本过低,可能执行脚本会失败。当前测试的adb版本为:29.0.5-5949299 +``` +export PATH=/platform-tools:$PATH +``` + +### 2. 添加模型 +在`/benchmark/benchmark-model`目录下,将要测试模型的tnnproto放入文件夹,例如, +``` +cd /benchmark/benchmark-model +cp mobilenet_v1.tnnproto . +``` + + +### 3. 修改脚本 +在脚本`benchmark_models.sh`中的`benchmark_model_list`变量里添加模型文件名,例如: +``` + benchmark_model_list=( + #test.tnnproto \ + mobilenet_v1.tnnproto \ # 待测试的模型文件名 +) +``` + +### 4. 执行脚本 +``` +./benchmark_models.sh [-32] [-c] [-b] [-f] [-d] [-bs] [-t] +参数说明: + -32 编译32位的库,否则为64位 + -c 删除之前的编译文件,重新编译 + -b 仅编译,不执行 + -f 打印每一层的耗时,否则是整个网络的平均耗时。 + -t 指定执行的平台。需要加上 + -bs shell运行可执行文件测试 +``` +P.S. 不指定 -t, 默认跑CPU和GPU, 华为npu benchmark需通过-t HUAWEI_NPU特殊制定. +#### 4.1 全网络性能分析: +分析整体网络耗时,执行多次,获取平均性能。 +执行脚本: +``` +./benchmark_models.sh -c +``` +结果如图: +
+ +执行结果会保存在`benchmark_models_result.txt`中。 + + +#### 4.2 逐层性能分析: +逐层性能分析工具可准备计算各层耗时,以便进行模型优化和op性能问题定位。 +执行脚本: +``` +./benchmark_models.sh -c -f +``` +结果如图: +
+ +执行结果会保存在`benchmark_models_result.txt`中。 +P.S. 华为npu不支持每层分析。 + +### 5. 特殊说明 +* 对于OpenCL平台,逐层性能分析的目的是分析kernel的耗时分布,其中为了打印每层耗时,有额外开销,只有kernel时间具有参考意义。如果要看整体实际性能,需要参考全网络性能分析。 +* Android系统相比shell执行可执行文件耗时测试,app耗时测试的性能更贴近真实安卓app执行的性能。受安卓调度策略的影响,两种方式的性能可能有明显差异。综上所述,安卓app耗时测试更为推荐。 diff --git a/3rdparty/TNN/doc/cn/development/resource/android_profiling.jpg b/3rdparty/TNN/doc/cn/development/resource/android_profiling.jpg new file mode 100644 index 0000000..64d565d Binary files /dev/null and b/3rdparty/TNN/doc/cn/development/resource/android_profiling.jpg differ diff --git a/3rdparty/TNN/doc/cn/development/resource/ios_add_account_benchmark.jpg b/3rdparty/TNN/doc/cn/development/resource/ios_add_account_benchmark.jpg new file mode 100644 index 0000000..b13c9f7 Binary files /dev/null and b/3rdparty/TNN/doc/cn/development/resource/ios_add_account_benchmark.jpg differ diff --git a/3rdparty/TNN/doc/cn/development/resource/ios_benchmark_result.jpg b/3rdparty/TNN/doc/cn/development/resource/ios_benchmark_result.jpg new file mode 100644 index 0000000..ad38168 Binary files /dev/null and b/3rdparty/TNN/doc/cn/development/resource/ios_benchmark_result.jpg differ diff --git a/3rdparty/TNN/doc/cn/development/resource/ios_set_account.jpg b/3rdparty/TNN/doc/cn/development/resource/ios_set_account.jpg new file mode 100644 index 0000000..208a96d Binary files /dev/null and b/3rdparty/TNN/doc/cn/development/resource/ios_set_account.jpg differ diff --git a/3rdparty/TNN/doc/cn/development/resource/ios_set_bundleid_benchmark.jpg b/3rdparty/TNN/doc/cn/development/resource/ios_set_bundleid_benchmark.jpg new file mode 100644 index 0000000..fde76f7 Binary files /dev/null and b/3rdparty/TNN/doc/cn/development/resource/ios_set_bundleid_benchmark.jpg differ diff --git a/3rdparty/TNN/doc/cn/development/resource/ios_verify_certificate_benchmark.jpg b/3rdparty/TNN/doc/cn/development/resource/ios_verify_certificate_benchmark.jpg new file mode 100644 index 0000000..5ec90b2 Binary files /dev/null and b/3rdparty/TNN/doc/cn/development/resource/ios_verify_certificate_benchmark.jpg differ diff --git a/3rdparty/TNN/doc/cn/development/resource/opencl_profiling.jpg b/3rdparty/TNN/doc/cn/development/resource/opencl_profiling.jpg new file mode 100644 index 0000000..1e1b587 Binary files /dev/null and b/3rdparty/TNN/doc/cn/development/resource/opencl_profiling.jpg differ diff --git a/3rdparty/TNN/doc/cn/development/unit_test.md b/3rdparty/TNN/doc/cn/development/unit_test.md new file mode 100644 index 0000000..dc3a9e0 --- /dev/null +++ b/3rdparty/TNN/doc/cn/development/unit_test.md @@ -0,0 +1,49 @@ +# 单元测试 + +[English Version](../../en/development/unit_test_en.md) + +本文档主要介绍当前单元测试的目的、用法及注意事项。 + +## 单元测试用途 + +当前单元测试有两个用途: + +1. 验证各个OP在不同平台上的结果正确性。 +2. 充当OP性能测试工具,在不需要模型的情况下测试OP性能。 + +## 需了解的代码的信息 + +TNN代码中OP通过Layer这个类型来实现,但Layer类型仅仅实现了Blob Shape推理等计算无关的逻辑。不同平台的计算由layer_acc实现。 +因此,Layer单元测试中通过两个层计算,然后对比结果,以此对比结果正确性。 + +## 使用方法 + +### 编译方法 + +* 打开以下选项编译TNN(编译方法参照[TNN编译文档](../user/compile.md)) +* TNN_UNIT_TEST_ENABLE=ON +* 如果用于OP性能测试,需同时打开 TNN_BENCHMARK_ENABLE 开关: +* TNN_BENCHMARK_ENABLE=ON + +### 运行方法 + +编译成功后执行以下命令: + + ./test/unit_test/unit_test -ic 1 + +ic 参数用于控制每个单元测试重复进行的次数,通常用1即可,其他可选参数如下: + + -dt {ARM|OPENCL|METAL} // 测试的计算设备类型 + -lp ${load_library_path} // OPENCL 及 METAL 需要加载的库路径 + -th ${num_threads} // 线程数,默认为1 + -ub {0|1} // 是否打印计算性能数据(GFLOPS),用于性能测试 + +一个实际的测试例子如下: + + ./test/unit_test/unit_test -ic 1 -dt ARM -th 4 -ub 0 + + +## 注意事项 + +单元测试中通过GTEST WithParamInterface 接口生成了很多参数组合。若需更改或自定义参数,可查看 INSTANTIATE_TEST_SUITE_P 宏相关代码。 + diff --git a/3rdparty/TNN/doc/cn/faq.md b/3rdparty/TNN/doc/cn/faq.md new file mode 100644 index 0000000..69b858d --- /dev/null +++ b/3rdparty/TNN/doc/cn/faq.md @@ -0,0 +1,154 @@ +# FAQ 常见问题 + +[English Version](../en/faq_en.md) + +## 一、编译问题 + +### 编译环境要求: + general: + cmake >= 3.1 + gcc >= 4.8 + NDK >= r14b + 模型转换: + python >= 3.5 + onnxruntime>=1.1 + onnx-simplifier>=0.2.4 + protobuf >= 3.0 + +### ARMv8.2编译报错 +若要支持ARMv8.2编译,ndk版本版本至少为r18b + +## 二、模型转换问题 + +### 如何支持tensorflow, caffe, mxnet模型? +* 我们统一通过onnx中间格式支持各大训练框架,开源社区维护有很好的各大框架转换为onnx的工具 +* [tensorflow2onnx](https://github.com/onnx/tensorflow-onnx): typical usage: python -m tf2onnx.convert --inputs-as-nchw [输入tensor]:0 --graphdef [输入文件].pb --inputs [输入tensor]:0 --outputs [输出tensor]:0 --opset 11 --output [输出文件].onnx +* [caffe2onnx](./user/caffe2tnn.md) +* [Mxnet: export onnx model](https://mxnet.apache.org/api/python/docs/tutorials/deploy/export/onnx.html) +* [Pytorch: EXPORTING A MODEL FROM PYTORCH TO ONNX](https://pytorch.org/tutorials/advanced/super_resolution_with_onnxruntime.html) + +### 模型对齐问题排查 +* [模型对齐问题排查](./model_align.md) + +## 三、运行问题 + +### 是否支持可以在PC上运行 +TNN支持在linux和windows上编译和运行 + +### 如何运行bfp16代码 +TNNTest的运行参数-pr设为LOW + +### cv::Mat如何转换成TNN::Mat +```cpp +cv::Mat cv_mat; +MatType mat_type = N8UC4; // if cv_mat.channels() == 3, then mat_type = N8UC3. +DimsVector dims = {1, cv_mat.channels(), cv_mat.rows, cv_mat.cols}; +auto tnn_mat = new TNN::Mat(DeviceType, mat_type, dims, (void *)cv_mat.ptr); +``` + +### 常见错误码介绍. +Status调用description()接口可获取更多错误信息描述。 + +0x1002(4098): 模型解析错误。检查确保ModelConfig配置的为文件内容而非文件路径。 + +0x6005(24581): 模型weights信息缺失。TNN的benchmark可以只用proto文件,是因为开启了TNN_BENCHMARK_MODE,weights自动生成,仅用来评估速度。 + +0x2000(8192): 错误信息not support model type。检查Android静态库集成链接需添加-Wl,--whole-archive tnn -Wl,--no-whole-archive,iOS库集成链接需要添加force_load。 + +0x9000(36864): device type类型不支持。(1)确保相关device type编译选项已开启 (2)Android静态库集成链接需添加-Wl,--whole-archive tnn -Wl,--no-whole-archive,iOS库集成链接需要添加force_load。 + +## 四、NPU相关问题 + +### 如何创建华为NPU编译环境? +选项1: + 在 /thrid_party/huawei_npu/ 下运行 ./download_ddk.sh 脚本下载最新版的ddk。 + + +选项2: +1. 到华为开发者联盟下载DDK[https://developer.huawei.com/consumer/cn/doc/overview/HUAWEI_HiAI] +2. 解压缩 +3. 进入到下载文件夹下的`ddk/ai_ddk_lib`目录 +4. 在`/third_party/huawei_npu/hiai_ddk_latest/`下创建`armeabi-v7a`文件夹, 并将ai_ddk_lib目录下的lib文件夹中所有文件复制到 `/third_party/huawei_npu/hiai_ddk_latest/armeabi-v7a` +5. 在`/third_party/huawei_npu/hiai_ddk_latest/`下创建`arm64-v8a`文件夹,并将ai_ddk_lib目录下的lib64文件夹中所有文件复制到 `/third_party/huawei_npu/hiai_ddk_latest/arm64-v8a` +6. 将ai_ddk_lib目录下include`文件夹`复制到 `/third_party/huawei_npu/hiai_ddk_latest/`目录下 + +`/third_party/huawei_npu/hiai_ddk_latest/`文件结构应该如下: + +``` +hiai_ddk_latest +├── arm64-v8a +│   ├── libcpucl.so +│   ├── libhcl.so +│   ├── libhiai.so +│   ├── libhiai_ir.so +│   └── libhiai_ir_build.so +├── armeabi-v7a +│   ├── libcpucl.so +│   ├── libhcl.so +│   ├── libhiai.so +│   ├── libhiai_ir.so +│   └── libhiai_ir_build.so +└── include + ├── HiAiAippPara.h + ├── HiAiModelManagerService.h + ├── HiAiModelManagerType.h + ├── graph + │   ├── attr_value.h + │   ├── buffer.h + │   ├── common + │   │   └── secures\tl.h + │   ├── debug + │   │   └── ge_error_codes.h + │   ├── detail + │   │   └── attributes_holder.h + │   ├── graph.h + │   ├── model.h + │   ├── op + │   │   ├── all_ops.h + │   │   ├── array_defs.h + │   │   ├── const_defs.h + │   │   ├── detection_defs.h + │   │   ├── image_defs.h + │   │   ├── math_defs.h + │   │   ├── nn_defs.h + │   │   └── random_defs.h + │   ├── operator.h + │   ├── operator_reg.h + │   ├── tensor.h + │   └── types.h + └── hiai_ir_build.h +``` + +### NPU版本限制: +* 如果获取手机的ROM在100.320.xxx.xxx以下 + 报错 + ERROR: npu is installed but is below 100.320.xxx.xxx +* 如果没有npu或是非华为手机 : + 报错 + ERROR: GetRomVersion(ROM): npu is not installed or rom version is too low + +### 如何更新到最新的ROM去支持NPU? +* 到 设置 >> 系统和更新 >> 软件更新中检查最新的ROM版本并更新。 + +### 如何创建RKNPU编译环境? +1. 在`/third_party`下创建rknpu文件夹并进入,然后执行: `git clone https://github.com/airockchip/rknpu_ddk.git`。 +2. 在`/scripts/build_aarch64_linux.sh`文件中加入`-DTNN_RK_NPU_ENABLE:BOOL=ON`选项并编译即可。 + + +## 五、其他 +### 如何获取模型中间结果? +* 修改项目目录下 /source/tnn/utils/blob_dump_utils.h 中 +* \#define DUMP_INPUT_BLOB 0 --> #define DUMP_INPUT_BLOB 1,获取每层输入 +* \#define DUMP_OUTPUT_BLOB 0 --> #define DUMP_OUTPUT_BLOB 1,获取每层输出 +* 仅作为调试使用 + +### 七、如何获取模型各个layer耗时? +* 参考profiling文档[性能测试](./development/profiling.md) + +### 网络问题 +```text +//mac下homebrew安装 +//https://zhuanlan.zhihu.com/p/59805070 +//https://brew.sh/index_zh-cn +//替换国内镜像的安装脚本 +``` diff --git a/3rdparty/TNN/doc/cn/front_page.md b/3rdparty/TNN/doc/cn/front_page.md new file mode 100644 index 0000000..3efeab3 --- /dev/null +++ b/3rdparty/TNN/doc/cn/front_page.md @@ -0,0 +1,124 @@ +
+ +[English Version](../en/front_page_en.md) + +## 快速开始 + +使用TNN非常简单,如果你有一个已经训练好的模型, 那么一般而言通过以下三个步骤就能完成模型在目标平台上的部署。 +1. 第一步是把训练好的模型转换成TNN的模型,为此我们提供了丰富的工具来帮助你完成这一步,无论你使用的是Tensorflow、Pytorch、或者Caffe,都可以轻松完成转换。 +详细的手把手教程可以参见这里[如何转换模型](./user/convert.md)。 + +2. 当你完成了模型的转换,第二步就是编译目标平台的TNN引擎了,你可以根据自己的目标平台的硬件支持情况,选择CPU/ARM/OpenCL/Metal/NPU等加速方案。 + 对于这些平台,TNN都提供了一键编译的脚本,使用非常方便。详细步骤可以参考这里[如何编译TNN](./user/compile.md)。 + +3. 最后一步就是使用编译好的TNN引擎进行推理,你可以在自己的应用程序中嵌入对TNN的调用,这方面我们提供了丰富而详实的demo来帮助你完成。 + * [从0开始跑通一个iOS Demo](./user/demo.md) + * [从0开始跑通一个Android Demo](./user/demo.md) + +## 技术方案 + +TNN作为一个移动端高性能、轻量级的推断框架,同时拥有跨平台、高性能、模型压缩、代码裁剪等众多突出优势。TNN框架借鉴了业界主流开源框架的优点,沉淀和整合了优图实验室Rapidnet,ncnn框架上的积累,并联合深度学习框架OTeam各个部门(PCG,TEG,IEG),共同打造公司级统一移动端推断框架。 +目前,TNN已在各大实际业务中上线,其具有的以下特性获得了广泛的好评。 + +* 计算优化 + * 针对不同架构在硬件指令发射、吞吐、延迟、缓存带宽、缓存延迟、寄存器数量等特点,深度优化底层算子,极致利用硬件算力 + * 主流硬件平台(CPU: ARMv7, ARMv8, GPU: Mali, Adreno, Apple) 深度调优 + * CNN核心卷积运算通过Winograd, Tile-GEMM, Direct Conv等多种算法实现,保证不同参数、计算尺度下高效计算 + * Op融合:离线分析网络计算图,多个小Op(计算量小、功能较简单)融合运算,减少反复内存读取、kernel启动等开销 + +* 低精度优化 + * 支持INT8, FP16低精度计算,减少模型大小、内存消耗,同时利用硬件低精度计算指令加速计算 + * 支持INT8 WINOGRAD算法,(输入6bit), 在精度满足要求的情况下,进一步降低模型计算复杂度 + * 支持单模型多种精度混合计算,加速计算同时保证模型精度 + +* 内存优化 + * 高效”内存池”实现:通过DAG网络计算图分析,实现无计算依赖的节点间复用内存,降低90%内存资源消耗 + * 跨模型内存复用:支持外部实时指定用于网络内存,实现“多个模型,单份内存”。 + +* 主流模型实测性能:v0.1 2020.05.29 + +> 麒麟970: + +| model | cpu time(single thread, ms) | gpu time(ms) | npu time(ms) +|---------------------------|--------------|--------------|---------------| +| Mobilenet_v1 | 88 | 12 | 4.9 | +| Mobilenet_v1_int8 | 55 | | | +| Mobilenet_v2 | 58 | 11 | 8.0 | +| Mobilenet_v2_int8 | 41 | | | +| squeezenet_v1.0 | 127 | 20 | 5.1 | +| squeezenet_v1.0_int8 | 82 | | | + + +> 骁龙835: + + | model | cpu 1 thread(ms) | gpu time(ms) | + |---------------------------|--------------|--------------| + | Mobilenet_v1 | 94 | 16 | + | Mobilenet_v1_int8 | 62 | | + | Mobilenet_v2 | 61 | 14 | + | Mobilenet_v2_int8 | 47 | | + | squeezenet_v1.0 | 122 | 28 | + | squeezenet_v1.0_int8 | 93 | | + + +> 骁龙845: + + +| model | cpu 1 thread(ms) | gpu time(ms) | +|---------------------------|--------------|--------------| +| Mobilenet_v1 | 60 | 10 | +| Mobilenet_v1_int8 | 37 | | +| Mobilenet_v2 | 39 | 8 | +| Mobilenet_v2_int8 | 28 | | +| squeezenet_v1.0 | 74 | 14 | +| squeezenet_v1.0_int8 | 56 | | + + +* TNN架构图: + + +
+ +* 通过ONNX支持TensorFlow, Pytorch, MxNet, Caffe等多种训练框架,充分利用和融入不断完善的ONNX开源生态。当前支持ONNX算子55个,近期会完善到约80个,覆盖主流CNN网络 +* 支持主流安卓、iOS、embedded Linux,windows操作系统,支持ARM CPU, GPU硬件平台(近期还会加入达芬奇NPU支持) +* 模块化设计,将模型解析、计算图构建、优化、底层硬件适配、高性能kernel实现各部分抽象隔离,通过Factory Mode注册、构建设备,方便接入更多的底层硬件、加速方案。 +* Runtime无任何第三方库依赖,CPU动态库尺寸仅约400KB,并提供基础图像变换操作,调用简单便捷。跨平台模型统一、调用接口统一,通过单个配置参数快速切换。 + +## 能力展示 +* [支持的算子](./user/support.md) +* [支持的网络](./user/support.md) +* [支持的架构](./user/support.md) +* [Benchmark性能测试方法](./development/profiling.md) + +## 使用手册 +* [从源码编译](./user/compile.md) +* [工具集]() + * [模型转换](./user/convert.md) + * [模型量化](./user/quantization.md) + * [模型可视化](https://lutzroeder.github.io/netron/) + * [性能分析工具](./development/profiling.md) + * [模型对齐工具](./development/model_check.md) + +## API文档 +* [API调用](./user/api.md) + +## 贡献者须知 +* [开发基础须知](./development/contributing.md) +* [架构详解](./development/architecture.md) +* [新增OP](./development/add_op.md) +* [单元测试](./development/unit_test.md) + +## Roadmap +* [Road map](./user/roadmap.md) + +## FAQ +* [FAQ 常见问题](./faq.md) + +## 加入我们 + +* 欢迎大家参与,协同共建,打造业界最好的移动端推理框架。 + +* 技术交流QQ群: 913940506 答案:TNN + +* QQ群二维码: +
diff --git a/3rdparty/TNN/doc/cn/get_started.md b/3rdparty/TNN/doc/cn/get_started.md new file mode 100644 index 0000000..be67e11 --- /dev/null +++ b/3rdparty/TNN/doc/cn/get_started.md @@ -0,0 +1,16 @@ +
+ +# 从0开始跑通一个Demo + +[English Version](../en/get_started_en.md) + +使用TNN非常简单,如果你有一个已经训练好的模型, 那么一般而言通过以下三个步骤就能完成模型在目标平台上的部署。 +1. 第一步是把训练好的模型转换成TNN的模型,为此我们提供了丰富的工具来帮助你完成这一步,无论你使用的是Tensorflow、Pytorch、或者Caffe,都可以轻松完成转换。 +详细的手把手教程可以参见这里[如何转换模型](./user/convert.md)。 + +2. 当你完成了模型的转换,第二步就是编译目标平台的TNN引擎了,你可以根据自己的目标平台的硬件支持情况,选择CPU/ARM/OpenCL/Metal/NPU等加速方案。 + 对于这些平台,TNN都提供了一键编译的脚本,使用非常方便。详细步骤可以参考这里[如何编译TNN](./user/compile.md)。 + +3. 最后一步就是使用编译好的TNN引擎进行推理,你可以在自己的应用程序中嵌入对TNN的调用,这方面我们提供了丰富而详实的demo来帮助你完成。 + * [从0开始跑通一个iOS Demo](./user/demo.md) + * [从0开始跑通一个Android Demo](./user/demo.md) diff --git a/3rdparty/TNN/doc/cn/imgs/blob_memory.png b/3rdparty/TNN/doc/cn/imgs/blob_memory.png new file mode 100644 index 0000000..21ff77d Binary files /dev/null and b/3rdparty/TNN/doc/cn/imgs/blob_memory.png differ diff --git a/3rdparty/TNN/doc/cn/imgs/device.png b/3rdparty/TNN/doc/cn/imgs/device.png new file mode 100644 index 0000000..5ee93fc Binary files /dev/null and b/3rdparty/TNN/doc/cn/imgs/device.png differ diff --git a/3rdparty/TNN/doc/cn/imgs/device_factory.png b/3rdparty/TNN/doc/cn/imgs/device_factory.png new file mode 100644 index 0000000..931df24 Binary files /dev/null and b/3rdparty/TNN/doc/cn/imgs/device_factory.png differ diff --git a/3rdparty/TNN/doc/cn/imgs/group.png b/3rdparty/TNN/doc/cn/imgs/group.png new file mode 100644 index 0000000..cc236ff Binary files /dev/null and b/3rdparty/TNN/doc/cn/imgs/group.png differ diff --git a/3rdparty/TNN/doc/cn/imgs/model_align.png b/3rdparty/TNN/doc/cn/imgs/model_align.png new file mode 100644 index 0000000..e523f27 Binary files /dev/null and b/3rdparty/TNN/doc/cn/imgs/model_align.png differ diff --git a/3rdparty/TNN/doc/cn/imgs/model_reinterpreter.png b/3rdparty/TNN/doc/cn/imgs/model_reinterpreter.png new file mode 100644 index 0000000..5af0188 Binary files /dev/null and b/3rdparty/TNN/doc/cn/imgs/model_reinterpreter.png differ diff --git a/3rdparty/TNN/doc/cn/imgs/roadmap.jpg b/3rdparty/TNN/doc/cn/imgs/roadmap.jpg new file mode 100644 index 0000000..7be2035 Binary files /dev/null and b/3rdparty/TNN/doc/cn/imgs/roadmap.jpg differ diff --git a/3rdparty/TNN/doc/cn/imgs/tnn_architect.jpg b/3rdparty/TNN/doc/cn/imgs/tnn_architect.jpg new file mode 100644 index 0000000..20fef51 Binary files /dev/null and b/3rdparty/TNN/doc/cn/imgs/tnn_architect.jpg differ diff --git a/3rdparty/TNN/doc/cn/jobs.md b/3rdparty/TNN/doc/cn/jobs.md new file mode 100644 index 0000000..cdfb8a2 --- /dev/null +++ b/3rdparty/TNN/doc/cn/jobs.md @@ -0,0 +1 @@ +Todo: 工作机会 diff --git a/3rdparty/TNN/doc/cn/model_align.md b/3rdparty/TNN/doc/cn/model_align.md new file mode 100644 index 0000000..ff14501 --- /dev/null +++ b/3rdparty/TNN/doc/cn/model_align.md @@ -0,0 +1,145 @@ +# 模型对齐常见问题 + +[English Version](../en/model_align_en.md) + +在使用转换得到的TNN模型进行推理时,有时会遇到TNN模型的推理结果与原始模型不对齐的情况。此文档总结了模型不对齐问题的主要原因、常见的不对齐算子以及分析和处理不对齐问题的方法。模型不对齐问题的整体处理流程可参考下图。 + +
+ +## 一、模型对齐的验证与检查 + +### 1. 模型转换时使用-align检查对齐情况 + +TNN模型转换工具支持对齐功能,可以在模型转换时检查生成的TNN模型与源模型是否对齐。强烈建议在模型转换时打开对齐检查,具体文档请参考[模型转换文档](https://github.com/Tencent/TNN/blob/master/doc/cn/user/convert.md)。 + +### 2. 使用model_check工具检查对齐情况 + +对于已经转换完成的模型,TNN提供了**model_check**工具辅助模型对齐情况的验证。**model_check**工具主要用于比较TNN不同设备(例如ARM,OpenCL,Metal等)的执行结果是否与TNN CPU等价,当怀疑TNN在某些设备上的执行结果不正确时,可以使用此工具进行检查。 + +**model_check**工具可以方便地在指定设备上,使用给出的输入数据或随机生成数据执行TNN模型,并与TNN CPU的执行结果进行逐算子的比较,从而准确定位存在问题的算子。**model_check**的使用方法请参考[model_check文档](https://github.com/Tencent/TNN/blob/master/doc/cn/development/model_check.md)。 + +## 二、常见的模型对齐问题 + +如果模型转换工具成功生成了TNN模型,但在使用中发现了不对齐的情况,可以按照以下方法排查问题。 + +由于神经网络模型类型众多,且不同框架对算子的定义和支持不尽相同,再加之各框架的算子支持情况还会随版本变化,所以存在转换前后算子功能不完全等价的情况。下表按照源模型的类型,总结了在实践中遇到的可能存在对齐问题的算子,可用于快速定位可能存在对齐问题的算子。 + +|源模型|问题算子列表| +|-|-| +|Pytorch |upsample, batchnorm, AvgPool| +|TensorFlow |TODO| +|tflite |ResizeBilinear| +|onnx |TODO| + +### 1.tensorflow +TODO + +### 2.pytorch + +#### upsample + +问题描述:将pytorch模型转换为onnx模型时,onnx的upsample算子与pytorch不等价 + +解决方法:1)更新pytorch;2)导出onnx模型时,设置opset_version>=11,代码如下: +``` +torch.onnx.export(model, input, filename, verbose=False, + opset_version=11,...) # or other number greater than 11 +``` + +#### batchnorm + +问题描述:将pytorch模型转换为onnx模型时,没有将pytorch切换到推理模式,导致batchnorm参数不固定 + +解决方法:导出onnx模型前,切换pytorch到推理模式,代码如下: +```torch_model.eval()``` or ```torch_model.train(False)``` + +#### AvgPool + +问题描述:pytorch模型中的AvgPool算子有count_include_pad属性,取值可以为```True```或```False```,当前TNN仅支持count_include_pad=```Fasle```的情况。 + +解决方法:导出onnx模型前,修改AvgPool算子的count_include_pad为```False``` + + +### 3.tflite + +#### ResizeBilinear + +问题描述:含有ResizeBilinear的tflite模型使用-align可能会不对齐,这是由于TensorFlow2.3之前tflite的ResizeBilinear实现存在问题导致的 + +解决方法:升级TensorFlow让其版本不小于2.3即可 + +### 4.onnx +TODO + +## 三、模型对齐问题的分析与处理方法 + +在排查模型对齐问题时,最直接有效的方法就是对比模型在相同输入下的计算结果。这一过程需要将TNN模型中特定算子的计算结果与原始模型中对应算子的计算结果进行比较。这可以通过保存算子的输入与输出实现。 + +TNN支持逐层dump结果的功能,可以通过下面的方法获得每层的输入和输出结果。 + +### 1. 打开blob dump功能 + +打开[source/tnn/utils/blob_dump_utils.h](https://github.com/Tencent/TNN/blob/master/source/tnn/utils/blob_dump_utils.h)文件,根据需要修改`DUMP_INPUT_BLOB`和`DUMP_OUTPUT_BLOB`两个宏。其中`DUMP_INPUT_BLOB`为`1`表示保存TNN模型每个算子的输入;设置`DUMP_OUTPUT_BLOB`为`1`表示保存每个算子的输出。 + +数据保存过程的调用在[source/tnn/core/default_network.cc](https://github.com/Tencent/TNN/blob/master/source/tnn/core/default_network.cc)的`Forward`方法中。 + +具体来说,TNN将算子的每个输入和输出保存在独立的txt文本文件中,文件名由**算子在模型中的顺序、算子名称以及输入和输出自身的形状等因素共同**决定。例如,假设模型的第2层名为*foo*,其第1个输入被保存在前缀为*00001-foo-in-0*的文件中;其第2个输出被保存在前缀为*00001-foo-out-1*的文件中。每层的计算结果按照*N-C-H-W*的顺序保存在文件内,每行保存一个元素。TNN模型各算子的输入输出信息可借助[**Netron**可视化工具](https://netron.app/)查看。 + +文件的保存目录由[source/tnn/utils/blob_dump_utils.cc](https://github.com/Tencent/TNN/blob/master/source/tnn/utils/blob_dump_utils.cc)中的变量 `g_tnn_dump_directory`控制,可以根据需要进行修改。 + +### 2. 使用指定输入,获得每层计算结果 + +考虑到保存数据的过程位于`Forward`方法中,我们可以通过调用`Forward`方法实现数据保存。此外,也可以借助TNN已有的工具执行这一过程,例如**TNNTest**工具。由于**TNNTest**默认使用异步方法执行推理,不调用`Forward`方法,所以需要进行修改。 + +具体修改方法如下:打开[test/test.cc](https://github.com/Tencent/TNN/blob/master/test/test.cc)文件,找到其中的`ForwardAsync`方法,并将其替换为`Forward`方法。在不了解**TNNTest**具体工作流程的情况下,建议对代码中的**2处调用**均进行替换。替换过程如下所示: +将 +``` + ret = instance->ForwardAsync(nullptr); +``` +替换为 +``` +ret = instance->Forward(); +``` + +由于上述修改均位于源代码中,因此修改后需要重新编译TNN。TNN的编译可参考[TNN编译文档](https://github.com/Tencent/TNN/blob/master/doc/cn/user/compile.md)。 + +编译后可以用**TNNTest**工具执行模型,并保存每层的输入和输出结果。可参考[TNNTest文档](https://github.com/Tencent/TNN/blob/master/doc/cn/user/test.md)了解**TNNTest**的使用方法和参数。 + +### 3. 获得源模型算子的计算结果 + +保存源模型算子结果的方法与源模型基于的框架紧密相关。这里以onnx模型为例,说明逐层保存模型结果的方法。 +- onnx模型:使用`onnxruntime`执行onnx模型,并保存每个算子的计算结果。 +``` +def forward_dump(model_path:str, input_data:numpy.ndarray) -> Dict[str, numpy.ndarray]: + # 1. Load onnx model + model = onnx.load(model_path) + onnx.checker.check_model(model) + model = copy.deepcopy(model) + + # 2. Prepare input data + input_data = {'input_name': input_data} + + # 3. Set the output of each operator as the output of the model + for node in model.graph.node: + for output in node.output: + model.graph.output.extend([onnx.ValueInfoProto(name=output)]) + + # 4. Use onnxruntime to execute onnx models + sess = onnxruntime.InferenceSession(model.SerializeToString()) + outputs = [x.name for x in sess.get_outputs()] + result = OrderedDict(zip(outputs, sess.run(outputs, input_data))) + # 5. save the data in 'result' + + return result +``` +`result`为一个`Dict`,将onnx模型中每个算子的`name`映射到该算子的计算结果(`numpy.ndarray`)。 + +## 四、提交issue + +当遇到了TNN模型对齐的问题后,可以[提交issue](https://github.com/Tencent/TNN/issues)将问题反馈给我们,我们会尽快进行修复。 + +为了方便我们复现和定位问题,请按照issue模板填写issue相关信息,并在描述问题时请尽量提供以下内容: +1. 原模型与TNN模型; +2. 指定的输入数据和参考计算结果; +3. 对齐时使用的环境与方法:例如onnxruntime的版本、tflite版本、tnn版本等; +4. 其他辅助信息:例如,已经定位到的不对齐算子等 diff --git a/3rdparty/TNN/doc/cn/user/api.md b/3rdparty/TNN/doc/cn/user/api.md new file mode 100644 index 0000000..80df1ae --- /dev/null +++ b/3rdparty/TNN/doc/cn/user/api.md @@ -0,0 +1,558 @@ +# API说明 + +[English Version](../../en/user/api_en.md) + +## 一、API兼容性 + +TNN所有对外暴露接口均通过PUBLIC宏显示声明,非暴露接口符号均不可见。 + +```cpp +#if defined _WIN32 || defined __CYGWIN__ + #ifdef BUILDING_DLL + #ifdef __GNUC__ + #define PUBLIC __attribute__ ((dllexport)) + #else + #define PUBLIC __declspec(dllexport) + #endif + #else + #ifdef __GNUC__ + #define PUBLIC __attribute__ ((dllimport)) + #else + #define PUBLIC __declspec(dllimport) + #endif + #endif + #define LOCAL +#else + #if __GNUC__ >= 4 + #define PUBLIC __attribute__ ((visibility ("default"))) + #define LOCAL __attribute__ ((visibility ("hidden"))) + #else + #define PUBLIC + #define LOCAL + #endif +#endif +``` + +不同版本API 兼容性遵守[语义化版本 2.0.0](https://semver.org/lang/zh-CN/)规则。 + +## 二、API调用 + +### 简介 +API调用主要对模型解析,网络构建,输入设定,输出获取四个步骤进行简要介绍,详细说明参见API详解部分。 + +### 步骤1. 模型解析 + +```cpp +TNN tnn; +TNN_NS::ModelConfig model_config; +//proto文件内容存入proto_buffer +model_config.params.push_back(proto_buffer); +//model文件内容存入model_buffer +model_config.params.push_back(model_buffer); +Status ret = tnn.Init(model_config); +``` + +TNN模型解析需配置ModelConfig params参数,传入proto和model文件内容,并调用TNN Init接口即可完成模型解析。 + +### 步骤2. 网络构建 + +```cpp +TNN_NS::NetworkConfig config; +config.device_type = TNN_NS::DEVICE_ARM; +TNN_NS::Status error; +auto net_instance = tnn.CreateInst(config, error); +``` + +TNN网络构建需配置NetworkConfig,device_type可配置`DEVICE_ARM`, `DEVICE_OPENCL`, `DEVICE_METAL`, `DEVICE_X86`, `DEVICE_CUDA`, `DEVICE_HUAWEI_NPU`, `DEVICE_RK_NPU`等多种加速方式,通过CreateInst接口完成网络的构建。 + + +### 步骤3. 输入设定 + +```cpp +auto status = instance->SetInputMat(input_mat, input_cvt_param); +``` + +TNN输入设定通过调用SetInputMat接口完成,需要传入的数据保存在`input_mat`中。 + +### 步骤4. 网络运行 + +```cpp +auto status = instante->Forward(); +``` +TNN Forward接口为同步调用接口,ForwardAsync接口为异步调用接口。 + +### 步骤5. 输出获取 + +```cpp +auto status = instance->GetOutputMat(output_mat); +``` + +TNN输出获取通过调用GetOutputMat接口完成,输出结果将按照特定格式保存在`output_mat`中。 + +## 二、API详解 + +### API目录结构 + +```bash +. +└── tnn + ├── core + │   ├── blob.h # 负责数据传递 + │   ├── common.h # 定义常用结构 + │   ├── instance.h # 网络实例 + │   ├── macro.h # 常用宏定义 + │   ├── mat.h # 输入接口,类cv::Mat + │   ├── status.h # 接口状态 + │   └── tnn.h # 模型解析 + ├── utils + │   ├── bfp16_utils.h # bfp16转换工具 + │   ├── blob_converter.h # blob输入输出转换工具 + │   ├── cpu_utils.h # CPU性能特定优化工具 + │   ├── data_type_utils.h # 数据类型转换工具 + │   ├── dims_vector_utils.h # 尺寸计算工具 + │   ├── half_utils.h # fp16转换工具 + │   ├── mat_utils.h # Mat转换工具 + │   └── string_utils.h # 字符串转换工具 + └── version.h # 编译构建信息 +``` + +### 1. core/common.h + +`DataType`:定义不同数据类型枚举值。 +`DataFormat`:定义Blob Data不同数据排布方式。 +`NetworkType`:定义不同网络构建类型,默认构建TNN网络,支持第三方库网络构建。 +`DeviceType`:用于指定网络运行设备及加速方式。 +`ModelType`:定义模型类型,TNN默认解析模型为TNN模型,同时支持其他第三方库模型格式传入。 +`Precision `: 定义网络运行精度。 + +```cpp +struct PUBLIC ModelConfig { + + ModelType model_type = MODEL_TYPE_TNN; + + // tnn model need two params: order is proto content, model content. + // ncnn need two: params: order is param content, bin content. + // openvino model need two params: order is xml content, model path. + // coreml model need one param: coreml model directory path. + // snpe model need one param: dlc model directory path. + // hiai model need two params: order is model name, model file path. + // atlas model need one param: config string. + std::vector params; +}; +``` + +ModelConfig参数说明: + +- `model_type`: TNN当前开源版本仅支持传入`MODEL_TYPE_TNN`, `MODEL_TYPE_NCNN`, `MODEL_TYPE_COREML` 模型格式。 +- `params`: TNN模型需传入proto文件内容以及model文件路径。NCNN模型需传入param文件内容以及bin文件路径, COREML模型需传入coreml 模型所在目录路径。 + + +```cpp +struct PUBLIC NetworkConfig { + // device type default cpu + DeviceType device_type = DEVICE_ARM; + + // device id default 0 + int device_id = 0; + + // blob data format, auto decided by device + DataFormat data_format = DATA_FORMAT_AUTO; + + // network type, auto decided by device + NetworkType network_type = NETWORK_TYPE_AUTO; + + // raidnet instances not share memory with others + ShareMemoryMode share_memory_mode = SHARE_MEMORY_MODE_DEFAULT; + + // dependent library path + std::vector library_path = {}; + + // compute precision + Precision precision = PRECISION_AUTO; + + // cache path to store possible cache models or opt kernel or opencl program cache + std::string cache_path = ""; + + // network init or reshape may cost more time to select opt kernel implement if enable tune kernel + // cache_path can set to store tune kernel info. + bool enable_tune_kernel = false; +}; +``` + +NetworkConfig参数说明: + +- `device_type`: 默认为`DEVICE_ARM`。 当前已支持 `DEVICE_NAIVE`、`DEVICE_ARM`、`DEVICE_X86`、`DEVICE_OPENCL`、`DEVICE_METAL`、`DEVICE_CUDA`、`DEVICE_HUAWEI_NPU`、`DEVICE_RK_NPU`。 +- `device_id`: 默认为0,多个设备支持通过`device_id`选择,当前仅`DEVICE_CUDA`需配置此参数指定gpu id。 +- `data_format`: 默认为tnn自动选择blob数据排布方式进行加速,可通过此参数设定特定blob数据排布进行加速。 +- `network_type`: 默认根据`device_type`自动选择网络类型,可指定构建网络类型。 +- `share_memory_mode`: tnn instance 内存共享方式。 +- `library_path`: 支持外部依赖库加载,iOS metal kernel库放在app非默认路径需配置此参数。 +- `precision`: 网络精度类型,默认根据不同的`device_type`自动选择精度。 +- `cache_path`: 华为NPU指定cache路径可存放运行过程中转出的om文件,后续运行可直接通过加载cache路径对应om文件。OpenCL指定cache路径可缓存编译好的kernel二进制文件,后续初始化可直接通过二进制cache文件创建kernel, `enable_tune_kernel` 打开,可通过指定cache路径存放tune参数,后续可直接加载tune参数而无需每次运行都tune kernel。 + + +```cpp +typedef enum { + // default + SHARE_MEMORY_MODE_DEFAULT = 0, + // same thread tnn instance share blob memory + SHARE_MEMORY_MODE_SHARE_ONE_THREAD = 1, + // set blob memory from external, different thread share blob memory need + // synchronize + SHARE_MEMORY_MODE_SET_FROM_EXTERNAL = 2 +} ShareMemoryMode; +``` + +ShareMemoryMode参数说明: + +- `SHARED_MEMORY_MODE_DEFAULT`: 仅支持同一instance不同blob间内存共享。 +- `SHARE_MEMORY_MODE_SHARE_ONE_THREAD`: 支持同一线程的不同Instance内存共享。 +- `SHARE_MEMORY_MODE_SET_FROM_EXTERNAL`: 支持instance内存由外部传入,共享方式由调用侧决定,线程间共享需处理同步问题,内存分配释放均需调用侧维护。 + +### 2. core/tnn.h + +```cpp +class PUBLIC TNN { +public: + ... + + // init tnn implement, interpret model. + Status Init(ModelConfig& config); + + // denit tnn implement, release model interpreter. + Status DeInit(); + + // add output to the model. + // if output_name of blob not found, then search output_index of layer. + Status AddOutput(const std::string& output_name, int output_index = 0); + + // return input shapes map from model + Status GetModelInputShapesMap(InputShapesMap& shapes_map); + + // create tnn network instance with network config and inputs shape. + // if inputs shape not set, use default from model. + std::shared_ptr CreateInst( + NetworkConfig& config, Status& status, + InputShapesMap inputs_shape = InputShapesMap()); + + // create tnn network instance with network config and min max inputs shape, + // instance reshape can support range from min inputs shape to max inputs shape. + std::shared_ptr CreateInst( + NetworkConfig& config, Status& status, + InputShapesMap min_inputs_shape, InputShapesMap max_inputs_shape); + + ... +}; +``` + +TNN接口说明: + +- Init接口:负责模型数据传入并解析,需配置并传入ModelConfig。 +- DeInit接口: 负责tnn implement释放,默认析构函数可自动释放。 +- AddOutput接口:支持增加模型输出,可将网络任意一层输出定义为模型输出。 +- GetModelInputShapesMap接口: 获取模型解析出的模型输入尺寸。 +- CreateInst接口:负责网络实例Instance构建,如果运行过程中支持输入维度可变,需配置`min_inputs_shape`和`max_inputs_shape`指定输入每个维度支持的最大最小尺寸。 + +### 3. core/instance.h + +```cpp +class PUBLIC Instance { +public: + Instance(NetworkConfig& net_config, ModelConfig& model_config); + + ~Instance(); + + // init with model interpeter and inputs shape. + Status Init(std::shared_ptr interpreter, InputShapesMap inputs_shape); + + // deinit, release network + Status DeInit(); + + // return memory bytes required for forward + Status GetForwardMemorySize(int& memory_size); + + // set memory to tnn instance. if success, return status code zero. + // only instance created with SHARE_MEMORY_MODE_SET_FROM_EXTERNAL can be set from external. + // the memory size need >= GetForwardMemorySize(). + // releasing or otherwise using the memory for other purposes during the tnn network run + // will result in undefined behavior. + Status SetForwardMemory(void* memory); + + // reshape instance with new input shapes + Status Reshape(const InputShapesMap& inputs); + + // get tnn command queue + Status GetCommandQueue(void** command_queue); + + // @brief tnn instance network infer, it will wait until all layer infer complete. + Status Forward(); + + ... + + // tnn instance network infer async. + // device gpu, all layer infer complete will call Callback. + Status ForwardAsync(Callback call_back); + + // get all input blobs + Status GetAllInputBlobs(BlobMap& blobs); + + // get all output blobs + Status GetAllOutputBlobs(BlobMap& blobs); + + // set threads run on cpu + virtual Status SetCpuNumThreads(int num_threads); + ... + + // set input Mat, if input_name is not set, take the first input as default + Status SetInputMat(std::shared_ptr mat, + MatConvertParam param, + std::string input_name = ""); + + // get output Mat, if output_name is not set, take the first output as default + Status GetOutputMat(std::shared_ptr& mat, + MatConvertParam param = MatConvertParam(), + std::string output_name = "", + DeviceType device = DEVICE_ARM, MatType mat_type = NCHW_FLOAT); + +}; +``` + +Instance接口说明: + +- `Instance`和`Init`接口均由TNN CreateInst接口实现调用,用于生成Instance网络实例。 +- `GetForwardMemorySize`可获取Instance所有Blob所需内存大小,`SetForwardMemory`用于传入外部内存。对于`SHARE_MEMORY_MODE_SET_FROM_EXTERNAL`内存模式构建的Instance,内存需由外部传入, 传入内存实际大小不得小于`GetForwardMemorySize`返回值大小。 +- `Reshape`接口支持网络构建成功后重新设定输入尺寸,仅通过`min_inputs_shape`和`max_inputs_shape` 构建的网络可在运行过程中改变输入尺寸,可变尺寸范围由`min_inputs_shape`和`max_inputs_shape` 指定。 +- `GetCommandQueue`接口支持获取网络运行对应的command queue,同一command queue消息顺序执行。 +- `GetAllInputBlobs`和 `GetAllOutputBlobs`分别用于获取输入输出blob。 +- `SetCpuNumThreads`可设置CPU线程并行数。 +- `Forward`为网络运行同步接口,`ForwardAsync`为网络运行异步接口。 +- `SetInputMat`用于设定输入Mat,其中MatConvertParam可设定[转换参数](#MatConvertParam参数说明)。对于多输入网络,可用`input_name`区分。 +- `GetOutputMat`用于获取输出结果并保存在输出Mat中,其中MatConvertParam可设定[转换参数](#MatConvertParam参数说明)。对于多输出网络,可用`output_name`区分,DeviceType可指定输出Mat Memory构建在CPU还是GPU,MatType可用于设定输出Mat数据排列方式。 + + +### 4. core/mat.h + +```cpp +class PUBLIC Mat { +public: + ... + + Mat(DeviceType device_type, MatType mat_type, DimsVector shape_dims, void* data); + Mat(DeviceType device_type, MatType mat_type, DimsVector shape_dims); + //empty mat + Mat(DeviceType device_type, MatType mat_type); + + DEPRECATED("use Mat(DeviceType, MatType, DimsVector, void*) instead") + Mat(DeviceType device_type, MatType mat_type, void* data) : Mat(device_type, mat_type, {1,0,0,0}, data) {}; + + ... +}; +``` +其中MatType支持常用的CV, NLP输入输出布局,且`DeviceType`可设定为CPU,GPU。 + +```cpp +typedef enum { + INVALID = -1, + //bgr or rgb: uint8 + N8UC3 = 0x00, + //bgra or rgba: uint8 + N8UC4 = 0x01, + //gray: uint8 + NGRAY = 0x10, + //YUV420SP, YYYYVUVUVU + NNV21 = 0x11, + //YUV420SP, YYYYUVUVUV + NNV12 = 0x12, + //nchw: float + NCHW_FLOAT = 0x20, + // nchw: int32 + NC_INT32 = 0x21, + ... +} PUBLIC MatType; +``` + +### 5. core/macro.h +提供不同平台Log宏,不同数据类型最大最小值宏,PUBLIC宏定义,以及部分数据pack转换等宏定义。 + +### 6. core/status.h +`Status`定义于status.h头文件中。 + +```cpp +enum StatusCode { + + TNN_OK = 0x0, + + // param errcode + TNNERR_PARAM_ERR = 0x1000, + TNNERR_INVALID_NETCFG = 0x1002, + ... +} + +class PUBLIC Status { +public: + Status(int code = TNN_OK, std::string message = "OK"); + + Status &operator=(int code); + + bool operator==(int code_); + bool operator!=(int code_); + operator int(); + operator bool(); + std::string description(); + +private: + int code_; + std::string message_; +} +``` +当Status code不为TNN_OK,通过`description`接口可返回错误描述信息。 + +### 7. core/blob.h + +```cpp +// @brief BlobDesc blob data info +struct PUBLIC BlobDesc { + // device_type describes devie cpu, gpu, ... + DeviceType device_type = DEVICE_NAIVE; + // data_type describes data precion fp32, in8, ... + DataType data_type = DATA_TYPE_FLOAT; + // data_format describes data order nchw, nhwc, ... + DataFormat data_format = DATA_FORMAT_AUTO; + // DimsVector describes data dims + DimsVector dims; + // name describes the blob name + std::string name; + + std::string description(bool all_message = false); +}; + +struct PUBLIC BlobHandle { + void *base = NULL; + uint64_t bytes_offset = 0; +}; + +// @brief Blob tnn data store and transfer interface. +class PUBLIC Blob { +public: + ... + + //@brief create Blob with blob descript and data handle + Blob(BlobDesc desc, BlobHandle handle); + + ... +}; + +``` + +Blob当前主要由`BlobDesc`以及`BlobHandle`构成,其中`BlobDesc`描述Blob相关结构信息,`BlobHandle`用于读取和存储Blob数据。 + +`BlobDesc`用于描述`device_type`, `data_type`, `data_format`, `dims`, `name`信息。 + +dims描述blob维度信息,dims存储尺寸与data_format无关: +- dims尺寸为2,存储对应N, C。 +- dims尺寸为4,存储尺寸对应N,C,H,W。 +- dims尺寸为5,存储尺寸对应N,C,D,H,W。 + +当前不同平台blob输入输出数据类型及排布如下: + +- `ARM`:CPU内存, NC4HW4. +- `OPENCL`: GPU显存(clImage), NHC4W4. 其中NH为clImage高,C4W4为clImage宽。 +- `METAL`: GPU显存(metal), NC4HW4. +- `HUAWEI_NPU`: CPU内存, NCHW. +- `X86`: CPU内存,NCHW。 +- `CUDA`: GPU内存, NCHW。 + +其中最后4代表pack 4, C4代表最后1位4由4个C进行pack。 + + +### 8. utils/mat\_utils.h +```cpp +class PUBLIC MatUtils { +public: + //copy cpu <-> device, cpu<->cpu, device<->device, src and dst dims must be equal. + static Status Copy(Mat& src, Mat& dst, void* command_queue); + + //src and dst device type must be same. when param scale_w or scale_h is 0, it is computed as + // (double)dst.GetWidth() / src.GetWidth() or (double)dst.GetHeight() / src.GetHeight(). + static Status Resize(Mat& src, Mat& dst, ResizeParam param, void* command_queue); + + //src and dst device type must be same. when param width or height is 0, it is equal to + //dst.GetWidth() or dst.GetHeight(). + static Status Crop(Mat& src, Mat& dst, CropParam param, void* command_queue); + + //src and dst device type must be same. + static Status WarpAffine(Mat& src, Mat& dst, WarpAffineParam param, void* command_queue); + + //src and dst device type must be same. + static Status CvtColor(Mat& src, Mat& dst, ColorConversionType type, void* command_queue); + + //src and dst device type must be same. param top, bottom, left and right must be non-negative. + static Status CopyMakeBorder(Mat& src, Mat& dst, CopyMakeBorderParam param, void* command_queue); +}; +``` + +接口参数说明: + +- `Copy`: 支持不同DEVICE与CPU Mat数据拷贝,以及相同DEVICE间Mat数据拷贝。 +- `Resize `、`Crop`、`WarpAffine `、`CvtColor `、`CopyMakeBorder` 接口行为类似OpenCV,CPU与GPU均支持,`src` 和 `dst` 需拥有相同的`DEVICE_TYPE`。 + + +### 9. utils/bfp16\_utils.h +接口提供了cpu内存fp32和bfp16转换工具。 + +### 10. utils/blob\_convert.h +```cpp +class PUBLIC BlobConverter { +public: + explicit BlobConverter(Blob* blob); + virtual Status ConvertToMat(Mat& image, MatConvertParam param, void* command_queue); + virtual Status ConvertFromMat(Mat& image, MatConvertParam param, void* command_queue); + + virtual Status ConvertToMatAsync(Mat& image, MatConvertParam param, void* command_queue); + virtual Status ConvertFromMatAsync(Mat& image, MatConvertParam param, void* command_queue); + +private: + Blob* blob_; + std::shared_ptr impl_ = nullptr; +}; +``` + +通过`ConvertToMat`可将blob数据按照Mat格式传入Mat,`ConvertFromMat`可将Mat数据按照blob格式传入blob, 接口对应的`command_queue`可通过 Instance `GetCommandQueue`接口获取。 + +接口提供常用预处理,后处理支持,支持设定scale, bias参数以及reverse channel适配bgr, rgb等场景。 + +```cpp +struct PUBLIC MatConvertParam { + std::vector scale = {1.0f, 1.0f, 1.0f, 1.0f}; + std::vector bias = {0.0f, 0.0f, 0.0f, 0.0f}; + bool reverse_channel = false; +}; +``` + +#### MatConvertParam参数说明: +- `reverse_channel`: 默认为`false`,若需要交换图像的B和R维度,可将此参数设置为`true`。 + * 仅`N8UC3`和`N8UC4`类型的Mat支持reverse_channel,其他类型的Mat会忽略该参数。 + * `ConvertFromMat`和`ConvertToMat`过程都支持reverse_channel。 +- `scale`和`bias`: scale默认为 `1`,bias默认为`0`,计算顺序为先乘scale,再加bias。 + * 所有类型的Mat都支持scale和bias。 + * `ConvertFromMat`和`ConvertToMat`过程都支持scale和bias。 + * 若指定的scale全为`1`,且bias全为`0`,或者使用默认的scale和bias值,则不做乘scale和加bias操作;否则用户需提供与channel大小对应的scale和bias值。 + * 对于多维数据,scale和bias中的数值顺序和推理过程使用的数据格式保持一致。例如,若模型实际使用BGR格式进行推理,则`ConvertFromMat`和`ConvertToMat`过程,无论reverse_channel与否,scale和bias都需按照BGR顺序指定。也可理解为,`ConvertFromMat`先reverse channel,再乘scale和加bias;`ConvertToMat`先乘scale和加bias,再reverse channel。 + +### 11. utils/cpu\_utils.h +提供CPU线程核绑定以及省电模式等设定相关工具。 + +### 12. utils/data\_type\_utils.h +提供DataType尺寸和名称转换相关工具。 + +### 13. utils/dims\_vector\_utils.h +提供常用blob dims计算比较工具。 + +### 14. utils/half\_utils.h +接口提供cpu内存fp32和fp16转换工具。 + +### 15. utils/string\_utils.h +接口提供uchar string 到std::string的转换,主要用于TNN模型内存输入。 + +### 16. version.h +构建版本信息 diff --git a/3rdparty/TNN/doc/cn/user/caffe2tnn.md b/3rdparty/TNN/doc/cn/user/caffe2tnn.md new file mode 100644 index 0000000..ac92739 --- /dev/null +++ b/3rdparty/TNN/doc/cn/user/caffe2tnn.md @@ -0,0 +1,139 @@ +# Caffe 模型转换为 ONNX 模型 + +[English Version](../../en/user/caffe2tnn_en.md) + +要将 Caffe 模型转换为 TNN 模型,首先将 Caffe 模型转换为 ONNX 模型,然后再将ONNX 模型转换为 TNN 模型。 + +将 Caffe 模型转换为ONNX,我们借助于 caffe2onnx 工具, 它可以直接将 Caffe 模型转换为 ONNX 模型。在下面的文档中,会简单的介绍如何使用 caffe2onnx进行转换,然后建议参考 [onnx2tnn](onnx2tnn.md) 的相关文档,再将 ONNX 模型转换为 TNN。 + + +## 1. 环境搭建(Mac and Linux) + +- 安装protobuf(version >= 3.4.0) + +Macos: +```shell script +brew install protobuf +``` + +Linux: + +对于 linux 系统,我们建议参考protobuf 的官方[README](https://github.com/protocolbuffers/protobuf/blob/master/src/README.md)文档,直接从源码进行安装。 + +如果你使用的是Ubuntu 系统可以使用下面的指令进行安装: +```shell script +sudo apt-get install libprotobuf-dev protobuf-compiler +``` + +- 安装python (version >=3.6) + +Macos +```shell script +brew install python3 +``` +centos: +```shell script +yum install python3 python3-devel +``` + +- onnx(version == 1.6.0) +```shell script +pip3 install onnx==1.6.0 +``` + +- numpy(version >= 1.17.0) +```shell script +pip3 install numpy +``` + +## 2. caffe2onnx 工具使用 +- 进入工具目录 +``` shell script +cd /tools/caffe2onnx/ +``` +- caffe 格式转换 + +目前 caffe2onnx 的工具目前只支持最新版本的 caffe 的格式,所以在使用 caffe2onnx +工具之前需要将老版本的 caffe 网络和模型转换为新版. caffe 自带了工具可以把老版本的 +caffe 网络和模型转换为新版本的格式. 具体的使用方式如下: +```shell script +upgrade_net_proto_text [老prototxt] [新prototxt] +upgrade_net_proto_binary [老caffemodel] [新caffemodel] +``` +修改后的输入的格式如下所示: + +```text +layer { + name: "data" + type: "input" + top: "data" + input_param { shape: { dim: 1 dim: 3 dim: 224 dim: 224 } } +} +``` +- caffe2onnx 工具的使用 + +```shell script +python3 convert2onnx.py ./test.prototxt ./test.caffemodel -o ./test.onnx -align -input_file=in.txt -ref_file=ref.txt +``` + +```text +usage: convert2onnx.py [-h] [-o ONNX_FILE] proto_file caffe_model_file + +convert caffe model to onnx + +positional arguments: + proto_file the path for prototxt file, the file name must end with + .prototxt + caffe_model_file the path for caffe model file, the file name must end with + .caffemodel! + +optional arguments: + -h, --help show this help message and exit + -o OUTPUT_DIR the output tnn directory + -v v1.0 the version for model, default v1.0 + -optimize If the model has fixed input shape, use this option to optimize the model for speed. On the other hand, if the model has dynamic input shape, dont use this option. It may cause warong result + -half save model using half + -align align the onnx model with tnn model + -input_file INPUT_FILE_PATH + the input file path which contains the input data for + the inference model. + -ref_file REFER_FILE_PATH + the reference file path which contains the reference + data to compare the results. +``` +注意:当前仅支持单输入单输出模型和单输入多输出模型。 align 只支持 FP32 模型的校验,所以使用 align 的时候不能使用 half。 + +## 3. caffe2onnx 支持的算子 + +| Number | caffe layer | onnx operator | +| ------ | -------------------- | --------------------------------------------------- | +| 1 | BatchNorm | BatchNormalization | +| 2 | BatchNorm + Scale | BatchNormalization | +| 3 | Concat | Concat | +| 4 | Convolution | Conv | +| 5 | ConvolutionDepthwise | Conv | +| 6 | Crop | Slice | +| 7 | Deconvolution | ConvTranspose | +| 8 | DetectionOutput | DetectionOutput(customer defination) | +| 9 | Dropout | Dropout | +| 10 | Eltwise | Mul/Add/Max | +| 11 | Flatten | Reshape | +| 12 | InnerProduct | Reshape + Gemm | +| 13 | LRN | LRN | +| 14 | MaxUnPool | MaxUnPool | +| 15 | MVN | InstanceNorm | +| 16 | PReLU | PRelu | +| 17 | Permute | Transpose | +| 18 | Pooling | MaxPool/AveragePool/GlobalMaxPool/GlobalAveragePool | +| 19 | Power | Mul/Add/Pow | +| 20 | PriorBox | PriorBox(customer defination) | +| 21 | ReLU | Relu/LeakyRelu | +| 22 | ReLU6 | Clip | +| 23 | Reshape | Reshape | +| 24 | Scale | Mul + Reshape | +| 25 | ShuffleChannel | Reshape + Transpose + Reshape | +| 26 | Sigmoid | Sigmoid | +| 27 | Slice | Slice | +| 28 | Softmax | Softmax | +| 29 | Upsample | Resize | + diff --git a/3rdparty/TNN/doc/cn/user/compile.md b/3rdparty/TNN/doc/cn/user/compile.md new file mode 100644 index 0000000..831a8e8 --- /dev/null +++ b/3rdparty/TNN/doc/cn/user/compile.md @@ -0,0 +1,255 @@ +# 从源代码编译 + +[English Version](../../en/user/compile_en.md) + +## 一、iOS库编译 +### 1. 编译环境要求 + - Mac系统, Xcode IDE + - cmake(使用3.1及以上版本) + +### 2. 编译步骤 +1)切换到脚本目录 +``` +cd /scripts +``` +2)执行编译脚本 +``` +./build_ios.sh +``` +编译过程中如果出现xcrun、metal或metallib命令找不到,可尝试如下命令。 +``` +sudo xcode-select -s /Applications/Xcode.app/Contents/Developer/ +``` +编译完成后,在目录`platforms/ios`下产生`tnn.framework`库和`tnn.bundle`资源 +3)添加到工程 + + - 在iOS app工程的根目录中添加`tnn.framework`库和`tnn.bundle`资源; + - 在app Xcode工程的设置中找到`Build Setting -> Linking -> Other Linker Flags`选项; + - 添加`-force_load "$(path_to_tnn)/tnn.framework/tnn"`; + +### 3. 限制说明 + +当前编译出的`tnn.framework`支持iOS设备上跑CPU和GPU,在Mac设备上当前仅支持跑GPU,CPU的支持在后续版本迭代中支持。 + +## 二、Android库编译 +### 1. 环境要求 +#### 依赖库 + - cmake(使用3.6及以上版本) + +#### NDK配置 + - 下载ndk版本(>=15c) + - 若要支持ARMv8.2编译,ndk版本版本至少为r18b + - 配置环境变量 `export ANDROID_NDK=` +### 2. 命令依赖 +centos: +```shell script +yum install attr.x86_64 +``` +ubuntu: +```shell script +sudo apt-get install attr +``` +### 3. 编译步骤 +1)切换到脚本目录 +``` +cd /scripts +``` +2)编辑`build_android.sh`修改配置选项 +``` + ABIA32="armeabi-v7a with NEON" + ABIA64="arm64-v8a" + STL="c++_static" + SHARED_LIB="ON" # ON表示编译动态库,OFF表示编译静态库 + ARM="ON" # ON表示编译带有Arm CPU版本的库 + OPENMP="ON" # ON表示打开OpenMP + OPENCL="ON" # ON表示编译带有Arm GPU版本的库 + HUAWEI_NPU="ON" # ON表示编译带有Arm GPU NPU版本的库 + SHARING_MEM_WITH_OPENGL=0 # 1表示OpenGL的Texture可以与OpenCL共享 +``` +华为NPU PS: +运行前需要下载DDK, 并放到指定文件夹。 或是用脚本直接下载具体请参考: +[FAQ](../faq.md)如何创建华为NPU编译环境? + +3)执行编译脚本 +``` +./build_android.sh +``` + +编译完成后,在当前目录的`release`目录下生成对应的`armeabi-v7a`库,`arm64-v8a`库和`include`头文件。如果是编译成静态库,集成链接需添加`-Wl,--whole-archive tnn -Wl,--no-whole-archive`。 + +## 三、ARM Linux跨平台交叉编译 + +### 1. 环境要求 +#### 依赖库 + - cmake(使用3.1及以上版本) + - 交叉编译需要安装编译工具链 + - ubuntu: aarch64: sudo apt-get install g++-aarch64-linux-gnu gcc-aarch64-linux-gnu + arm32hf: sudo apt-get install g++-arm-linux-gnueabihf gcc-arm-linux-gnueabihf + - other linux: 下载arm toolchain: https://developer.arm.com/tools-and-software/open-source-software/developer-tools/gnu-toolchain/gnu-a/downloads +### 2. 编译步骤 +1)切换到脚本目录 +``` +cd /scripts +``` +2)编辑`build_aarch_linux.sh` 或 `build_armhf_linux.sh` 修改配置选项 +``` + SHARED_LIB="ON" # ON表示编译动态库,OFF表示编译静态库 + ARM="ON" # ON表示编译带有Arm CPU版本的库 + OPENMP="ON" # ON表示打开OpenMP + OPENCL="OFF" # ON表示编译带有Arm GPU版本的库 + RKNPU="OFF" # ON表示编译带有RKNPU版本的库 + #ARM64: + CC=aarch64-linux-gnu-gcc # 指定C编译器 + CXX=aarch64-linux-gnu-g++ # 指定C++编译器 + TARGET_ARCH=aarch64 # 指定指令架构 + #ARM32HF: + CC=arm-linux-gnueabihf-gcc + CXX=arm-linux-gnueabihf-g++ + TARGET_ARCH=arm +``` +3)执行编译脚本 +``` +./build_aarch_linux.sh +``` +RKNPU : 运行前需要下载DDK, 并放到指定文件夹。具体请参考: +[FAQ](../faq.md#如何创建rknpu编译环境)如何创建RKNPU编译环境? + +## 四、Linux 环境编译 +### 1.环境要求 +依赖库 + - cmake (使用3.11版本及以上) + - 网络访问 + +### 2.编译步骤 +1)切换到脚本目录 +``` +cd /scripts +``` +2) 执行编译脚本 + - 编译不带openvino的版本 +``` +./build_linux_native.sh +``` + - 编译带openvino的版本 +``` +./build_x86_linux.sh +``` +注意:openvino只能编译成64位的库,且cmake版本必须要求3.13以上 + +## 五、Linux CUDA库编译 +### 1.环境要求 +#### 依赖库 + - cmake (使用3.8及以上版本) + - CUDA (使用10.2及以上版本) + +#### TensorRT配置 + - 下载TensorRT(>=7.1) + - 配置环境变量 `export TENSORRT_ROOT_DIR=` + +#### CuDNN配置 + - 下载CuDNN(>=8.0) + - 配置环境变量 `export CUDNN_ROOT_DIR=` + +### 2.编译步骤 +1)切换到脚本目录 +``` +cd /scripts +``` +2) 执行编译脚本 +``` +./build_cuda_linux.sh +``` + +## 六、Windows 环境编译 +### 1.环境要求 +依赖库 + - Visual Studio (2017 及更高版本) + - cmake (把3.11及以上版本cmake加入环境变量或使用 Visual Studio 自带cmake) + - ninja (编译速度更快,可以使用choco安装) + +### 2.编译步骤 +打开 `x64 Native Tools Command Prompt for VS 2017/2019`,如果想要编译32位的库,打开 `x86 Native Tools Command Prompt for VS 2017/2019` +1) 切换到脚本目录 +``` +cd /scripts +``` +2) 执行编译脚本 + - 编译不带openvino的版本 +``` +.\build_msvc_naive.bat +``` + - 编译带openvino的版本 +``` +.\build_msvc.bat +``` +openvino只能编译成64位的库,更多编译问题请参考 [FAQ](openvino.md) + +## 七、Windows CUDA 环境编译 +### 1.环境要求 +依赖库 + - Visual Studio (2017 及更高版本) + - cmake (把3.11及以上版本cmake加入环境变量或使用 Visual Studio 自带cmake) + - CUDA (使用10.2及以上版本) 并且确保 `CUDA_PATH` 加入了环境变量 + +#### TensorRT配置 + - 下载TensorRT(>=7.1) + - 在脚本文件 *build_cuda_msvc.bat* 中修改 `set TENSORRT_ROOT_DIR=` + +#### CuDNN配置 + - 下载CuDNN(>=8.0) + - 在脚本文件 *build_cuda_msvc.bat* 中修改 `set CUDNN_ROOT_DIR=` + +### 2.编译步骤 +打开 `x64 Native Tools Command Prompt for VS 2017/2019` 或配置了cmake环境变量的 `cmd` +1) 切换到脚本目录 +``` +cd /scripts +``` +2) 执行编译脚本 +``` +.\build_cuda_msvc.bat +``` + +## 八、Macos 环境编译 +### 1.环境要求 +依赖库 + - cmake 3.11 以上版本 + - xcode command line tools (需提前在应用商店安装好Xcode,然后再命令行执行xcode-select --install ) + - automake, libtool (可通过brew安装,指令是brew install libtool, brew install automake) + - 网络访问 + +### 2.编译步骤 +1)切换到脚本目录 +``` +cd /scripts +``` +2)执行编译脚本 +``` +./build_macos.sh +``` + +## 编译参数option说明 + +|Option|默认值|说明| +|------|:---:|----| +|TNN_CPU_ENABLE| ON | 代码source/device/cpu编译开关,实现全部为c++代码,不包含特定CPU加速指令。| +|TNN_X86_ENABLE| OFF | 代码source/device/x86编译开关, 当前适配openvino实现,后续会迁入更多加速代码实现。| +|TNN_ARM_ENABLE| OFF | 代码source/device/arm编译开关,代码包含neon加速指令, 且部分实现了int8加速。| +|TNN_ARM82_ENABLE| OFF | 代码source/device/arm/acc/compute_arm82编译开关,代码包含fp16指令加速。| +|TNN_METAL_ENABLE| OFF | 代码source/device/metal编译开关,代码包含metal加速指令。| +|TNN_OPENCL_ENABLE| OFF | 代码source/device/opencl编译开关,代码包含opencl加速指令。| +|TNN_CUDA_ENABLE| OFF | 代码source/device/cuda编译开关,当前适配TensorRT实现,后续会迁入更多加速代码实现。| +|TNN_DSP_ENABLE| OFF | 代码source/device/dsp编译开关,当前适配snpe实现。| +|TNN_ATLAS_ENABLE| OFF | 代码source/device/atlas编译开关,当前适配华为atlas加速框架。| +|TNN_HUAWEI_NPU_ENABLE| OFF | 代码source/device/huawei_npu编译开关,当前适配HiAI加速框架。| +|TNN_RK_NPU_ENABLE| OFF | 代码source/device/rknpu编译开关,当前适配rknpu_ddk加速框架。| +|TNN_SYMBOL_HIDE| ON | 加速库符号隐藏,release发布默认非public接口符号不可见。| +|TNN_OPENMP_ENABLE| OFF | OpenMP开关,控制是否打开openmp加速。| +|TNN_BUILD_SHARED| ON | 动态库编译开关,关闭则编译静态库。| +|TNN_TEST_ENABLE| OFF | test代码编译开关| +|TNN_UNIT_TEST_ENABLE| OFF | unit test编译开关,打开unit test编译开关会自动打开TNN_CPU_ENABLE开关,作为测试基准。| +|TNN_PROFILER_ENABLE| OFF | 性能调试开关,打开后会打印更多性能信息,仅用于调试。| +|TNN_QUANTIZATION_ENABLE| OFF | 量化工具编译开关| +|TNN_BENCHMARK_MODE| OFF | benchmark开关,打开后支持model weights文件为空,可自动生成数据。| +|TNN_ARM82_SIMU| OFF | ARM82仿真开关,需要和TNN_ARM82_ENABLE同时打开,打开后可以在普通CPU上运行half实现代码。| + diff --git a/3rdparty/TNN/doc/cn/user/convert.md b/3rdparty/TNN/doc/cn/user/convert.md new file mode 100755 index 0000000..dfe2fea --- /dev/null +++ b/3rdparty/TNN/doc/cn/user/convert.md @@ -0,0 +1,648 @@ +# 模型转换介绍 + +[English Version](../../en/user/convert_en.md) + +
+ +目前 TNN 支持业界主流的模型文件格式,包括ONNX、PyTorch、TensorFlow、TesorFlow-Lite 以及 Caffe 等。如上图所示,TNN 将 ONNX 作为中间层,借助于ONNX 开源社区的力量,来支持多种模型文件格式。如果要将PyTorch、TensorFlow 以及 Caffe 等模型文件格式转换为 TNN,首先需要使用对应的模型转换工具,统一将各种模型格式转换成为 ONNX 模型格式,然后将 ONNX 模型转换成 TNN 模型。 + +| 原始模型 | 转换工具 | 目标模型 | +|------------|-----------------|----------| +| PyTorch | pytorch export | ONNX | +| TensorFlow | tensorflow-onnx | ONNX | +| Caffe | caffe2onnx | ONNX | +| ONNX | onnx2tnn | TNN | +| TensorFlow-Lite | tflite2tnn | TNN | +目前 TNN 目前仅支持 CNN 等常用网络结构,RNN、GAN等网络结构正在逐步开发中。 + +# TNN 模型转换工具 + +通过上面的模型转换的总体介绍,可以发现如果想将 TensorFlow 模型转换成 TNN 模型需要最少两步,稍显麻烦,所以我们提供了 convert2tnn 工具。这个工具提供了集成的转换工具,可以将 TensorFlow、Caffe 和 ONNX 模型转换成 TNN 模型。由于 PyTorch 可以直接导出为 ONNX 模型,然后再将 ONNX 模型转换成 TNN 模型,所以本工具不再提供对于 PyTorch 模型的模型转换, + +大家可以使用 convert2tnn 工具对相关的模型直接进行转换,也可以基于后面文档的相关内容,先将对应的模型转换成 ONNX 模型,然后再将 ONNX 转换成 TNN 模型. + +本文中提供了两种方式帮助大家使用 convert2tnn工具: +- 通过 docker image 的方式使用 covnert2tnn 转换工具; +- 手动安装依赖工具和编译工具的方式使用 convert2tnn 转换工具; + +## Convert2tnn Docker (推荐) + +为了简化 convert2tnn转换工具的安装和编译步骤,目前 TNN 提供了 Dockerfile 文件以及 Docker image 的方式,你可以自己根据 Dockerfile 文件自己构建 docker 镜像,也可以从 Docker Hub 上直接拉取已经构建好的镜像。你可以选择自己喜欢的方式获取 docker 的镜像。 + +### 拉取构建好的 docker 镜像(推荐) + +目前 TNN 已经在 docker hub 上准备好了构建好的 docker image,我们建议直接从 docker hub 上拉取镜像。 + +```shell script +docker pull turandotkay/tnn-convert +``` +同样的,等待一会之后,你可以通过 `docker images` 来查看是否构建成功,如果构建成功之后,会有类似下面的输出信息: +``` text +REPOSITORY TAG IMAGE ID CREATED SIZE +turandotkay/tnn-convert latest 28c93a738b08 15 minutes ago 2.81GB +``` +我们发现pull 下来的 docker 镜像的 REPOSIOTY 的名称太长了,我们可以通过下面的命令进行重命名: +``` +docker tag turandotkay/tnn-convert:latest tnn-convert:latest +docker rmi turandotkay/tnn-convert:latest +``` +此时再次执行 `docker images` 命令,会得到下面的类似的输出: +``` text +REPOSITORY TAG IMAGE ID CREATED SIZE +tnn-convert latest 28c93a738b08 16 minutes ago 2.81GB +``` + +#### 更新 docker 镜像 +重复 [__拉取构建好的 docker 镜像__](#拉取构建好的-docker-镜像推荐) 中的操作即可 + +### 构建 docker 镜像(如果上面已经拉取了 image,这一步,可直接跳过) +``` shell script +cd / +docker build -t tnn-convert:latest . +``` +docker 会根据 Dockerfile 文件进行构建,这需要等待一会。等构建完成之后,你可以通过下面的命令进行验证是否构建完成。 +``` shell script +docker images +``` +在输出的列表中会有下面类似的输出,这表明docker 的镜像已经构建好了。 +``` text +REPOSITORY TAG IMAGE ID CREATED SIZE +tnn-convert latest 9fb83110d2c9 26 minutes ago 2.79GB +``` + + + +### convert2tnn 工具进行转换 + +首先验证下 docker 镜像能够正常使用,首先我们通过下面的命令来看下 convert2tnn 的帮助信息: + +``` shell script +docker run -it tnn-convert:latest python3 ./converter.py -h +``` +如果docker 镜像是正确的话,你会得到下面的输出: +```text + +usage: convert [-h] {onnx2tnn,caffe2tnn,tf2tnn} ... + +convert ONNX/Tensorflow/Caffe model to TNN model + +positional arguments: + {onnx2tnn,caffe2tnn,tf2tnn} + onnx2tnn convert onnx model to tnn model + caffe2tnn convert caffe model to tnn model + tf2tnn convert tensorflow model to tnn model + tflite2tnn convert tensorflow-lite model to tnn model + +optional arguments: + -h, --help show this help message and exit +``` +从上面的帮助信息中,我们可以得知,目前 convert2tnn 提供了 3 种模型格式的转换支持。假设我们这里想将 TensorFlow 模型转换成 TNN 模型,我们输入下面的命令继续获得帮助信息: + +``` shell script +docker run -it tnn-convert:latest python3 ./converter.py tf2tnn -h +``` +得到的输出信息如下: +``` text +usage: convert tf2tnn [-h] -tp TF_PATH -in input_info [input_info ...] -on output_name [output_name ...] [-o OUTPUT_DIR] [-v v1.0] [-optimize] [-half] [-align] [-input_file INPUT_FILE_PATH] + [-ref_file REFER_FILE_PATH] + +optional arguments: + -h, --help show this help message and exit + -tp TF_PATH the path for tensorflow graphdef file + -in input_info [input_info ...] + specify the input name and shape of the model. e.g., -in input1_name:1,128,128,3 input2_name:1,256,256,3 + -on output_name [output_name ...] + the tensorflow model's output name. e.g. -on output_name1 output_name2 + -o OUTPUT_DIR the output tnn directory + -v v1.0 the version for model + -optimize If the model has fixed input shape, use this option to optimize the model for speed. On the other hand, if the model has dynamic input shape, dont use this option. It may cause warong result + -half save the model using half + -align align the onnx model with tnn model + -input_file INPUT_FILE_PATH + the input file path which contains the input data for the inference model. + -ref_file REFER_FILE_PATH + the reference file path which contains the reference data to compare the results. +``` +通过上面的输出,可以发现针对 TF 模型的转换,convert2tnn 工具提供了很多参数,我们一次对下面的参数进行解释: + +- tp 参数(必须) + 通过 “-tp” 参数指定需要转换的模型的路径。目前只支持单个 TF模型的转换,不支持多个 TF 模型的一起转换。 +- in 参数(必须) + 通过 “-in” 参数指定模型输入,例如:-in input_name_1:1,128,128,3 input_name_2:1,256,256,3。 +- on 参数(必须) + 通过 “-on” 参数指定模型输出的名称,例如: -on output_name1 output_name2 +- output_dir 参数: + 可以通过 “-o ” 参数指定输出路径,但是在 docker 中我们一般不使用这个参数,默认会将生成的 TNN 模型放在当前和 TF 模型相同的路径下。 +- optimize 参数(可选) + 可以通过 “-optimize” 参数来对模型进行优化,**对于固定输入维度的模型,我们强烈建议你开启这个选项,对于动态可变输入维度的模型则关闭这个选项,否则可能在维度变化时造成结果错误或者运行报错**。 +- v 参数(可选) + 可以通过 -v 来指定模型的版本号,以便于后期对模型进行追踪和区分。 +- half 参数(可选) + 可以通过 -half 参数指定,模型数据通过 FP16 进行存储,减少模型的大小,默认是通过 FP32 的方式进行存储模型数据的。 +- align 参数(可选) + 可以通过 -align 参数指定转换得到的 TNN 模型和原模型对齐的模式,确定 TNN 模型是否转换成功。例如:不使用 “-align” 参数,默认不进行对齐;如果只对比 TNN 模型和原模型最后一层的输出,可以使用命令 “-align” 或 “-align output”; 如果模型不对齐,可以使用命令 “-align all” 进行逐层对齐,并输出第一层不对齐层的信息。(TensorFlow Lite 模型暂时不支持 “-align all”)。__align 只支持 FP32 模型的校验,所以使用 align 的时候不能使用 half__ +- input_file 参数(可选) + 可以通过 -input_file 参数指定模型对齐所需要的输入文件的名称,输入需要遵循如下[格式](#输入)。生成输入的代码可以[参考](#生成输入或输出文件示例代码)。 +- ref_file 参数(可选) + 可以通过 -ref_file 参数指定待对齐的输出文件的名称,输出需遵循如下[格式](#输出)。生成输出的代码可以[参考](#生成输入或输出文件示例代码)。 + + +**当前 convert2tnn 的模型只支持 graphdef 模型,不支持 checkpoint 以及 saved_model 格式的文件,如果想将 checkpoint 或者 saved_model 的模型进行转换,可以参看下面[tf2tnn](./tf2tnn.md)的部分,自行进行转换。** + +下面我们通过一个例子来展示如何将 TF 模型转换到 TNN 模型, + +``` shell script +docker run --volume=$(pwd):/workspace -it tnn-convert:latest python3 ./converter.py tf2tnn \ + -tp /workspace/test.pb \ + -in "input0:1,32,32,3 input2:1,32,32,3" \ + -on output0 output1 \ + -v v2.0 \ + -optimize \ + -align \ + -input_file /workspace/in.txt \ + -ref_file /workspace/ref.txt +``` + +由于 convert2tnn工具是部署在 docker 镜像中的,如果要进行模型的转换,需要先将模型传输到 docker 容器中。我们可以通过 docker run 的参数--volume 将包含模型的模型挂载到 docker 容器的某个路径下。上面的例子中是将执行shell 的当前目录(pwd)挂载到 docker 容器中的 "/workspace” 文件夹下面。当然了测试用到的test.pb 也**必须执行 shell 命令的当前路径下**。执行完成上面的命令后,convert2tnn 工具会将生成的 TNN 模型存放在 test.pb文件的同一级目录下,当然了生成的文件也就是在当前目录下。 + +上面的文档中只是介绍了 TensorFlow 的模型的转换,其他模型的使用也是类似的,可以自行通过转换工具的帮助信息的提醒进行使用,我这里不在对这些转换命令进行详细的说明,只是简单的将这些转换命令列出来,你可以仿照着进行转换。 + +``` shell script +# convert onnx +docker run --volume=$(pwd):/workspace -it tnn-convert:latest python3 ./converter.py onnx2tnn \ + /workspace/mobilenetv3-small-c7eb32fe.onnx \ + -optimize \ + -v v3.0 \ + -align output \ + -input_file /workspace/in.txt \ + -ref_file /workspace/ref.txt + +# convert caffe +docker run --volume=$(pwd):/workspace -it tnn-convert:latest python3 ./converter.py caffe2tnn \ + /workspace/squeezenet.prototxt \ + /workspace/squeezenet.caffemodel \ + -optimize \ + -v v1.0 \ + -align \ + -input_file /workspace/in.txt \ + -ref_file /workspace/ref.txt + +# convert tflite +docker run --volume=$(pwd):/workspace -it tnn-convert:latest python3 ./converter.py tflite2tnn \ + /workspace/mobilenet_v1_1.0_224.tflite \ + -v v1.0 \ + -align \ + -input_file /workspace/in.txt \ + -ref_file /workspace/ref.txt + + +``` + +## Convert2tnn 手动安装 +如果你不想使用 docker 镜像的方式,也可以在自己的开发机上安装 convert2tnn 的依赖工具,并根据相关的说明进行编译,也可以同样使用 convert2tnn 工具机型模型转换。 + +convert2tnn 的完整环境搭建包含下面的所有的工具的安装和编译。如果你只想转换某一类的模型,你只需要安装转换对应模型转换的依赖工具。例如你只想转换 caffe 的模型,你就不需要安装 转换 TensorFlow 模型依赖的工具。同理你需要转换 TensorFlow 的模型,就可以不用安装 Caffe 模型转换的依赖工具。但是 ONNX 模型依赖工具和安装和编译都是必须的。 + +针对 Linux 系统下的环境配置,我使用 Centos 7.2 为例,Ubuntu 系统也可以适用,只要将相应的安装命令修改为 Ubuntu 上的对应命令即可。 + +### 环境搭建及编译 +#### 1. ONNX模型转换工具搭建(必须) +- 安装protobuf(version >= 3.4.0) +Macos: +```shell script +brew install protobuf +``` + +- 安装python (version >=3.6) +Macos +```shell script +brew install python3 +``` +centos: +```shell script +yum install python3 python3-devel +``` +- 安装 python 依赖库 +onnx=1.6.0 +onnxruntime>=1.1.0 +numpy>=1.17.0 +onnx-simplifier>=0.2.4 +protobuf>=3.4.0 +requests +```shell script +pip3 install onnx==1.6.0 onnxruntime numpy onnx-simplifier protobuf requests +``` + +- cmake (version >= 3.0) +从的官网下载最新版本的 cmake,然后按照文档安装即可。建议使用最新版本的 cmake。 + +##### 编译 +onnx2tnn 工具在 Mac 以及 Linux 上有自动编译脚本直接运行就可以。 + ```shell script +cd /tools/convert2tnn +./build.sh + ``` + +#### 2. TensorFlow 模型转换(可选) + + +- tensorflow (version == 1.15.0) +建议使用 TensorFlow 1.15.0 的版本,目前 TensorFlow 2.+ 的版本的兼容性不好, 不建议使用。 +```shell script +pip3 install tensorflow==1.15.0 +``` + +- tf2onnx (version>= 1.5.5) +```shell script +pip3 install tf2onnx +``` +- onnxruntime(version>=1.1.0) +```shell script +pip3 install onnxruntime +``` + +#### 3. Caffe 模型转换(可选) + +- 安装protobuf(version >= 3.4.0) + +Macos: +```shell script +brew install protobuf +``` + +Linux: + +对于 linux 系统,我们建议参考 protobuf 的官方[README](https://github.com/protocolbuffers/protobuf/blob/master/src/README.md)文档,直接从源码进行安装。 + +如果你使用的是 Ubuntu 系统可以使用下面的指令进行安装: +```shell script +sudo apt-get install libprotobuf-dev protobuf-compiler +``` + +- 安装python (version >=3.6) + +Macos +```shell script +brew install python3 +``` +centos: +```shell script +yum install python3 python3-devel +``` + +- onnx(version == 1.6.0) +```shell script +pip3 install onnx==1.6.0 +``` + +- numpy(version >= 1.17.0) +```shell script +pip3 install numpy +``` + +#### convert2tnn 工具的使用 +配置后上面的环境依赖之后,就可以使用 convert2tnn 进行相应模型的转换 + +```shell script +cd /tools/convert2tnn/ +python3 converter.py -h +``` +执行上面的命令会打印下面的信息。目前 convert2tnn 提供了三个子命令,分别对相应的模型进行转换。 + +```text +usage: convert [-h] {onnx2tnn,caffe2tnn,tf2tnn} ... + +convert ONNX/Tensorflow/Caffe model to TNN model + +positional arguments: + {onnx2tnn,caffe2tnn,tf2tnn} + onnx2tnn convert onnx model to tnn model + caffe2tnn convert caffe model to tnn model + tf2tnn convert tensorflow model to tnn model + +optional arguments: + -h, --help show this help message and exit +``` +- ONNX模型转换 +如果想相对 ONNX 模型进行转换,可以直接使用 onnx2tnn 的子命令来查看帮助信息。 + +```shell script +python3 converter.py onnx2tnn -h +``` +usage 信息如下: +```text +usage: convert onnx2tnn [-h] [-in input_info [input_info ...]] [-optimize] + [-half] [-v v1.0.0] [-o OUTPUT_DIR] [-align] + [-input_file INPUT_FILE_PATH] + [-ref_file REFER_FILE_PATH] [-debug] + onnx_path + +positional arguments: + onnx_path the path for onnx file + +optional arguments: + -h, --help show this help message and exit + -in input_info [input_info ...] + specify the input name and shape of the model. e.g., + -in input1_name:1,3,128,128 input2_name:1,3,256,256 + -optimize If the model has fixed input shape, use this option to optimize the model for speed. On the other hand, if the model has dynamic input shape, dont use this option. It may cause warong result + -half save model using half + -v v1.0.0 the version for model + -o OUTPUT_DIR the output tnn directory + -align align the onnx model with tnn model + -input_file INPUT_FILE_PATH + the input file path which contains the input data for + the inference model. + -ref_file REFER_FILE_PATH + the reference file path which contains the reference + data to compare the results. + -debug Turn on the switch to debug the model. +``` +示例: +```shell script +python3 converter.py onnx2tnn \ + ~/mobilenetv3/mobilenetv3-small-c7eb32fe.onnx.onnx \ + -optimize \ + -v=v3.0 \ + -o ~/mobilenetv3/ \ + -align \ + -input_file in.txt \ + -ref_file ref.txt +``` + +- caffe2tnn + +Caffe 格式转换 + +目前 convert2tnn 的工具目前只支持最新版本的 Caffe 的文件格式,所以如果想将 Caffe 模型转换为 TNN 模型。需要先将老版本的 Caffe 网络和模型转换为新版. Caffe 自带了工具可以把老版本的 + +Caffe 网络和模型转换为新版本的格式. 具体的使用方式如下: +```shell script +upgrade_net_proto_text [老prototxt] [新prototxt] +upgrade_net_proto_binary [老caffemodel] [新caffemodel] +``` +修改后的输入的格式如下所示: + +```text +layer { + name: "data" + type: "input" + top: "data" + input_param { shape: { dim: 1 dim: 3 dim: 224 dim: 224 } } +} +``` + + +```shell script +python3 converter.py caffe2tnn -h +``` +usage 信息如下: +```text +usage: convert caffe2tnn [-h] [-o OUTPUT_DIR] [-v v1.0] [-optimize] [-half] + prototxt_file_path caffemodel_file_path + +positional arguments: + prototxt_file_path the path for prototxt file + caffemodel_file_path the path for caffemodel file + +optional arguments: + -h, --help show this help message and exit + -o OUTPUT_DIR the output tnn directory + -v v1.0 the version for model, default v1.0 + -optimize If the model has fixed input shape, use this option to optimize the model for speed. On the other hand, if the model has dynamic input shape, dont use this option. It may cause warong result + -half save model using half + -align align the onnx model with tnn model + -input_file INPUT_FILE_PATH + the input file path which contains the input data for + the inference model. + -ref_file REFER_FILE_PATH + the reference file path which contains the reference + data to compare the results. +``` +示例: +```shell script +python3 converter.py caffe2tnn \ + ~/squeezenet/squeezenet.prototxt \ + ~/squeezenet/squeezenet.caffemodel \ + -optimize \ + -v v1.0 \ + -o ~/squeezenet/ \ + -align \ + -input_file in.txt \ + -ref_file ref.txt +``` +- tensorflow2tnn + +当前 convert2tnn 的模型只支持 graphdef 模型,不支持 checkpoint 以及 saved_model 格式的文件,如果想将 checkpoint 或者 saved_model 的模型进行转换,可以参看下面的 tf2onnx 的部分,自行进行转换。 + +``` shell script +python3 converter.py tf2tnn -h +``` +usage 信息如下: +```text +usage: convert tf2tnn [-h] -tp TF_PATH -in input_info [input_info ...] -on output_name [output_name ...] [-o OUTPUT_DIR] [-v v1.0] [-optimize] [-half] [-align] [-input_file INPUT_FILE_PATH] + [-ref_file REFER_FILE_PATH] + +optional arguments: + -h, --help show this help message and exit + -tp TF_PATH the path for tensorflow graphdef file + -in input_info [input_info ...] + specify the input name and shape of the model. e.g., -in input1_name:1,128,128,3 input2_name:1,256,256,3 + -on output_name [output_name ...] + the tensorflow model's output name. e.g. -on output_name1 output_name2 + -o OUTPUT_DIR the output tnn directory + -v v1.0 the version for model + -optimize If the model has fixed input shape, use this option to optimize the model for speed. On the other hand, if the model has dynamic input shape, dont use this option. It may cause warong result + -half save the mode using half + -align align the onnx model with tnn model + -input_file INPUT_FILE_PATH + the input file path which contains the input data for the inference model. + -ref_file REFER_FILE_PATH + the reference file path which contains the reference data to compare the results. +``` +- tensorflow-lite2tnn + +当前 tensorflow-lite2tnn 的转换支持tflite格式文件,从而方便移动端部署。 + +``` shell script +python3 converter.py tflite2tnn -h +``` +usage 信息如下: +``` +usage: convert tflite2tnn [-h] TF_PATH [-o OUTPUT_DIR] [-v v1.0] [-align] + +optional arguments: + -h, --help show this help message and exit + TF_PATH the path for tensorflow-lite graphdef file + -o OUTPUT_DIR the output tnn directory + -v v1.0 the version for model + -align align the onnx model with tnn model + -input_file INPUT_FILE_PATH + the input file path which contains the input data for + the inference model. + -ref_file REFER_FILE_PATH + the reference file path which contains the reference + data to compare the results. +``` +示例: +```shell script +python3 converter.py tflite2tnn \ + ~/tf-model/test.tflite \ + -o ~/tf-model/ \ + -align \ + -input_file in.txt \ + -ref_file ref.txt +``` + +## 输入输出文件格式示例 +### 输入 +```text + +输入数量 +输入名称 shape维度个数 具体shape信息 输入数据类型 +输入数据 +输入名称 shape维度个数 具体shape信息 输入数据类型 +输入数据 +...... + +例如 + 2 + in0 4 1 3 1 1 3 + 2 + 4 + 3 + in1 4 1 2 2 1 0 + 0.1 + 0.2 + 0.3 + 0.4 + + +提示: +如果输入数据是 float, 输入数据类型可以用 0 表示 +如果输入数据是 int , 输入数据类型可以用 3 表示 + +``` + +### 输出 +```text + +输出数量 +输出名称 shape维度个数 具体shape信息 输出数据类型 +输出数据 +输出名称 shape维度个数 具体shape信息 输出数据类型 +输出数据 +...... + +例如 + 2 + out0 2 1 3 0 + 0.1 + 0.2 + 0.3 + out1 4 1 2 2 1 0 + 0.1 + 0.2 + 0.3 + 0.4 + + +提示: +如果输出数据是 float, 输出数据类型可以用 0 表示 +如果输出数据是 int , 输出数据类型可以用 3 表示 + +``` + +### 生成输入或输出文件示例代码 +```python +def write_pytorch_data(output_path, data, data_name_list): + """ + Save the data of Pytorch needed to align TNN model. + + The input and output names of pytorch model and onnx model may not match, + you can use Netron to visualize the onnx model to determine the data_name_list. + + The following example converts ResNet50 to onnx model and saves input and output: + >>> from torchvision.models.resnet import resnet50 + >>> model = resnet50(pretrained=False).eval() + >>> input_data = torch.randn(1, 3, 224, 224) + >>> input_names, output_names = ["input"], ["output"] + >>> torch.onnx.export(model, input_data, "ResNet50.onnx", input_names=input_names, output_names=output_names) + >>> with torch.no_grad(): + ... output_data = model(input_data) + ... + >>> write_pytorch_data("input.txt", input_data, input_names) + >>> write_pytorch_data("output.txt", output_data, output_names) + + :param output_path: Path to save data. + :param data: The input or output data of Pytorch model. + :param data_name_list: The name of input or output data. You can get it after visualization through Netron. + :return: + """ + + if type(data) is not list and type(data) is not tuple: + data = [data, ] + assert len(data) == len(data_name_list), "The number of data and data_name_list are not equal!" + with open(output_path, "w") as f: + f.write("{}\n" .format(len(data))) + for name, data in zip(data_name_list, data): + data = data.numpy() + shape = data.shape + description = "{} {} ".format(name, len(shape)) + for dim in shape: + description += "{} ".format(dim) + data_type = 0 if data.dtype == np.float32 else 3 + fmt = "%0.6f" if data_type == 0 else "%i" + description += "{}".format(data_type) + f.write(description + "\n") + np.savetxt(f, data.reshape(-1), fmt=fmt) + + +def write_tensorflow_data(output_path, data, data_name_list, data_usage=1): + """ + Save the data of TensoFlow needed to align TNN model. + + :param output_path: Path to save data. "You should use input.txt or output.txt to name input or output data" + :param data: The input or output data of TensorFlow model. + :param data_name_list: The name of input or output data. You can get it after visualization through Netron. + :param data_usage: Specify the data usage. If the data is input data, data_usage=0; + if the data if outptu data, data_usage=1. + :return: + """ + def convert_nhwc(data): + assert len(data.shape) <= 4 + if len(data.shape) == 2: + return data + orders = (0, 2, 1) if len(data.shape) == 3 else (0, 2, 3, 1) + return data.transpose(orders) + + if type(data) is not list and type(data) is not tuple: + data = [data, ] + assert len(data) == len(data_name_list), "The number of data and data_name_list are not equal!" + with open(output_path, "w") as f: + f.write("{}\n" .format(len(data))) + for name, data in zip(data_name_list, data): + data = convert_nhwc(data) if data_usage == 0 else data + shape = data.shape + description = "{} {} ".format(name, len(shape)) + for dim in shape: + description += "{} ".format(dim) + data_type = 0 if data.dtype == np.float32 else 3 + fmt = "%0.6f" if data_type == 0 else "%i" + description += "{}".format(data_type) + f.write(description + "\n") + np.savetxt(f, data.reshape(-1), fmt=fmt) + + +``` + + +## 模型转换详细介绍 +convert2tnn 只是对多种模型转换的工具的封装,根据第一部分 “模型转换介绍”中原理说明,你也可以先将原始模型转换成 ONNX,然后再将 ONNX 模型转换成 TNN 模型。我们提供了如何手动的将 Caffe、PyTorch、TensorFlow 模型转换成 ONNX 模型,然后再将 ONNX 模型转换成 TNN 模型的文档。如果你在使用 convert2tnn 转换工具遇到问题时,我们建议你了解下相关的内容,这有可能帮助你更加顺利的进行模型转换。 + +- [onnx2tnn](onnx2tnn.md) +- [pytorch2tnn](onnx2tnn.md) +- [tf2tnn](tf2tnn.md) +- [caffe2tnn](caffe2tnn.md) +- [tflite2tnn](tflite2tnn.md) + diff --git a/3rdparty/TNN/doc/cn/user/demo.md b/3rdparty/TNN/doc/cn/user/demo.md new file mode 100644 index 0000000..1613dcc --- /dev/null +++ b/3rdparty/TNN/doc/cn/user/demo.md @@ -0,0 +1,591 @@ +# Demo 代码介绍 + +[English Version](../../en/user/demo_en.md) + +## 一、iOS Demo 介绍 + +### Demo运行步骤 + +1. 下载Demo模型 + + ``` + cd /model + sh download_model.sh + ``` + + 可选:如果需要执行OCR demo,还需要准备opencv库。可以使用提供的脚本下载opencv。 + ``` + cd /scripts + sh download_opencv.sh iOS + ``` + + PS: 如因网络问题脚本无法下载模型或依赖库,请根据脚本中的信息手动创建对应文件夹并自行下载 + +2. 打开TNNExamples工程 + + 进入目录`/examples/ios/`,双击打开TNNExamples工程。 + + 可选:如果需要执行OCR demo,需要将opencv加入TNNExamples的依赖项中。 + + 如下图点击TNNExamples工程,找到工程设置`General`,在`Framworks, Libraries, and Embedded Content`选项卡下点击`+`。 + +
+ + 在打开的界面中选择`Add Other-Add Files...`,找到opencv2.framework,并添加。使用提供的`/scripts/download_opencv.sh`时,下载的opencv位于`/third_party/opencv/iOS`目录下。 + +
+ + 由于opencv2.framework中包含真机和模拟器多平台的代码,需要按下图将`Embed`选项设置为`Do Not Embed`。 + +
+ + 最后,为了确保编译器可以找到opencv.framework,需要确认opencv.framework所在目录被添加到`Framework Search Paths`中。如下图所示,找到工程设置`Build Settings`,在`Search Paths`选项卡下找到`Framework Search Paths`。如果opencv.framework所在的目录不存在,需要双击这一条目,并添加。 + +
+ + +3. 设置开发者账号 + + 如下图点击TNNExamples工程,找到工程设置`Signing & Capabilities`,点击Team选项卡选择`Add an Account...` + +
+ + 在如下界面输入Apple ID账号和密码,添加完成后回到`Signing & Capabilities`界面,并在Team选项卡中选中添加的账号。如果没有Apple ID也可以通过`Create Apple ID`选项根据相关提示进行申请。 + + `PS:申请Apple ID无需付费,可以即时通过,通过后才可在真机上运行APP调试` + +
+ +4. 真机运行 + + 4.1 修改`Bundle Identitifier` + + 如图在现有`Bundle Identifier`后随机添加后缀(限数字和字母),避免个人账户遇到签名冲突。 + +
+ +4.2 验证授权 + +首次运行先利用快捷键`Command + Shift + K`对工程进行清理,再执行快捷键`Command + R`运行。如果是首次登陆Apple ID,Xcode会弹框报如下错误,需要在iOS设备上根据提示进行授权验证。一般来说手机上的授权路径为:设置 -> 通用 -> 描述文件与设备管理 -> Apple Development选项 -> 点击信任 + +
+ +4.3 运行结果 + +首次运行先利用快捷键`Command + Shift + K`对工程进行清理,再执行快捷键`Command + R`运行。默认界面为人脸检测,可以点击右上角编辑按钮切换图像分类等不同功能。 + +PS: + +a) 由于GPU和CPU加速原理不同,具体模型的GPU性能不一定比CPU高,与具体机型、模型结构以及工程实现有关。欢迎大家参与到TNN开发中,共同进步。 + +b) tnn_sdk_sample.h中的宏TNN_SDK_USE_NCNN_MODEL默认为0,运行TNN模型,可以设置为1来运行ncnn模型。 + + c) 如遇到`Unable to install...`错误提示,请在真机设备上删除已有的TNNExamples,重新运行安装。 + + d) 真机运行时,如果遇到CodeSign错误`Command CodeSign failed with a nonzero exit code`,可参看issue20 `iOS Demo运行步骤说明` + +c) 如果需要执行OCR demo,需要将tnn_sdk_sample.h中的宏HAS_OPENCV设置为1,否则不会编译OCR demo代码。 + +### Demo运行效果 + +1. 人脸检测 + + 模型来源:https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB + + 效果示例:iPhone 7, ARM 单线程 6.3206ms + +
+ +2. 图像分类 + + 模型来源:https://github.com/forresti/SqueezeNet + + 效果示例:iPhone 7, ARM 单线程 13.83ms + +
+ +## 二、Android Demo 介绍 + +### 运行环境要求 + +1. Android Studio 3.5 或以上 +2. NDK version >= 18, <= 21 +NDK 22和23在链接第三方动态库可能会出错,例如opencv,hiai,不建议使用。 + +### 运行步骤 + +1. 下载Demo模型 + + ``` + cd /model + sh download_model.sh + ``` + + 可选:如果需要执行OCR demo,还需要下载opencv库。 + ``` + cd /scripts + sh download_opencv.sh android + ``` + + PS: + + 如因网络问题脚本无法下载模型,请根据脚本中的信息手动创建对应文件夹并自行下载. + + 想要使用NPU运行demo需要需首先下载NPU ddk。详情参考: [FAQ](../faq.md): 创建华为NPU编译环境。 + +2. 打开TNNExamples工程 + + - 进入目录`/examples/android/`,双击打开TNNExamples工程文件`build.gradle`。 + + - 将手机连接到电脑,点击`Run Demo`编译和运行demo。 + + - 工程默认编译64位armv8库,如要添加32位armv7库,可在`build.gradle`中修改为`abiFilters "armeabi-v7a", "arm64-v8a"`。 + + PS : + + 1). 想要使用NPU, 打开工程后,需要手动设置打开NPU: + 在/examples/android/demo/CMakeList.txt中, 更新指令为如下,使用华为NPU。 + ```` + set(TNN_HUAWEI_NPU_ENABLE ON CACHE BOOL "" FORCE) + ```` + 2). 第一次运行如果遇到 `/examples/android/src/main/jni/thirdparty/hiai_ddk/include/graph`Permission Denied 的情况, + Clean Project 再重新运行。 + + 3). 当前只有rom版本 >= 100.320.xxx.xxxx的华为机型支持IR构建事例模型。参考:[FAQ](../faq.md): 更新到最新的ROM支持NPU。 + + 4). 运行demo需要需首先下载NPU DDK。参考: [FAQ](../faq.md): 创建华为NPU编译环境。 + + 5). 想要执行OCR demo, 打开工程后,需要手动设置打开OPENCV依赖: + 在/examples/android/demo/CMakeList.txt中, 更新指令为如下,使用OPENCV。 + ```` + set(TNN_OPENCV_ENABLE ON CACHE BOOL "" FORCE) + ```` + + 如果通过上述`download_opencv.sh`下载OpenCV库,不需要再指定路径。 + 如果想要使用自定义的OpenCV Android SDK,需要指定OPENCV_ANDROID_SDK_PATH路径。 + 在/examples/android/demo/CMakeList.txt中, 更新指令为如下。 + ```` + set(OPENCV_ANDROID_SDK_PATH ) + ```` + + +### 运行效果 +1. 人脸检测-图片 + + 模型来源:https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB + + 效果示例:华为P30, ARM 单线程 32.2359ms + +
+ + 效果示例: 华为P30, 华为NPU rom 100.320.010.022 9.04ms + +
+ + +2. 人脸检测-视频 + 模型来源:https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB + + 效果示例:华为P30, ARM 单线程 122.296ms + +
+ + 效果示例: 华为P30, 华为NPU rom 100.320.010.022 28ms + +
+ +3. 图像分类 + + 模型来源:https://github.com/forresti/SqueezeNet + + 效果示例:华为P30, ARM 单线程 81.4047ms + +
+ + 效果示例: 华为P30, NPU rom 100.320.010.022 2.48ms + +
+ +## 三、Linux/Mac/Windows/ArmLinux/CudaLinux Demo 介绍 +### 功能 +* 快速在 Linux/Mac/Windows/ArmLinux/CudaLinux 环境下运行模型,展示 TNN 接口的使用方法。 + +### 使用步骤 +#### 1. 下载 Demo 模型 + ``` + cd /model + sh download_model.sh + ``` + 如因网络问题脚本无法下载模型,请根据脚本中的信息手动创建对应文件夹并自行下载 + +#### 2. 编译 +##### Linux +* 环境要求 + - Cmake (>=3.11) + - OpenCV3 (只有webcam的demo会用), 可在CMake中通过find_package(OpenCV 3) 成功找到依赖项。 + + ``` + // 手动编译OpenCV3 + wget https://github.com/opencv/opencv/archive/3.4.13.zip + unzip 3.4.13.zip + cd opencv-3.4.13 + + mkdir build + mkdir install + cd build + + cmake -DCMAKE_INSTALL_PREFIX=../install .. + make -j4 + make install + + // 在CMakeList.txt的find_packpage之前添加OpenCV路径 + // 例如,进入examples/linux/x86,打开CMakeList.txt + // 在find_package(OpenCV 3 REQUIRED)之前添加 + set(OpenCV_DIR /opencv-3.4.13/install/share/OpenCV) + ``` + +* 编译 + 进入 `examples/linux/x86` 目录,执行 `build_linux_native.sh`或`build_linux_openvino.sh`。前者使用TNN实现的优化X86后端执行,后者基于Intel OpenVINO后端执行。以`build_linux_native.sh`为例,默认仅编译处理图像的demo,如需编译基于摄像头的人脸配准demo,需要将`build_linux_native.sh`中的"-DTNN_DEMO_WITH_WEBCAM=OFF"修改为"-DTNN_DEMO_WITH_WEBCAM=ON": + ``` + cd /examples/linux/x86 + ./build_linux_native.sh + ``` +* 执行 + 进入 `examples/linux/x86/build_linux_native` 或 `examples/linux/x86/build_linux_openvino` 目录,当不使用任何参数执行demo文件时,会打印demo用法信息,以图形分类demo为例: + ``` + cd build_linux_native + ./demo_x86_imageclassify + >Parameter -m and -p should be set + >usage: + >./demo_x86_imageclassify [-h] [-p] tnnproto [-m] tnnmodel [-i] + > -h, print a usage message. + > -p, (required) tnn proto file path + > -m, (required) tnn model file path + > -i, (required) input file path + > -l,